aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v3dv/dal_numbering.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v3dv/dal_numbering.rb')
-rw-r--r--lib/sisu/v3dv/dal_numbering.rb470
1 files changed, 0 insertions, 470 deletions
diff --git a/lib/sisu/v3dv/dal_numbering.rb b/lib/sisu/v3dv/dal_numbering.rb
deleted file mode 100644
index f5a09ef6..00000000
--- a/lib/sisu/v3dv/dal_numbering.rb
+++ /dev/null
@@ -1,470 +0,0 @@
-# encoding: utf-8
-=begin
-
- * Name: SiSU
-
- * Description: a framework for document structuring, publishing and search
-
- * Author: Ralph Amissah
-
- * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
-
- * License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/copyleft/gpl.html>
-
- <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
-
- * SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- * Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
- * Download:
- <http://www.jus.uio.no/sisu/SiSU/download.html>
-
- * Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
- ** Description: system environment, resource control and configuration details
-
-=end
-module SiSU_DAL_Numbering
- class Numbering
- attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
- def initialize(md,data)
- @md,@data=md,data
- @obj=@type=@ocn=@lv=@name=@index=@comment=nil
- end
- def numbering_song
- data=@data
- data=number_plaintext_para(data)
- data=auto_number_heading_ie_title(data.compact) #tr issue
- data=ocn(data.compact) #watch
- data=xml(data.compact)
- data=minor_numbering(data.compact)
- data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
- data=set_heading_top(data) unless @md.set_heading_top
- [data,tags_map,ocn_html_seg_map]
- end
- def number_plaintext_para(data)
- @tuned_file=[]
- data.each do |dob|
- if (dob.of !=:block \
- && dob.of !=:comment \
- && dob.of !=:layout) \
- && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
- dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
- end
- unless dob.obj.class==Array
- dob.obj=dob.obj.gsub(/^\s+/,'').
- gsub(/\s$/,"\n")
- end
- @tuned_file << dob
- end
- @tuned_file=@tuned_file.flatten
- end
- def number_sub_heading(dob,num,title_no)
- unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
- dob.obj=case dob.name
- when /-/; dob.obj.gsub(/^/,"#{title_no} ")
- when /^#/; dob.obj.gsub(/^/,"#{title_no} ")
- when /^[a-z_\.]+/; dob.obj.gsub(/^/,"#{title_no} ")
- else
- dob.name=title_no if dob.name=~/^$/ #where title contains title number
- dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
- end
- if @md.toc_lev_limit \
- and @md.toc_lev_limit < num
- dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch
- end
- end
- dob
- end
- def heading_tag_clean(heading_tag)
- heading_tag=heading_tag.gsub(/[ ]+/,'_').
- gsub(/["']/,'').
- gsub(/[\/]/,'-').
- gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'').
- gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'').
- gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'').
- gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'').
- gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'').
- gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'').
- gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'').
- gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'').
- gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'').
- gsub(/#{Mx[:gl_bullet]}/,'')
- end
- def auto_number_heading_ie_title(data) #also does some segment naming
- @tuned_file=[]
- if defined? @md.make.num_top \
- and @md.make.num_top \
- and @md.make.num_top !~/^$/
- input||=@md.make.num_top
- end
- num_top=(input ? input.to_i : nil)
- t_no1=t_no2=t_no3=t_no4=0
- if num_top
- no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
- end
- t_not=0
- chapter_number_counter=0
- data=data.compact
- data.each do |dob| #@md.seg_names << [additions to segment names]
- title_no=nil
- dob=SiSU_DAL_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require
- if dob.is ==:heading \
- && dob.autonum_ \
- and defined? @md.make.num_top \
- and @md.make.num_top !~/^$/
- if dob.lv=='1' \
- and dob.obj =~/^#\s|\s#(?:\s|$)/
- chapter_number_counter +=1
- dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} ").
- gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
- end
- if dob.ln==no1
- @subnumber=1
- @subnumber=0 if dob.ln==no1
- end
- if dob.ln.to_s =~/^[1-6]/ \
- and not dob.toc_ \
- and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
- if dob.ln==no1
- t_no1+=1; t_no2=0; t_no3=0
- title_no="#{t_no1}"
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(title_no)
- if dob.ln==no1
- dob.name="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
- tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
- tag=heading_tag_clean(tag)
- dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
- dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
- ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
- : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
- end
- if dob.ln !=no1 \
- and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
- dob.name ="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
- dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
- end
- @md.seg_names << title_no
- end
- if dob.ln!=no1 \
- and dob.name!~/^[a-z_\.]+$/ \
- and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
- dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
- end
- end
- if dob.ln==no1 #watch because here you change dob.name
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
- end
- if dob.ln==no2 #watch because here you change dob.name
- t_no2+=1; t_no3=0
- title_no="#{t_no1}.#{t_no2}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
- dob=number_sub_heading(dob,no2,title_no)
- end
- if dob.ln==no3 #watch because here you change dob.name
- t_no3+=1
- title_no="#{t_no1}.#{t_no2}.#{t_no3}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
- dob=number_sub_heading(dob,no3,title_no)
- end
- elsif dob.ln.to_s =~/^[1-6]/ \
- and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
- dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
- end
- elsif dob.is ==:heading \
- and dob.autonum_ \
- and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
- #here lies a bug, as is nil when run from -Dv --update, FIX
- if (dob.name.nil? or dob.name.empty?) \
- and dob.ln.to_s =~/^[1-9]/ \
- and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
- dob.name=$1
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
- end
- if @md.toc_lev_limit
- end
- elsif defined? dob.name \
- and dob.name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
- end
- dob.tags=dob.tags.uniq if defined? dob.tags
- @tuned_file << dob
- end
- @tuned_file=@tuned_file.flatten
- end
- def ocn(data) #and auto segment numbering increment
- @tuned_file=SiSU_DAL_DocumentStructureExtract::OCN.new(@md,data).ocn
- @tuned_file
- end
- def xml(data)
- @tuned_file=SiSU_DAL_DocumentStructureExtract::XML.new(@md,data).dom
- @tuned_file
- end
- def minor_numbering(data) #and auto segment numbering increment
- @tuned_file=[]
- number_small,letter_small=0,0
- letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
- data.each do |dob|
- if dob.of ==:heading \
- || dob.of ==:heading_insert \
- || dob.of ==:para \
- || dob.of ==:block
- if dob.is ==:heading \
- and dob.ln.to_s=~/^[1-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
- number_small,letter_small=0,0
- elsif dob.is ==:para
- if dob.obj =~/^#[ 1]/ \
- and dob.obj !~/^#\s+(?:~#)?$/
- letter_small=0
- number_small=0 if dob.obj =~ /^#1/
- number_small+=1
- dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ")
- end
- if dob.obj =~/^_# /
- dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ")
- dob.indent='1'
- letter_small+=1
- end
- end
- end
- @tuned_file << dob
- end
- @tuned_file=@tuned_file.flatten
- end
- def name_para_seg_filename(data) #segment naming, remaining
- # paragraph name/numbering rules
- # manual naming overrides, manual naming may be
- # alpha-numeric characters mixed,
- # numeric only (a number), if
- # all segments have been named,
- # the numbers used are over 1000 or
- # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
- # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
- # auto-naming takes the form of giving numbers to segments
- # the rules for which are as follows
- # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
- # otherwise the level 4 segment number from the embedded document structure info is used
- # if there is none a sequential number is designated, preceded by an underscore
- @tuned_file,@unique_auto_name=[],[]
- tags={}
- art_filename_auto=1
- @counter=1
- if not @md.seg_autoname_safe and @md.opt.cmd =~/[MV]/
- puts 'manual segment names, numbers used as names, risk warning (segmented html)'
- end
- ocn_html_seg=[]
- data.each do |dob|
- if dob.is==:heading \
- && dob.ln \
- and dob.ln.to_s =~/^[456]/
- if dob.ln==4 \
- and not dob.name \
- and not @md.set_heading_seg
- @md.set_heading_seg=true
- end
- if dob.name !~/^\S+/ \
- and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
- possible_seg_name=$1
- possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.').
- gsub(/\.$/,'')
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(possible_seg_name)
- dob.name=possible_seg_name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
- @md.seg_names << possible_seg_name
- else puts 'warn, there may be a conflicting numbering scheme' if @md.opt.cmd =~/[VM]/
- end
- end
- if dob.ln==4 \
- and dob.name #extract segment name from embedded document structure info
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(dob.name)
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
- @md.seg_names << dob.name
- end
- end
- if dob.ln==4 \
- and not dob.name #if still no segment name, provide a numerical one
- pf='_' #pg='' #may use e.g. '' or '~' or '_'
- segn_auto="#{pf}#{art_filename_auto.to_s}"
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(segn_auto)
- dob.name=segn_auto
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
- @md.seg_names << segn_auto
- else puts 'segment name (numbering) error'
- end
- art_filename_auto+=1
- end
- if dob.ln==4 \
- and not dob.name #should not occur
- puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
- end
- end
- if (dob.is ==:heading \
- || dob.is ==:heading_insert) \
- && dob.ln==4
- @seg=dob.name
- end
- @tuned_file << if dob.is==:heading \
- && (@md.pagenew || @md.pagebreak)
- m=dob.ln.to_s
- dob_tmp=[]
- if @md.pagenew.inspect =~/#{m}/
- dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob
- elsif @md.pagebreak.inspect =~/#{m}/
- dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob
- end
- para_result=unless dob_tmp.length > 0; dob
- else dob_tmp
- end
- else dob
- end
- if defined? dob.ocn \
- and dob.ocn
- @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \
- ? (dob.name)
- : @segname
- tags["#{dob.ocn}"]={ segname: @segname }
- ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert)
- x=if dob.ln =~/[1-3]/
- { seg: nil, level: dob.ln }
- else #elsif dob.ln =~/[4-6]/
- { seg: @seg, level: dob.ln }
- end
- else
- { seg: @seg, level: nil }
- end
- end
- dob.tags=dob.tags.uniq if defined? dob.tags
- if defined? dob.tags \
- and dob.tags.length > 0
- #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
- #? (dob.name) \
- #: @segname
- dob.tags.each do |x|
- tags[x]={ ocn: dob.ocn.to_s, segname: @segname }
- end
- end
- dob
- end
- ocn_html_seg.each_with_index do |ocn,i|
- if ocn \
- and ocn[:level].to_s=~/[1-3]/
- ocn_seg=nil
- (1..4).each do |x|
- if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4
- ocn[:seg]=ocn_html_seg[i+x][:seg]
- end
- end
- end
- end
- if @md.seg_names.length > 0
- @md.set_heading_seg=true
- end
- tuned_file=@tuned_file.flatten
- [tuned_file,tags,ocn_html_seg]
- end
- def set_heading_top(data) #% make sure no false positives
- unless @md.set_heading_top
- puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |t_o|
- unless @md.set_heading_top
- if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
- and t_o !~/\A\s*\Z/m
- @md.set_heading_top=true
- if defined? @md.title \
- and @md.title \
- and defined? @md.title.full \
- and defined? @md.creator \
- and @md.creator
- head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]'])
- @tuned_file << head
- end
- end
- end
- @tuned_file << t_o
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_heading_seg(data) #% make sure no false positives
- unless @md.set_heading_seg
- puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |dob|
- unless @md.set_heading_seg
- if defined? dob.ln and dob.ln.to_s !~/^[123]/m \
- and dob.obj !~/\A\s*\Z/m \
- and dob.is !=:layout
- @md.set_heading_seg=true
- head=if @md.title.main ; dob.ln,dob.name,dob.obj=4,'seg',@md.title.main
- else dob.ln,dob.name,dob.obj=4,'seg','[segment]'
- end
- @tuned_file << head
- end
- end
- @tuned_file << dob
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_header_title(data) #% make sure no false positives
- unless @md.set_header_title
- puts "\t no document title provided, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |t_o|
- unless @md.set_header_title
- if t_o !~/^%{1,2}\s/m \
- and t_o !~/\A\s*\Z/m
- @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
- @md.title.main=@md.heading_seg_first
- @md.set_header_title=true
- end
- end
- @tuned_file << t_o
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- end
-end
-__END__