From 7372f56054259457f77c64cbdb34e736531cfc0e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 4 Jul 2009 11:57:29 -0400 Subject: move lib to version 1 directory, (lib/sisu/v1) and make related changes --- lib/sisu/v1/dal_numbering.rb | 368 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 lib/sisu/v1/dal_numbering.rb (limited to 'lib/sisu/v1/dal_numbering.rb') diff --git a/lib/sisu/v1/dal_numbering.rb b/lib/sisu/v1/dal_numbering.rb new file mode 100644 index 00000000..05123bbc --- /dev/null +++ b/lib/sisu/v1/dal_numbering.rb @@ -0,0 +1,368 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_numbering + class Numbering + def initialize(md,data) + @md,@data=md,data + end + def numbering_song + data=@data + data=number_plaintext_para(data) + data=name_endnote_seg(data) #tr issue + data=auto_number_heading_ie_title(data) #tr issue + data=ocn(data) #watch + data=minor_numbering(data) + data=name_para_seg_filename(data) + data=set_heading_seg(data) unless @md.set_heading_seg + data=set_heading_top(data) unless @md.set_heading_top + data=set_header_title(data) unless @md.set_header_title + data + end + def number_plaintext_para(data) + @tuned_file=[] + data.each do |para| + if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ + para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks + end + para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u + para.gsub!(/^\s+|\s$/,"\n") + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def name_endnote_seg(data) + tuned_file=data + if @md.flag_auto_endnotes \ + and @md.flag_separate_endnotes_make + tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" + end + tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON + tuned_file=tuned_file.flatten + end + def owner_details_seg + data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" + end + def number_sub_heading(para,num,title_no) + case para + when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) + when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number + else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided + end + if @md.toc_lev_limit \ + and @md.toc_lev_limit < num + para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch + end + para + end + def auto_number_heading_ie_title(data) #also does some segment naming + @tuned_file=[] + if @md.markup =~/num_top/ \ + or @md.num_top # watch, 2003w23 + input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup + input||=@md.num_top if @md.num_top !~/^$/ + end + num_top=input.to_i + t_no1=t_no2=t_no3=t_no4=0 + no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) + t_not=0 + data.each do |para| #@md.seg_names << [additions to segment names] + if (@md.markup =~/num_top/ \ + or (@md.num_top \ + and @md.num_top !~/^$/)) \ + and para !~/^#{Rx[:meta]}/ + if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ + and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) + t_not+=1 #; t_no2=0; t_no3=0 + para.gsub!(/^(#{Mx[:lv_o]}(?:#{no1}|#{no2}|#{no3}|#{no4})):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + end + if para =~/#{Mx[:lv_o]}#{no1}:/ + @subnumber=1 + @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ + end + if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ + and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ + and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ + if para =~/^#{Mx[:lv_o]}#{no1}:/ + t_no1+=1; t_no2=0; t_no3=0 + title_no="#{t_no1}" + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(title_no) + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) + para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) + unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") + end + @md.seg_names << title_no + #else puts "warning segment name #{title_no} already exists" + end + unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, + %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) + end + para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch + para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") + end + if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ + t_no2+=1; t_no3=0 + title_no="#{t_no1}.#{t_no2}" + para=number_sub_heading(para,no2,title_no) + end + if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ + t_no3+=1 + title_no="#{t_no1}.#{t_no2}.#{t_no3}" + para=number_sub_heading(para,no3,title_no) + end + elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") + para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") + para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") + end + elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 + if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d + name_num=$1 + para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") + end + if @md.toc_lev_limit + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def ocn(data) #and auto segment numbering increment + @tuned_file=[] + object_array=SiSU_document_structure::OCN.new(@md,data).ocn + object_array.each do |o| + @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor + else o.txt + end + end + @tuned_file=@tuned_file.flatten + end + def minor_numbering(data) #and auto segment numbering increment + @tuned_file=[] + number_small,letter_small=0,0 + letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) + data.each do |para| + if para =~/\w|\S|<|\(/ + if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #ยก + if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) + end + if para =~/^#[ 1]/ + letter_small=0 + number_small=0 if para =~ /^#1/ + number_small+=1 + para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 + end + if para =~/^_# / + para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 + letter_small+=1 + end + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def name_para_seg_filename(data) + # paragraph name/numbering rules + # manual naming overrides, manual naming may be + # alpha-numeric characters mixed, + # numeric only (a number), if + # all segments have been named, + # the numbers used are over 1000 or + # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) + # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] + # auto-naming takes the form of giving numbers to segments + # the rules for which are as follows + # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) + # otherwise the level 4 segment number from the embedded document structure info is used + # if there is none a sequential number is designated, preceded by an underscore + @tuned_file=[] + art_filename_auto=1 + @counter=1 + @unique_auto_name=[] + if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ + puts 'manual segment names, numbers used as names, risk warning (segmented html)' + end + data.each do |para| + para=SiSU_document_structure::Structure.new(@md,para).structure_markup + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ + if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ + and not @md.set_heading_seg + @md.set_heading_seg=true + end + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name + pattern=$1 + pattern.gsub!(/(?:[:,-]|\W)/,'.') + pattern.gsub!(/\.$/,'') + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(pattern) + para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") + @md.seg_names << pattern + else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ + end + end + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info + pattern=$1 + pattern.gsub!(/(?:[:,-]|\W)/,'.') + pattern.gsub!(/\.$/,'') + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(pattern) + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") + @md.seg_names << pattern + else + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") + @md.seg_names << "~#{pattern}" + end + end + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(art_filename_auto) + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) + @md.seg_names << art_filename_auto + else puts 'segment name (numbering) error' + end + art_filename_auto+=1 + end + end + @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ + and (@md.pagenew or @md.pagebreak) + m=$1 #watch ref~ + para_tmp=[] + if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para + elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para + end + para_result=unless para_tmp.length > 0; para + else para_tmp + end + else para + end + end + if @md.seg_names.length > 0 + @md.set_heading_seg=true + end + @tuned_file=@tuned_file.flatten + end + def set_heading_top(data) #% make sure no false positives + unless @md.set_heading_top + puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_heading_top + if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ + and para !~/\A\s*\Z/m + @md.set_heading_top=true + head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" + else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" + end + @tuned_file << head + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + def set_heading_seg(data) #% make sure no false positives + unless @md.set_heading_seg + puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_heading_seg + if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ + and para !~/\A\s*\Z/m \ + and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ + @md.set_heading_seg=true + head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" + else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" + end + @tuned_file << head + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + def set_header_title(data) #% make sure no false positives + unless @md.set_header_title + puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_header_title + if para !~/^%{1,2}\s/m \ + and para !~/\A\s*\Z/m + @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" + @md.title=@md.heading_seg_first + @md.set_header_title=true + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + end +end +__END__ -- cgit v1.2.3