diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-09-07 23:16:47 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-09-07 23:16:47 -0400 |
commit | 3655940354be602e6e69e1e8445d157bcb89c965 (patch) | |
tree | 733b6c51e9245054b8157afbf792af7e2d3d20f7 | |
parent | minor unicode for a type of cross (diff) |
book index (markup adjustment, dal module); dal numbering moved to separate module; version date
-rw-r--r-- | CHANGELOG | 8 | ||||
-rw-r--r-- | conf/sisu/version.yml | 4 | ||||
-rw-r--r-- | lib/sisu/v0/constants.rb | 1 | ||||
-rw-r--r-- | lib/sisu/v0/dal.rb | 335 | ||||
-rw-r--r-- | lib/sisu/v0/dal_idx.rb | 201 | ||||
-rw-r--r-- | lib/sisu/v0/dal_numbering.rb | 374 | ||||
-rw-r--r-- | lib/sisu/v0/dal_syntax.rb | 2 | ||||
-rw-r--r-- | lib/sisu/v0/param.rb | 7 | ||||
-rw-r--r-- | lib/sisu/v0/plaintext.rb | 2 |
9 files changed, 598 insertions, 336 deletions
@@ -9,7 +9,7 @@ Reverse Chronological: %% STABLE MANIFEST -%% sisu_0.69.0.orig.tar.gz (2008-08-28:34/4) +%% sisu_0.69.0.orig.tar.gz (2008-09-07:35/7) http://www.jus.uio.no/sisu/pkg/src/sisu_0.69.0.orig.tar.gz sisu_0.69.0.orig.tar.gz sisu_0.69.0-1.dsc @@ -37,12 +37,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.69.0.orig.tar.gz * db table field lengths (varchar), increased + * book type index + * markup adjustment, book type index introduced + * dal, book type indexing module + * middle layer, url representation changed, downstream code affected * dummy headings removed when not used, latex/pdf, odf, html scroll * internal, greater use of passing of hash to formatting modules + * dal, numbering moved to separate module + %% sisu_0.68.0.orig.tar.gz (2008-07-22:29/2) http://www.jus.uio.no/sisu/pkg/src/sisu_0.68.0.orig.tar.gz cecae8b828aa187b22354f2ee9e8a8b5244dd9f4 1509605 sisu_0.68.0.orig.tar.gz diff --git a/conf/sisu/version.yml b/conf/sisu/version.yml index 70d4f342..d3ddf9f7 100644 --- a/conf/sisu/version.yml +++ b/conf/sisu/version.yml @@ -1,5 +1,5 @@ --- :version: 0.69.0-beta -:date_stamp: 2008w34/4 -:date: "2008-08-28" +:date_stamp: 2008w35/7 +:date: "2008-09-07" :project: SiSU diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb index f352ab7c..3f050df7 100644 --- a/lib/sisu/v0/constants.rb +++ b/lib/sisu/v0/constants.rb @@ -90,6 +90,7 @@ Mx[:gl_bullet]= "#{Mx[:gl_o]}●#{Mx[:gl_c]}" #non substantive text sort: <-#> <~#> Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph) Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph) +Mx[:idx_o]='▢ '; Mx[:idx_c]='▢ ' # Mx[:nbsp]= '▭ ' Mx[:br_line]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" Mx[:br_paragraph]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index 25b7528e..aa4758b7 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -65,6 +65,8 @@ module SiSU_DAL require "#{SiSU_lib}/param" require "#{SiSU_lib}/dal_syntax" require "#{SiSU_lib}/dal_doc_str" + require "#{SiSU_lib}/dal_idx" + require "#{SiSU_lib}/dal_numbering" require "#{SiSU_lib}/i18n" require "#{SiSU_lib}/shared_sem" include SiSU_Env @@ -211,7 +213,8 @@ module SiSU_DAL data=character_check(data) data=images(data) data=SiSU_document_structure::Tables.new(@md,data).tables - data=numbering_song(data) #tr issue + data=SiSU_numbering::Numbering.new(@md,data).numbering_song + data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index data=endnotes(data) data=object_digest(data) meta=metadata(data) @@ -449,7 +452,7 @@ module SiSU_DAL end def substitutions_and_insertions?(data) data_expand=[] - if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it) + if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it) data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'') data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'') end @@ -507,332 +510,6 @@ module SiSU_DAL end end end - def numbering_song(data) - data=number_plaintext_para(data) - data=name_endnote_seg(data) #tr issue - data=auto_number_heading_ie_title(data) #tr issue - data=ocn(data) #watch - data=minor_numbering(data) - data=name_para_seg_filename(data) - data=set_heading_seg(data) unless @md.set_heading_seg - data=set_heading_top(data) unless @md.set_heading_top - data=set_header_title(data) unless @md.set_header_title - data - end - def number_plaintext_para(data) - @tuned_file=[] - data.each do |para| - if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ - para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks - end - para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u - para.gsub!(/^\s+|\s$/,"\n") - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def name_endnote_seg(data) - @tuned_file=[] - data.each do |para| - para.gsub!(/<:3>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_3']} <p /> -#{@@endnote['special_align_close']} - WOK - ) - para.gsub!(/<:2>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_2']} <p /> -#{@@endnote['special_align_close']} - WOK - ) - para.gsub!(/<:1>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']} <p /><br />\r -#{@@endnote['seg_name_1']} <p /> -#{@@endnote['special_align_close']} - WOK - ) - @tuned_file << para - end - # debug 2003w46 adding revision control info - if @md.flag_auto_endnotes \ - and @md.flag_separate_endnotes_make - @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" - end - @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON - @tuned_file=@tuned_file.flatten - end - def owner_details_seg - data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" - end - def number_sub_heading(para,num,title_no) - case para - when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") - when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") - when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ - para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) - when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ - para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number - else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided - end - if @md.toc_lev_limit \ - and @md.toc_lev_limit < num - para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch - end - para - end - def auto_number_heading_ie_title(data) #also does some segment naming - @tuned_file=[] - if @md.markup =~/num_top/ \ - or @md.num_top # watch, 2003w23 - input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup - input||=@md.num_top if @md.num_top !~/^$/ - end - num_top=input.to_i - t_no1=t_no2=t_no3=t_no4=0 - no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) - t_not=0 - data.each do |para| #@md.seg_names << [additions to segment names] - if (@md.markup =~/num_top/ \ - or (@md.num_top \ - and @md.num_top !~/^$/)) \ - and para !~/^#{Rx[:meta]}/ - if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ - and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) - t_not+=1 #; t_no2=0; t_no3=0 - para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - end - if para =~/#{Mx[:lv_o]}#{no1}:/ - @subnumber=1 - @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ - end - if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ - and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ - and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ - if para =~/^#{Mx[:lv_o]}#{no1}:/ - t_no1+=1; t_no2=0; t_no3=0 - title_no="#{t_no1}" - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(title_no) - para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) - para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) - unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review - para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") - end - @md.seg_names << title_no - #else puts "warning segment name #{title_no} already exists" - end - unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required - para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, - %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) - end - para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch - para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") - end - if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ - t_no2+=1; t_no3=0 - title_no="#{t_no1}.#{t_no2}" - para=number_sub_heading(para,no2,title_no) - end - if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ - t_no3+=1 - title_no="#{t_no1}.#{t_no2}.#{t_no3}" - para=number_sub_heading(para,no3,title_no) - end - elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 - para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") - para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") - para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") - end - elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 - if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d - name_num=$1 - para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") - end - if @md.toc_lev_limit - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def ocn(data) #and auto segment numbering increment - @tuned_file=[] - object_array=SiSU_document_structure::OCN.new(@md,data).ocn - object_array.each do |o| - @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor - else o.txt - end - end - @tuned_file=@tuned_file.flatten - end - def minor_numbering(data) #and auto segment numbering increment - @tuned_file=[] - number_small,letter_small=0,0 - letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) - data.each do |para| - if para =~/\w|\S|<|\(/ - if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here # added with Tune.code #¡ - if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) - end - if para =~/^#[ 1]/ - letter_small=0 - number_small=0 if para =~ /^#1/ - number_small+=1 - para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 - end - if para =~/^_# / - para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 - letter_small+=1 - end - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def name_para_seg_filename(data) - # paragraph name/numbering rules - # manual naming overrides, manual naming may be - # alpha-numeric characters mixed, - # numeric only (a number), if - # all segments have been named, - # the numbers used are over 1000 or - # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) - # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] - # auto-naming takes the form of giving numbers to segments - # the rules for which are as follows - # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) - # otherwise the level 4 segment number from the embedded document structure info is used - # if there is none a sequential number is designated, preceded by an underscore - @tuned_file=[] - art_filename_auto=1 - @counter=1 - @unique_auto_name=[] - if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ - puts 'manual segment names, numbers used as names, risk warning (segmented html)' - end - data.each do |para| - para=SiSU_document_structure::Structure.new(@md,para).structure_markup - if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ - if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ - and not @md.set_heading_seg - @md.set_heading_seg=true - end - if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(pattern) - para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") - @md.seg_names << pattern - else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ - end - end - if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(pattern) - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") - @md.seg_names << pattern - else - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") - @md.seg_names << "~#{pattern}" - end - end - if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(art_filename_auto) - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) - @md.seg_names << art_filename_auto - else puts 'segment name (numbering) error' - end - art_filename_auto+=1 - end - end - @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ - and (@md.pagenew or @md.pagebreak) - m=$1 #watch ref~ - para_tmp=[] - if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para - elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para - end - para_result=unless para_tmp.length > 0; para - else para_tmp - end - else para - end - end - if @md.seg_names.length > 0 - @md.set_heading_seg=true - end - @tuned_file=@tuned_file.flatten - end - def set_heading_top(data) #% make sure no false positives - unless @md.set_heading_top - puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_heading_top - if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ - and para !~/\A\s*\Z/m - @md.set_heading_top=true - head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" - else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end - def set_heading_seg(data) #% make sure no false positives - unless @md.set_heading_seg - puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_heading_seg - if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ - and para !~/\A\s*\Z/m \ - and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ - @md.set_heading_seg=true - head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" - else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end - def set_header_title(data) #% make sure no false positives - unless @md.set_header_title - puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_header_title - if para !~/^%{1,2}\s/m \ - and para !~/\A\s*\Z/m - @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" - @md.title=@md.heading_seg_first - @md.set_header_title=true - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end def endnotes(data) @tuned_file=[] endnote_no,endnote_ref=1,1 @@ -1058,7 +735,7 @@ module SiSU_DAL para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m - /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] + /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] else '' end para_plus_en << para_tail diff --git a/lib/sisu/v0/dal_idx.rb b/lib/sisu/v0/dal_idx.rb new file mode 100644 index 00000000..5e07396a --- /dev/null +++ b/lib/sisu/v0/dal_idx.rb @@ -0,0 +1,201 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/copyleft/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_book_index + class Book_index + def initialize(data) + @data=data + end + def indexing_song + @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ + #@rgx_idx=/\s*#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ + @rgx_idx_ocn_init=/#{Mx[:idx_o]}(.+?)#{Mx[:idx_c]}\s*#{Mx[:id_o]}~(\d+)\S+?#{Mx[:id_c]}/ + @rgx_idx_ocn=/(.+?)~(\d+)/ + @data=extract_book_index(@data) + @data=clean_index(@data) + @data + end + def extract_book_index(data) + tuned_file=[] + idx_array=[] + data.each do |para| + idx_array << @rgx_idx_ocn_init.match(para)[0].gsub(@rgx_idx_ocn_init,'\1~\2') if para =~ @rgx_idx_ocn_init + tuned_file << para if para + end + idx_array.each do |i| + i.gsub!(@rgx_idx_ocn_init,'\1~\2') + end + idx_array=construct_idx_array(idx_array) if idx_array.length > 0 + if idx_array.length > 0 + the_idx=construct_book_index(idx_array) + screen_print(the_idx) + end + tuned_file + end + def construct_idx_array(idx_array) + idx_lst=[] + idx_array.each do |idx| + idx_list,ocn=@rgx_idx_ocn.match(idx)[1,2] + idx_lst <<=if idx_list =~/;/ + g=idx_list.scan(/[^;]+/) + idxl=[] + g.each do |i| + idxl << { :rough_idx => i, :ocn => ocn } + end + idxl + else { :rough_idx => idx_list, :ocn => ocn } + end + end + idx_lst.flatten! + idx_lst + end + def construct_book_index(idx_array) + @the_idx={} + idx_array.each do |idx| + idx_lst=idx[:rough_idx].scan(/[^|:]+/) + if idx_lst[0] =~/.+?\+\d+/ + use,plus=/(.+?)\+(\d+)/.match(idx_lst[0])[1,2] + else use=idx_lst[0] + end + @the_idx[use]={} unless @the_idx[use] and defined? @the_idx[use] + idx_lst.each do |i| + i.strip! + i,r=/(.+?)\+(\d+)/.match(i)[1,2] if i =~/.+?\+\d+/ + x=if idx_lst.length == 1 or idx_lst[0] == i + @the_idx[use]['a1']=[] unless @the_idx[use]['a1'] and defined? @the_idx[use]['a1'] + x=if r + @the_idx[use]['a1'] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" } + "#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" + else + @the_idx[use]['a1'] << { :ocn => idx[:ocn] } + "#{i} #{idx[:ocn]}" + end + else + @the_idx[use]['b1']={} unless @the_idx[use]['b1'] and defined? @the_idx[use]['b1'] + @the_idx[use]['b1'][i]=[] unless @the_idx[use]['b1'][i] and defined? @the_idx[use]['b1'][i] + x=if r + @the_idx[use]['b1'][i] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" } + "#{idx_lst[0]}:#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" + else + @the_idx[use]['b1'][i] << { :ocn => idx[:ocn] } + "#{idx_lst[0]}:#{i} #{idx[:ocn]}" + end + end + end + end + the_idx=@the_idx.sort + #p the_idx; p '-----' + the_idx + end + def screen_print(the_idx) + the_idx.each do |i| + i.each do |x| + if x.class == String + print "\n" + x + ', ' + elsif x.class == Array + p 'array error? -->' + print x + elsif x.class == Hash + if x['a1'].class == Array + x['a1'].each do |a| + if a[:range] + print a[:range] + ', ' + elsif a[:ocn] + print a[:ocn] + ', ' + else p 'error' + end + end + end + if x['b1'] + m=x['b1'] + m=m.sort + m.each do |k,y| + if k !~/a1/ + print "\n\t" + k + ', ' + #p y + y.each do |z| + if z[:range] + print z[:range] + ', ' + elsif z[:ocn] + print z[:ocn] + ', ' + else p 'error' + end + end + end + end + end + end + end + end + end + def clean_index(data) + tuned_file=[] + data.each do |para| + para.gsub!(@rgx_idx,'') + tuned_file << para + end + tuned_file + end + end +end + +__END__ + diff --git a/lib/sisu/v0/dal_numbering.rb b/lib/sisu/v0/dal_numbering.rb new file mode 100644 index 00000000..e14b87d7 --- /dev/null +++ b/lib/sisu/v0/dal_numbering.rb @@ -0,0 +1,374 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/copyleft/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_numbering + class Numbering + def initialize(md,data) + @md,@data=md,data + end + def numbering_song + data=@data + data=number_plaintext_para(data) + #data=name_endnote_seg(data) #tr issue + data=auto_number_heading_ie_title(data) #tr issue + data=ocn(data) #watch + data=minor_numbering(data) + data=name_para_seg_filename(data) + data=set_heading_seg(data) unless @md.set_heading_seg + data=set_heading_top(data) unless @md.set_heading_top + data=set_header_title(data) unless @md.set_header_title + data + end + def number_plaintext_para(data) + @tuned_file=[] + data.each do |para| + if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ + para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks + end + para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u + para.gsub!(/^\s+|\s$/,"\n") + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def name_endnote_seg(data) + @tuned_file=[] + if @md.flag_auto_endnotes \ + and @md.flag_separate_endnotes_make + @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" + end + @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON + @tuned_file=@tuned_file.flatten + end + def owner_details_seg + data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" + end + def number_sub_heading(para,num,title_no) + case para + when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) + when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number + else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided + end + if @md.toc_lev_limit \ + and @md.toc_lev_limit < num + para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch + end + para + end + def auto_number_heading_ie_title(data) #also does some segment naming + @tuned_file=[] + if @md.markup =~/num_top/ \ + or @md.num_top # watch, 2003w23 + input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup + input||=@md.num_top if @md.num_top !~/^$/ + end + num_top=input.to_i + t_no1=t_no2=t_no3=t_no4=0 + no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) + t_not=0 + data.each do |para| #@md.seg_names << [additions to segment names] + if (@md.markup =~/num_top/ \ + or (@md.num_top \ + and @md.num_top !~/^$/)) \ + and para !~/^#{Rx[:meta]}/ + if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ + and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) + t_not+=1 #; t_no2=0; t_no3=0 + para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + end + if para =~/#{Mx[:lv_o]}#{no1}:/ + @subnumber=1 + @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ + end + if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ + and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ + and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ + if para =~/^#{Mx[:lv_o]}#{no1}:/ + t_no1+=1; t_no2=0; t_no3=0 + title_no="#{t_no1}" + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(title_no) + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) + para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) + unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") + end + @md.seg_names << title_no + #else puts "warning segment name #{title_no} already exists" + end + unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, + %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) + end + para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch + para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") + end + if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ + t_no2+=1; t_no3=0 + title_no="#{t_no1}.#{t_no2}" + para=number_sub_heading(para,no2,title_no) + end + if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ + t_no3+=1 + title_no="#{t_no1}.#{t_no2}.#{t_no3}" + para=number_sub_heading(para,no3,title_no) + end + elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") + para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") + para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") + end + elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 + if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d + name_num=$1 + para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") + end + if @md.toc_lev_limit + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def ocn(data) #and auto segment numbering increment + @tuned_file=[] + object_array=SiSU_document_structure::OCN.new(@md,data).ocn + object_array.each do |o| + @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor + else o.txt + end + end + @tuned_file=@tuned_file.flatten + end + def minor_numbering(data) #and auto segment numbering increment + @tuned_file=[] + number_small,letter_small=0,0 + letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) + data.each do |para| + if para =~/\w|\S|<|\(/ + if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here # added with Tune.code #¡ + if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) + end + if para =~/^#[ 1]/ + letter_small=0 + number_small=0 if para =~ /^#1/ + number_small+=1 + para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 + end + if para =~/^_# / + para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 + letter_small+=1 + end + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + def name_para_seg_filename(data) + # paragraph name/numbering rules + # manual naming overrides, manual naming may be + # alpha-numeric characters mixed, + # numeric only (a number), if + # all segments have been named, + # the numbers used are over 1000 or + # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) + # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] + # auto-naming takes the form of giving numbers to segments + # the rules for which are as follows + # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) + # otherwise the level 4 segment number from the embedded document structure info is used + # if there is none a sequential number is designated, preceded by an underscore + @tuned_file=[] + art_filename_auto=1 + @counter=1 + @unique_auto_name=[] + if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ + puts 'manual segment names, numbers used as names, risk warning (segmented html)' + end + data.each do |para| + para=SiSU_document_structure::Structure.new(@md,para).structure_markup + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ + if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ + and not @md.set_heading_seg + @md.set_heading_seg=true + end + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name + pattern=$1 + pattern.gsub!(/(?:[:,-]|\W)/,'.') + pattern.gsub!(/\.$/,'') + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(pattern) + para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") + @md.seg_names << pattern + else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ + end + end + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info + pattern=$1 + pattern.gsub!(/(?:[:,-]|\W)/,'.') + pattern.gsub!(/\.$/,'') + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(pattern) + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") + @md.seg_names << pattern + else + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") + @md.seg_names << "~#{pattern}" + end + end + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(art_filename_auto) + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) + @md.seg_names << art_filename_auto + else puts 'segment name (numbering) error' + end + art_filename_auto+=1 + end + end + @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ + and (@md.pagenew or @md.pagebreak) + m=$1 #watch ref~ + para_tmp=[] + if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para + elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para + end + para_result=unless para_tmp.length > 0; para + else para_tmp + end + else para + end + end + if @md.seg_names.length > 0 + @md.set_heading_seg=true + end + @tuned_file=@tuned_file.flatten + end + def set_heading_top(data) #% make sure no false positives + unless @md.set_heading_top + puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_heading_top + if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ + and para !~/\A\s*\Z/m + @md.set_heading_top=true + head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" + else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" + end + @tuned_file << head + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + def set_heading_seg(data) #% make sure no false positives + unless @md.set_heading_seg + puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_heading_seg + if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ + and para !~/\A\s*\Z/m \ + and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ + @md.set_heading_seg=true + head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" + else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" + end + @tuned_file << head + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + def set_header_title(data) #% make sure no false positives + unless @md.set_header_title + puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |para| + unless @md.set_header_title + if para !~/^%{1,2}\s/m \ + and para !~/\A\s*\Z/m + @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" + @md.title=@md.heading_seg_first + @md.set_header_title=true + end + end + @tuned_file << para + end + @tuned_file=@tuned_file.flatten + end + end + end +end +__END__ diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index acdec0e4..80635f36 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -326,7 +326,7 @@ module SiSU_Syntax line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added line.gsub!(/<[:e]\s+(.+?)!?>/,"#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested line.gsub!(/^\s*_\*\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut - #line.gsub!(/^\s*_(\*+)\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut + line.gsub!(/=\{(.+?)\}/,"#{Mx[:idx_o]}\\1#{Mx[:idx_c]}") # line.gsub!(/^\s*_([1-9])\*\s*/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:gl_bullet]}") #bullets, shortcut #line.gsub!(/^\s*_([1-9])(\*+)\s*/,"#{Mx[:fa_o]}:i\\1#{Mx[:fa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}") #bullets, shortcut line.gsub!(/^\s*_([1-9])\s+/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") #indent diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index b211f5c1..7994487d 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -119,13 +119,13 @@ module SiSU_Param @doc={ :lv=>[] } @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','','' @@publisher='SiSU scribe' - attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag + attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag,:book_index def initialize(fns_array,opt) @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=nil @man_section=1 @man_name='man page "name/whatis" information not provided, set in header @man: name=[whatis information]' @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data - @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo=false,false,false,false,false,false + @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_index=false,false,false,false,false,false,false @seg_autoname_safe=true @sem_tag=false @markup_instruction,@markup_declared,@image='','','' #check which other values should be set to empty rather than nil @@ -576,6 +576,9 @@ module SiSU_Param end end end + if not @book_index and para =~/^=\{(.+?)\}\s*$/ + @book_index=true + end unless @code_flag case para when /~\{\s+.+?\}~/m #% processing diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 4dd808fc..fb4b1115 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -321,7 +321,7 @@ WOK para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1') para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links - para.gsub!(/ /,' ') # decide on + para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') |