diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-07-22 20:00:59 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-07-22 20:00:59 -0400 |
commit | d29a3e5469d8468084641c385ebf16948f7c2437 (patch) | |
tree | 1a2306de3c5c1055dbc0374b95f31c335f5da85c /lib/sisu/v0/shared_xml.rb | |
parent | action response query fix (diff) |
sisu-0.68.0 proposed
* middle layer document representation changed, (accounting for substantial patch)
* texpdf multiple document sizes as specified in config
* numerous small fixes
[should on the whole be easier to maintain]
Diffstat (limited to 'lib/sisu/v0/shared_xml.rb')
-rw-r--r-- | lib/sisu/v0/shared_xml.rb | 110 |
1 files changed, 42 insertions, 68 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index c93eff5b..3c34e67f 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -59,75 +59,27 @@ =end module SiSU_text_parts - class Split_text_object + require "#{SiSU_lib}/shared_structure" + class Split_text_object < SiSU_Structure::Split_text_object require "#{SiSU_lib}/param" require "#{SiSU_lib}/xml_format" include SiSU_Viz include SiSU_XML_format @@alt_id_count=0 @@dp=nil - attr_reader :format,:text,:ocn,:lev_para_ocn - def initialize(md,para) - @md,@para=md,para - @format,@ocn='null','null' - #@format,@ocn=nil,nil - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - end - def lev_segname_para_ocn #using shared_txt instead, watch #% watch closely - if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,segname,@text,@ocn=$1,$2,$3,$4 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}" - elsif /^(?:<:i([1-9])>\s*_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn="_#{$1}\*",$2,$3,$4 - elsif /^(_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /<:(i[1-9])>\s*(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^(\d)~\S*\s+(.+)/m.match(@para) - @format,@text=$1,$2 - end - end - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn) - else - SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>") - end - self - end def lev_segname_para - if @para =~/^(\d~|<:.+?>).+/ - if /^([1-6])~(\S+)\s+(\S.+)/m.match(@para) + if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/ + if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,segname,@text=$1,$2,$3 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+)/m.match(@para) + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,@text=$1,$2 elsif /<:(.+?)>\s*(\S.+?)/m.match(@para) @format,@text=$1,$2 - elsif /^([1-6])~(\S+)\s+(\S.+?)/m.match(@para) + elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,segname,@text=$1,$2,$3 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)/m.match(@para) + #@format="#@format:#{segname}" # + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,@text=$1,$2 end @@ -135,17 +87,19 @@ module SiSU_text_parts if /(.+?)/m.match(@para) @text=$1 end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 + if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^(\d)~\S*\s+(.+)/m.match(@para) @format,@text=$1,$2 end end - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @format="#@format:#{segname}" # +#follow this search beneath for heading_body1-6 + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn) else - SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>") + SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") end self end @@ -396,19 +350,40 @@ module SiSU_XML_munge def markup(para='') wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip - para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') - para.gsub!(/<:pb>\s*/,'') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') + #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'<em>\1</em>') #reinstate + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + para.gsub!(/<:pb>\s*/,'') #Fix para.gsub!(/<+[-~]#>+/,'') - para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') - if para !~/^<:code>/ + para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #embeds a red-bullet image --> + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') + para.gsub!(/#{Mx[:br_page]}\s*/,'') + para.gsub!(/#{Mx[:br_page_new]}\s*/,'') + para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'') para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/, %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4}) para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/, %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1}) - para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later @@ -429,7 +404,7 @@ module SiSU_XML_munge para.gsub!(/<br(\s*\/)?>/,'<br />') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|\s)&\s+/,'\1& ') #sort + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, "<image.path>#{@dir.url.images_local}\/\\1</image.path>") @@ -452,7 +427,7 @@ module SiSU_XML_munge para.gsub!(/<:\S+?>/,'') #<-- temporary para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|\s)&\s+/,'\1& ') #sort + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, "<image.path>#{@dir.url.images_local}\/\\1</image.path>") @@ -728,4 +703,3 @@ module SiSU_Tables require "#{SiSU_lib}/xml_tables" end __END__ - |