diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-07-22 20:03:57 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-07-22 20:03:57 -0400 |
commit | 812a2ffaf4ff90abc6225e1dde8f4fbb6c92c2c0 (patch) | |
tree | 67efd138f4f690c75efd38d75d53e1b72c0024e2 /lib/sisu/v0/odf.rb | |
parent | Updated sisu-0.67.5 (diff) | |
parent | changelog and version updated (diff) |
Merge branch 'upstream' into debian/sid
Diffstat (limited to 'lib/sisu/v0/odf.rb')
-rw-r--r-- | lib/sisu/v0/odf.rb | 190 |
1 files changed, 79 insertions, 111 deletions
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index 1883bdba..09c67ff6 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -68,6 +68,7 @@ module SiSU_ODF require "#{SiSU_lib}/odf_format" include SiSU_ODF_format require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 class Source require 'zlib' @@ -100,55 +101,6 @@ module SiSU_ODF end end private - class Split_text_object <Source - require "#{SiSU_lib}/odf_format" - include SiSU_Viz - include SiSU_ODF_format - @@dp=nil - @@alt_id_count=0 - attr_reader :format,:lev,:text,:ocn,:lev_para_ocn - def initialize(para) - @para=para - @format,@ocn='null','null' - #@format,@ocn=nil,nil - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - end - def lev_segname_para_ocn - @text=nil - if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ - @text=/(.+?)/im.match(@para)[1] - end - if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) - @format,@lev,@text=$1,$2,$3 - end - end - format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - SiSU_ODF_format::Format_text_object.new(format,@text,@ocn) - else - SiSU_ODF_format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") - end - self - end - end class Scroll <Source require "#{SiSU_lib}/defaults" require "#{SiSU_lib}/shared_txt" @@ -160,8 +112,8 @@ module SiSU_ODF @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - @serial=/\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>\s*/ + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + @serial=/\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}\s*/ @tab="\t" @url_brace=SiSU_Viz::Skin.new.url_decoration @br=if @md.cmd =~/M/; "\n" @@ -176,12 +128,12 @@ module SiSU_ODF end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~\{(\d+\s+.+?)\s*<#@dp>\}~/) + notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/) @n=[] notes.each do |n| #high cost to deal with <br> appropriately within odf, consider n=n.dup.to_s - if n =~/<:?br(?: \/)?>/ - fix=n.split(/<:?br(?: \/)?>/) #watch #added + if n =~/#{Mx[:br_line]}/ + fix=n.split(/#{Mx[:br_line]}/) #watch #added fix.each do |x| if x =~/\S+/; @n << x end @@ -210,8 +162,8 @@ module SiSU_ODF end def heading(para,no) para.gsub!(@serial,'') - para.gsub!(/<:name#\S+?>/,'') - para.gsub!(/^([1-6])~\S*\s/,'') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/^#{Mx[:lv_o]}(\d):\S*?#{Mx[:lv_c]}\s*/,'') m=/#{$1}/ breakpage='' if @md.fns \ @@ -307,19 +259,19 @@ module SiSU_ODF def normal(para) #P1 - P3 para.gsub!(@serial,'') para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, - %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration + '<text:a xlink:type="simple" xlink:href="\1">\1</text:a>\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, '\1<text:a xlink:type="simple" xlink:href="\2">\2</text:a>\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works #%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) para=case para - when /^<:i([1-9])>\s/m + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/m m=$1 - para.gsub!(/^<:i#{m}>\s/m,'') + para.gsub!(/^#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/m,'') %{<text:p text:style-name="P1#{m}">#{para}</text:p>} else %{<text:p text:style-name="P1">#{para}</text:p>} end @@ -331,24 +283,24 @@ module SiSU_ODF end def footnote(para) @astx||=10000 - para.gsub!(/<#@dp>([}\]]~)/,'\1') + para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_a_c]})/,'\1') #para.gsub!(/<br \/><:i[1-9]>/,'<br />') - if para =~/~\{\d+\s+/ - para=para.gsub(/~\{(\d+)\s+(.+?)\}~/,'<text:note text:id="ftn\1" text:note-class="footnote"><text:note-citation>\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>') + if para =~/#{Mx[:en_a_o]}\d+\s+/ + para=para.gsub(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:en_a_c]}/,'<text:note text:id="ftn\1" text:note-class="footnote"><text:note-citation>\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>') end - if para=~/~\[[*+]\d+\s/ #editor notes, squre bracket series - asterisk=para.scan(/~\[([*+]\d+)\s+(.+?)\]~/) + if para=~/#{Mx[:en_b_o]}[*+]\d+\s/ #editor notes, squre bracket series + asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/~\[(#{a})\s+(.+?)\]~/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) + para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) @astx+=1 end end - if para=~/~\{[*+]+\s/ - asterisk=para.scan(/~\{([*+]+)\s+(.+?)\}~/) + if para=~/#{Mx[:en_a_o]}[*+]+\s/ + asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/~\{(#{a})\s+(.+?)\}~/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) + para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) @astx+=1 end end @@ -358,15 +310,16 @@ module SiSU_ODF para.gsub!(/&nbsp;| /,' ') para.gsub!(/</,'<'); para.gsub!(/>/,'>') para.gsub!(/<(text:span text:style-name="T[1-5]"|\/text:span)>/,'<\1>') #works, not ideal + para.gsub!(/#{Mx[:br_line]}/,'<br />') para.gsub!(/<br(?:\s+\/)?>/,'<br />') #para.gsub!(/\s\s/,'  ') para end def poem(para) #P4 #same as group para.gsub!(@serial,'') - para.gsub!(/<:verse(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}\s*/m,'') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parray << %{<text:p text:style-name="P4">#{parablock}</text:p>} if parablock =~/\S+/ end @@ -374,9 +327,9 @@ module SiSU_ODF end def group(para) #P4 #same as verse para.gsub!(@serial,'') - para.gsub!(/<:group(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}\s*/m,'') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/<text:a xlink:type="simple" xlink:href="(.+?)">/m,'<text:a xlink:type="simple" xlink:href="\1">') parablock.gsub!(/<(\/text:a)>/,'<\1>') @@ -390,21 +343,21 @@ module SiSU_ODF end def code(para) #P5 para.gsub!(@serial,'') - para.gsub!(/<:code(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}\s*/m,'') para.gsub!(/\s\s/,'  ') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/^\s*$/,'<br />') parablock.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, - %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration + '<text:a xlink:type="simple" xlink:href="\1">\1</text:a>\2') #http ftp matches escaped, no decoration parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/ end para=parray.join + '<text:p text:style-name="Standard"/>' para end def table(para) # - if para =~/<!Th?.+/ # tables come as single block + if para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block table=SiSU_ODF_format::Table.new(@md,para) para=table.table_split end @@ -414,7 +367,7 @@ module SiSU_ODF n=lv - 1 n3=lv + 2 lv=nil if lv == 0 - para=unless para=~/^<:code>/ + para=unless para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para=if para =~/\{\s*\S+?\.(?:png|jpg|gif)\s.+?\}(?:(?:https?|file|ftp):\S+|image)/; image(para) elsif para =~/\{.+?\}(?:(?:https?|file|ftp):\S+|image)/; text_link(para) else para @@ -431,16 +384,16 @@ module SiSU_ODF when 5; heading(para,'5') << @br*2 when 6; heading(para,'6') << @br*2 end - elsif para =~ /<:verse(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << poem(para) @@odf[:body] << @br*2 - elsif para =~ /<:group(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << group(para) @@odf[:body] << @br*2 - elsif para =~ /<:code(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << code(para) @@odf[:body] << @br*2 - elsif para =~ /<!Th?/ #elsif para =~ /<!Th?¡/u + elsif para =~ /#{Mx[:gr_o]}Th?/ #elsif para =~ /<!Th?¡/u @@odf[:body] << table(para) @@odf[:body] << @br*2 else @@ -457,7 +410,7 @@ module SiSU_ODF wordlist end def markup(data) # Used for major markup instructions - safe_characters=/[^a-zA-Z0-9}{\/?,."';:)(><\-_&!@%~#\]\[*=$| \n+`¡]/u + safe_characters=/[^a-zA-Z0-9}{\/?,."';:)(><\-_&!@%~#\]\[*=$| \n+`#{Mx[:tc_p]}]/u dir=SiSU_Env::Info_env.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@odf_contents_close=Array.new(6){[]} @rcdc=false @@ -474,10 +427,10 @@ module SiSU_ODF word=para.scan(/\S+|\n/) if word word.each do |w| # _ - / # | : ! ^ ~ - unless w =~/<~\S+?;\S+?;\S+?><#@dp:#@dp>|<[:!][^<>]+?>|^<\/?(?:del|ins|sub|sup|cite)>$/ + unless w =~/#{Mx[:id_o]}~\S+?;\S+?;\S+?#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:gr_o]}.+?#{Mx[:gr_c]}|<[:!][^<>]+?>/ w.gsub!(/^<([^<>][^<>][^<>][^<>]+?)>$/,'<\1>') #refix end - unless para =~/^(?:0~|%+ )/m + unless para =~/^(?:#{Rx[:meta]}|%+ )/m w.gsub!(/&#(?:126|152);/,'~') #126 usual if w !~/&\S{1,7};/ \ or w =~/ / @@ -489,24 +442,32 @@ module SiSU_ODF para=para_array.join(' ') para=para.strip end - if para =~/<:code>/ #code-block: angle brackets special characters + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') para.gsub!(/(^|[^}])_</m,'\1<'); para.gsub!(/(^|[^}])_>/m,'\1>') end - para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1<draw:frame draw:style-name="gr1" text:anchor-type="as-char" svg:width="0.22cm" svg:height="0.22cm" draw:z-index="2"><draw:image xlink:href="Pictures/bullet_09.png" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame> ') # bullet_09.png + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}[~-]##{Mx[:mk_c]}/,'') + para.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}\s*)?#{Mx[:gl_bullet]}/,'\1<draw:frame draw:style-name="gr1" text:anchor-type="as-char" svg:width="0.22cm" svg:height="0.22cm" draw:z-index="2"><draw:image xlink:href="Pictures/bullet_09.png" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame> ') # bullet_09.png #para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1<text:span text:style-name="T6">●</text:span> ') #bullet #para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1● ') # bullet utf8, make smaller if used #para.gsub!(/^_\*\s+/,'<text:span text:style-name="T6">●</text:span> ') #bullet - para.gsub!(/^(<:i[1-9]>)\s+_\*\s+/,'\1 <text:span text:style-name="T6">·</text:span> ') #bullet - para.gsub!(/<:?br>/,'<br />') - para.gsub!(/<:p[bn]>/,'<text:p text:style-name="P8"> </text:p>') + para.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})\s*#{Mx[:gl_bullet]}/,'\1 <text:span text:style-name="T6">·</text:span> ') #bullet + para.gsub!(/^#{Mx[:gl_bullet]}/,'<text:span text:style-name="T6">·</text:span> ') #bullet + para.gsub!(/#{Mx[:br_line]}/,'<br />') + para.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'<text:p text:style-name="P8"> </text:p>') para.gsub!(/©/,'©') #too arbitrary para.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check - para.gsub!(/<b>(.+?)<\/b>/,'<text:span text:style-name="T1">\1</text:span>') - para.gsub!(/<i>(.+?)<\/i>/,'<text:span text:style-name="T2">\1</text:span>') - para.gsub!(/<u>(.+?)<\/u>/,'<text:span text:style-name="T3">\1</text:span>') - para.gsub!(/<sup>(.+?)<\/sup>/,'<text:span text:style-name="T4">\1</text:span>') - para.gsub!(/<sub>(.+?)<\/sub>/,'<text:span text:style-name="T5">\1</text:span>') + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<text:span text:style-name="T1">\1</text:span>') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<text:span text:style-name="T2">\1</text:span>') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<text:span text:style-name="T3">\1</text:span>') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<text:span text:style-name="T4">\1</text:span>') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<text:span text:style-name="T5">\1</text:span>') para.gsub!(/`/,"'") para.gsub!(//u,'-') para.gsub!(/·/u,'*') @@ -520,10 +481,10 @@ module SiSU_ODF para.gsub!(/\44/,'$') #$ watch #para.gsub!(/^·/,'_*') #$ watch #para.gsub!(/·/,'*') #$ watch - para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') # remove empty lines + para.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') # remove empty lines para.gsub!(/<a href=".+?">(.+?)<\/a>/,'\1') - para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links # para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1') #para.gsub!(/ /,' ') # decide on #para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:http:\/\/\S+|image)/," [ \\1 ]") #"[ #{@env.url.images_local}\/\\1 ]") @@ -533,38 +494,44 @@ module SiSU_ODF #para.gsub!(/^(\{\S+?\.(?:png|jpg)\s+.+?"(.*?)"\s*\}\S+)/,"\\1 \n [image: \"\\2\"]") wordlist=para.scan(/\S+/) para=tidywords(wordlist).join(' ').strip - if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + if para =~/^#{Rx[:meta]}(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; odf_metadata(d_meta) end end @rcdc=true if @rcdc==false \ - and (para =~/~metadata/ or para =~/1~meta\s+Document Information/) - if para !~/(^0~|<ENDNOTES>|<EOF>)/ + and (para =~/~metadata/ or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_x]}\s*Document Information/) + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=SiSU_ODF_format::Paragraph_number.new(paranum) end - @sto=Split_text_object.new(para).lev_segname_para_ocn + @sto=SiSU_Structure::Split_text_object.new(@md,para).odt #<office:annotation><dc:date>yyyy-mm-ddT00:00:00</dc:date><text:p>#{ocn}</text:p></office:annotation> #followed immediately by paragraph closure ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format - when /^(1)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(1):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(2)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(2):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(3)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(3):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(4)~(\S+)/; odf_structure(para,$1,@sto.ocn,$2) + when /^(4):(\S+)/ # work on see Split_text_object + odf_structure(para,$1,@sto.ocn,$2) # work on see SiSU_text_parts::Split_text_object para - when /^(5)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(5):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(6)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(6):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para #@sto.lev_para_ocn.heading_body6 #when /^(i1)$/ @@ -610,12 +577,14 @@ module SiSU_ODF if para =~ /^(4)~(\S+)/ odf_structure(para,$1,@sto.ocn,$2) para - elsif para =~/<~(\d+);m\d+;[mdv]\d+><#@dp:#@dp>$/ + elsif para =~/#{Mx[:id_o]}~(\d+);m\d+;[mdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ odf_structure(para,nil,nil,nil) #watch may be problematic para end end para.gsub!(/<!.+!>/,' ') if para ## Clean Prepared Text + para.gsub!(/#{Mx[:gr_o]}.+?#{Mx[:gr_c]}/,' ') if para ## CHECK Clean Prepared Text + para.gsub!(/#{Mx[:tc_o]}.+?#{Mx[:tc_c]}/,' ') if para ## CHECK Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text end end @@ -668,7 +637,6 @@ module SiSU_ODF %{<style:style style:name="P8" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:break-before="page"/></style:style>#@br} + %{<style:style style:name="P9" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:break-after="page"/></style:style>#@br} + %{<style:style style:name="P10" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:text-align="justify" style:justify-single-word="false"/></style:style>#@br} + # P1 - %{<style:style style:name="P11" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:margin-left="1cm" fo:margin-right="0cm" fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0cm" style:auto-text-indent="false"/></style:style>#@br} + %{<style:style style:name="P12" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:margin-left="2cm" fo:margin-right="0cm" fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0cm" style:auto-text-indent="false"/></style:style>#@br} + %{<style:style style:name="P13" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:margin-left="3cm" fo:margin-right="0cm" fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0cm" style:auto-text-indent="false"/></style:style>#@br} + |