diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-09-16 00:36:14 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-09-16 00:36:14 -0400 |
commit | bc9228c42269bfb4d451ca2d2d92a6a12afb094f (patch) | |
tree | 42b666a81ef350d0ff6153a49d159b3a1c348959 /lib/sisu/v0/odf.rb | |
parent | Updated sisu-0.68.0 (diff) | |
parent | fixes: alphabet list (concordance, dal_idx), and file types (dal_expand_inser... (diff) |
Merge branch 'upstream' into debian/sid
Diffstat (limited to 'lib/sisu/v0/odf.rb')
-rw-r--r-- | lib/sisu/v0/odf.rb | 129 |
1 files changed, 64 insertions, 65 deletions
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index 09c67ff6..05a6272c 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -128,7 +128,7 @@ module SiSU_ODF end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/) + notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/)[1] @n=[] notes.each do |n| #high cost to deal with <br> appropriately within odf, consider n=n.dup.to_s @@ -222,13 +222,13 @@ module SiSU_ODF end def image(para) para.gsub!(@serial,'') - m=para.scan(/(\{\s*(.+?)\}((?:https?|file|ftp)\S+|image))/) + m=para.scan(/(#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}((?:https?|file|ftp)\S+|image))/) if m; m.each do |i| cont,url=i[1],i[2] cont.gsub!(/([)(\]\[])/,"\\\\\\1") cont.gsub!(/([+?])/,"\\\\\\1") # incorrect handling of + url.gsub!(/([+?])/,"\\\\\\1") - para.sub!(/\{\s*#{cont}\}#{url}/m,image_odf(i)) #watch + para.sub!(/#{Mx[:lnk_o]}\s*#{cont}\s*#{Mx[:lnk_c]}#{url}/m,image_odf(i)) #watch para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil @@ -242,14 +242,14 @@ module SiSU_ODF end def text_link(para) para.gsub!(@serial,'') - m=para.scan(/(\{([^}]+?)\}((?:https?|file|ftp)\S+?))([;.,]?(?=\s|$))/) #sort + m=para.scan(/(#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp)\S+?))([;.,]?(?=\s|$))/) #sort if m m.each do |i| txt,url,trail=i[1],i[2] txt.gsub!(/([)(\]\[])/,"\\\\\\1") - txt.gsub!(/([+?])/,"\\\\\\1") # problems with + + txt.gsub!(/([+?*])/,"\\\\\\1") # problems with + url.gsub!(/([+?])/,"\\\\\\1") # problems with + - para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url + para.gsub!(/#{Mx[:lnk_o]}\s*#{txt}#{Mx[:lnk_c]}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil @@ -260,14 +260,14 @@ module SiSU_ODF para.gsub!(@serial,'') para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, '<text:a xlink:type="simple" xlink:href="\1">\1</text:a>\2') #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + para.gsub!(/((?:^|\s)#{Mx[:lnk_c]})((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, '\1<text:a xlink:type="simple" xlink:href="\2">\2</text:a>\3') #special case \{ e.g. \}http://url para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works #%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, - %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) + %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) if para !~/http:\/\// # improve upon, document crash where url contains '@' symbol para=case para when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/m m=$1 @@ -283,31 +283,31 @@ module SiSU_ODF end def footnote(para) @astx||=10000 - para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_a_c]})/,'\1') + para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,'\1') #para.gsub!(/<br \/><:i[1-9]>/,'<br />') - if para =~/#{Mx[:en_a_o]}\d+\s+/ + if para =~/#{Mx[:en_a_o]}\d+\s+/ para=para.gsub(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:en_a_c]}/,'<text:note text:id="ftn\1" text:note-class="footnote"><text:note-citation>\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>') end - if para=~/#{Mx[:en_b_o]}[*+]\d+\s/ #editor notes, squre bracket series - asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/) + if para=~/#{Mx[:en_a_o]}[*+]+\s/ + asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) + para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) @astx+=1 end end - if para=~/#{Mx[:en_a_o]}[*+]+\s/ - asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/) + if para=~/#{Mx[:en_b_o]}[*+]\d+\s/ + asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) + para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>}) @astx+=1 end end para end def group_clean(para) - para.gsub!(/&nbsp;| /,' ') + para.gsub!(/&nbsp;| |#{Mx[:nbsp]}/,' ') para.gsub!(/</,'<'); para.gsub!(/>/,'>') para.gsub!(/<(text:span text:style-name="T[1-5]"|\/text:span)>/,'<\1>') #works, not ideal para.gsub!(/#{Mx[:br_line]}/,'<br />') @@ -358,18 +358,36 @@ module SiSU_ODF end def table(para) # if para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block - table=SiSU_ODF_format::Table.new(@md,para) + txt_obj={:table =>para} + table=SiSU_ODF_format::Table.new(@md,txt_obj) para=table.table_split end end - def odf_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document + def odf_structure(md,t_o) + @md,@t_o=md,t_o + @md,@t_o=md,t_o + if t_o.class == Hash + para =t_o[:txt] || nil + lv =t_o[:lv] || nil + ocn =t_o[:ocn] || nil + hname =t_o[:h_name] || nil + #@h_name =t_o[:h_name] || nil + #elsif t_o.class == Array + # @txt =txt[0] + #elsif t_o.class == String + # @txt =txt + else + #@one,@two,@three=one,two,three + p t_o.class + p caller + end lv=lv.to_i n=lv - 1 n3=lv + 2 lv=nil if lv == 0 para=unless para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ - para=if para =~/\{\s*\S+?\.(?:png|jpg|gif)\s.+?\}(?:(?:https?|file|ftp):\S+|image)/; image(para) - elsif para =~/\{.+?\}(?:(?:https?|file|ftp):\S+|image)/; text_link(para) + para=if para =~/#{Mx[:lnk_o]}\s*\S+?\.(?:png|jpg|gif)\s.+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/; image(para) + elsif para =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/; text_link(para) else para end else para @@ -421,15 +439,15 @@ module SiSU_ODF bullet=image_src('bullet_09.png') cp("#{bullet}/bullet_09.png","#{@env.path.odf}/Pictures/.") #if image_src('bullet_09.png') data.each do |para| + para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove #p para if para =~safe_characters and @md.cmd =~/V/ #KEEP #para.gsub!(/<(~\d+;(?:\w|[0-6]:)\d+;\w\d+)><(#@dp:#@dp)>/,'<\1><\2>') + para='' if para =~/#{Mx[:lv_o]}\d+:.*?#{Mx[:lv_c]}.+?#{Mx[:pa_non_object_dummy_heading]}/ para_array=[] + para.gsub!(/</,'<'); para.gsub!(/>/,'>') word=para.scan(/\S+|\n/) if word word.each do |w| # _ - / # | : ! ^ ~ - unless w =~/#{Mx[:id_o]}~\S+?;\S+?;\S+?#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:gr_o]}.+?#{Mx[:gr_c]}|<[:!][^<>]+?>/ - w.gsub!(/^<([^<>][^<>][^<>][^<>]+?)>$/,'<\1>') #refix - end unless para =~/^(?:#{Rx[:meta]}|%+ )/m w.gsub!(/&#(?:126|152);/,'~') #126 usual if w !~/&\S{1,7};/ \ @@ -468,10 +486,10 @@ module SiSU_ODF para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<text:span text:style-name="T3">\1</text:span>') para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<text:span text:style-name="T4">\1</text:span>') para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<text:span text:style-name="T5">\1</text:span>') - para.gsub!(/`/,"'") para.gsub!(//u,'-') + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify para.gsub!(/·/u,'*') - para.gsub!(/[“”]/u,'""') para.gsub!(/[–—]/u,'-') #— – chk para.gsub!(/ < /i,'<') para.gsub!(/\\copy(?:right)?\b/,'©') @@ -485,7 +503,7 @@ module SiSU_ODF para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') # remove empty lines para.gsub!(/<a href=".+?">(.+?)<\/a>/,'\1') para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links -# para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1') + #para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1') #para.gsub!(/ /,' ') # decide on #para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:http:\/\/\S+|image)/," [ \\1 ]") #"[ #{@env.url.images_local}\/\\1 ]") #para.gsub!(/<!TZ.+/,'') @@ -515,51 +533,35 @@ module SiSU_ODF and para=~/\S+/ para=case @sto.format when /^(1):(\S*)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para when /^(2):(\S*)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para when /^(3):(\S*)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para when /^(4):(\S+)/ # work on see Split_text_object - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) # work on see SiSU_text_parts::Split_text_object para when /^(5):(\S*)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para when /^(6):(\S*)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para - #@sto.lev_para_ocn.heading_body6 - #when /^(i1)$/ - # #formatMono.gsubBody - # #para=@sto[:lev_para_ocn].scrIndent1 - #when /^(i2)$/ - # formatMono.gsubBody - # para=@sto[:lev_para_ocn].scrIndent2 - #when /^(center)$/ - # para.gsub!(/(.+)/, - # %{<center>(\\1)</center>}) - # para=@sto[:lev_para_ocn].scrPara - #when /^(b|bold)$/ - # para.gsub!(/(.+)/, - # %{<b>(\\1)</b>}) - # para=@sto[:lev_para_ocn].scrPara - #when /null/ # see whether u can improve - # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) - # #formatMono.gsubBody - # #para=@sto[:lev_para_ocn].scrPara - # end - else odf_structure(para,nil,nil,nil) #watch may be problematic + else + txt_obj={:txt =>para} + odf_structure(@md,txt_obj) #watch may be problematic para end - elsif para =~/(.*)<!#!>(.*)/ - one,two=$1,$2 - format_text=SiSU_ODF_format::Format_text_object.new(one,two) - para=format_text.seg_no_paranum end if para =~/<a name="n\d+">/ \ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote @@ -568,17 +570,14 @@ module SiSU_ODF if (para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/) # i don't get the condition for no paranum end - if para =~/<:center>/ - one,two=/(.*)<:center>(.*)/.match(para)[1,2] - format_text=SiSU_ODF_format::Format_text_object.new(one,two) - para=format_text.center - end else if para =~ /^(4)~(\S+)/ - odf_structure(para,$1,@sto.ocn,$2) + txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2} + odf_structure(@md,txt_obj) para elsif para =~/#{Mx[:id_o]}~(\d+);m\d+;[mdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ - odf_structure(para,nil,nil,nil) #watch may be problematic + txt_obj={:txt =>para} + odf_structure(@md,txt_obj) #watch may be problematic para end end |