From 7010772e78d181be3a8ba304387d94b8f4825ea7 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph@amissah.com>
Date: Sat, 9 Oct 2010 22:51:24 -0400
Subject: odt fix cornercase breakage matching and representing "&nbsp", "&"
 and urls (+changelog)

* odf, "&nbsp" and "&"
 (prior to this fix, odt for viral_spiral is broken in sisu-markup-samples)
* shared_metadata, "&nbsp", "&" and urls in metadata

more consistent open document text results
---
 lib/sisu/v2/odf.rb             |  6 +++---
 lib/sisu/v2/shared_metadata.rb | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/sisu/v2/odf.rb b/lib/sisu/v2/odf.rb
index 41229249..ad1f45d1 100644
--- a/lib/sisu/v2/odf.rb
+++ b/lib/sisu/v2/odf.rb
@@ -461,9 +461,9 @@ module SiSU_ODF
             word.each do |w| # _ - / # | : ! ^ ~
               unless dob =~/^(?:#{Rx[:meta]}|%+ )/m
                 w.gsub!(/&#(?:126|152);/,'~') #126 usual
-                if w !~/&\S{1,7};/ \
-                or w =~/&nbsp;/
-                  w.gsub!(/&/,'&amp;') #watch &nbsp;
+                w.gsub!(/&nbsp;/,'&#160;')
+                if w !~/&\S{2,7}?;/
+                  w.gsub!(/&/,'&amp;')
                 end
                 w.gsub!(/(&\S{1,7};)+&/,'\1&amp;') #could break things
               end
diff --git a/lib/sisu/v2/shared_metadata.rb b/lib/sisu/v2/shared_metadata.rb
index 69a92070..e589c598 100644
--- a/lib/sisu/v2/shared_metadata.rb
+++ b/lib/sisu/v2/shared_metadata.rb
@@ -476,6 +476,18 @@ WOK
         if @inf.class==String
           @inf.gsub!(/</,'&lt;'); @inf.gsub!(/>/,'&gt;')
           @inf.gsub!(/&lt;br(?: \/)?&gt;/,'<br />')
+          if @inf =~/&/
+            inf_array=[]
+            word=@inf.scan(/\S+|\n/)
+            word.each do |w| # _ - / # | : ! ^ ~
+              w.gsub!(/&nbsp;/,'&#160;')
+              if w !~/&\S{2,7}?;/
+                w.gsub!(/&/,'&amp;')
+              end
+              inf_array << w
+            end
+            @inf=inf_array.join(' ')
+          end
           @inf.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
             '<text:a xlink:type="simple" xlink:href="\1">\1</text:a>') #http ftp matches escaped, no decoration
           @inf.gsub!(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
@@ -484,7 +496,7 @@ WOK
             @inf.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
               %{#{url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{url_brace.xml_close}}) #http ftp matches with decoration
           else
-            @inf.gsub!(/(https?:\/\/\S+)/,
+            @inf.gsub!(/(https?:\/\/[^<>'"\s]+)/,
               %{#{url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{url_brace.xml_close}}) #http ftp matches with decoration
           end
           @inf.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,
-- 
cgit v1.2.3