From 84957a4260170c3b81a690853ba5b865f59e1217 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 19 Oct 2014 21:05:55 -0400 Subject: v5 v6: html, remove trailing backslash for empty linebreak & paragraph,

--- lib/sisu/v5/db_sqltxt.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/sisu/v5/db_sqltxt.rb') diff --git a/lib/sisu/v5/db_sqltxt.rb b/lib/sisu/v5/db_sqltxt.rb index ffb5966b..6585fd66 100644 --- a/lib/sisu/v5/db_sqltxt.rb +++ b/lib/sisu/v5/db_sqltxt.rb @@ -62,11 +62,11 @@ module SiSU_DbText def special_character_escape(str) str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql - gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n"). - gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,''). #check - gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2'). - gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2'). - gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') + gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"
\n"). + gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check + gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/m,'[image: \1] \2'). + gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/m,'\1\2'). + gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/m,'\1') end def clean_searchable_text_from_document_objects(arr) txt_arr,en=[],[] -- cgit v1.2.3 From 2c73f3060f9678f751c236fe17863d443f6a650f Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 19 Oct 2014 21:13:52 -0400 Subject: v5 v6: db, text search & display field, footnotes moved to end of text object * cleaner, more useful search results * cleaner text search field * separate footnote fields redundant for search purposes --- lib/sisu/v5/db_sqltxt.rb | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'lib/sisu/v5/db_sqltxt.rb') diff --git a/lib/sisu/v5/db_sqltxt.rb b/lib/sisu/v5/db_sqltxt.rb index 6585fd66..3f6cf951 100644 --- a/lib/sisu/v5/db_sqltxt.rb +++ b/lib/sisu/v5/db_sqltxt.rb @@ -60,7 +60,7 @@ module SiSU_DbText class Prepare def special_character_escape(str) - str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"
\n"). gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check @@ -80,13 +80,29 @@ module SiSU_DbText gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). gsub(/ \s+/m,' ') #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/ - s + txt_arr << s end - txt_arr << arr << en - #txt_arr=txt_arr.flatten + txt_arr=txt_arr << en txt=txt_arr.flatten.join("\n") - txt=special_character_escape(txt) - txt + special_character_escape(txt) + end + def clean_document_objects_body(arr) + txt_arr,en,en_arr=[],[],[] + arr=(arr.is_a?(String)) ? [ arr ] : arr + arr.each do |s| + en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m) + s=s.gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,'\1'). + gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,''). + gsub(/ \s+/m,' ') + txt_arr << s + end + en.flatten.each do |e| + e=e.sub(/^(\d+)\s*/,'\1 ') + en_arr << e + end + txt_arr=txt_arr << en_arr + txt=txt_arr.flatten.join("\n
") + special_character_escape(txt) end def clean_searchable_text_from_document_source(arr) txt_arr,en=[],[] -- cgit v1.2.3