diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-09-10 21:24:31 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-09-10 21:24:31 -0400 |
commit | 291080c0495f59f031bf5c0de2482f1bc7df59f2 (patch) | |
tree | 6e8a1d334be8979f5c9e77687e2ec8ee7fc633b2 /lib/sisu/v0/concordance.rb | |
parent | book index, sample html file and to screen sisu markup sample added (-M flag) (diff) |
primarily on book index where (markup) provided
* book index, html, tex/pdf, xml
* texpdf no ocn if ocn is 0
* odf, plaintext, if book index? remove ... do other outputs for which not relevant
* concordance, better matches
* constants, dal special character for hardspace changed as (ruby) regx bug in replacing it in xml, odd but move on
Note: to fix html seg headers for endnotes and for index
Diffstat (limited to 'lib/sisu/v0/concordance.rb')
-rw-r--r-- | lib/sisu/v0/concordance.rb | 22 |
1 files changed, 12 insertions, 10 deletions
diff --git a/lib/sisu/v0/concordance.rb b/lib/sisu/v0/concordance.rb index 485f3df5..fad91d14 100644 --- a/lib/sisu/v0/concordance.rb +++ b/lib/sisu/v0/concordance.rb @@ -121,8 +121,8 @@ WOK end def create head_banner=SiSU_HTML_Format::Head_toc.new(@md) -minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet -toc='<div class="toc">' + minitoc.to_s + '</div>' + minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet + toc='<div class="toc">' + minitoc.to_s + '</div>' <<WOK <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> @@ -199,7 +199,8 @@ WOK @rxp_t3=Regexp.new('^T3') @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/ @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! - @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|#{Mx[:id_o]}\S+?#{Mx[:id_c]}|\w+|[a-zA-Z]+}mi + @rgx_splitlist=%r{[—.,;:-]|#{Mx[:id_o]}\S+?#{Mx[:id_c]}}mi + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|\w+|[a-zA-Z]+}mi rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error end end @@ -223,13 +224,13 @@ WOK @sfx='.html' #used for hardlinks, previous setting @sfx='', web server takes care of suffix @word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}#\\2") unless wordlocation.nil? case @wordlocation - when @rxp_t1 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - when @rxp_t2 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - when @rxp_t3 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - else %{<a href="#@word_location_seg">#@show</a>, } + when @rxp_t1 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + when @rxp_t2 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + when @rxp_t3 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + else %{<a href="#@word_location_seg">#@show</a>, } end end def map_para @@ -243,6 +244,7 @@ WOK end if toy =~/\d+/ \ and toy !~/^0$/ + line=line.split(@rgx_splitlist).join(' ') #%take in word or other match for word in line.scan(@rgx_scanlist) #%take in word or other match #word.gsub!(@rxp_clean,'') word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}(?:http)?/,'') |