diff options
Diffstat (limited to 'lib/sisu/0.52/html_segments.rb')
-rw-r--r-- | lib/sisu/0.52/html_segments.rb | 471 |
1 files changed, 471 insertions, 0 deletions
diff --git a/lib/sisu/0.52/html_segments.rb b/lib/sisu/0.52/html_segments.rb new file mode 100644 index 00000000..765c012e --- /dev/null +++ b/lib/sisu/0.52/html_segments.rb @@ -0,0 +1,471 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: html segment generation, processing + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + + Summary of GPL 2 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl2.fsf + + SiSU was first released to the public on January 4th 2005 + + SiSU uses: + + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + © Ralph Amissah 1997, current 2007. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com +=end +module SiSU_HTML_seg + require SiSU_lib + '/shared_html' + require SiSU_lib + '/html' + require SiSU_lib + '/html_promo' + class Seg + @@seg,@@seg_subtoc,@@seg_endnotes,@@seg_ad={},{},{},{} + @@seg_name,@@seg_name_html,@@seg_name_php,@@segtocband=[],[],[],[] + @@filename_seg=@@filename_segphp=@@seg_url=@@fn=@@to_lev4=@@get_hash_to=@@get_hash_fn='' + @@loop_count=@@seg_total=@@tracker=0 + @@is4=@@is3=@@is2=@@is1=0 + @@header1=@@header2=@@header3=@@header4=0 + @@seg[:dot_nav],@@seg[:tocband],@@seg[:title],@@seg[:headers],@@seg[:main],@@seg[:tail],@@seg[:credits],@@seg_subtoc_array,@@seg_endnotes_array,@@heading_endnotes_array,@@seg[:endnote_all]=Array.new(11){[]} + @@seg[:header_endnotes]='' + @@tablehead,@@number_of_cols=0,0 + @@flag_group=false + @@dp=nil + attr_reader :seg_name_html,:seg_name_html_tracker + def initialize(data='',md='') + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @seg_name_html=@@seg_name_html || nil + @seg_name_html_tracker=@@tracker || nil + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + end + def songsheet + begin + Seg.new(@data,@md).get_subtoc_endnotes + Seg.new(@data,@md).articles + Seg.new.cleanup # (((( added )))) + #### (((( END )))) #### + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + @@seg_name=[] + end + end + protected + def articles + data=@data + track,tracking,newfile=0,0,0 + @@is4=@@is3=@@is2=@@is1=0 + printed_endnote_seg='n' + @h_sfx='.php' if @md.file_type =~/php/ + @h_sfx=@md.sfx if @md.file_type =~/html/ + @h_sfx='.html' if @md.file_type =~/html/ #used in creating file, not to be omitted. + data.each do |para| + if para =~/^4~/ + @@seg_name << para[/^4~(\S+)/,1] + seg_name=para[/^4~(\S+)/,1] + @@seg_ad[seg_name]=para[/.+?<:\d\s+(.+)\s*?>/,1] #watch + end + end + @@seg_name_html=@@seg_name + @@seg_total=@@seg_name.length + testforartnum=@@seg_name_html + tell=SiSU_Screen::Ansi.new(@md.cmd,@@seg_name.length) + tell.segmented unless @md.cmd =~/q/ + flagend='y' + data.each do |para| + if para =~/^4~.+/ #watch + if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @@header4=para.to_s[/^4~(?:\S+\s+)?(.+?)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,1] + else @@header4=para.to_s[/^4~(?:\S+\s+)?(.+)/,1] + end + @@is4=newfile=1 + end + if para =~/^3~.+/ + @@header3=para.to_s[/^3~(?:~\S+\s+)?(.+)/,1] + @@is4,@@is3=0,1 + end + if para =~/^2~.+/ + @@header2=para.to_s[/^2~(?:~\S+\s+)?(.+)/,1] + @@is4,@@is3,@@is2=0,0,1 + end + if para =~/^1~.+/ + @@header1=para.to_s[/^1~(?:~\S+\s+)?(.+)/,1] + @@is4,@@is3,@@is2,@@is1=0,0,0,1 + end + if (@@is1 && !@@is2 && !@@is3 && !@@is4) + unless para =~/^1~/; head1=$_ #; + end + end + if @@is4 == 1 or para =~/^<ENDNOTES>|^<EOF>/ + if newfile == 1 or para =~/^<ENDNOTES>|^<EOF>/ + newfile=0 + if para =~/^4~\S+/ or para =~/^<ENDNOTES>|^<EOF>/ # @@level4 + if tracking != 0 + File.mkpath(@md.dir_out) unless FileTest.directory?(@md.dir_out) #bug - added specifically for nav! not needed by regular seg, check !!! + Seg.new('',@md).tail + segfilename="#{@md.dir_out}/#{@md.fnl[:pre]}#{@@seg_name_html[tracking-1]}#{@md.fnl[:mid]}#@h_sfx#{@md.fnl[:post]}" + @@filename_seg=File.new(segfilename,'w') if @@seg_name_html[tracking-1] + unless (@@seg_name_html[tracking-1] =~/endnotes/) + Seg.new.output + else Seg.new.output('endnotes') + end + Seg.new.reinitialise + Seg.new(para,@md).header_art + Seg.new(para,@md).head + if @@seg_name_html[tracking] =~/metadata/ # this is for metadata + segfilename="#{@md.dir_out}/#{@md.fnl[:pre]}#{@@seg_name_html[tracking]}#{@md.fnl[:mid]}#@h_sfx#{@md.fnl[:post]}" + @@filename_seg=File.new(segfilename,'w') + Seg.new.reinitialise + flagend="x" + @@filename_seg.close #%(((( EOF )))) --> + end + end + if tracking == 0 + Seg.new(para,@md).header_art + Seg.new(para,@md).head + end + end + tracking=tracking + 1 + end + m=para[/.+?<a name="(\d+)">.*/]; @@get_hash_to=$1 if m # changed 2002w42, again w44 ! & again 2003w16 + m=para[/^4~(\S+)/]; @@get_hash_fn=$1 if m + if testforartnum[tracking-1] !~/endnote/ + Seg.new(para,@md).markup + Seg.new(para,@md).txt + else + Seg.new(para,@md).markup + Seg.new(para,@md).txt + if printed_endnote_seg == 'n' + Seg.new(para,@md).endnote + printed_endnote_seg='y' + end + end + end + end + end + def header_art + @data.each do |para| + format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) + if para =~/^[0-6]~/ #2004w27/5 + if @@tracker < @@seg_total-1; @@seg[:dot_nav] << format_head_seg.dot_control_pre_next + else @@seg[:dot_nav] << format_head_seg.dot_control_pre + end + end + ads=SiSU_HTML_promo::Ad.new(@md) + @@seg[:title]=format_head_seg.head << ads.div.major + end + end + def head + data=@data + clean=/<!.*?!>|<:.*?>|<~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) + unless @md.flag_pdf + if @@tracker < @@seg_total-1 + if @@tracker == 0; @@segtocband << format_head_seg.toc_next3 + else @@segtocband << format_head_seg.toc_pre_next3 + end + elsif @@tracker == @@seg_total + @@segtocband << format_head_seg.toc_pre3 + end + else # identical code without .pdf + if @@tracker < @@seg_total-1 + if @@tracker == 0; @@segtocband << format_head_seg.toc_next2 + else @@segtocband << format_head_seg.toc_pre_next2 + end + else @@segtocband << format_head_seg.toc_pre2 + end + end + @p_num ||= '' + if @@is1 == 1 + @dc_creator=%{<b><sup>©</sup> #{@md.dc_creator}</b>\n} if @md.dc_creator.to_s =~/\S/ + @@seg[:tocband] << format_head_seg.navigation_band(@@segtocband,@@seg[:dot_nav]) + @@seg[:headers] << format_head_seg.seg_head_escript if SiSU_HTML_Format_type::Head_seg.method_defined? :seg_head_escript #debug PHP move up in text #bug + @@seg[:headers] << format_head_seg.title_banner(@md.title,@md.subtitle,@dc_creator).gsub(clean,'') + paranum=if @@header1[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + else '' + end + @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,@@header1,@p_num.ocn_display) + @@seg[:headers] << format_seg.title_header1.gsub(clean,'') + @@header1.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') + end + if @@is2 == 1 + header2=@@header2 + paranum=if header2[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + else '' + end + @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header2,@p_num.ocn_display) + @@seg[:headers] << format_seg.title_header2.gsub(clean,'') + @@header2.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') + end + if @@is3 == 1 + header3=@@header3 + paranum=if header3[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + else '' + end + @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header3,@p_num.ocn_display) + @@seg[:headers] << format_seg.title_header3.gsub(clean,'') + @@header3.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') + end + if @@is4 == 1 + header4=@@header4 + paranum=if header4[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + else '' + end + @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header4,@p_num.ocn_display) + @@seg[:headers] << format_seg.title_header4.gsub(clean,'') + end + @@seg[:header_endnotes]=format_head_seg.title_endnote(@md.title,@md.subtitle,@dc_creator,@@seg[:dot_nav]) + @@tracker=@@tracker+1 + end + def markup + @debug=[] + data=@data.dup #bugwatch tied + @group_collect=[] + data.each do |para| + format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) + if para !~/^0~/ + m=para[/.+?<~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] + if m + paranum=m[1].to_s + @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) + end + if para =~/<:(?:code|alt|verse|group)>/ or @@flag_group==true + if para =~/<:(?:code|alt|verse|group)>/ + @group_collect << @vz.margin_txt_0 + para + @@flag_group=true + elsif @@flag_group==true + unless para =~/<:(?:code|alt|verse|group)-end>/ # neither ideal nor necessary sort later + @group_collect << para + else @group_collect << para.gsub(/<:(?:code|alt|verse|group)-end>/,'') + end + end + if para =~/<:(?:code|alt|verse|group)-end>/ + para = @group_collect.join + @@flag_group=false + @group_collect=[] + end + end + if para !~/^[0-9]~/ + if para =~/(.*)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/ + one,two=$1,$2 + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) + para=format_seg.no_paranum + end + end + if para[/<~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp)>$/] + @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).lev_segname_para_ocn + format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[12]|_1?\*|<:i[12]>\s*_\*|null/ + para=case @sto.format # work area 2003w29 ||@|def lev_segname_para_ocn| + when /^4~\S+/; @sto.seg_lev_para_ocn.header4 # work on see Split_text_object + when /^5~(?:~\S+)?/; @sto.seg_lev_para_ocn.header5 + when /^6~(?:~\S+)?/; @sto.seg_lev_para_ocn.header6 + when /^_\*$/; @sto.seg_lev_para_ocn.bullet + when /^_1\*$/ + format_txt_obj.gsub_body + @sto.seg_lev_para_ocn.bullet_indent1 + when /^i1$/ + format_txt_obj.gsub_body + @sto.seg_lev_para_ocn.indent1 + when /^i2$/ + format_txt_obj.gsub_body + @sto.seg_lev_para_ocn.indent2 + when /^(?:verse|group|alt)$/ + @sto.seg_lev_para_ocn.para + when /^code$/ + @sto.seg_lev_para_ocn.code + when /null/ + if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ and para !~/^<!TZ!>/ + format_txt_obj.gsub_body + @sto.seg_lev_para_ocn.para + elsif para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ and para =~/^<!TZ!>/ + format_txt_obj.gsub_body + @sto.seg_lev_para_ocn.table_end + else para + end + else para + end + elsif para =~/¡|<!T[hZ]?/ + table=SiSU_HTML_shared::Table.new(para) + para=table.table + end + if @md.flag_separate_endnotes + para.gsub!(/"\s+href="#_(\d+)">/,%{" href=\"endnotes#{@md.sfx}#_\\1">}) #endnote- twice #removed file type + end + if para !~/#{@vz.margin_txt_w1}|#{@vz.margin_txt_w2}/ + if para[/(.*)<~0;(?:u|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/] #% watch u & m? + one,two=$1,$2 + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) + para=format_seg.seg_no_paranum #% undefined + end + para.gsub!(/\s*(-\{{2}~\d+|<:e[:_]\d+>).*/,'') #potentially dagerous - removes all paragraphs with <!e_!> #?? workpoint + if para =~/<a name="_\d+" href="#-\d+"> <sup>/ #endnote- note- + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,para) + para=format_seg.no_paranum + end + end + if para =~/^4~\S+|4~!/ + para.gsub!(/4~\S+|<:[-_\w\d]?(-.+?-)?>|4~!.+/,'') #sort seg headers + @@seg[:main] << para + @@seg[:main] << @@seg_subtoc[@@get_hash_fn] #% insertion of sub-toc + else + para.gsub!(/<:[-_\w\d]?(-.+?-)?>|4~!.+/,'') + @@seg[:main] << para unless @@flag_group==true + end + end + end + end + def txt + end + def endnote + end + def tail + format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) + if @md.flag_auto_endnotes + @@seg[:tail] << format_head_seg.endnote_mark + @@seg[:tail] << @@seg_endnotes[@@get_hash_fn] #endnotes deposited at end of individual segments||@|EXTRACTION OF ENDNOTES| + end + @@seg[:tail] << '<table summary="whitespace"><tr><td> </td></tr></table>' + ads = SiSU_HTML_promo::Ad.new(@md) + @@seg[:credits] << format_head_seg.credit << ads.div.close << ads.display << format_head_seg.html_close + end + def output(type='') + if @@seg[:title] =~/\S/ #kludge (for exception file better.ways, how ironic) get a real ruby test, e.g. test that not array or... + @@filename_seg << @@seg[:title] + #@@filename_seg << @@seg[:dot_nav] #places dot control at very top of segment + @@filename_seg << @@seg[:tocband] + if type !~/endnote/ + @@filename_seg << @@seg[:headers] + @@filename_seg << @@seg[:main] + else + @@filename_seg << @@seg[:header_endnotes] + @@filename_seg << @@seg[:endnote_all] + end + @@filename_seg << @@seg[:tail] + @@filename_seg << @@seg[:tocband] + @@filename_seg << @@seg[:credits] + @@filename_seg.close + end + end + def reinitialise + @@seg[:title],@@seg[:dot_nav],@@segtocband,@@seg[:tocband],@@seg[:headers],@@seg[:main],@@seg[:tail],@@seg[:credits]=Array.new(8){[]} + end + def cleanup + reinitialise + @@seg_total,@@tracker=0,0 + @@seg_endnotes,@@seg_subtoc={},{} + @@seg_endnotes_array,@@seg_subtoc_array,@@heading_endnotes_array=[],[],[] + @@seg[:endnote_all]=[] + end + def get_subtoc_endnotes #get endnotes & sub-table of contents subtoc + @data.each do |para| + para.gsub!(/<a name=\"h\d.*?\">(.+?)<\/a>/mi,'\1') + if @md.flag_auto_endnotes + if para =~/^[1234]~/ and not @@fn.empty? + @@seg_endnotes[@@fn] = [] + @@seg_endnotes[@@fn] << @@seg_endnotes_array + @@seg_endnotes_array=[] if para=~/^4~/ + end + end + if para =~/^4~/ #% EXTRACTION OF SUB-TOCs + @@seg_subtoc[@@fn]=@@seg_subtoc_array + @@seg_subtoc_array=[] + end + if para =~/^4~/ #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs + m=para[/^4~(\S+).+?<~(\d+);(?:[oh]|4:)\d+;\w\d+><#@dp:#@dp>$/] + if m; @@fn,@@to_lev4=$1,$2 if m # changed 2004w07 #endnotes and sub-tocs + else + m=para[/^4~(\S+)/] + @@fn,@@to_lev4=$1,'nonum' if m # changed 2005w13 + end + end + if para =~/^[56]~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + para.gsub!(/ <\/a>/,' ') + case para # series changed 2002w42 + when /^5~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ #remove [u]? req by pg texts, revist + one,two=$1,$2 + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) + para=format_seg.subtoc_lev5 + when /^6~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + one,two=$1,$2 + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) + para=format_seg.subtoc_lev6 + end + @@seg_subtoc_array << para + end + if @md.flag_auto_endnotes + if para =~/~[{\[][\d*+]+ <a name="_[\d*+]+"/ # endnote- + endnote_array=[] + if para=~/~\{.+?\}\~/m + endnote_array << para.scan(/~\{.+?\}\~/m) + end + if para=~/~\[[*]\d+\s.+?\]\~/m + endnote_array << para.scan(/~\[[*]\d+\s.+?\]\~/m) + end + if para=~/~\[[+]\d+\s.+?\]\~/m + endnote_array << para.scan(/~\[[+]\d+\s.+?\]\~/m) + end + endnote_array.flatten.each do |note| + note_match=note.dup + note_match_seg=note.dup + e_n=note_match_seg[/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?)[}\]]~/m,1] + try=e_n.split(/<br \/>/) + try.each do |e| + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,e) + note_match=if e =~/<:i[12]>/ + format_seg.endnote_body_seg_tail_indent + else format_seg.endnote_body_seg_tail + end + @@seg_endnotes_array << note_match + end + try.join('<br \/>') + #% creation of separate end segment/page of all endnotes referenced back to reference segment + m=/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)[}\]]~/mi + one=note_match_seg[m,1] #note~ [a name] + two=note_match_seg[m,2] #note- + format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) + note_match_all_seg=format_seg.endnote_seg_body(@@fn) #BUG WATCH 200408 + @@seg[:endnote_all] << note_match_all_seg + end + para.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') + end + end + end + end + end +end +__END__ |