-*- mode: org -*- #+TITLE: sisu sst #+DESCRIPTION: documents - structuring, various output representations & search #+FILETAGS: :sisu:sst: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :eval no #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no #+PROPERTY: header-args+ :mkdirp yes * sst_from_xml.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/sst_from_xml.rb" <> module SiSU_sstFromXML require_relative 'se' # se.rb class Convert begin require 'rexml/document' include REXML rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia). error('rexml/document NOT FOUND (LoadError)') end def initialize(opt) @opt=opt @sisu,@sisu_base=[],[] @ver=SiSU_Env::InfoVersion.instance.get_version end def tell(filename,type) SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "XML #{type} to SiSU sst", "#{filename} --> #{filename}.sst" ).green_hi_blue end def read xml_to_sisu end def markup_head(text) text.strip! text.gsub!(/(?:\s*\n|\s\s+)/,' ') text.gsub!(/
(.+?)<\/header>/,'\1') text.gsub!(/<(\w+)>(.+?)<\/\w+>/,'@\1: \2') text.gsub!(/
<(\w+)>(.+?)<\/\w+><\/header>/,'@\1: \2') text.gsub!(/\s +/,' ') text.strip! text + "\n\n" end def markup(text) text.strip! text.gsub!(/(?:\s*\n|\s\s+)/,' ') text.gsub!(/(.+?)<\/text>/,':A~ \1') text.gsub!(/(.+?)<\/text>/,':B~ \1') text.gsub!(/(.+?)<\/text>/,':C~ \1') text.gsub!(/(.+?)<\/text>/,'1~ \1') text.gsub!(/(.+?)<\/text>/,'2~ \1') text.gsub!(/(.+?)<\/text>/,'3~ \1') text.gsub!(/(.+?)<\/text>/,'\1') text.gsub!(/(.+?)<\/endnote>/,'~{ \1 }~') text.gsub!(/
/,'
') text.gsub!(/(.+?)<\/i>/,'/{\1}/') text.gsub!(/(.+?)<\/b>/,'*{\1}*') text.gsub!(/(.+?)<\/u>/,'_{\1}_') text.gsub!(/(\s*.+?\s*)<\/sem:\1>/,';{ \2 };\1') text.gsub!(/(\s*.+?\s*)<\/sem:\1>/,':{ \2 }:\1') text.gsub!(/(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1') text.gsub!(/(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1') text.gsub!(/\s +/,' ') text.strip! text + "\n\n" end def sax out_file=File.new(@output_file_name,'w') head=@doc.root.get_elements('//head/header') body=@doc.root.get_elements('//object/text') out_file.puts "% SiSU text #{@ver.version} (generated from a SiSU XML markup representation)\n\n" head.each do |x| if x.name=='header' head=markup_head(x.to_s) out_file.puts head end end body.each do |x| if x.name=='text' body=markup(x.to_s) out_file.puts body end end end def node sax end def dom raise "#{__FILE__}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)." end def xml_to_sisu unless @opt.files.empty? @opt.files.each do |xml| @sisu_base=[] if xml =~/\.sx[sdn]\.xml$/ begin @doc_str=IO.readlines(xml,'').join("\n") @output=File.new("#{xml}.sst",'w') @doc=REXML::Document.new(@doc_str) @output_file_name="#{Dir.pwd}/#{xml}.sst" @el=[] rescue REXML::ParseException end end if xml =~/\.sxs\.xml$/ unless @opt.act[:quiet][:set]==:on tell(xml,'sax') end sax elsif xml =~/\.sxd\.xml$/ unless @opt.act[:quiet][:set]==:on tell(xml,'dom') end dom elsif xml =~/\.sxn\.xml$/ unless @opt.act[:quiet][:set]==:on tell(xml,'node') end node else puts "filename not recognised: << #{xml} >>" end @output << @sisu_base end else puts '.xml file for conversion to sisu expected' end puts @opt.files.inspect end end end __END__ #+END_SRC * sst_to_s_xml_sax.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/sst_to_s_xml_sax.rb" <> module SiSU_SimpleXML_ModelSax require_relative 'se_hub_particulars' # se_hub_particulars.rb include SiSU_Particulars require_relative 'dp' # dp.rb include SiSU_Param require_relative 'se' # se.rb include SiSU_Env require_relative 'ao_doc_str' # ao_doc_str.rb require_relative 'xml_shared' # xml_shared.rb include SiSU_XML_Munge require_relative 'shared_sem' # shared_sem.rb require_relative 'xml_format' # xml_format.rb include SiSU_XML_Format require_relative 'rexml' # rexml.rb include SiSU_Rexml @@alt_id_count=0 @@tablefoot='' class Convert @@fns=nil def initialize(opt) @opt=opt @particulars=SiSU_Particulars::CombinedSingleton.instance.get_env_md(opt) end def read begin @md=@particulars.md #bug, relies on info persistence, assumes -m has previously been run @env=@particulars.env SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'invert', 'XML SAX', "#{@md.fns} -> #{@md.fn[:sxs]}" ).colorize unless @opt.act[:quiet][:set]==:on if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], @opt.fns, "#{Dir.pwd}/#{@md.fn[:sxs]}" ).flow end unless @@fns==@opt.fns @@fns=@opt.fns @@fns_array=[] end @fns_array=if @@fns_array.empty?; read_fnm else @@fns_array.dup #check end SiSU_SimpleXML_ModelSax::Convert::Songsheet.new(@fns_array,@particulars).songsheet rescue SiSU_Errors::Rescued.new($!,$@,@opt.cmd,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure #file closed in songsheet end end def read_fnm ao=[] if FileTest.file?("#{Dir.pwd}/#{@opt.fns}") ao=IO.readlines("#{Dir.pwd}/#{@opt.fns}","\n\n") else STDERR.puts 'Error' end end private class Songsheet def initialize(data,particulars) @data,@particulars,@env,@md=data,particulars,particulars.env,particulars.md end def songsheet begin SiSU_SimpleXML_ModelSax::Convert::Scroll.new(@data,@particulars).songsheet if (@md.opt.act[:verbose][:set]==:on \ || @md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) SiSU_SimpleXML_ModelSax::Convert::Tidy.new(@md,@env).xml # test wellformedness, comment out when not in use end SiSU_Rexml::Rexml.new(@md,@md.fn[:sxs]).xml if @md.opt.act[:maintenance][:set]==:on # test rexml parsing, comment out when not in use #debug rescue SiSU_Errors::Rescued.new($!,$@,@md.opt.cmd,@md.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end end end class Scroll require_relative 'txt_shared' # txt_shared.rb require_relative 'css' # css.rb include SiSU_TextUtils @@xml={ body: [], open: [], close: [], head: [] } def initialize(data='',particulars='') @data,@env,@md=data,particulars.env,particulars.md @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ @tab="\t" if @md @trans=SiSU_XML_Munge::Trans.new(@md) end @sys=SiSU_Env::SystemCall.new end def songsheet pre markup post publish end protected def embedded_endnotes(para='') para.gsub!(/~\{(.+?)\}~/,'\1 ') para.gsub!(/~\[([*+])\s+(.+?)\]~/,'\2 ') end def xml_head(meta) txt=meta.text txt.gsub!(/\/{(.+?)}\//,'\1') txt.gsub!(/[*!]{(.+?)}[*!]/,'\1') txt.gsub!(/_{(.+?)}_/,'\1') txt.gsub!(/-{(.+?)}-/,'\1') txt.gsub!(//,'
') txt.gsub!(/ & /,' and ') @@xml[:head] <<< #{@tab*2}<#{meta.el}> #{@tab*3}#{txt} #{@tab*2} #{@tab}
WOK end def xml_sc(md='') sc=if @md.sc_info < #{@md.sc_filename} #{@md.sc_number} #{@md.sc_date} WOK else '' end @@xml[:sc]=sc end def xml_structure(para='',lv='',hname='') #extracted endnotes lv=lv.to_i lv=nil if lv==0 embedded_endnotes(para) if para[@regx] paragraph="#{para[@regx,2]}" util=SiSU_TextUtils::Wrap.new(paragraph,70) wrapped=util.line_wrap end @@xml[:body] << "#{@tab*0}" if para[@regx] @@xml[:body] << "#{@tab*1}" << "\n" if para[@regx] @@xml[:body] << if lv; %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} << "\n" elsif wrapped =~/\A%%?\s+/; %{\n} # comments else %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} # main text, contents, body KEEP end @@xml[:body] << "#{@endnotes}" if @endnotes # main text, endnotes KEEP @@xml[:body] << "#{@tab*0}" << "\n" if para[@regx] @endnotes=[] end def block_structure(para='') para.gsub!(/<:block(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" end def group_structure(para='') para.gsub!(/<:group(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" end def poem_structure(para='') para.gsub!(/<:verse(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def code_structure(para='') para.gsub!(/<:code(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def table_structure(table='') #tables @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP @@xml[:body] << "#{@tab*0}" << "\n" #if para[@regx] @endnotes=[] end def tidywords(wordlist) wordlist.each do |x| x.gsub!(/&/,'&') unless x =~/&\S+;/ end end def xml_clean(para) para.gsub!(/#{Mx[:gl_o]}[1-9]:\S*?#{Mx[:gl_c]}/,'') #Danger, watch para end def markup data=[] xml_sc(@md) @endnotes,@level,@cont,@copen,@xml_contents_close=[],[],[],[],[] @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } @data.each do |para| data << SiSU_AO_DocumentStructureExtract::Structure.new(@md,para).structure #takes on Mx marks end data.each do |para| if para !~/^\s*(?:%+ |<:code>)/ if @md.sem_tag and para =~/[:;]\{|\}[:;]/ para=@trans.xml_semantic_tags(para) end if para =~/[:;]\{|\}[:;]/ para=SiSU_Sem::Tags.new(para,@md).rm.all end end para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 para.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers d_meta=SiSU_TextUtils::HeaderScan.new(@md,para).meta if d_meta; xml_head(d_meta) end end para='' if para=~/#{Mx[:lv_o]}@\S+?#{Mx[:lv_c]}/ if @rcdc==false \ and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) @rcdc=true end if para !~/(^@\S+?:|^\s*$||)/ @sto=SiSU_text_parts::SplitTextObject.new(@md,para).lev_segname_para unless @rcdc SiSU_XML_Format::FormatScroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|ordinary/ case @sto.format when /^(1):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body1 when /^(2):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body2 when /^(3):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body3 when /^(4):(\S*)/ # work on see SplitTextObject xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body4 when /^(5):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body5 when /^(6):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body6 else if para =~ /<:verse>/ para=poem_structure(para) elsif para =~ /<:group>/ para=group_structure(para) elsif para =~ /<:code>/ para.gsub!(//,'>') para=code_structure(para) elsif para =~// \ and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end if para =~/.*<:#>.*$/ para=case para when /<:i1>/ format_text=FormatTextObject.new(para,'') format_text.scr_inden_ocn_e_no_paranum when /<:i2>/ format_text=FormatTextObject.new(para,'') format_text.scr_inden_ocn_e_no_paranum end end if para =~/<:center>/ one,two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=FormatTextObject.new(one,two) para=format_text.center end end para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') ## Clean Prepared Text #bugwatch reinstate para end para end 6.downto(4) do |x| y=x - 1; v=x - 3 @@xml[:body] << "#{@tab*5}\n#{@tab*y}\n" if @level[x]==true end 3.downto(1) do |x| y=x - 1 @@xml[:body] << "#{@tab*y}\n" if @level[x]==true end end def pre rdf=SiSU_XML_Tags::RDF.new(@md) dir=SiSU_Env::InfoEnv.new @@xml[:head],@@xml[:body]=[],[] css=SiSU_Env::CSS_Select.new(@md).xml_sax encoding=if @sys.locale =~/utf-?8/i then '' else '' end @@xml[:open] =< #{rdf.comment_xml_sax} WOK @@xml[:head] << "\n" @@xml[:body] << "\n" end def post @@xml[:head] << @@xml[:sc] @@xml[:head] << "\n" @@xml[:body] << "\n" @@xml[:close] = "\n" end def publish content=[] content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] content << @@xml[:owner_details] if @md.stmp =~/\w\w/ content << @@xml[:tail] << @@xml[:close] Output.new(content.join,@md).xml @@xml={} end end class Output def initialize(data,md) @data,@md=data,md end def xml @sisu=[] @data.each do |para| para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? @sisu << para end new_file_data=@sisu.join @sisu=new_file_data.scan(/.+/) SiSU_Env::FileOp.new(@md).mkdir filename_sxm=SiSU_Env::FileOp.new(@md,@md.fn[:sxs]).mkfile_pwd if filename_sxm.is_a?(File) @sisu.each {|para| filename_sxm.puts para} filename_sxm.close else puts 'file not created, is directory writable?' end end end class Tidy def initialize(md,dir) @md,@env=md,dir @prog=SiSU_Env::InfoProgram.new end def xml if @prog.tidy !=false #note values can be other than true if (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @md.opt.act[:color_state][:set], 'invert', 'Using XML Tidy', 'check document structure' ).colorize unless @md.opt.act[:quiet][:set]==:on SiSU_Screen::Ansi.new( @md.opt.act[:color_state][:set], '', '', 'check document structure' ) tell.grey_open unless @md.opt.act[:quiet][:set]==:on tidyfile='/dev/null' #don't want one or screen output, check for alternative flags tidy =SiSU_Env::SystemCall.new("#{Dir.pwd}/#{@md.fn[:sxs]}",tidyfile) tidy.well_formed? tell.p_off unless @md.opt.act[:quiet][:set]==:on end end end end end end __END__ #+END_SRC * sst_identify_markup.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/sst_identify_markup.rb" <> module SiSU_Markup class MarkupInform attr_accessor :version def initialize(version,message,declared_markup='',declared_type='') @version,@message,@declared_markup,@declared_type=version,message,declared_markup,declared_type end def version @version end def message @message end def declared_version @declared_markup end def declared_type @declared_type end def history MarkupHistory.new(@version).query end end class MarkupIdentify def initialize(opt) @opt=opt @description='This is a script attempts to identify the version of markup used in SiSU (and provides information on changes in markup)' end def help print <= 200 if y =~ /(?:~{\*+|~\[\*|~\[\+)\s/ version='0.42' markup=MarkupInform.new(version,'0.42' + oldlinks,@declared_markup,@declared_type) break end end if (y =~/^1~/ and f =~/(?:\.sst|\.ssm|\.ssi)/) \ and not @flag_38 version='0.37' markup=MarkupInform.new(version,'0.37 is substantially 0.16 - 0.36 markup with new file-extension' + oldlinks,@declared_markup,@declared_type) break end if y =~/^1~/ \ and f =~/\.([rs])([123])/ \ and not @flag_38 t,n=$1,$2 version='0.16' instruct=if t =~/r/ " (change file extension from .#{t}#{n} to .ssm)" else " (change file extension from .#{t}#{n} to .sst)" end markup=MarkupInform.new(version,'0.16 - 0.36' + instruct + links,@declared_markup,@declared_type) break end if y =~/^0\{~/ \ and not @flag_38 version='0.1' markup=MarkupInform.new(version,'0.1 - 0.15',@declared_markup,@declared_type) break end if y =~/^0\{{3}/ \ and not @flag_38 markup=MarkupInform.new('circa. 1997','old, check date',@declared_markup,@declared_type) break end markup='Not a recognised file type ' end end markup else MarkupHistory.new(@opt).help_query end end def determine_markup_version if @opt.fns.nil? \ or @opt.fns.empty? MarkupHistory.new(@opt).help_identify end if File.exist?(@opt.fns) if @opt.fns =~/\.(?:sst|ssm|ssi|s[123i]|r[123])/ markup=identify #(@opt.fns) if defined? markup.version unless @opt.act[:quiet][:set]==:on message=unless markup.declared_version.empty? "#{@opt.fns}\n markup Type Declared as SiSU #{markup.declared_version} #{markup.declared_type}\n appears to be SiSU #{markup.version}" else "Markup Type Appears to be SiSU #{markup.version}\n in file #{@opt.fns}" end puts message puts %{"sisu --query-#{markup.version}" for a brief description of markup type} end end else puts 'file-type not recognised: ' + @opt.fns end else puts 'file not found: ' + @opt.fns end (defined? markup.version) \ ? markup.version : 'markup type/version not determined' end def markup_version? if @opt.fns.empty? @opt.files.each do |fns| @opt.fns=fns determine_markup_version end else determine_markup_version end end end class MarkupHistory def initialize(opt) @opt=opt end def sisu_3_0 < f=$* cf=f[0].to_s f.shift match_and_replace=[] unless f.length > 0; f=Dir.glob("[a-z]*.ss?") #restricted to sisu type files, it need not be end puts "SiSU files:" puts f f.each do |x| SiSU_Markup::MarkupIdentify.new(x).markup_version? end #+END_SRC * sst_do_inline_footnotes.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/sst_do_inline_footnotes.rb" <> module SiSU_ConvertFootnotes require_relative 'se' # se.rb include SiSU_Env require_relative 'dp' # dp.rb include SiSU_Param require_relative 'ao_syntax' # ao_syntax.rb include SiSU_AO_Syntax require_relative 'i18n' # i18n.rb class Instantiate < SiSU_Param::Parameters::Instructions @@flag={} #Beware!! def initialize @@flag['table_to']=false @@counter=@@column=@@columns=@@flag_vocab=0 @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@line_mode='' end end class Source #{@md.fns}.fn" ).txt_red unless @md.opt.act[:quiet][:set]==:on ao.each {|s| ao_array << "#{s.strip}\n\n" unless s.strip.empty?} ao_array else SiSU_Screen::Ansi.new( @md.opt.act[:color_state][:set], '*WARN* no footnote conversion done, problem with source file', 'to override use --convert=footnote-force (this is not advised)' ).warn unless @md.opt.act[:quiet][:set]==:on '' end end def read_fnm ao=[] ao=(FileTest.file?(@fnm)) \ ? (File.open(@fnm){ |f| ao=Marshal.load(f)}) : (SiSU_ConvertFootnotes::Source.new(@opt).create_ao) #watch end end class Output def initialize(md,data) @md,@data=md,data @my_make=SiSU_Env::CreateFile.new(@md.fns) SiSU_Env::InfoEnv.new(@md.fns) @hard="#{Dir.pwd}/#{@md.fns}.fn" end def hard_output filename_note=@my_make.file_note @data.each {|s| filename_note.puts s.strip + "\n\n" unless s.strip.empty?} end end class Make @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@comment='%' @@flag={ ['table_to']=>false } def initialize(md,data) @md,@data=md,data @@word_mode=[] @env=SiSU_Env::InfoEnv.new(@md.fns) l=SiSU_Env::StandardiseLanguage.new(@md.opt.lng).language @language=l[:n] @translate=SiSU_Translate::Source.new(@md,@language) end def reset @@counter=@@column=@@columns=@@flag_vocab=0 @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@line_mode='' end def song reset data=@data @metafile="#{@env.processing_path.ao}/#{@md.fns}.meta" SiSU_Env::CreateFile.new(@md.fns) data=data.join.split("\n\n") data_new=[] data.each do |x| data_new << (x =~ /\n\n/m) \ ? (x.split(/\n\n+/)) : x end data=data_new.flatten data=SiSU_ConvertFootnotes::Make.new(@md,data).character_check data=SiSU_ConvertFootnotes::Make.new(@md,data).endnotes SiSU_ConvertFootnotes::Output.new(@md,data).hard_output reset data end protected def vocabulary data=@data tuned_file,vocab_insert=[],[] data.each do |para| if para =~/^1~/ \ and @@flag_vocab==0 vocab_insert << '@vocabulary: lex' << "\n\n" << para tuned_file << vocab_insert unless para.nil? @@flag_vocab=1 else tuned_file << para unless para.nil? end end tuned_file end def character_check reset data=@data @tuned_file=[] endnote_no=1 data.each do |para| para.strip! para.gsub!(/^[{~}]\s*$/,'') para.gsub!(/^#{@@comment}.*/,'') #remove comment and divider #% para.gsub!(/<~#>|~#\s*/,'~#') para.gsub!(/-#\s*/,'-#') para.gsub!(/(~\{ )\s+/,'\1') para.gsub!(/ \/\//,'
') #added 2004w29 para.gsub!(/
/,'
') #needed by xml, xhtml etc. para.gsub!(/`/,"'") para.gsub!(/\342\200\231/,"'") #if para =~/’/ #Avoid #‘ ’ #“ ” para.gsub!(/\t/,' ') para.gsub!(/�/,' ') #watch, replace with char code para.gsub!(/[“”]/,'""') para.gsub!(/[­–—]/,'-') #— – chk para.gsub!(/·/,'*') para.gsub!(/\\copy(?:right)?\b/,'©') para.gsub!(/\\trademark\b|\\tm\b/,'®') para.gsub!(/\44/,'$') #$ watch para=para + "\n" case para when /\^~/ # endnotes #% Note must do this first (earlier loop) and then enter gathered data into ~^\d+ sub_para=para.dup @@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/,'~{ \1 }~').strip endnote_no+=1 para=nil if para =~/\^~ .+/ #removes 'binary' endnote now in endnote array for later insertion end @tuned_file << para unless para.nil? end @tuned_file end def name_endnote_seg data=@data @tuned_file=[] data.each do |para| para.gsub!(/<:3>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_3']}

" + "#{@@endnote['special_align_close']}") para.gsub!(/<:2>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_2']}

" + "#{@@endnote['special_align_close']}") para.gsub!(/<:1>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_1']}

" + "#{@@endnote['special_align_close']}") @tuned_file << para end if @md.flag_auto_endnotes \ and @md.flag_separate_endnotes_make @tuned_file << "\n1~endnotes Endnotes" #prob numbering, revisit end @tuned_file << "\n" @tuned_file end def owner_details_seg data << '1~owner.details Owner Details' end def number_sub_heading(para,num,title_no) case para when /#{num}~- / then para.gsub!(/#{num}~- /,"#{title_no} ") when /^#{num}~#\s*/ then para.gsub!(/^#{num}~#\s*/,"#{title_no} ") when /^#{num}~[a-z_\.]+ / para.gsub!(/^#{num}~([a-z_\.]+)\s+(.+)/i,%{#{num}~\\1 #{title_no} \\2 <:name##{title_no}>}) else para.gsub!(/^#{num}~ /,"#{num}~#{title_no} #{title_no} ") #main end if @md.toc_lev_limit \ and @md.toc_lev_limit < num para.gsub!(/^[2-6]~(?:~\S+)?\s*/,'!_ ') end para end def set_heading_top #% make sure no false positives unless @md.set_heading_top if (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) puts "\tdocument contains no top level heading, (will have to manufacture one)" end data=@data @tuned_file=[] data.each do |para| unless @md.set_heading_top if para !~/^(?:@\S+:|0~\S+)\s/m \ and para !~/\A\s*\Z/m @md.set_heading_top=true head=(@md.title.full) \ ? (":A~ #{@md.title.full}") : (':A~ [no title provided]') @tuned_file << head end end @tuned_file << para end @tuned_file end end def set_heading_seg #% make sure no false positives unless @md.set_heading_seg if (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) puts "\tdocument contains no segment level, (will have to manufacture one)" end data=@data @tuned_file=[] data.each do |para| unless @md.set_heading_seg if para !~/^(?:@\S+:|0~\S+|:[ABC]~)/m \ and para !~/\A\s*\Z/m \ and para !~/<:p[bn]>/ @md.set_heading_seg=true head=(@md.title.full) \ ? ("1~seg [#{@md.title.full}]") : ('1~seg [segment]') @tuned_file << head end end @tuned_file << para end @tuned_file end end def set_header_title #% make sure no false positives unless @md.set_header_title if (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) puts "\t no document title provided, (will have to manufacture one)" end data=@data @tuned_file=[] data.each do |para| unless @md.set_header_title if para !~/^%{1,2}\s/m \ and para !~/\A\s*\Z/m @tuned_file << "0~title #{@md.heading_seg_first}" @md.title.full=@md.heading_seg_first @md.set_header_title=true end end @tuned_file << para end @tuned_file end end def endnotes #% endnote work zone data=@data @tuned_file=[] endnote_ref=1 data.each do |para| case para # manually numbered endnotes --> when /~\{\s+.+?\}~/ # auto-numbered endnotes --> para.gsub!(/\s*\}~/,' }~') # required 2003w31 @word_mode=para.scan(/\S+/) word_mode=SiSU_ConvertFootnotes::Make.new(@md,@word_mode).endnote_call_number para=word_mode.join(' ') endnote_ref+=1 when /~\^(?:\s|$)|<:e>/ #%Note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop) word_mode=para.scan(/\S+/) word_mode=SiSU_ConvertFootnotes::Make.new(@md,word_mode).endnote_call_number para=word_mode.join(' ') endnote_ref+=1 end @tuned_file << para end @tuned_file end def endnote_call_number data=@data data.each do |word| case word when /~\{/ unless word =~/~\{\*+/ @@endnote_call_counter+=1 end when /~\^|<:e>/ word.gsub!(/~\^|<:e>/,"#{@@endnote_array[@@endnote_call_counter-1]}") @@endnote_call_counter+=1 end end end def strip_clean_extra_spaces(s) # ao output tuned s=s.dup s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') s=s.gsub(/ [ ]+/,' ') s=s.gsub(/^ [ ]+/,'') s=s.gsub(/ [ ]+$/,'') s=s.gsub(/(<\/[bi]>')[ ]+(s )/,'\1\2') end def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean s=s.dup s=s.gsub(/(?:<\/?[ib]>|^:[A-C]~\S+|^[1-6]~\S+|~\{\d+\s.+?\}~)/,'') # markup and endnotes removed #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') s=s.gsub(/(?: \\;|#{Mx[:nbsp]})+/,' ') #checking source Mx not necessary s=s.gsub(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search s=s.gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image] ') # else image names found in search, re-check s=s.gsub(/\s\s+/,' ') s=s.strip end end end __END__ @particulars=SiSU_Particulars::CombinedSingleton.instance.get_all(opt) ao_array=@particulars.ao_array # ao file drawn here #+END_SRC * sst_convert_markup.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/sst_convert_markup.rb" <> module SiSU_Modify require_relative 'sst_identify_markup' # sst_identify_markup.rb require_relative 'sst_from_xml' # sst_from_xml.rb require_relative 'utils_response' # utils_response.rb class ConvertMarkup include SiSU_Response def initialize(opt) @opt=opt @description='This is a script that contains canned text conversions for reuse' @warn='WARNING, PROCEED AT YOUR OWN RISK, will make file changes.' end def current_match_and_replace convert_37_to_38 end def message(text) response='' unless @opt.cmd=~/QQ/ \ or @opt.act[:quiet][:set]==:on response=response?(%{#{ text}\nProceed? }) end end def help print < ./#{s}" FileUtils::cp("#{pwd}/#{f}","#{pwd}/#{s}") else "File already exists, < #{s} > will not overwrite" end end end end def convert_to_simple_xml_model_sax SiSU_SimpleXML_ModelSax::Convert.new(@opt).read end def convert_to_simple_xml_model_dom SiSU_simple_xml_model_dom::Convert.new(@opt).read end def convert_to_simple_xml_model_node SiSU_simple_xml_model_node::Convert.new(@opt).read end def convert_kdi_to_sst SiSU_Kdissert::Convert.new(@opt).read end def convert_s_xml_to_sst SiSU_sstFromXML::Convert.new(@opt).read end def convert_footnotes require_relative 'sst_do_inline_footnotes' SiSU_ConvertFootnotes::Source.new(@opt).read end def conversion #%% do it --------------------------> if @opt.files \ and @opt.files.length > 0 mr=nil #%% changes to make m match, r replace --------------------------> if @opt.selections.str =~/--help/ then help elsif @opt.selections.str =~/(?:convert|to)[=-](?:xml |sxs|sax|sxd|dom|sxn|node)/ ext=case @opt.selections.str when /(?:convert|to)[=-](?:xml|sxs|sax)/ then '.sxs.xml' when /(?:convert|to)[=-](?:sxd|dom)/ then '.sxd.xml' when /(?:convert|to)[=-](?:sxn|node)/ then '.sxn.xml' end message("#{@opt.files.inspect}\n\nWARNING, PROCEED AT YOUR OWN RISK,\noverwriting any equivalent file with the extension #{ext}") mr=case @opt.selections.str when /(?:convert|to)[=-](?:sxs|sax|xml )/ then convert_to_simple_xml_model_sax when /(?:convert|to)[=-](?:sxd|dom)/ then convert_to_simple_xml_model_dom when /(?:convert|to)[=-](?:sxn|node)/ then convert_to_simple_xml_model_node else help end else mr=case @opt.selections.str when /(?:(?:37)?to-?38|--(?:convert|to)[=-](?:current|0.38))/ then convert_37_to_38 when /(?:(?:38)?to-?37|--(?:convert|to)[=-](?:0.37))/ then convert_38_to_37 when /(?:36to37)/ then convert_filename_36_to_37 when /(?:convert|from)[=-]kdi/ then convert_kdi_to_sst when /(?:(?:convert|from)[=-])?(?:xml_to_sst|xml2sst|sxml|sxs|sxd|sxd)/ then convert_s_xml_to_sst when /(?:convert|to)[=-]footnotes/ then convert_footnotes when /convert|default/ then current_match_and_replace else help end end unless @opt.selections.str =~/kdi/ match_and_replace=mr #start_processing =/not used in this example/i end_processing =/END\s+OF\s+FILE/ i=@opt.fns if i =~/(?:\.sst|\.ssm|\.ssi)$/ @new,@matched,@flag_start,@flag_end,@empty1,@empty2=true,false,false,false,false,false o="#{i}.bk" #o is for old markup_version=SiSU_Markup::MarkupIdentify.new(@opt).markup_version? if (@opt.selections.str=~/37/ and markup_version=~/0.38/) \ or (@opt.selections.str=~/current|38/ and markup_version=~/0.37/) puts "#{i} #{markup_version}" file=File.open(i,'r') cont=file.readlines file.close cont.each do |y| match_and_replace.each do |m,r,w| if y =~m \ and y =~w if @new @new=false File.unlink(o) if File.exist?(o) File.rename(i,o) File.unlink(i) if File.exist?(i) @file=File.new(i,'w') @matched=true break end end end end if @matched puts "conversion match in #{i}" unless @opt.act[:quiet][:set]==:on @flag_start=true cont.each do |y| if y =~end_processing @flag_end=true end if @flag_start \ and not @flag_end match_and_replace.each do |m,r,w| if y =~m \ and y =~w puts m.inspect + ' -> ' + r unless @opt.act[:quiet][:set]==:on if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) puts "in: #{y}" end y.gsub!(m,r) if m and r if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) puts "out: #{y}" end end end end @empty1=(y=~/^\s*$/) \ ? true : false @file.puts y unless (@empty1==true and @empty2==true) @empty2=(y=~/^\s*$/) \ ? true : false end @file.close else puts "NO conversion match in #{i}" unless @opt.act[:quiet][:set]==:on end else if (@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) puts "Requested conversion #{@opt.selections.str} markup #{markup_version} identified in #{i}" end end end end else puts 'this routine makes permanent changes to the contents of the files matched, as instructed within [no matches]' end end end end #%% files to match for this conversion set -------------------------> require_relative 'hub_options' # hub_options.rb argv=$* base_path=Dir.pwd @opt=SiSU_Commandline::Options.new(argv,base_path) case @opt.selections.str when /=kdi/ SiSU_Modify::ConvertMarkup.new(@opt).conversion when /(?:36|37|38)?to-?(?:37|38)|--convert|--to|--from|default/ @opt.files.each do |fns| @opt.fns=fns SiSU_Modify::ConvertMarkup.new(@opt).conversion end else @opt.selections.str='--help' SiSU_Modify::ConvertMarkup.new(@opt).help end __END__ #+END_SRC * document header #+NAME: sisu_document_header #+BEGIN_SRC text #encoding: utf-8 =begin - Name: SiSU - Description: documents, structuring, processing, publishing, search sst - Author: Ralph Amissah - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, 2020, 2021, Ralph Amissah, All Rights Reserved. - License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: - SiSU uses: - Standard SiSU markup syntax, - Standard SiSU meta-markup syntax, and the - Standard SiSU object citation numbering and system - Homepages: - Git =end #+END_SRC