From 804a103722aa7731ca7f2062ee2ebf533607e6aa Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 3 Oct 2012 00:11:08 -0400 Subject: v4: 4.0.0 new branch & version & changelog "opened" --- lib/sisu/v2/shared_xml.rb | 706 ---------------------------------------------- 1 file changed, 706 deletions(-) delete mode 100644 lib/sisu/v2/shared_xml.rb (limited to 'lib/sisu/v2/shared_xml.rb') diff --git a/lib/sisu/v2/shared_xml.rb b/lib/sisu/v2/shared_xml.rb deleted file mode 100644 index e82f51bf..00000000 --- a/lib/sisu/v2/shared_xml.rb +++ /dev/null @@ -1,706 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: common file for xml generation -=end -module SiSU_XML_munge - class Trans - require "#{SiSU_lib}/defaults" # defaults.rb - def initialize(md) - @md=md - @sys=SiSU_Env::System_call.new - @dir=SiSU_Env::Info_env.new(@md.fns) - @brace_url=SiSU_Viz::Skin.new.url_decoration - if @md.sem_tag - @ab ||=semantic_tags.default - end - end - def semantic_tags - def default - { - :pub => 'publication', - :conv => 'convention', - :vol => 'volume', - :pg => 'page', - :cty => 'city', - :org => 'organization', - :uni => 'university', - :dept => 'department', - :fac => 'faculty', - :inst => 'institute', - :co => 'company', - :com => 'company', - :conv => 'convention', - :dt => 'date', - :y => 'year', - :m => 'month', - :d => 'day', - :ti => 'title', - :au => 'author', - :ed => 'editor', #editor? - :v => 'version', #edition - :n => 'name', - :fn => 'firstname', - :mn => 'middlename', - :ln => 'lastname', - :in => 'initials', - :qt => 'quote', - :ct => 'cite', - :ref => 'reference', - :ab => 'abreviation', - :def => 'define', - :desc => 'description', - :trans => 'translate', - } - end - self - end - def char_enc #character encode - def utf8(dob='') - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - str=if defined? dob.obj; dob.obj - elsif dob.class==String; dob - end - if str - #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü - #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ - str.gsub!(//um,'>') # '>' # > - str.gsub!(/¢/um,'¢') # '¢' # ¢ - str.gsub!(/£/um,'£') # '£' # £ - str.gsub!(/¥/um,'¥') # '¥' # ¥ - str.gsub!(/§/um,'§') # '§' # § - str.gsub!(/©/um,'©') # '©' # © - str.gsub!(/ª/um,'ª') # 'ª' # ª - str.gsub!(/«/um,'«') # '«' # « - str.gsub!(/®/um,'®') # '®' # ® - str.gsub!(/°/um,'°') # '°' # ° - str.gsub!(/±/um,'±') # '±' # ± - str.gsub!(/²/um,'²') # '²' # ² - str.gsub!(/³/um,'³') # '³' # ³ - str.gsub!(/µ/um,'µ') # 'µ' # µ - str.gsub!(/¶/um,'¶') # '¶' # ¶ - str.gsub!(/¹/um,'¹') # '¹' # ¹ - str.gsub!(/º/um,'º') # 'º' # º - str.gsub!(/»/um,'»') # '»' # » - str.gsub!(/¼/um,'¼') # '¼' # ¼ - str.gsub!(/½/um,'½') # '½' # ½ - str.gsub!(/¾/um,'¾') # '¾' # ¾ - str.gsub!(/×/um,'×') # '×' # × - str.gsub!(/÷/um,'÷') # '÷' # ÷ - str.gsub!(/¿/um,'¿') # '¿' # ¿ - str.gsub!(/À/um,'À') # 'À' # À - str.gsub!(/Á/um,'Á') # 'Á' # Á - str.gsub!(/Â/um,'Â') # 'Â' #  - str.gsub!(/Ã/um,'Ã') # 'Ã' # à - str.gsub!(/Ä/um,'Ä') # 'Ä' # Ä - str.gsub!(/Å/um,'Å') # 'Å' # Å - str.gsub!(/Æ/um,'Æ') # 'Æ' # Æ - str.gsub!(/Ç/um,'Ç') # 'Ç' # Ç - str.gsub!(/È/um,'È') # 'È' # È - str.gsub!(/É/um,'É') # 'É' # É - str.gsub!(/Ê/um,'Ê') # 'Ê' # Ê - str.gsub!(/Ë/um,'Ë') # 'Ë' # Ë - str.gsub!(/Ì/um,'Ì') # 'Ì' # Ì - str.gsub!(/Í/um,'Í') # 'Í' # Í - str.gsub!(/Î/um,'Î') # 'Î' # Î - str.gsub!(/Ï/um,'Ï') # 'Ï' # Ï - str.gsub!(/Ð/um,'Ð') # 'Ð' # Ð - str.gsub!(/Ñ/um,'Ñ') # 'Ñ' # Ñ - str.gsub!(/Ò/um,'Ò') # 'Ò' # Ò - str.gsub!(/Ó/um,'Ó') # 'Ó' # Ó - str.gsub!(/Ô/um,'Ô') # 'Ô' # Ô - str.gsub!(/Õ/um,'Õ') # 'Õ' # Õ - str.gsub!(/Ö/um,'Ö') # 'Ö' # Ö - str.gsub!(/Ø/um,'Ø') # 'Ø' # Ø - str.gsub!(/Ù/um,'Ù') # 'Ù' # Ù - str.gsub!(/Ú/um,'Ú') # 'Ú' # Ú - str.gsub!(/Û/um,'Û') # 'Û' # Û - str.gsub!(/Ü/um,'Ü') # 'Ü' # Ü - str.gsub!(/Ý/um,'Ý') # 'Ý' # Ý - str.gsub!(/Þ/um,'Þ') # 'Þ' # Þ - str.gsub!(/ß/um,'ß') # 'ß' # ß - str.gsub!(/à/um,'à') # 'à' # à - str.gsub!(/á/um,'á') # 'á' # á - str.gsub!(/â/um,'â') # 'â' # â - str.gsub!(/ã/um,'ã') # 'ã' # ã - str.gsub!(/ä/um,'ä') # 'ä' # ä - str.gsub!(/å/um,'å') # 'å' # å - str.gsub!(/æ/um,'æ') # 'æ' # æ - str.gsub!(/ç/um,'ç') # 'ç' # ç - str.gsub!(/è/um,'è') # 'è' # è - str.gsub!(/é/um,'é') # '´' # é - str.gsub!(/ê/um,'ê') # 'ˆ' # ê - str.gsub!(/ë/um,'ë') # 'ë' # ë - str.gsub!(/ì/um,'ì') # 'ì' # ì - str.gsub!(/í/um,'í') # '´' # í - str.gsub!(/î/um,'î') # 'î' # î - str.gsub!(/ï/um,'ï') # 'ï' # ï - str.gsub!(/ð/um,'ð') # 'ð' # ð - str.gsub!(/ñ/um,'ñ') # 'ñ' # ñ - str.gsub!(/ò/um,'ò') # 'ò' # ò - str.gsub!(/ó/um,'ó') # 'ó' # ó - str.gsub!(/ô/um,'ô') # 'ô' # ô - str.gsub!(/õ/um,'õ') # 'õ' # õ - str.gsub!(/ö/um,'ö') # 'ö' # ö - str.gsub!(/ø/um,'ø') # 'ø' # ø - str.gsub!(/ù/um,'ú') # 'ù' # ú - str.gsub!(/ú/um,'û') # 'ú' # û - str.gsub!(/û/um,'ü') # 'û' # ü - str.gsub!(/ü/um,'ý') # 'ü' # ý - str.gsub!(/þ/um,'þ') # 'þ' # þ - str.gsub!(/ÿ/um,'ÿ') # 'ÿ' # ÿ - str.gsub!(/‘/um,'‘') # '‘' # ‘ - str.gsub!(/’/um,'’') # '’' # ’ - str.gsub!(/“/um,'“') # “ # “ - str.gsub!(/”/um,'”') # ” # ” - str.gsub!(/–/um,'–') # – # – - str.gsub!(/—/um,'—') # — # — - str.gsub!(/∝/um,'∝') # ∝ # ∝ - str.gsub!(/∞/um,'∞') # ∞ # ∞ - str.gsub!(/™/um,'™') # ™ # ™ - str.gsub!(/✠/um,'✠') # ✗ # ✠ - str.gsub!(/ /um,' ') # space identify - str.gsub!(/ /um,' ') # space identify - end - dob=if defined? dob.obj - dob.obj=str - dob - elsif dob.class==String; dob - end - dob - end - end - def html(dob='') - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - dob.obj.gsub!(/ /u,' ') # space identify - dob.obj.gsub!(/ /u,' ') # space identify - else - dob.obj.gsub!(/¢/u,'¢') # ¢ - dob.obj.gsub!(/£/u,'£') # £ - dob.obj.gsub!(/¥/u,'¥') # ¥ - dob.obj.gsub!(/§/u,'§') # § - dob.obj.gsub!(/©/u,'©') # © - dob.obj.gsub!(/ª/u,'ª') # ª - dob.obj.gsub!(/«/u,'«') # « - dob.obj.gsub!(/®/u,'®') # ® - dob.obj.gsub!(/°/u,'°') # ° - dob.obj.gsub!(/±/u,'±') # ± - dob.obj.gsub!(/²/u,'²') # ² - dob.obj.gsub!(/³/u,'³') # ³ - dob.obj.gsub!(/µ/u,'µ') # µ - dob.obj.gsub!(/¶/u,'¶') # ¶ - dob.obj.gsub!(/¹/u,'¹') # ¹ - dob.obj.gsub!(/º/u,'º') # º - dob.obj.gsub!(/»/u,'»') # » - dob.obj.gsub!(/¼/u,'¼') # ¼ - dob.obj.gsub!(/½/u,'½') # ½ - dob.obj.gsub!(/¾/u,'¾') # ¾ - dob.obj.gsub!(/×/u,'×') # × - dob.obj.gsub!(/÷/u,'÷') # ÷ - dob.obj.gsub!(/¿/u,'¿') # ¿ - dob.obj.gsub!(/À/u,'À') # À - dob.obj.gsub!(/Á/u,'Á') # Á - dob.obj.gsub!(/Â/u,'Â') #  - dob.obj.gsub!(/Ã/u,'Ã') # à - dob.obj.gsub!(/Ä/u,'Ä') # Ä - dob.obj.gsub!(/Å/u,'Å') # Å - dob.obj.gsub!(/Æ/u,'Æ') # Æ - dob.obj.gsub!(/Ç/u,'Ç') # Ç - dob.obj.gsub!(/È/u,'È') # È - dob.obj.gsub!(/É/u,'É') # É - dob.obj.gsub!(/Ê/u,'Ê') # Ê - dob.obj.gsub!(/Ë/u,'Ë') # Ë - dob.obj.gsub!(/Ì/u,'Ì') # Ì - dob.obj.gsub!(/Í/u,'Í') # Í - dob.obj.gsub!(/Î/u,'Î') # Î - dob.obj.gsub!(/Ï/u,'Ï') # Ï - dob.obj.gsub!(/Ð/u,'Ð') # Ð - dob.obj.gsub!(/Ñ/u,'Ñ') # Ñ - dob.obj.gsub!(/Ò/u,'Ò') # Ò - dob.obj.gsub!(/Ó/u,'Ó') # Ó - dob.obj.gsub!(/Ô/u,'Ô') # Ô - dob.obj.gsub!(/Õ/u,'Õ') # Õ - dob.obj.gsub!(/Ö/u,'Ö') # Ö - dob.obj.gsub!(/Ø/u,'Ø') # Ø - dob.obj.gsub!(/Ù/u,'Ù') # Ù - dob.obj.gsub!(/Ú/u,'Ú') # Ú - dob.obj.gsub!(/Û/u,'Û') # Û - dob.obj.gsub!(/Ü/u,'Ü') # Ü - dob.obj.gsub!(/Ý/u,'Ý') # Ý - dob.obj.gsub!(/Þ/u,'Þ') # Þ - dob.obj.gsub!(/ß/u,'ß') # ß - dob.obj.gsub!(/à/u,'à') # à - dob.obj.gsub!(/á/u,'á') # á - dob.obj.gsub!(/â/u,'â') # â - dob.obj.gsub!(/ã/u,'ã') # ã - dob.obj.gsub!(/ä/u,'ä') # ä - dob.obj.gsub!(/å/u,'å') # å - dob.obj.gsub!(/æ/u,'æ') # æ - dob.obj.gsub!(/ç/u,'ç') # ç - dob.obj.gsub!(/è/u,'è') # è - dob.obj.gsub!(/é/u,'´') # é - dob.obj.gsub!(/ê/u,'ˆ') # ê - dob.obj.gsub!(/ë/u,'ë') # ë - dob.obj.gsub!(/ì/u,'ì') # ì - dob.obj.gsub!(/í/u,'´') # í - dob.obj.gsub!(/î/u,'î') # î - dob.obj.gsub!(/ï/u,'ï') # ï - dob.obj.gsub!(/ð/u,'ð') # ð - dob.obj.gsub!(/ñ/u,'ñ') # ñ - dob.obj.gsub!(/ò/u,'ò') # ò - dob.obj.gsub!(/ó/u,'ó') # ó - dob.obj.gsub!(/ô/u,'ô') # ô - dob.obj.gsub!(/õ/u,'õ') # õ - dob.obj.gsub!(/ö/u,'ö') # ö - dob.obj.gsub!(/ø/u,'ø') # ø - dob.obj.gsub!(/ù/u,'ù') # ú - dob.obj.gsub!(/ú/u,'ú') # û - dob.obj.gsub!(/û/u,'û') # ü - dob.obj.gsub!(/ü/u,'ü') # ý - dob.obj.gsub!(/þ/u,'þ') # þ - dob.obj.gsub!(/ÿ/u,'ÿ') # ÿ - dob.obj.gsub!(/‘/u,'&#lsquo;') # ‘ # ‘ - dob.obj.gsub!(/’/u,'&#rsquo;') # ’ # ’ - dob.obj.gsub!(/“/u,'“') # “ # “ - dob.obj.gsub!(/”/u,'”') # ” # ” - dob.obj.gsub!(/–/u,'–') # – # – - dob.obj.gsub!(/—/u,'—') # — # — - dob.obj.gsub!(/∝/u,'∝') # ∝ # ∝ - dob.obj.gsub!(/∞/u,'∞') # ∞ # ∞ - dob.obj.gsub!(/™/u,'™') # ™ # ™ - dob.obj.gsub!(/✠/u,'✠') # ✠ - #par.obja.gsub!(/✠/u '†') # † # † incorrect replacement † - dob.obj.gsub!(/ /u,' ') # space identify - dob.obj.gsub!(/ /u,' ') # space identify - end - end - self - end - def tidywords(wordlist) - wordlist.each do |x| - #imperfect solution will not catch all possible cases - x.gsub!(/&/,'&') unless x =~/&\S+;/ - x.gsub!(/&([A-Z])/,'&\1') - end - end - def markup(dob='') - wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - dob.obj=tidywords(wordlist).join(' ').strip - unless dob.is=='table' - dob.obj.gsub!(/#{Mx[:br_line]}/u,'
') - dob.obj.gsub!(/#{Mx[:br_paragraph]}/u,'
') - dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') - end - dob.obj.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') - dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') - dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); dob.obj.gsub!(/\s+>(\s+|$)/,' >\1') - #dob.obj.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate - dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'\1') - dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'\1') - dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1') - dob.obj.gsub!(/<:pb>\s*/,'') #Fix - dob.obj.gsub!(/<+[-~]#>+/,'') - if dob.is !~/^code/ - #embeds a red-bullet image --> - dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') - dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is=='table' - dob.obj.gsub!(/#{Mx[:br_page]}\s*/,'') - dob.obj.gsub!(/#{Mx[:br_page_new]}\s*/,'') - dob.obj.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); dob.obj.gsub!(/<[-~]#>/,'') - dob.obj.gsub!(/href="#{Xx[:segment]}/m,'href="') - dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/, - '\1') - dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/, - '\1') - dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/, - '\1') - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, - %{[\\1] \\4}) - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, - %{\\1}) - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, - %{[\\1] \\4}) - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/, - %{\\1}) - dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, - '\1') #watch, compare html_tune - dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, - %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) - dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, - '\1') #escaped urls not linked, deal with later - else - dob.obj.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters - dob.obj.gsub!(/(^|[^}])_/m,'\1>') - end - if dob.of=='group' - dob.obj.gsub!(/#{Mx[:gl_bullet]}/,'● ') - end - dob.obj.gsub!(/#{Mx[:url_o]}([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)#{Mx[:url_c]}/, - %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) - dob.obj.gsub!(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}") - dob.obj.gsub!(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}") - dob.obj.gsub!(/ |#{Mx[:nbsp]}/m,' ') - dob - end - def markup_light(dob='') - dob.obj.gsub!(/\/\{(.+?)\}\//,'\1') - dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1') - dob.obj.gsub!(/_\{(.+?)\}_/,'\1') - dob.obj.gsub!(/-\{(.+?)\}-/,'\1') - dob.obj.gsub!(//,'
') - dob.obj.gsub!(/<:pb>\s*/,'') - dob.obj.gsub!(/<[-~]#>/,'') - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort - dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, - "#{@dir.url.images_local}\/\\1") - dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') - wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - dob.obj=tidywords(wordlist).join(' ').strip - dob - end - def markup_fictionbook(dob='') - dob.obj.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') - dob.obj.gsub!(/\/\{(.+?)\}\//,'\1') - dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1') - dob.obj.gsub!(/_\{(.+?)\}_/,'\1') - dob.obj.gsub!(/-\{(.+?)\}-/,'\1') - dob.obj.gsub!(//,'
') - dob.obj.gsub!(/<:pb>\s*/,'') - dob.obj.gsub!(/<[-~]#>/,'') - #temporary --> - dob.obj.gsub!(/<:\S+?>/,'') - #<-- temporary - dob.obj.gsub!(/<[-~]#>/,'') - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort - dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax - dob.obj.gsub!(/(#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, - "#{@dir.url.images_local}\/\\1") - dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') - wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 - dob.obj=tidywords(wordlist).join(' ').strip - dob - end - def markup_group(dob='') - dob.obj.gsub!(//,'>') - dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'
') - dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') - dob.obj.gsub!(/<(\/link)>/,'<\1>') - dob.obj.gsub!(/<(\/?en)>/,'<\1>') - dob - end - def xml_sem_block_paired(matched) # colon depth: many, recurs - matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}) - matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}) - matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{\\2}) - matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2') - end - def xml_semantic_tags(dob) - if @md.sem_tag - dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } - dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } - dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } - dob.obj.gsub!(/:\{(.+?)\}:au\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:n\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:ti\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:ref\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:desc\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:cty\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:org\b/m, %{\\1}) - dob.obj.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1') - dob.obj.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1}) - dob.obj.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1') - end - dob - end - end -end -module SiSU_XML_tags #Format - require "#{SiSU_lib}/param" # param.rb - include SiSU_Param - include SiSU_Viz - class RDF - def initialize(md='',seg_name=[],tracker=0) - @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' - @md=md - @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} - if defined? @md.title.full \ - and @md.title.full # DublinCore 1 - title - @rdf_title=%{ dc.title="#{seg_name}#{@md.title.full}"\n} - @full_title=%{ \n} - end - if defined? @md.creator.author \ - and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author) - @rdf_author=%{ dc.author="#{@md.creator.author}"\n} - content=meta_content_clean(@md.creator.author) - @author=%{ \n} - end - if defined? @md.classify.subject \ - and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) - @rdf_subject=%{ dc.subject="#{@md.classify.subject}"\n} - content=meta_content_clean(@md.classify.subject) - @subject=%{ \n} - end - if defined? @md.notes.description \ - and @md.notes.description=~/\S+/ # DublinCore 4 - description - @rdf_description=%{ dc.description="#{@md.notes.description}"\n} - content=meta_content_clean(@md.notes.description) - @description=%{ \n} - end - if defined? @md.publisher \ - and @md.publisher # DublinCore 5 - publisher (current copy published by) - @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} - content=meta_content_clean(@md.publisher) - @publisher=%{ \n} - end - if defined? @md.creator.contributor \ - and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor - @rdf_contributor=%{ dc.contributor="#{@md.creator.contributor}"\n} - content=meta_content_clean(@md.creator.contributor) - @contributor=%{ \n} - end - if defined? @md.date.published \ - and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd - @rdf_date=%{ dc.date="#{@md.date.published}"\n} - @date=%{ \n} # fix @md.date_scheme - end - if defined? @md.date.created \ - and @md.date.created=~/\S+/ # DublinCore 7 - date.created year-mm-dd - @rdf_date_created=%{ dc.date.created="#{@md.date.created}"\n} - @date_created=%{ \n} - end - if defined? @md.date.issued \ - and @md.date.issued=~/\S+/ # DublinCore 7 - date.issued year-mm-dd - @rdf_date_issued=%{ dc.date.issued="#{@md.date.issued}"\n} - @date_issued=%{ \n} - end - if defined? @md.date.available \ - and @md.date.available=~/\S+/ # DublinCore 7 - date.available year-mm-dd - @rdf_date_available=%{ dc.date.available="#{@md.date.available}"\n} - @date_available=%{ \n} - end - if defined? @md.date.valid \ - and @md.date.valid=~/\S+/ # DublinCore 7 - date.valid year-mm-dd - @rdf_date_valid=%{ dc.date.valid="#{@md.date.valid}"\n} - @date_valid=%{ \n} - end - if defined? @md.date.modified \ - and @md.date.modified=~/\S+/ # DublinCore 7 - date.modified year-mm-dd - @rdf_date_modified=%{ dc.date.modified="#{@md.date.modified}"\n} - @date_modified=%{ \n} - end - if defined? @md.type \ - and @md.type # DublinCore 8 - type (genre eg. report, convention etc) - @rdf_type=%{ dc.type="#{@md.type}"\n} - content=meta_content_clean(@md.type) - @type=%{ \n} - end - if defined? @md.classify.format \ - and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type) - @rdf_format=%{ dc.format="#{@md.classify.format}"\n} - content=meta_content_clean(@md.classify.format) - @format=%{ \n} - end - if defined? @md.classify.identifier \ - and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free) - @rdf_identifier=%{ dc.identifier="#{@md.classify.identifier}"\n} - content=meta_content_clean(@md.classify.identifier) - @identifier=%{ \n} - end - if defined? @md.original.source \ - and @md.original.source=~/\S+/ # DublinCore 11 - source (document source) - @rdf_source=%{ dc.source="#{@md.original.source}"\n} - content=meta_content_clean(@md.original.source) - @source=%{ \n} - end - if defined? @md.title.language \ - and @md.title.language=~/\S+/ # DublinCore 12 - language (English) - @rdf_language=%{ dc.language="#{@md.title.language}"\n} - @language=%{ \n} - end - if defined? @md.original.language \ - and @md.original.language=~/\S+/ - @rdf_language_original=%{ dc.language="#{@md.original.language}"\n} - @language_original=%{ \n} - end - if defined? @md.classify.relation \ - and @md.classify.relation=~/\S+/ # DublinCore 13 - relation - @rdf_relation=%{ dc.relation="#{@md.classify.relation}"\n} - content=meta_content_clean(@md.classify.relation) - @relation=%{ \n} - end - if defined? @md.classify.coverage \ - and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage - @rdf_coverage=%{ dc.coverage="#{@md.classify.coverage}"\n} - content=meta_content_clean(@md.classify.coverage) - @coverage=%{ \n} - end - if defined? @md.rights.all \ - and @md.rights.all # DublinCore 15 - rights - @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n} - content=meta_content_clean(@md.rights.all) - @rights=%{ \n} - end - content=meta_content_clean(@md.keywords) - @keywords=%{ \n} if @md.keywords - @vz=SiSU_Env::Get_init.instance.skin - end - def meta_content_clean(content='') - content - unless content.nil? - content.tr!('"',"'") - content=SiSU_XML_munge::Trans.new(@md).char_enc.utf8(content) - end - content - end - def rdfseg #segHead - rdftoc - end - def comment_xml(extra='') - generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] - lastdone="Last Generated on: #{Time.now}" - rubyv="Ruby version: #{@md.ruby_version}" - sc=if @md.sc_info - "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" - else '' - end - if extra.empty? -< -WOK - else -< -WOK - end - end - def comment_xml_sax - desc='SiSU XML, SAX type representation' - comment_xml(desc) - end - def comment_xml_node - desc='SiSU XML, Node type representation' - comment_xml(desc) - end - def comment_xml_dom - desc='SiSU XML, DOM type representation' - comment_xml(desc) - end - def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better -<