aboutsummaryrefslogtreecommitdiffhomepage
path: root/data/sisu/v3dv/conf/convert/sisu_convert
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2012-03-19 22:07:29 -0400
committerRalph Amissah <ralph@amissah.com>2012-03-19 22:07:33 -0400
commit6811ac91f21a434fc7d967c11e1b20f33918c6ea (patch)
tree30f39674ca96a79f8a604a9f02d571f24320e221 /data/sisu/v3dv/conf/convert/sisu_convert
parentv3: 3.2.0 version & changelog "opened" (diff)
v3: 3.2 branch is main (v3dv --> v3); dev (v3dv) branch directories removed
* v3dv (3.2) "merged" into v3 (previously 3.1) (& removed) * conf/sisu/v3dv --> conf/sisu/v3 * data/sisu/v3dv --> data/sisu/v3 * lib/sisu/v3dv --> lib/sisu/v3 * bin/sisu* (v3dv references changed to v3) * (--dev modifier (superfluous for the time being) runs main v3 branch)
Diffstat (limited to 'data/sisu/v3dv/conf/convert/sisu_convert')
-rw-r--r--data/sisu/v3dv/conf/convert/sisu_convert519
1 files changed, 0 insertions, 519 deletions
diff --git a/data/sisu/v3dv/conf/convert/sisu_convert b/data/sisu/v3dv/conf/convert/sisu_convert
deleted file mode 100644
index a3a12189..00000000
--- a/data/sisu/v3dv/conf/convert/sisu_convert
+++ /dev/null
@@ -1,519 +0,0 @@
-#!/usr/bin/env ruby
-# = sisu - SiSU information Structuring Universe
-#
-# Copyright (c) Ralph Amissah 1997,2004
-#
-# Ralph Amissah mailto:ralph@amissah.com
-#
-# * Name: SiSU information Structuring Universe
-# * Author: Ralph@Amissah.com
-# * Description: document conversion tool, to sisu from other formats
-# * License: GPL 3 or later
-# * Notes: word conversion uses wvWare and wvSiSU.xml (a modified/stripped wvHtml.xml)
-# * http://wvware.sourceforge.net/
-# * http://sourceforge.net/projects/wvware
-# * <url:sisu.lnk>|sisu.lnk|@|^|
-# * <url:sisu>
-module CONVERT
- class MyOutput
- def initialize(data, filename, instruct)
- @data=data.compact
- @filename=filename
- @instruct=instruct
- end
- def headerBasic
- <<WOK
-% SiSU 2.0
-
-@title:
- :subtitle:
-
-@creator:
- :author:
-
-@classify:
- :topic_register:
-
-@date:
- :published:
-
-@rights:
- :copyright:
- :license:
-
-WOK
- end
- def headerDefault
- <<WOK
-% SiSU 2.0
-
-@title:
- :subtitle:
-
-@creator:
- :author:
-
-@classify:
- :topic_register:
-
-@date:
- :published:
-
-@rights:
- :copyright:
- :license:
-
-WOK
- end
- def hardOutput
- pre = Array.new
- case @instruct
- when /default/
- pre << headerDefault
- else
- pre << headerBasic
- end
- @filename_wv=File.new(%{,,#{@filename}.sst},'w+')
- @filename_wv << pre
- @data.each do |x|
- y = x.split("\n")
- y.each do |z| # cleaner output this way
- z.strip!
- @filename_wv.puts "#{z}\n\n" unless z =~/^$/
- end
- end
- end
- end
- class WareWord97
- def initialize(data, filename, instruct)
- @data=data
- @filename=filename
- @instruct=instruct
- end
- def songsheet
- data=@data
- print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
- data=WareWord97.new(data.collect,@filename,@instruct).strip
- data=WareWord97.new(data.collect,@filename,@instruct).strip
- data=WareWord97.new(data.collect,@filename,@instruct).markup_rules
- data=MyOutput.new(data.collect,@filename,@instruct).hardOutput
- end
- def strip
- data=@data
- tuned_file=Array.new
- endnote_no=1
- data.each do |para|
- para.strip!
- para.gsub!(/<u>\s*<\/u>/,'')
- para.gsub!(/<\/u>\s*<u>/,'')
- para.gsub!(/<b>\s*<\/b>/,'')
- para.gsub!(/<\/b>\s*<b>/,'')
- para.gsub!(/<i>\s*<\/i>/,'')
- para.gsub!(/<\/i>\s*<i>/,'')
- tuned_file << para unless para == nil
- end
- tuned_file
- end
- def markup_rules
- data=@data
- tuned_file=Array.new
- endnote_no=1
- data.each do |para|
- para.strip!
- para.gsub!(/\s+/,' ')
- para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'1~ \1 \2') #watch case insensitivity
- para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,':C~ \1 \2') #watch case insensitivity
- para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'3~ \1 \2') #numeric, decide what to do, can be different
- para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'2~ \1 \2') #numeric, decide what to do, can be different
- para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
- #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
- para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_')
- para.gsub!(/<b>(.+?)<\/b>/,'!{\1}!')
- para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/')
- tuned_file << para unless para == nil
- end
- tuned_file
- end
- end
- class Html
- def initialize(data, filename, instruct)
- @data=data
- @filename=filename
- @instruct=instruct
- end
- def songsheet
- data=@data
- print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
- #data=Html.new(data.collect, @filename, @instruct).space_paragraphs
- #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs
- data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs
- #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs
- #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs
- data=Html.new(data.collect, @filename, @instruct).multiline
- data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules
- data=MyOutput.new(data.collect, @filename, @instruct).hardOutput
- end
- def space_paragraphs
- #data=@data.join.split(/\n/)
- data=@data
- #p data.length
- tuned_file=Array.new
- data.each do |para|
- para.strip!
- para.gsub!(/\r/,'')
- #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary
- para.gsub!(/<\/?p>/i,'zZz')
- para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n")
- para.gsub!(/<p\s+(class|align).+?>/i,'zZz') #
- para.gsub!(/<\/p>/i,'zZz') # repeat actually
- para.gsub!(/<(?:dir|tr|br)>/i,'zZz') #
- #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") #
- para.gsub!(/(<\/center>)/i,'\1zZz')
- para.gsub!(/(<\/h[1-6]>)/i,'\1zZz')
- para.gsub!(/ \s+/i,' ')
- para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') #
- tuned_file << para unless para == nil
- end
- tuned_file
- end
- def blockquotes(sub='') # SERIOUS PROBLEM INTRODUCED, some blockquotes go missing !, quite unacceptable, debug, for now not used
- res=Array.new
- sub.each do |x|
- if x=~/(<\/blockquote>)/i
- m = $1
- res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi
- res << x[/#{m}(.+)/mi,1]
- else
- res << x #[/(.+)/mi,1]
- end
- end
- res.join
- end
- def multiline
- data=@data
- tuned_file=Array.new
- data.each do |para|
- para.gsub!(/\n/,' ')
- para.gsub!(/ \s+/mi,' ')
- #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi
- para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
- para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>')
- #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>')
- para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>')
- para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu
- #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu?
- #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
- #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
- ### SERIOUS PROBLEM INTRODUCED
- # sub = para.split(/<blockquote>/i)
- # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29
- para.gsub!(/zZz(\s*zZz)*/,"\n\n")
- tuned_file << para << "\n\n" unless para == nil
- end
- tuned_file
- end
- def markup_rules
- @@flag_blockquote=false
- data=@data
- tuned_file=Array.new
- data.each do |para|
- if para=~/<a href="(http:\/\/.+?)">/i
- #p para.grep(/<a href="(http:\/\/.+?)">/i)
- #m=$1
- #para.gsub!(/(?:&lt;\s*)?<a href="#{m}">#{m}<\/a>(?:\s*&gt;)?\.?/i, "#{m}")
- para.gsub!(/(?:&lt;\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*&gt;)?\.?/i,'\1') #risk that url & url are not to match
- #para.gsub!(/(?:&lt;\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*&gt;)?\.?/i, "\\2") #does not match
- end
- if para=~/<BLOCKQUOTE>/i
- @@flag_blockquote=true
- end
- if @@flag_blockquote
- para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i
- end
- if para=~/<\/BLOCKQUOTE>/i
- @@flag_blockquote=false
- end
- para.gsub!(/<\/?blockquote?>/i,'')
- ### clean
- para.gsub!(/^\s+/i,'')
- para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'')
- para.gsub!(/<\/?center>/i,'')
- para.gsub!(/\s*<\/dir>/i,'')
- para.gsub!(/<hr>/i,'')
- para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker
- para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker
- para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker
- para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote
- #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
- para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
- para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity
- para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity
- para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different
- para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different
- para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
- #<a name="ii"></a><B>
- para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch
- para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch
- para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch
- para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
- para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy
- para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy
- para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/
- para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_')
- para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*')
- para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/')
- para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^')
- para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ')
- para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ')
- para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ')
- para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3')
- para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3')
- para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/
- para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc.
- para.gsub!(/ \s+/i,' ')
- #para.gsub!(/\/\{\*\{/i, '*{/{')
- #para.gsub!(/\}\*\}\//i, '}/}*')
- para.gsub!(/&quot;/i,'"')
- para.gsub!(/&amp;/i,'and')
- para.gsub!(/<!doctype html public .+/i,'')
- para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'')
- para.gsub!(/<\/(?:title)>/i,'')
- para.gsub!(/<title>/i,'#{~title? ')
- para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n")
- para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'')
- para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c
- para.gsub!(/\s+~^\s+/i,'~^ ')
- para.gsub!(/ \s+/i,' ')
- para.gsub!(/\s+$/i,'')
- para.gsub!(/^(?:<\/[bi]>)+$/i,'')
- para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy
- para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc.
- para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i>
- para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b>
- para.gsub!(/<dd>([\d.]+)/i,'5~ \1')
- para.gsub!(/<dd>(?:&nbsp;)+([\d.]+)/i,'6~ \1')
- para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1')
- para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4')
- para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2')
- para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1')
- para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ')
- para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2')
- para.gsub!(/^\s*(&nbsp;){10,12}/i,'_2 ')
- para.gsub!(/^\s*(&nbsp;){4,5}/i,'_1 ')
- para.gsub!(/&#9;/,' ') #check
- ## glyphs & tildes
- para.gsub!(/&iexcl;/, '¡') #'Inverted exclamation
- para.gsub!(/&cent;/, '¢') #'Cent sign ¢
- para.gsub!(/&pound;/, '£') #'Pound sign £
- para.gsub!(/&curren;/, '¤') #'General currency sign
- para.gsub!(/&yen;/, '¥') #'Yen sign ¥
- para.gsub!(/&brvbar;/, '¦') #'Broken vertical bar
- para.gsub!(/&sect;/, '§') #'Section sign §
- para.gsub!(/&uml;/, '¨') #'Umlaut
- para.gsub!(/&copy;/, '©') #'Copyright ©
- para.gsub!(/&ordf;/, 'ª') #'Feminine ordinal ª
- para.gsub!(/&laquo;/, '«') #'Left angle quote «
- para.gsub!(/&not;/, '¬') #'Not sign
- para.gsub!(/&shy;/, '­') #'Soft hyphen
- para.gsub!(/&reg;/, '®') #'Registered trademark ®
- para.gsub!(/&macr;/, '¯') #'Macron accent
- para.gsub!(/&deg;/, '°') #'Degree sign °
- para.gsub!(/&plusmin;/,'±') #'Plus or minus ±
- para.gsub!(/&sup2;/, '²') #'Superscript 2 ²
- para.gsub!(/&sup3;/, '³') #'Superscript 3 ³
- para.gsub!(/&acute;/, '') #'Acute accent
- para.gsub!(/&micro;/, 'µ') #'Micro sign (Greek mu) µ
- para.gsub!(/&para;/, '¶') #'Paragraph sign ¶
- para.gsub!(/&middot;/, '·') #'Middle dot
- para.gsub!(/&cedil;/, '¸') #'Cedilla
- para.gsub!(/&sup1;/, '¹') #'Superscript 1 ¹
- para.gsub!(/&ordm;/, 'º') #'Masculine ordinal º
- para.gsub!(/&raquo;/, '»') #'Right angle quote
- para.gsub!(/&frac14;/, '¼') #'Fraction one quarter ¼
- para.gsub!(/&frac12;/, '½') #'Fraction on half ½
- para.gsub!(/&frac34;/, '¾') #'Fraction three quarters ¾
- para.gsub!(/&iquest;/, '¿') #'Inverted question mark ¿
- para.gsub!(/&Agrave;/, 'À') #'Capital A, grave accent À
- para.gsub!(/&Aacute;/, 'Á') #'Capital A, acute accent Á
- para.gsub!(/&Acirc;/, 'Â') #'Capital A, circumflex accent Â
- para.gsub!(/&Atilde;/, 'Ã') #'Capital A, tilde Ã
- para.gsub!(/&Auml;/, 'Ä') #'Capital A, umlaut Ä
- para.gsub!(/&Aring;/, 'Å') #'Capital A, ring Å
- para.gsub!(/&AElig;/, 'Æ') #'Capital AE ligature Æ
- para.gsub!(/&Ccedil;/, 'Ç') #'Capital C, cedilla Ç
- para.gsub!(/&Egrave;/, 'È') #'Capital E, grave accent È
- para.gsub!(/&Eacute;/, 'É') #'Capital E, acute accent É
- para.gsub!(/&Ecirc;/, 'Ê') #'Capital E, circumflex accent Ê
- para.gsub!(/&Euml;/, 'Ë') #'Capital E, umlaut Ë
- para.gsub!(/&Igrave;/, 'Ì') #'Capital I, grave accent Ì
- para.gsub!(/&Iacute;/, 'Í') #'Capital I, acute accent Í
- para.gsub!(/&Icirc;/, 'Î') #'Capital I, circumflex accent Î
- para.gsub!(/&Iuml;/, 'Ï') #'Capital I, umlaut Ï
- para.gsub!(/&ETH;/, 'Ð') #'Capital eth, Icelandic
- para.gsub!(/&Ntilde;/, 'Ñ') #'Capital N, tilde Ñ
- para.gsub!(/&Ograve;/, 'Ò') #'Capital O, grave accent Ò
- para.gsub!(/&Oacute;/, 'Ó') #'Capital O, acute accent Ó
- para.gsub!(/&Ocirc;/, 'Ô') #'Capital O, circumflex accent Ô
- para.gsub!(/&Otilde;/, 'Õ') #'Capital O, tilde Õ
- para.gsub!(/&Ouml;/, 'Ö') #'Capital O, umlaut Ö
- para.gsub!(/&times;/, '×') #'Multiply sign ×
- para.gsub!(/&Oslash;/, 'Ø') #'Capital O, slash Ø
- para.gsub!(/&Ugrave;/, 'Ù') #'Capital U, grave accent Ù
- para.gsub!(/&Uacute;/, 'Ú') #'Capital U, acute accent Ú
- para.gsub!(/&Ucirc;/, 'Û') #'Capital U, circumflex accent Û
- para.gsub!(/&Uuml;/, 'Ü') #'Capital U, umlaut Ü
- para.gsub!(/&Yacute;/, 'Ý') #'Capital Y, acute accent Ý
- para.gsub!(/&THORN;/, 'Þ') #'Capital thorn, Icelandic Þ
- para.gsub!(/&szlig;/, 'ß') #'Small sz ligature, German ß
- para.gsub!(/&agrave;/, 'à') #'Small a, grave accent à
- para.gsub!(/&aacute;/, 'á') #'Small a, acute accent á
- para.gsub!(/&acirc;/, 'â') #'Small a, circumflex accent â
- para.gsub!(/&atilde;/, 'ã') #'Small a, tilde ã
- para.gsub!(/&auml;/, 'ä') #'Small a, umlaut ä
- para.gsub!(/&aring;/, 'å') #'Small a, ring å
- para.gsub!(/&aelig;/, 'æ') #'Small ae ligature æ
- para.gsub!(/&ccedil;/, 'ç') #'Small c, cedilla ç
- para.gsub!(/&egrave;/, 'è') #'Small e, grave accent è
- para.gsub!(/&eacute;/, 'é') #'Small e, acute accent é
- para.gsub!(/&ecirc;/, 'ê') #'Small e, circumflex accent ê
- para.gsub!(/&euml;/, 'ë') #'Small e, umlaut ë
- para.gsub!(/&igrave;/, 'ì') #'Small i, grave accent ì
- para.gsub!(/&iacute;/, 'í') #'Small i, acute accent í
- para.gsub!(/&icirc;/, 'î') #'Small i, circumflex accent î
- para.gsub!(/&iuml;/, 'ï') #'Small i, umlaut ï
- para.gsub!(/&eth;/, 'ð') #'Small eth, Icelandic ð
- para.gsub!(/&ntilde;/, 'ñ') #'Small n, tilde ñ
- para.gsub!(/&ograve;/, 'ò') #'Small o, grave accent ò
- para.gsub!(/&oacute;/, 'ó') #'Small o, acute accent ó
- para.gsub!(/&ocirc;/, 'ô') #'Small o, circumflex accent ô
- para.gsub!(/&otilde;/, 'õ') #'Small o, tilde õ
- para.gsub!(/&ouml;/, 'ö') #'Small o, umlaut ö
- para.gsub!(/&divide;/, '÷') #'Divide sign ÷
- para.gsub!(/&oslash;/, 'ø') #'Small o, slash ø
- para.gsub!(/&ugrave;/, 'ù') #'Small u, grave accent ù
- para.gsub!(/&uacute;/, 'ú') #'Small u, acute accent ú
- para.gsub!(/&ucirc;/, 'û') #'Small u, circumflex accent û
- para.gsub!(/&uuml;/, 'ü') #'Small u, umlaut ü
- para.gsub!(/&yacute;/, 'ý') #'Small y, acute accent ý
- para.gsub!(/&thorn;/, 'þ') #'Small thorn, Icelandic þ
- para.gsub!(/&yuml;/, 'ÿ') #'Smally y, umlaut ÿ
- ##
- para.gsub!(/\s\s+/,' ')
- para.gsub!(/\t+/,' ')
- #para.gsub!(/ +/,' ')
- #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '')
- tuned_file << para unless para == nil
- end
- tuned_file
- end
- end
- class Default < Html
- def initialize(data, filename, instruct)
- @data=data
- @filename=filename
- @instruct=instruct
- end
- def songsheet
- data=@data
- print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
- data=Default.new(data.collect, @filename, @instruct).space_paragraphs
- data=Default.new(data.collect, @filename, @instruct).multiline
- data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules
- data=Default.new(data.collect, @filename, @instruct).markup_default
- data=MyOutput.new(data.collect, @filename, @instruct).hardOutput
- end
- def markup_default
- data=@data
- tuned_file=Array.new
- data.each do |para|
- para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/')
- para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1')
- para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/')
- tuned_file << para unless para == nil
- end
- tuned_file
- end
- end
-end
-def help
- puts <<WOK
-conversion program
-initial SiSU markup from other file formats
-
- zxy_convert --word does initial conversion from word97 to sisu markup, expects [filename].doc (can also use --doc)
- zxy_convert --html does initial conversion from html to sisu markup, expects [filename].html
- zxy_convert --default does initial conversion from defalt html to sisu markup, expects [filename].html
-
-WOK
-end
-def do_word(argv, instruct)
- argv.each do |f|
- if f =~/.+?\.doc$/
- @argv << f[/(.+?)\.doc$/, 1]
- else
- print "not .doc? << #{f} >> "
- end
- end
- @argv.each do |filename|
- system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv})
- file_array=IO.readlines("#{filename}.wv", "")
- CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here
- end
-end
-def do_html(argv, instruct)
- argv.each do |f|
- if f =~/.+?\.html?$/
- @argv << f[/(.+?)\.html?$/, 1]
- else
- print "not .html? << #{f} >> "
- end
- end
- @argv.each do |filename|
- file_end=if FileTest.file?("#{filename}.html")
- 'html'
- elsif FileTest.file?("#{filename}.htm")
- 'htm'
- end
- file_array=IO.readlines("#{filename}.#{file_end}","\n\r")
- CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here
- end
-end
-def do_default(argv, instruct)
- argv.each do |f|
- if f =~/.+?\.html$/
- @argv << f[/(.+?)\.html$/, 1]
- else
- print "not .html? << #{f} >> "
- end
- end
- @argv.each do |filename|
- file_array=IO.readlines("#{filename}.html", "\n\r")
- CONVERT::Default.new(file_array, filename, instruct).songsheet # metaverse created here
- end
-end
-def cases(argv, instruct)
- case instruct
- when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send
- do_word(argv, instruct)
- when/^--(html)$/i #creates sisu.gz package to send
- do_html(argv, instruct)
- when/^--(default)$/i #creates sisu.gz package to send
- do_default(argv, instruct)
- else
- help
- end
-end
-$KCODE='u'
-branch='v2'
-@argv=Array.new
-argv=$*
-SiSU_version_dir=(argv.inspect=~/--v1/) ? 'v1' : 'v2'
-SiSU_lib="sisu/#{SiSU_version_dir}"
-require "#{SiSU_lib}/sysenv"
-include SiSU_Env
-@dir=SiSU_Env::Info_env.new
-instruct = "#{argv[0].to_s}"
-argv.shift
-instruct.chomp!
-instruct = "help" if instruct.nil? or instruct == "";
-cases(argv, instruct)