From 65477054fd798728bf186aa2938727ddddbe86a5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 May 2007 02:06:46 +0100 Subject: Imported upstream version 0.52.7 --- data/sisu/conf/convert/sisu_convert | 392 ++++++++++++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 data/sisu/conf/convert/sisu_convert (limited to 'data/sisu/conf/convert/sisu_convert') diff --git a/data/sisu/conf/convert/sisu_convert b/data/sisu/conf/convert/sisu_convert new file mode 100644 index 00000000..d7876083 --- /dev/null +++ b/data/sisu/conf/convert/sisu_convert @@ -0,0 +1,392 @@ +#!/usr/bin/env ruby +# = sisu - SiSU information Structuring Universe +# +# Copyright (c) Ralph Amissah 1997,2004 +# +# Ralph Amissah mailto:ralph@amissah.com +# +# * Name: SiSU information Structuring Universe +# * Author: Ralph@Amissah.com +# * Description: document conversion tool, to sisu from other formats +# * arch-tag: document conversion tool to sisu markup +# * $Date: 2004/10/16 15:51:06 $ +# * $Id: sisu_convert,v 1.37 2004/10/16 15:51:06 ralph Exp $ +# * License: GPL 2 or later +# * Notes: word conversion uses wvWare and wvSiSU.xml (a modified/stripped wvHtml.xml) +# * http://wvware.sourceforge.net/ +# * http://sourceforge.net/projects/wvware +# * |sisu.lnk|@|^| +# * +# * |zxy_param.rb|@|^| +module CONVERT + class MyOutput + def initialize(data, filename, instruct) + @data=data.compact + @filename=filename + @instruct=instruct + end + def headerBasic + <\n" #: <<#{@@html_title}>> + data=WareWord97.new(data.collect, @filename, @instruct).strip + data=WareWord97.new(data.collect, @filename, @instruct).strip + data=WareWord97.new(data.collect, @filename, @instruct).markup_rules + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def strip + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/\s*<\/u>/, '') + para.gsub!(/<\/u>\s*/, '') + para.gsub!(/\s*<\/b>/, '') + para.gsub!(/<\/b>\s*/, '') + para.gsub!(/\s*<\/i>/, '') + para.gsub!(/<\/i>\s*/, '') + tuned_file << para unless para == nil + end + tuned_file + end + def markup_rules + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/\s+/, ' ') + para.gsub!(/^(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity + para.gsub!(/^(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity + para.gsub!(/^(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/^(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/^(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/(.+?)<\/u>/, "_{\\1}_") + para.gsub!(/(.+?)<\/b>/, "*{\\1}*") + para.gsub!(/(.+?)<\/i>/, "/{\\1}/") + tuned_file << para unless para == nil + end + tuned_file + end + end + class Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>> + #data=Html.new(data.collect, @filename, @instruct).space_paragraphs + #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs + data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs + #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs + data=Html.new(data.collect, @filename, @instruct).multiline + data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def space_paragraphs + #data=@data.join.split(/\n/) + data=@data + #p data.length + tuned_file=Array.new + data.each do |para| + para.strip! + para.gsub!(/\r/, '') + #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary + para.gsub!(/<\/?p>/i, 'zZz') + para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i, "zZz") #all manner of

para.gsub!(/<\/?p>/i, "\n\n") + para.gsub!(//i, "zZz") # + para.gsub!(/<\/p>/i, "zZz") # repeat actually + para.gsub!(/<(?:dir|tr|br)>/i, "zZz") # + #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") # + para.gsub!(/(<\/center>)/i, "\\1zZz") + para.gsub!(/(<\/h[1-6]>)/i, "\\1zZz") + para.gsub!(/ \s+/i, ' ') + para.gsub!(/(?:\s*zZz\s*)+/i, "zZz") # + tuned_file << para unless para == nil + end + tuned_file + end + def blockquotes(sub='') # SERIOUS PROBLEM INTRODUCED, some blockquotes go missing !, quite unacceptable, debug, for now not used + res=Array.new + sub.each do |x| + if x=~/(<\/blockquote>)/i + m = $1 + res << x[/(.+?)#{m}/mi, 1].gsub!(/zZz/,"zZz_1 ") if x =~/.+?#{m}/mi + res << x[/#{m}(.+)/mi, 1] + else + res << x #[/(.+)/mi, 1] + end + end + res.join + end + def multiline + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/\n/, ' ') + para.gsub!(/ \s+/mi, ' ') + #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi + para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3") + para.gsub!(/<([biu]|h[1-6])>(?:

|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3") + #para.gsub!(/<([biu]|h[1-6])>(?:
|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3") + para.gsub!(/<([biu]|h[1-6])>(?:
|zZz)+(.+?)<\/\1>/i, "zZz<\\1>\\2") + para.gsub!(/<(h[1-6])>(.+?)(?:
|zZz)+<\/\1>/i, "zZz<\\1>\\2zZz") #does catch some h1, h2 etc, too expensive to have biu + #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:
|zZz)+<\/\1>/i, "zZz<\\1>\\2 \\3") #may go too far? useful for h1 h2 etc, remove biu? + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3") + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3") + ### SERIOUS PROBLEM INTRODUCED + # sub = para.split(/
/i) + # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29 + para.gsub!(/zZz(\s*zZz)*/, "\n\n") + tuned_file << para << "\n\n" unless para == nil + end + tuned_file + end + def markup_rules + data=@data + tuned_file=Array.new + data.each do |para| + if para=~//i + #p para.grep(//i) + #m=$1 + #para.gsub!(/(?:<\s*)?#{m}<\/a>(?:\s*>)?\.?/i, "#{m}") + para.gsub!(/(?:<\s*)?http:\/\/.+?<\/a>(?:\s*>)?\.?/i, "\\1") #risk that url & url are not to match + #para.gsub!(/(?:<\s*)?(http:\/\/.+?\/\1)<\/a>(?:\s*>)?\.?/i, "\\2") #does not match + end + ### clean + para.gsub!(/^\s+/i, '') + para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i, '') + para.gsub!(/<\/?center>/i, '') + para.gsub!(/\s*<\/dir>/i, '') + para.gsub!(/
/i, '') + para.gsub!(/\s*
\[(\*+)\]<\/a>/i, "^{[\\1]}^ ") #other endnote marker + para.gsub!(/\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker + para.gsub!(/\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker + para.gsub!(/\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i, '~{{ ') #endnote + #para.gsub!(/\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # + para.gsub!(/\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # + para.gsub!(/^(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity + para.gsub!(/^(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity + para.gsub!(/^(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/^(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different + para.gsub!(/^(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different + # + para.gsub!(/^()(?:)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i, "5{ \\2 \\1") #watch + para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/, "5{ \\2 \\1") #watch + para.gsub!(/^(\s+|

)?()(\s*|<\/a>)?/i, "5{ \\2 \\1") #watch + para.gsub!(/\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") # + para.gsub!(/^\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i, "4{ \\1\\2") # wish it all were less messy + para.gsub!(/^\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i, "5{ \\1\\2") # wish it all were less messy + para.gsub!(/<\/?[biu]>/i, '') if para =~/[1-6]\{/ + para.gsub!(/\s*(.+?)\s*<\/u>/i, "_{\\1}_") + para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i, "*{\\2}*") + para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i, "/{\\2}/") + para.gsub!(/\s*(.+?)\s*<\/sup>/i, "^{\\1}^") + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i, "\\1 ") + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i, "\\1\\3 ") + para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i, "\\1\\3 ") + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3") + para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3") + para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i, " \\1 \\2") #eg this/{problem}/ + para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i, "\\2") #eg /{,}/ or *{ }* etc. + para.gsub!(/ \s+/i, ' ') + #para.gsub!(/\/\{\*\{/i, '*{/{') + #para.gsub!(/\}\*\}\//i, '}/}*') + para.gsub!(/"/i, '"') + para.gsub!(/&/i, 'and') + para.gsub!(//i, '') + para.gsub!(/<\/(?:title)>/i, '') + para.gsub!(//i, '#{~title? ') + para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi, "\n\n_1 \\1\n\n") + para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i, '') + para.gsub!(/~e\s+\.\s*/i, ".~e ") #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c + para.gsub!(/\s+~e\s+/i, "~e ") + para.gsub!(/ \s+/i, ' ') + para.gsub!(/\s+$/i, '') + para.gsub!(/^(?:<\/[bi]>)+$/i, '') + para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i, "5{ \\1\\2") # wish it all were less messy + para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i, '') # cleaning up left over <i> etc. + para.gsub!(/<(?:i|em)>\s*(.+)/i, "/{\\1}/") # using up left over <i> + para.gsub!(/<b>\s*(.+)/i, "*{\\1}*") # using up left over <b> + #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '') + tuned_file << para unless para == nil + end + tuned_file + end + end + class Default < Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>> + data=Default.new(data.collect, @filename, @instruct).space_paragraphs + data=Default.new(data.collect, @filename, @instruct).multiline + data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=Default.new(data.collect, @filename, @instruct).markup_default + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def markup_default + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/<i>(Id\.?)(\s|$)/i, "/\{\\1\}\\2/") + para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i, "\\1") + para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i, '/{Id.}/') + tuned_file << para unless para == nil + end + tuned_file + end + end +end +def help + puts <<WOK +conversion program +initial SiSU markup from other file formats + + zxy_convert --word does initial conversion from word97 to sisu markup, expects [filename].doc (can also use --doc) + zxy_convert --html does initial conversion from html to sisu markup, expects [filename].html + zxy_convert --default does initial conversion from defalt html to sisu markup, expects [filename].html + +WOK +end +def doWord(argv, instruct) + argv.each do |f| + if f =~/.+?\.doc$/ + @argv << f[/(.+?)\.doc$/, 1] + else + print "not .doc? << #{f} >> " + end + end + @argv.each do |filename| + system(%{wvWare -x #{@dir.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) + file_array=IO.readlines("#{filename}.wv", "") + CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def doHtml(argv, instruct) + argv.each do |f| + if f =~/.+?\.html$/ + @argv << f[/(.+?)\.html$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_array=IO.readlines("#{filename}.html", "\n\r") + CONVERT::Html.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def doDefault(argv, instruct) + argv.each do |f| + if f =~/.+?\.html$/ + @argv << f[/(.+?)\.html$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_array=IO.readlines("#{filename}.html", "\n\r") + CONVERT::Default.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def cases(argv, instruct) + case instruct + when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send + doWord(argv, instruct) + when/^--(html)$/i #creates sisu.gz package to send + doHtml(argv, instruct) + when/^--(default)$/i #creates sisu.gz package to send + doDefault(argv, instruct) + else + help + end +end +require 'zxy_sysenv.rb' +include SiSU_Env +@dir=SiSU_Env::Info_dir.new +@argv=Array.new +argv=$* +instruct = "#{argv[0].to_s}" +argv.shift +instruct.chomp! +instruct = "help" if instruct.nil? or instruct == ""; +cases(argv, instruct) -- cgit v1.2.3