# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013 Ralph Amissah, All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Git
* Ralph Amissah
** Description: modules shared by flatfile output generators
=end
module SiSU_TextUtils
class Wrap
def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil)
@para,@n_char_max,@n_indent=para,n_char_max,n_indent
@n_char_max_extend = n_char_max
@br="\n"
@n_hang=n_hang ? n_hang : @n_indent
end
def line_wrap
space=' '
spaces_indent,spaces_hang="#{@br}#{space*@n_indent}",space*@n_hang
line=0
out=[]
out[line]=''
@para=@para.gsub(/
/,' \\ ').
gsub(/#{Mx[:br_nl]}/,"\n\n")
words=@para.scan(/\n\n|\s+\\\s+|
|\S+/m)
while words != ''
word=words.shift
if not word
out[line] unless out[line].empty? #check
break
elsif word =~/
/
word=nil
out[line]=out[line].gsub(/
/,'')
line=line
elsif word =~/\n\n/
word="\n"
@n_char_max_extend = @n_char_max
line += 1
elsif (out[line].length + word.length) > (@n_char_max_extend - @n_indent) \
and out[line] =~/\S+/
@n_char_max_extend = @n_char_max
out[line].squeeze!(' ')
line += 1
end
if word
out[line]=if out[line] \
and out[line] !~/\S+$/m
"#{out[line]}#{word}"
elsif out[line] \
and out[line] =~/\S+/
"#{out[line]} #{word}"
else "#{word.strip}"
end
end
@oldword=word if word =~/\S+/
end
spaces_hang + out.join(spaces_indent)
end
def line_wrap_indent1
@n_indent,@n_hang=2,2
line_wrap
end
def line_wrap_endnote
@n_indent,@n_hang=4,2
line_wrap
end
def array_wrap
if @para.is_a?(Array)
@arr=[]
@para.each do |line|
@arr << SiSU_TextUtils::Wrap.new(line,@n_char_max,@n_indent,@n_hang).line_wrap
end
end
@arr
end
end
class HeaderScan
def initialize(md,para)
@md,@p=md,para
end
def extract(tag,tag_content,type,attrib)
dc=if dc_tag \
and dc_content
[dc_tag,dc_content,{dc_tag=>dc_content}]
else nil
end
end
def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
@tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
def label #element
@tag
end
def type
@type
end
def text
@tag_content
end
def info #element text
@tag_content
end
def attribute
@attrib
end
def element
@tag
end
def attrib
@attrib
end
def el
@tag
end
self
end
def start_is_match
meta=case @p
when /^#{Mx[:meta_o]}(title)#{Mx[:meta_c]}\s*(.+?)$/; header($1,@md.title.full,'meta','dc') #dc 1
when /^#{Mx[:meta_o]}(creator|author)#{Mx[:meta_c]}\s*(.+?)$/; header('creator',$2,'meta','dc') #dc 2
when /^#{Mx[:meta_o]}(subject)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 3
when /^#{Mx[:meta_o]}(description)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 4
when /^#{Mx[:meta_o]}(publisher)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 5
when /^#{Mx[:meta_o]}(contributor)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 6
when /^#{Mx[:meta_o]}(date)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 7
when /^#{Mx[:meta_o]}(date\.created)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(date\.issued)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(date\.available)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(date\.valid)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(date\.modified)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(type)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 8
when /^#{Mx[:meta_o]}(format)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 9
when /^#{Mx[:meta_o]}(identifier)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 10
when /^#{Mx[:meta_o]}(source)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 11
when /^#{Mx[:meta_o]}(language)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 12
when /^#{Mx[:meta_o]}(relation)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 13
when /^#{Mx[:meta_o]}(coverage)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 14
when /^#{Mx[:meta_o]}(rights)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 15
when /^#{Mx[:meta_o]}(keywords)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(copyright)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(translator|translated_by)#{Mx[:meta_c]}\s*(.+?)$/; header('translator',$2)
when /^#{Mx[:meta_o]}(illustrator|illustrated_by)#{Mx[:meta_c]}\s*(.+?)$/; header('illustrator',$2)
when /^#{Mx[:meta_o]}(prepared_by)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(digitized_by)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(comments?)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(abstract)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(tags?)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(catalogue)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra')
when /^#{Mx[:meta_o]}(class(?:ify)?_loc)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_loc',$2,'meta','extra')
when /^#{Mx[:meta_o]}(class(?:ify)?_dewey)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_dewey',$2,'meta','extra')
when /^#{Mx[:meta_o]}(class(?:ify)?_pg)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_pg',$2,'meta','extra')
when /^#{Mx[:meta_o]}(class(?:ify)?_isbn)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_isbn',$2,'meta','extra')
when /^#{Mx[:meta_o]}(toc|structure)#{Mx[:meta_c]}\s*(.+?)$/; header('structure',$2,'process','instruct')
when /^#{Mx[:meta_o]}(level|page|markup)#{Mx[:meta_c]}\s*(.+?)$/; header('markup',$2,'process','instruct')
when /^#{Mx[:meta_o]}(bold)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct')
when /^#{Mx[:meta_o]}(italics|itali[sz]e)#{Mx[:meta_c]}\s*(.+?)$/; header('italicize',$2,'process','instruct')
when /^#{Mx[:meta_o]}(vocabulary|wordlist)#{Mx[:meta_c]}\s*(.+?)$/; header('vocabulary',$2,'process','instruct')
when /^#{Mx[:meta_o]}(css|stylesheet)#{Mx[:meta_c]}\s*(.+?)$/; header('css',$2,'process','instruct')
when /^#{Mx[:meta_o]}(links)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct')
when /^#{Mx[:meta_o]}(prefix)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') #add a & b
when /^#{Mx[:meta_o]}(suffix)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct')
when /^#{Mx[:meta_o]}(information)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct')
when /^#{Mx[:meta_o]}(contact)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct')
when /^#{Mx[:meta_o]}(rcs|cvs)#{Mx[:meta_c]}\s*(.+?)$/; header('version',$2,'process','instruct')
else nil
end
end
def dublin
out=if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/
start_is_match
else nil
end
end
def meta
out=if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/
start_is_match
else nil
end
end
end
end
__END__