# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
metadata harvest, extract authors and their writings from document set
* Author: Ralph Amissah
* Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: simple xml representation (sax style)
=end
module SiSU_Harvest_Authors
require_relative 'author_format' # author_format.rb
class Songsheet
@@the_idx_authors={}
def initialize(opt,env)
@opt,@env=opt,env
@file_list=opt.files
end
def songsheet
puts 'authors:'
idx_array={}
@opt.f_pths.each do |y|
lang_hash_file_array={}
name=y[:f]
filename=y[:pth] + '/' + y[:f]
File.open(filename,'r') do |file|
file.each_line("\n\n") do |line|
if line =~/^@(?:title|creator|date):(?:\s|$)/m
lang_hash_file_array[y[:lng_is]] ||= []
lang_hash_file_array[y[:lng_is]] << line
elsif line =~/^@\S+?:(?:\s|$)/m \
or line =~/^(?:\s*\n|%+ )/
else break
end
end
end
lang_hash_file_array.each_pair do |lang,a|
idx_array[lang] ||= []
idx_array=SiSU_Harvest_Authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest
end
end
the_idx=SiSU_Harvest_Authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index
SiSU_Harvest_Authors::OutputIndex.new(@opt,the_idx).html_print.html_songsheet
end
end
class Harvest
def initialize(opt,env,data,filename,name,idx_array,lang)
@opt,@env,@data,@filename,@name,@idx_array,@lang=opt,env,data,filename,name,idx_array,lang
end
def extract_harvest
data,filename,name,idx_array,lang=@data,@filename,@name,@idx_array,@lang
@title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil
@authors=[]
rgx={}
rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
rgx[:title]=/^@title:[ ]+(.+)/
rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
data.each do |para|
if para=~ rgx[:title]
@title=rgx[:title].match(para)[1]
end
if para=~ rgx[:subtitle]
@subtitle=rgx[:subtitle].match(para)[1]
end
if para=~ rgx[:author]
@author_format=rgx[:author].match(para)[1]
end
if para=~ rgx[:date]
@date=rgx[:date].match(para)[1]
end
break if @title and @subtitle and @author and @date
end
@fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title
if @title \
and @author_format
creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details
@authors,@authorship=creator[:authors],creator[:authorship]
file=if name=~/~[a-z]{2,3}\.ss[mt]$/
name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
else
name.sub(/\.ss[mt]$/,'')
end
page=if @env.output_dir_structure.by_language_code?
"#{lang}/sisu_manifest.html"
else
"sisu_manifest.#{lang}.html"
end
idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang }
else
#p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}"
end
idx_array[lang]=idx_array[lang].flatten
idx_array
end
end
class Index
def initialize(idx_array,the_idx)
@idx_array,@the_idx=idx_array,the_idx
@@the_idx_authors=@the_idx
end
def capital(txt)
txt[0].chr.capitalize + txt[1,txt.length]
end
def construct_book_author_index
idx_array=@idx_array
idx_array.each_pair do |lang,idx_array|
@@the_idx_authors[lang] ||= {}
idx_array.each do |idx|
idx[:author][:last_first_format_a].each do |author|
author=author.strip
if @@the_idx_authors[lang][author].class==NilClass
@@the_idx_authors[lang][author]={ md: [] }
end
@@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] }
end
end
end
@the_idx=@@the_idx_authors
end
end
class OutputIndex
require_relative 'i18n' # i18n.rb
def initialize(opt,the_idx)
@opt,@the_idx=opt,the_idx
@env=SiSU_Env::InfoEnv.new
@rc=SiSU_Env::GetInit.instance.sisu_yaml.rc
@alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
@letter=@alph.shift
@vz=SiSU_Env::GetInit.instance.skin
end
def html_file_open
@the_idx.keys.each do |lng|
@output ||={}
@output[lng] ||={}
harvest_pth,file='',''
if @env.output_dir_structure.by_language_code?
harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/#{lng}/manifest"
file="#{harvest_pth}/authors.html"
else
harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/manifest"
file="#{harvest_pth}/authors.#{lng}.html"
end
FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth)
puts "file://#{file}"
@output[lng][:html]=File.new(file,'w')
end
end
def html_file_close
@the_idx.keys.each do |lng|
@output[lng][:html].close
@output[lng][:html_mnt].close if @output[lng][:html_mnt].class==File
end
end
def html_print
def html_songsheet
html_file_open
html_head
html_alph
html_body
html_tail
html_file_close
end
def html_head_adjust(lng,type='')
css_path,topics='',''
if @env.output_dir_structure.by_language_code?
css_path=(type !~/maintenance/) \
? '../../_sisu/css/harvest.css'
: 'harvest.css'
topics='topics.html'
elsif @env.output_dir_structure.by_filetype?
css_path=(type !~/maintenance/) \
? '../_sisu/css/harvest.css'
: 'harvest.css'
topics="topics.#{lng}.html"
elsif @env.output_dir_structure.by_filename?
css_path=(type !~/maintenance/) \
? '../_sisu/css/harvest.css'
: 'harvest.css'
topics="topics.#{lng}.html"
end
ln=SiSU_i18n::Languages.new.language.list
harvest_languages=''
@the_idx.keys.each do |lng|
if @env.output_dir_structure.by_language_code?
harvest_pth="../../#{lng}/manifest"
file="#{harvest_pth}/authors.html"
else @env.output_dir_structure.by_filetype?
harvest_pth='.'
file="#{harvest_pth}/authors.#{lng}.html"
end
l=ln[lng][:t]
harvest_languages += %{#{l} }
end
sv=SiSU_Env::InfoVersion.instance.get_version
<SiSU Metadata Harvest - Authors
WOK
end
def html_head
@the_idx.keys.each do |lng|
@output[lng][:html_mnt] << html_head_adjust(lng,'maintenance') if @opt.cmd.inspect =~/M/
@output[lng][:html] << html_head_adjust(lng)
end
end
def html_alph
a=[]
a << '
'
@alph.each do |x|
a << ((x =~/[0-9]/) \
? ''
: %{#{x}, })
end
a=a.join
@the_idx.keys.each do |lng|
@output[lng][:html_mnt] << a if @opt.cmd.inspect =~/M/
@output[lng][:html] << a
end
end
def html_tail
a =<
#{@vz.credits_sisu}
WOK
@the_idx.keys.each do |lng|
@output[lng][:html_mnt] << a if @output[lng][:html_mnt].class==File
@output[lng][:html] << a
end
end
def do_html(lng,html)
@output[lng][:html_mnt] << html if @output[lng][:html_mnt].class==File
@output[lng][:html] << html
end
def do_string_name(lng,attrib,string)
f=/^(\S)/.match(string[0])[1]
if @letter < f
while @letter < f
if @alph.length > 0
@letter=@alph.shift
if @output[lng][:html_mnt].class==File
@output[lng][:html_mnt] << %{\n
}
else break
end
end
end
end
def html_body
the_idx=@the_idx
the_idx.each_pair do |lng,lng_array|
lng_array.sort.each do |a|
do_string_name(lng,'',a)
name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
x = %{
}]))
: work
end
works.sort_by {|x| x[0]}.each do |x|
@output[lng][:html] << x[1]
@output[lng][:html_mnt] << x[2] if @output[lng][:html_mnt].class==File
end
end
end
end
self
end
def screen_print
def cycle
the_idx=@the_idx
the_idx.sort.each do |a|
puts a[0]
a[1][:md].each do |x|
puts "\t" + x[:file]
end
end
end
self
end
end
end
__END__