From 4b51bc00cda70d3c118401a74f1704df38c947a3 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 1 Feb 2011 09:48:30 -0500 Subject: v3 introduced as development branch, invoked using "sisu --v3 [instructions] --- lib/sisu/v3/harvest_authors.rb | 316 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 lib/sisu/v3/harvest_authors.rb (limited to 'lib/sisu/v3/harvest_authors.rb') diff --git a/lib/sisu/v3/harvest_authors.rb b/lib/sisu/v3/harvest_authors.rb new file mode 100644 index 00000000..53003bb4 --- /dev/null +++ b/lib/sisu/v3/harvest_authors.rb @@ -0,0 +1,316 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract authors and their writings from document set + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module HARVEST_authors + require "#{SiSU_lib}/author_format" # author_format.rb + @@the_idx_authors=[] + class Songsheet + def initialize(opt) + @opt=opt + @file_list=opt.files + @env=SiSU_Env::Info_env.new + end + def songsheet + files,idx_array=[],[] + @file_list.each do |f| + (f =~/.+?\.ss[tm]$/) \ + ? (files << f[/(.+?\.ss[tm])$/,1]) \ + : (print "not .sst or .ssm ? << #{f} >> ") + end + files.each do |filename| + file_array=[] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + file_array << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest + end + the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index + HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet + puts "file://#{@env.path.output_md_harvest}/harvest_authors.html" + puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/M/ + end + end + class Harvest + def initialize(data,filename,idx_array) + @data,@filename,@idx_array=data,filename,idx_array + end + def extract_harvest + data,filename,idx_array=@data,@filename,@idx_array + @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title and @subtitle and @author and @date + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title and @author_format + creator=FORMAT::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ + lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] + filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + lang='' + filename.sub(/\.ss[mt]$/,'') + end + page="sisu_manifest#{lang}.html" + idx_array <<= { :filename => filename, :file => file, :date => @date, :title => @fulltitle, :author => creator, :page => page } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array.flatten! + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author.strip! + if @@the_idx_authors[author].class==NilClass + @@the_idx_authors[author]={:md => []} + end + @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] } + end + end + @the_idx=@@the_idx_authors + end + end + class Output_index + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::Info_env.new + @rc=Get_init.instance.sisu_yaml.rc + @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @letter=@alph.shift + @vz=SiSU_Env::Get_init.instance.skin + end + def html_file_open + @output={} + @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w') + @output[:html_mnt]=(@opt.cmd.inspect =~/M/) \ + ? File.new("#{@env.path.pwd}/harvest_authors.html",'w') \ + : nil + end + def html_file_close + @output[:html].close + @output[:html_mnt].close if @output[:html_mnt].class==File + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(type='') + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' \ + : 'harvest.css' + sv=SiSU_Env::Info_version.instance.get_version + < + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

SiSU Metadata Harvest - Authors

+

[ HOME ] also see SiSU Metadata Harvest - Topics

+

#{@env.widget_static.search_form}

+
+WOK + end + def html_head + @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/ + @output[:html] << html_head_adjust + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) ? '' : %{#{x}, }) + end + @output[:html_mnt] << a.join if @output[:html_mnt].class==File + @output[:html] << a.join + end + def html_tail + a=[] + a <<< + + + + + + +#{@vz.credits_sisu} + + +WOK + @output[:html_mnt] << a if @output[:html_mnt].class==File + @output[:html] << a + end + def do_html(html) + @output[:html_mnt] << html if @output[:html_mnt].class==File + @output[:html] << html + end + def do_string(attrib,string) + html=%{

#{string}

} + do_html(html) + end + def do_string_name(attrib,string) + f=/^(\S)/.match(string[0])[1] + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[:html_mnt].class==File + @output[:html_mnt] << %{\n

#{@letter}

} + end + @output[:html] << %{\n

#{@letter}

} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.sort.each do |a| + do_string_name('',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[:html_mnt].class==File + @output[:html_mnt] << x + end + @output[:html] << x + works=[] + a[1][:md].each do |x| + work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] + works<<=(@output[:html_mnt].class==File) \ + ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) \ + : work + end + works.sort_by {|x| x[0]}.each do |x| + @output[:html] << x[1] + @output[:html_mnt] << x[2] if @output[:html_mnt].class==File + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ -- cgit v1.2.3