From a6651effc29a5805599657e3cbeb6c44de62be1e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 9 Jun 2011 22:12:11 -0400 Subject: v3: harvest, work for alt output structures; for each language * paths to document manifests etc. for the possible output dir structures * author and topic lists for available documents, by availability in each language --- lib/sisu/v3/harvest_authors.rb | 233 ++++++++++++++++++++++++----------------- 1 file changed, 136 insertions(+), 97 deletions(-) (limited to 'lib/sisu/v3/harvest_authors.rb') diff --git a/lib/sisu/v3/harvest_authors.rb b/lib/sisu/v3/harvest_authors.rb index 75d2a810..c59a9111 100644 --- a/lib/sisu/v3/harvest_authors.rb +++ b/lib/sisu/v3/harvest_authors.rb @@ -59,47 +59,45 @@ =end module HARVEST_authors require_relative 'author_format' # author_format.rb - @@the_idx_authors=[] class Songsheet - def initialize(opt) - @opt=opt + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env @file_list=opt.files - @env=SiSU_Env::Info_env.new end def songsheet - files,idx_array=[],[] - @file_list.each do |f| - (f =~/.+?\.ss[tm]$/) \ - ? (files << f[/(.+?\.ss[tm])$/,1]) \ - : (print "not .sst or .ssm ? << #{f} >> ") - end - files.each do |filename| - file_array=[] + puts 'authors:' + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@(?:title|creator|date):(?:\s|$)/m - file_array << line + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line elsif line =~/^@\S+?:(?:\s|$)/m \ or line =~/^(?:\s*\n|%+ )/ else break end end end - idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=HARVEST_authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest + end end the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet - harvest_pth="#{@env.path.webserv}/#{@env.stub_md_harvest}" - puts "file://#{harvest_pth}/harvest_authors.html" - puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/M/ end end class Harvest - def initialize(data,filename,idx_array) - @data,@filename,@idx_array=data,filename,idx_array + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt,@env,@data,@filename,@name,@idx_array,@lang=opt,env,data,filename,name,idx_array,lang end def extract_harvest - data,filename,idx_array=@data,@filename,@idx_array + data,filename,name,idx_array,lang=@data,@filename,@name,@idx_array,@lang @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil @authors=[] rgx={} @@ -123,22 +121,25 @@ module HARVEST_authors break if @title and @subtitle and @author and @date end @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title - if @title and @author_format + if @title \ + and @author_format creator=FORMAT::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] - file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ - lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] - filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else - lang='' - filename.sub(/\.ss[mt]$/,'') + name.sub(/\.ss[mt]$/,'') end - page="sisu_manifest#{lang}.html" - idx_array <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page } + page=if @env.output_dir_structure.by_language_code? + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang } else #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" end - idx_array.flatten! + idx_array[lang].flatten! idx_array end end @@ -152,38 +153,55 @@ module HARVEST_authors end def construct_book_author_index idx_array=@idx_array - idx_array.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author.strip! - if @@the_idx_authors[author].class==NilClass - @@the_idx_authors[author]={ md: [] } + idx_array.each_pair do |lang,idx_array| + @@the_idx_authors[lang] ||= {} + idx_array.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author.strip! + if @@the_idx_authors[lang][author].class==NilClass + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] } end - @@the_idx_authors[author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page] } end end @the_idx=@@the_idx_authors end end class Output_index + require_relative 'i18n' # i18n.rb + require 'fileutils' + include FileUtils def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::Info_env.new - @rc=Get_init.instance.sisu_yaml.rc + @rc=SiSU_Env::Get_init.instance.sisu_yaml.rc @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @letter=@alph.shift @vz=SiSU_Env::Get_init.instance.skin end def html_file_open - @output={} - harvest_pth="#{@env.path.webserv}/#{@env.stub_md_harvest}" - @output[:html]=File.new("#{harvest_pth}/harvest_authors.html",'w') - @output[:html_mnt]=(@opt.cmd.inspect =~/M/) \ - ? File.new("#{@env.path.pwd}/harvest_authors.html",'w') \ - : nil + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by_language_code? + harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/#{lng}/manifest" + file="#{harvest_pth}/authors.html" + else + harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/manifest" + file="#{harvest_pth}/authors.#{lng}.html" + end + mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) + puts "file://#{file}" + @output[lng][:html]=File.new(file,'w') + end end def html_file_close - @output[:html].close - @output[:html_mnt].close if @output[:html_mnt].class==File + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close if @output[lng][:html_mnt].class==File + end end def html_print def html_songsheet @@ -194,19 +212,36 @@ module HARVEST_authors html_tail html_file_close end - def html_head_adjust(type='') - css_path=if @env.output_dir_structure.by_language_code? + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by_language_code? css_path=(type !~/maintenance/) \ ? '../../_sisu/css/harvest.css' \ : 'harvest.css' + topics='topics.html' elsif @env.output_dir_structure.by_filetype? css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' \ : 'harvest.css' + topics="topics.#{lng}.html" elsif @env.output_dir_structure.by_filename? css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' \ : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lng| + if @env.output_dir_structure.by_language_code? + harvest_pth="../../#{lng}/manifest" + file="#{harvest_pth}/authors.html" + else @env.output_dir_structure.by_filetype? + harvest_pth='.' + file="#{harvest_pth}/authors.#{lng}.html" + end + l=ln[lng][:t] + harvest_languages += %{#{l}   } end sv=SiSU_Env::Info_version.instance.get_version <

SiSU Metadata Harvest - Authors

-

[ HOME ] also see SiSU Metadata Harvest - Topics

+

[ HOME ] also see SiSU Metadata Harvest - Topics

#{@env.widget_static.search_form}


+

#{harvest_languages}

+
WOK end def html_head - @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/ - @output[:html] << html_head_adjust + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << html_head_adjust(lng,'maintenance') if @opt.cmd.inspect =~/M/ + @output[lng][:html] << html_head_adjust(lng) + end end def html_alph a=[] a << '

' @alph.each do |x| - a << ((x =~/[0-9]/) ? '' : %{#{x}, }) + a << ((x =~/[0-9]/) \ + ? '' \ + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a if @opt.cmd.inspect =~/M/ + @output[lng][:html] << a end - @output[:html_mnt] << a.join if @output[:html_mnt].class==File - @output[:html] << a.join end def html_tail - a=[] - a <<< @@ -260,27 +303,25 @@ WOK WOK - @output[:html_mnt] << a if @output[:html_mnt].class==File - @output[:html] << a - end - def do_html(html) - @output[:html_mnt] << html if @output[:html_mnt].class==File - @output[:html] << html + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a if @output[lng][:html_mnt].class==File + @output[lng][:html] << a + end end - def do_string(attrib,string) - html=%{

#{string}

} - do_html(html) + def do_html(lng,html) + @output[lng][:html_mnt] << html if @output[lng][:html_mnt].class==File + @output[lng][:html] << html end - def do_string_name(attrib,string) + def do_string_name(lng,attrib,string) f=/^(\S)/.match(string[0])[1] if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift - if @output[:html_mnt].class==File - @output[:html_mnt] << %{\n

#{@letter}

} + if @output[lng][:html_mnt].class==File + @output[lng][:html_mnt] << %{\n

#{@letter}

} end - @output[:html] << %{\n

#{@letter}

} + @output[lng][:html] << %{\n

#{@letter}

} else break end end @@ -288,37 +329,35 @@ WOK end def html_body the_idx=@the_idx - the_idx.sort.each do |a| - do_string_name('',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{

#{a[0]}

} - if @output[:html_mnt].class==File - @output[:html_mnt] << x - end - @output[:html] << x - works=[] - a[1][:md].each do |x| - if @env.output_dir_structure.by_language_code? - manifest_pth="#{@env.path.output}/#{x[:file]}" - manifest_at_local=@env.path.output + '/' + 'en' + '/' + 'manifest/' + x[:file] + '.manifest.html' - manifest_at=x[:file] + '.manifest.html' - elsif @env.output_dir_structure.by_filetype? - #manifest_pth_2="#{@env.path.output}/#{x[:file]}" - manifest_name=x[:file] - manifest_at_local=manifest_name + 'manifest.html' - manifest_at=x[:file] + '.manifest.html' - elsif @env.output_dir_structure.by_filename? - manifest_at_local="#{@env.path.output}/#{x[:file]}/#{x[:page]}" - manifest_at="../#{x[:file]}/#{x[:page]}" + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[lng][:html_mnt].class==File + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + works=[] + a[1][:md].each do |x| + if @env.output_dir_structure.by_language_code? + manifest_pth="#{@env.path.output}/#{x[:file]}" + manifest_at=x[:file] + '.html' + elsif @env.output_dir_structure.by_filetype? + manifest_name=x[:file] + manifest_at=x[:file] + '.' + lng + '.html' + elsif @env.output_dir_structure.by_filename? + manifest_at="../#{x[:file]}/#{x[:page]}" + end + work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] + works<<=(@output[lng][:html_mnt].class==File) \ + ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) \ + : work + end + works.sort_by {|x| x[0]}.each do |x| + @output[lng][:html] << x[1] + @output[lng][:html_mnt] << x[2] if @output[lng][:html_mnt].class==File end - work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] - works<<=(@output[:html_mnt].class==File) \ - ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) \ - : work - end - works.sort_by {|x| x[0]}.each do |x| - @output[:html] << x[1] - @output[:html_mnt] << x[2] if @output[:html_mnt].class==File end end end -- cgit v1.2.3