From 61b9cb507a5be25b69ca870f3ec379ef1863c124 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 12 Oct 2014 10:29:43 -0400 Subject: v5 v6: moved files: harvest* to html_harvest* --- data/doc/sisu/CHANGELOG_v5 | 2 + data/doc/sisu/CHANGELOG_v6 | 2 + lib/sisu/v5/harvest.rb | 114 ---- lib/sisu/v5/harvest_author_format.rb | 114 ---- lib/sisu/v5/harvest_authors.rb | 470 ---------------- lib/sisu/v5/harvest_topics.rb | 900 ------------------------------ lib/sisu/v5/html_harvest.rb | 114 ++++ lib/sisu/v5/html_harvest_author_format.rb | 114 ++++ lib/sisu/v5/html_harvest_authors.rb | 470 ++++++++++++++++ lib/sisu/v5/html_harvest_topics.rb | 900 ++++++++++++++++++++++++++++++ lib/sisu/v5/hub_actions.rb | 2 +- lib/sisu/v6/harvest.rb | 114 ---- lib/sisu/v6/harvest_author_format.rb | 114 ---- lib/sisu/v6/harvest_authors.rb | 470 ---------------- lib/sisu/v6/harvest_topics.rb | 900 ------------------------------ lib/sisu/v6/html_harvest.rb | 114 ++++ lib/sisu/v6/html_harvest_author_format.rb | 114 ++++ lib/sisu/v6/html_harvest_authors.rb | 470 ++++++++++++++++ lib/sisu/v6/html_harvest_topics.rb | 900 ++++++++++++++++++++++++++++++ lib/sisu/v6/hub_actions.rb | 2 +- 20 files changed, 3202 insertions(+), 3198 deletions(-) delete mode 100644 lib/sisu/v5/harvest.rb delete mode 100644 lib/sisu/v5/harvest_author_format.rb delete mode 100644 lib/sisu/v5/harvest_authors.rb delete mode 100644 lib/sisu/v5/harvest_topics.rb create mode 100644 lib/sisu/v5/html_harvest.rb create mode 100644 lib/sisu/v5/html_harvest_author_format.rb create mode 100644 lib/sisu/v5/html_harvest_authors.rb create mode 100644 lib/sisu/v5/html_harvest_topics.rb delete mode 100644 lib/sisu/v6/harvest.rb delete mode 100644 lib/sisu/v6/harvest_author_format.rb delete mode 100644 lib/sisu/v6/harvest_authors.rb delete mode 100644 lib/sisu/v6/harvest_topics.rb create mode 100644 lib/sisu/v6/html_harvest.rb create mode 100644 lib/sisu/v6/html_harvest_author_format.rb create mode 100644 lib/sisu/v6/html_harvest_authors.rb create mode 100644 lib/sisu/v6/html_harvest_topics.rb diff --git a/data/doc/sisu/CHANGELOG_v5 b/data/doc/sisu/CHANGELOG_v5 index 133a621a..0e15c942 100644 --- a/data/doc/sisu/CHANGELOG_v5 +++ b/data/doc/sisu/CHANGELOG_v5 @@ -38,6 +38,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.7.0.orig.tar.xz sisu_5.7.0.orig.tar.xz sisu_5.7.0-1.dsc +* moved files: harvest* to html_harvest* + * --maintenance revisited, bin/sisu, hub, urls --maintenance, fix required * db, --maintenance sql, separated out postgresql & sqlite diff --git a/data/doc/sisu/CHANGELOG_v6 b/data/doc/sisu/CHANGELOG_v6 index 4d2a70e0..62abf4d9 100644 --- a/data/doc/sisu/CHANGELOG_v6 +++ b/data/doc/sisu/CHANGELOG_v6 @@ -28,6 +28,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_6.3.0.orig.tar.xz sisu_6.3.0.orig.tar.xz sisu_6.3.0-1.dsc +* moved files: harvest* to html_harvest* + * --maintenance revisited, bin/sisu, hub, urls --maintenance, fix required * db, --maintenance sql, separated out postgresql & sqlite diff --git a/lib/sisu/v5/harvest.rb b/lib/sisu/v5/harvest.rb deleted file mode 100644 index 87f81bc9..00000000 --- a/lib/sisu/v5/harvest.rb +++ /dev/null @@ -1,114 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - harvest metadata from document corpus (suitable for medium sized sites) - (concept example, [to remove size constraint: implement SQL equivalent]) - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_Harvest - @@the_idx_topics,@@the_idx_authors={},{} - class Source - require_relative 'hub_options' # hub_options.rb - require_relative 'harvest_topics' # harvest_topics.rb - require_relative 'harvest_authors' # harvest_authors.rb - require_relative 'se' # se.rb - include SiSU_Env - def initialize(opt) - @opt=opt - @env=SiSU_Env::InfoEnv.new - end - def read - begin - harvest_pth=@env.path.webserv + '/' + @opt.base_stub - FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) - cases(@opt,@env) - rescue - ensure - SiSU_Env::CreateSite.new(@opt).cp_css - end - end - def help - puts <. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_FormatAuthor - class Author - def initialize(author_param) - @author_param=author_param - end - def author_details - @authors,@author_array=[],[] - authors=@author_param.scan(/[^;]+/) - authors.each do |a| - a=a.strip - if a =~/"(.+?)"/ - @authors << { the: $1 } - @author_array << $1.upcase - else #if a =~/,/ - x=a.scan(/[^,]+/) - x[0]=x[0].strip - x[1]=x[1].strip if x[1] - if x.length==1 - @authors << { the: x[0] } - @author_array << x[0].upcase - elsif x.length==2 - @authors << { the: x[0], others: x[1] } - @author_array << "#{x[0].upcase}, #{x[1]}" - else #p x.length - end - end - end - l = @authors.length - authors_string='' - @authors.each_with_index do |a,i| - authors_string += if a[:others] - if (l - i) > 1 - "#{a[:others]} #{a[:the]}, " - else - "#{a[:others]} #{a[:the]}" - end - else - if (l - i) > 2 - "#{a[:the]}, " - else - "#{a[:the]}" - end - end - end - { - last_first_a: authors, - last_first_format_a: @author_array, - authors_h: @authors, - authors_s: authors_string, - authors_param: @author_param - } - end - end -end -__END__ diff --git a/lib/sisu/v5/harvest_authors.rb b/lib/sisu/v5/harvest_authors.rb deleted file mode 100644 index 8db14eee..00000000 --- a/lib/sisu/v5/harvest_authors.rb +++ /dev/null @@ -1,470 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract authors and their writings from document set - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module SiSU_HarvestAuthors - require_relative 'harvest_author_format' # harvest_author_format.rb - class Songsheet - @@the_idx_authors={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|date):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||= [] - idx_array=SiSU_HarvestAuthors::Harvest.new( - @opt, - @env, - a, - filename, - name, - idx_array, - lang - ).extract_harvest - end - end - the_idx=SiSU_HarvestAuthors::Index.new( - idx_array, - @@the_idx_authors - ).construct_book_author_index - SiSU_HarvestAuthors::OutputIndex.new( - @opt, - the_idx - ).html_print.html_songsheet - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt, @env,@data,@filename,@name,@idx_array,@lang= - opt,env, data, filename, name, idx_array, lang - end - def extract_harvest - data, filename, name, idx_array, lang = - @data,@filename,@name,@idx_array,@lang - @title=@subtitle=@fulltitle=@author=@author_format=@date=nil - @authors=[] - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m - data.each do |para| - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - if para=~ rgx[:date] - @date=rgx[:date].match(para)[1] - end - break if @title && @subtitle && @author && @date - end - @fulltitle=@subtitle \ - ? (@title + ' - ' + @subtitle) - : @title - if @title \ - and @author_format - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by? == :language - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<= { - filename: filename, - file: file, - date: @date, - title: @fulltitle, - author: creator, - page: page, - lang: lang - } - else - #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index - def initialize(idx_array,the_idx) - @idx_array,@the_idx=idx_array,the_idx - @@the_idx_authors=@the_idx - end - def capital(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def construct_book_author_index - idx_array=@idx_array - idx_array.each_pair do |lang,idx_arr| - @@the_idx_authors[lang] ||= {} - idx_arr.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author=author.strip - if @@the_idx_authors[lang][author].is_a?(NilClass) - @@the_idx_authors[lang][author]={ md: [] } - end - @@the_idx_authors[lang][author][:md] << { - filename: idx[:filename], - file: idx[:file], - author: idx[:author], - title: idx[:title], - date: idx[:date], - page: idx[:page], - lang: idx[:lang] - } - end - end - end - @the_idx=@@the_idx_authors - end - end - class OutputIndex - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.new.sisu_yaml.rc - @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @alph=@alphabet_list.dup - @letter=@alph.shift - @vz=SiSU_Viz::Defaults.new - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by? == :language - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + lng + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub - file="#{harvest_pth}/authors.#{lng}.html" - end - FileUtils::mkdir_p(harvest_pth) \ - unless FileTest.directory?(harvest_pth) - fileinfo=(@opt.act[:verbose][:set]==:on \ - || @opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:urls_selected][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) \ - ? ("file://#{file}") : '' - SiSU_Screen::Ansi.new( - @opt.act[:color_state][:set], - "harvest authors (#{@opt.files.length} files)", - fileinfo - ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on - @output[lng][:html]=File.new(file,'w') - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close \ - if @output[lng][:html_mnt].is_a?(File) - end - end - def html_print - def html_songsheet - html_file_open - html_head - html_alph - html_body - html_tail - html_file_close - end - def html_head_adjust(lng,type='') - css_path,topics='','' - if @env.output_dir_structure.by? == :language - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - topics='topics.html' - elsif @env.output_dir_structure.by? == :filetype - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - css_path=(type !~/maintenance/) \ - ? './_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lg| - if @env.output_dir_structure.by? == :language - harvest_pth="../../#{lg}/manifest" - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - end - l=ln[lg][:t] - harvest_languages += - %{#{l}   } - end - sv=SiSU_Env::InfoVersion.instance.get_version - if @env.output_dir_structure.by? == :language - home_pth='../..' - output_structure_by= - '(output organised by language & filetype)' - elsif @env.output_dir_structure.by? == :filetype - home_pth='..' - output_structure_by= - '(output organised by filetype)' - elsif @env.output_dir_structure.by? == :filename - home_pth='.' - output_structure_by= - '(output organised by filename)' - else - home_pth='.' - output_structure_by='(output organised by ?)' - end - < - - - -SiSU Metadata Harvest - Authors - - - - - - - - - - - - -

SiSU Metadata Harvest - Authors #{output_structure_by}

-

[ HOME ] also see SiSU Metadata Harvest - Topics

-

#{@env.widget_static.search_form}

-
-

#{harvest_languages}

-
-WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] \ - << html_head_adjust(lng,'maintenance') \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] \ - << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{#{x}, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << a - end - end - def html_tail - a =< - - - - - - -#{@vz.credits_sisu} - - -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html_mnt] << html \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << html - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string[0])[1] - if @lng != lng - @alph=@alphabet_list.dup - @letter=@alph.shift - @lng = lng - end - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] \ - << %{\n

#{@letter}

} - end - @output[lng][:html] \ - << %{\n

#{@letter}

} - else break - end - end - end - end - def html_body - the_idx=@the_idx - the_idx.each_pair do |lng,lng_array| - lng_array.sort.each do |a| - do_string_name(lng,'',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{

#{a[0]}

} - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] << x - end - @output[lng][:html] << x - lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert - works=[] - a[1][:md].each do |i| - manifest_at=if @env.output_dir_structure.by? == :language - i[:file] + Sfx[:html] - elsif @env.output_dir_structure.by? == :filetype - i[:file] + lang_code_insert + Sfx[:html] - elsif @env.output_dir_structure.by? == :filename - './' + i[:file] + '/' + i[:page] - else '' #error - end - work=[ - "#{i[:date]} #{i[:title]}", - %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} - ] - works<<=(@output[lng][:html_mnt].is_a?(File)) \ - ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) - : work - end - works.sort_by {|y| y[0]}.each do |z| - @output[lng][:html] << z[1] - @output[lng][:html_mnt] << z[2] \ - if @output[lng][:html_mnt].is_a?(File) - end - end - end - end - self - end - def screen_print - def cycle - the_idx=@the_idx - the_idx.sort.each do |a| - puts a[0] - a[1][:md].each do |x| - puts "\t" + x[:file] - end - end - end - self - end - end -end -__END__ diff --git a/lib/sisu/v5/harvest_topics.rb b/lib/sisu/v5/harvest_topics.rb deleted file mode 100644 index b2e83683..00000000 --- a/lib/sisu/v5/harvest_topics.rb +++ /dev/null @@ -1,900 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract topics and associated writings from document set - (topics use topic_register header) - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module SiSU_HarvestTopics - require_relative 'harvest_author_format' # harvest_author_format.rb - include SiSU_Viz - class Songsheet - @@the_idx_topics={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|classify):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|\s*$|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||=[] - idx_array=SiSU_HarvestTopics::Harvest.new( - @opt, - @env, - a, - filename, - name, - idx_array, - lang - ).extract_harvest - end - end - the_hash=SiSU_HarvestTopics::Index.new( - @opt, - @env, - idx_array, - @@the_idx_topics - ).song - SiSU_HarvestTopics::OutputIndex.new( - @opt, - the_hash - ).html_print.html_songsheet - end - end - class Mix - def spaces - Ax[:spaces] - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt, @env,@data,@filename,@name,@idx_array,@lang= - opt,env, data, filename, name, idx_array, lang - end - def extract_harvest - data, filename, name, idx_array, lang= - @data,@filename,@name,@idx_array,@lang - @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m - data.each do |para| - if para=~ rgx[:idx] - @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join - end - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - break if @title && @subtitle && @author && @idx_lst - end - @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title - if @title \ - and @author_format \ - and @idx_list - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by? == :language - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<=if @idx_list =~/;/ - g=@idx_list.scan(/[^;]+/) - idxl=[] - g.each do |i| - i=i.strip - idxl << { - filename: filename, - file: file, - rough_idx: i, - title: @fulltitle, - author: creator, - page: page, - lang: lang - } - end - idxl - else { - filename: filename, - file: file, - rough_idx: @idx_list, - title: @fulltitle, - author: creator, - page: page, - lang: lang, - } - end - else - if (@opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) - p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" - end - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index < Mix - def initialize(opt,env,idx_array,the_idx) - @opt, @env,@idx_array,@the_idx= - opt,env, idx_array, the_idx - @@the_idx_topics=@the_idx - end - def song - the_idx=construct_book_topic_keys - construct_book_topic_hash(the_idx) - end - def capital(txt) - txt_a=txt.scan(/\S+/) - tx='' - txt_a.each do |t| - tx += t[0].chr.capitalize + t[1,txt.length] + ' ' - end - tx.strip - end - def capital_(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def contents(idx,lang) - names='' - idx[:author][:last_first_format_a].each do |n| - s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - names=if @env.output_dir_structure.by? == :language - names += %{#{n}, } - else - names += %{#{n}, } - end - end - { - filename: idx[:filename], - file: idx[:file], - author: names, - title: idx[:title], - page: idx[:page] - } - end - def capital_(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def key_create(c,alt) - x=nil - x=if c.length==6 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(c[2][0].to_s) + '|' + - capital(c[3][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==5 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(c[2][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==4 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==3 - c[0].to_s + '|' + - capital(alt.to_s) - end - end - def construct_book_topic_keys - idx_array=@idx_array - @idx_a=[] - @the_a=[] - idx_array.each_pair do |lang,idx_arr| - @@the_idx_topics[lang] ||= {} - idx_arr.each do |idx| - if idx[:rough_idx] - idx_lst=idx[:rough_idx].scan(/[^:]+/) - else - puts "no topic register in: << #{idx[:filename]} >>" - next - end - idx_a=[] - idx_lst.each do |c| - idx_a << c.scan(/[^|\n]+/m) - end - idx_a << contents(idx,lang) - @idx_a << [lang] + idx_a - end - end - @idx_a.each do |c| - if c.length > 1 \ - and c.is_a?(Array) - if c[2].is_a?(Hash) - c[1].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[2]] if v - end - end - end - if c.length > 2 \ - and c.is_a?(Array) - if c[3].is_a?(Hash) - c[2].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[3]] if v - end - end - end - if c.length > 3 \ - and c.is_a?(Array) - if c[4].is_a?(Hash) - c[3].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[4]] if v - end - end - end - if c.length > 4 \ - and c.is_a?(Array) - if c[5].is_a?(Hash) - c[4].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[5]] if v - end - end - end - if c.length > 5 \ - and c.is_a?(Array) - if c[6].is_a?(Hash) - c[5].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[6]] if v - end - end - end - end - @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} - end - def construct_book_topic_hash(t) - @the_h={} - t.each do |z| - x=z[0].scan(/[^|]+/) - depth=x.length - extract=(depth-1) - k=case extract - when 4 - { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } - when 3 - { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } - when 2 - { x[0] => { x[1] => { x[2] => z[1] } } } - when 1 - { x[0] => { x[1] => z[1] } } - when 0 - { x[0] => z[1] } - end - if extract >= 0 - k.each_pair do |x0,y0| - if extract == 0 - @the_h[x0] ||={ md: [] } - @the_h[x0][:md] << y0 - else - @the_h[x0] ||={} - end - #puts spaces*0 + x0 - if extract >= 1 - y0.each_pair do |x1,y1| - if extract == 1 - @the_h[x0][x1] ||={ md: [] } - @the_h[x0][x1][:md] << y1 - else - @the_h[x0][x1] ||={} - end - #puts spaces*1 + x1 - if extract >= 2 - y1.each_pair do |x2,y2| - if extract == 2 - @the_h[x0][x1][x2] ||={ md: [] } - @the_h[x0][x1][x2][:md] << y2 - else - @the_h[x0][x1][x2] ||={} - end - #puts spaces*2 + x2 - if extract >= 3 - y2.each_pair do |x3,y3| - if extract == 3 - @the_h[x0][x1][x2][x3] ||={ md: [] } - @the_h[x0][x1][x2][x3][:md] << y3 - else - @the_h[x0][x1][x2][x3] ||={} - end - #puts spaces*3 + x3 - if extract == 4 - y3.each_pair do |x4,y4| - if extract == 4 - @the_h[x0][x1][x2][x3][x4] ||={ md: [] } - @the_h[x0][x1][x2][x3][x4][:md] << y4 - else - @the_h[x0][x1][x2][x3][x4] ||={} - end - #puts spaces*4 + x4 - if extract == 5 - y4.each_pair do |x5,y5| - if extract == 5 - @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } - @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 - end - #puts spaces*5 + x5 - end - end - end - end - end - end - end - end - end - end - end - end - end - #@the_h.each_pair { |x,y| p x; p y } - @the_h - end - def traverse_base - @the_h.each_pair do |x0,y0| - puts spaces*0 + x0 if x0.is_a?(String) - if y0.is_a?(Hash) - y0.each_pair do |x1,y1| - puts spaces*1 + x1 if x1.is_a?(String) - if y1.is_a?(Hash) - y1.each_pair do |x2,y2| - puts spaces*2 + x2 if x2.is_a?(String) - if y2.is_a?(Hash) - y2.each_pair do |x3,y3| - puts spaces*3 + x3 if x3.is_a?(String) - if y3.is_a?(Hash) - y3.each_pair do |x4,y4| - puts spaces*4 + x4 if x4.is_a?(String) - if y4.is_a?(Hash) - y4.each_pair do |x5,y5| - puts spaces*5 + x5 if x5.is_a?(String) - end - end - end - end - end - end - end - end - end - end - end - end - def traverse - @the_h.each_pair do |x0,y0| - puts spaces*0 + x0 if x0.is_a?(String) - if y0.is_a?(Hash) - if y0.has_key?(:md) - y0[:md].each { |x| puts spaces*5 + x[:title] } - end - y0.each_pair do |x1,y1| - puts spaces*1 + x1 if x1.is_a?(String) - if y1.is_a?(Hash) - if y1.has_key?(:md) - y1[:md].each { |x| puts spaces*5 + x[:title] } - end - y1.each_pair do |x2,y2| - puts spaces*2 + x2 if x2.is_a?(String) - if y2.is_a?(Hash) - if y2.has_key?(:md) - y2[:md].each { |x| puts spaces*5 + x[:title] } - end - y2.each_pair do |x3,y3| - puts spaces*3 + x3 if x3.is_a?(String) - if y3.is_a?(Hash) - if y3.has_key?(:md) - y3[:md].each { |x| puts spaces*5 + x[:title] } - end - y3.each_pair do |x4,y4| - puts spaces*4 + x4 if x4.is_a?(String) - if y4.is_a?(Hash) - if y4.has_key?(:md) - y4[:md].each { |x| puts spaces*5 + x[:title] } - end - y4.each_pair do |x5,y5| - puts spaces*5 + x4 if x4.is_a?(String) - end - end - end - end - end - end - end - end - end - end - end - end - end - class OutputIndex < Mix - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.new.sisu_yaml.rc - @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @alph=@alphabet_list.dup - @letter=@alph.shift - @vz=SiSU_Viz::Defaults.new - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by? == :language - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + lng + '/' \ - + 'manifest' - file=harvest_pth + '/' + 'topics.html' - elsif @env.output_dir_structure.by? == :filetype - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + 'manifest' - file=harvest_pth + '/' + 'topics.' + lng + '.html' - elsif @env.output_dir_structure.by? == :filename - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub - file=harvest_pth + '/' + 'topics.' + lng + '.html' - end - FileUtils::mkdir_p(harvest_pth) \ - unless FileTest.directory?(harvest_pth) - fileinfo=(@opt.act[:verbose][:set]==:on \ - || @opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:urls_selected][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) \ - ? ("file://#{file}") - : '' - SiSU_Screen::Ansi.new( - @opt.act[:color_state][:set], - "harvest topics(#{@opt.files.length} files)", - fileinfo - ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on - @output[lng][:html]=File.new(file,'w') - if @opt.act[:maintenance][:set]==:on - @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') - end - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) - end - end - def html_print - def html_songsheet - #traverse - html_file_open - html_head - html_alph - html_body_traverse - html_tail - html_file_close - end - def html_body_traverse - @the_idx.each_pair do |x0,y0| - lng=x0 - if x0.is_a?(String) - #do_string_name(lng,'lev0',x0) - #puts spaces*0 + x0 - end - if y0.is_a?(Hash) - if y0.has_key?(:md) - y0[:md].each do |x| - #do_hash(lng,attrib,x) #lv==0 ? - #puts spaces*5 + x[:title] - end - end - y0.each_pair do |x1,y1| - if x1.is_a?(String) - do_string_name(lng,'lev0',x1) - #puts spaces*1 + x1 - end - if y1.is_a?(Hash) - if y1.has_key?(:md) - y1[:md].each do |x| - do_hash(lng,0,x) - #puts spaces*5 + x[:title] - end - end - y1.each_pair do |x2,y2| - if x2.is_a?(String) - do_string(lng,'lev1',x2) - #puts spaces*2 + x2 - end - if y2.is_a?(Hash) - if y2.has_key?(:md) - y2[:md].each do |x| - do_hash(lng,1,x) - #puts spaces*5 + x[:title] - end - end - y2.each_pair do |x3,y3| - if x3.is_a?(String) - do_string(lng,'lev2',x3) - #puts spaces*3 + x3 - end - if y3.is_a?(Hash) - if y3.has_key?(:md) - y3[:md].each do |x| - do_hash(lng,2,x) - #puts spaces*5 + x[:title] - end - end - y3.each_pair do |x4,y4| - if x4.is_a?(String) - do_string(lng,'lev3',x4) - #puts spaces*4 + x4 - end - if y4.is_a?(Hash) - if y4.has_key?(:md) - y4[:md].each do |x| - do_hash(lng,3,x) - #puts spaces*5 + x[:title] - end - end - y4.each_pair do |x5,y5| - if x5.is_a?(String) - do_string(lng,'lev4',x5) - #puts spaces*5 + x5 - end - end - end - end - end - end - end - end - end - end - end - end - end - def html_head_adjust(lng,type='') - css_path,authors='','' - if @env.output_dir_structure.by? == :language - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - authors='authors.html' - elsif @env.output_dir_structure.by? == :filetype - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - authors="authors.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - css_path=(type !~/maintenance/) \ - ? './_sisu/css/harvest.css' - : 'harvest.css' - authors="authors.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lg| - if @env.output_dir_structure.by? == :language - harvest_pth="../../#{lg}/manifest" - file=harvest_pth + '/' + 'topics.html' - elsif @env.output_dir_structure.by? == :filetype - harvest_pth='.' - file=harvest_pth + '/' + 'topics.' + lg + '.html' - elsif @env.output_dir_structure.by? == :filename - harvest_pth='.' - file=harvest_pth + '/topics.' + lg + '.html' - end - l=ln[lg][:t] - harvest_languages += - %{#{l}   } - end - sv=SiSU_Env::InfoVersion.instance.get_version - if @env.output_dir_structure.by? == :language - home_pth='../..' - output_structure_by='(output organised by language & filetype)' - elsif @env.output_dir_structure.by? == :filetype - home_pth='..' - output_structure_by='(output organised by filetype)' - elsif @env.output_dir_structure.by? == :filename - home_pth='.' - output_structure_by='(output organised by filename)' - else - home_pth='.' - output_structure_by='(output organised by ?)' - end - < - - - -SiSU Metadata Harvest - Topics - - - - - - - - - - - - -

SiSU Metadata Harvest - Topics #{output_structure_by}

-

[ HOME ] also see SiSU Metadata Harvest - Authors

-

#{@env.widget_static.search_form}

-
-

#{harvest_languages}

-
-WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] \ - << html_head_adjust(lng,'maintenance') \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{#{x}, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << a - end - end - def html_tail - a =< - - - - - - -#{@vz.credits_sisu} - - -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html] << html - end - def do_html_maintenance(lng,html) - @output[lng][:html_mnt] << html \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string(lng,attrib,string) - html=%{

#{string}

} - do_html(lng,html) - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string_default(lng,attrib,string) - html=%{

#{string}

} - do_html(lng,html) - end - def do_string_maintenance(lng,attrib,string) - html=%{

#{string}

} - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string)[1] - if @lng != lng - @alph=@alphabet_list.dup - @letter=@alph.shift - @lng = lng - end - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] \ - << %{\n

#{@letter}

} - end - @output[lng][:html] \ - << %{\n

#{@letter}

} - else break - end - end - end - name=string.strip.gsub(/\s+/,'_') - html=%{

#{string}

} - do_html(lng,html) - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_array(lng,lv,array) - lv+=1 - array.each do |b| - do_case(lng,lv,b) - end - end - def do_hash_md(lng,attrib,hash) - lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert - manifest_at=if @env.output_dir_structure.by? == :language - hash[:file] + Sfx[:html] - elsif @env.output_dir_structure.by? == :filetype - hash[:file] + lang_code_insert + Sfx[:html] - elsif @env.output_dir_structure.by? == :filename - "./#{hash[:file]}/#{hash[:page]}" - else '' #error - end - html=%{#{hash[:title]} - #{hash[:author]}} - do_string_default(lng,attrib,html) - end - def do_hash_md_maintenance(lng,attrib,hash) - if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output - html=%{[src]  #{hash[:title]} - #{hash[:author]}} - do_string_maintenance(lng,attrib,html) - end - end - def do_hash(lng,lv,hash) - lv+=1 - key=[] - hash.each_key do |m| - if m == :md - do_case(lng,lv,hash[m]) - elsif m != :title \ - and m != :author \ - and m != :filename \ - and m != :file \ - and m != :rough_idx \ - and m != :page - key << m - elsif m == :title - do_hash_md(lng,'work',hash) - do_hash_md_maintenance(lng,'work',hash) - end - end - if key.length > 0 - key.sort.each do |m| - attrib="lev#{lv}" - lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) - do_case(lng,lv,hash[m]) - end - end - end - def do_case(lng,lv,a) - case a - when String - attrib="lev#{lv}" - if a=~/S/ - lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) - end - when Array - do_array(lng,lv,a) - when Hash - do_hash(lng,lv,a) - end - end - #def html_body - # the_idx=@the_idx - # the_idx.each_pair do |lng,lng_array| - # lng_array.sort.each do |a| - # do_case(lng,-1,a) - # end - # end - #end - self - end - end -end -__END__ -terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} - | |_ {tl2} -|_ {fa}[fa]{filenames and other details} - | | |_{tl3} -|_ {fa}[fa]{filenames and other details} - | | | |_{tl4} - {fa}[fa]{filenames and other details} - | | | | - | | | |_{tl4a} - {fa}[fa]{filenames and other details} - | | | | - | | | |_{tl4b} - {fa}[fa]{filenames and other details} - | | | | - | | | |_ ... - | | | - | | |_{tl3a} - {fa}[fa]{filenames and other details} - | | - | |_{tl2a} - {fa}[fa]{filenames and other details} - | - |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} - |_ ... diff --git a/lib/sisu/v5/html_harvest.rb b/lib/sisu/v5/html_harvest.rb new file mode 100644 index 00000000..c3a4030b --- /dev/null +++ b/lib/sisu/v5/html_harvest.rb @@ -0,0 +1,114 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + harvest metadata from document corpus (suitable for medium sized sites) + (concept example, [to remove size constraint: implement SQL equivalent]) + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_Harvest + @@the_idx_topics,@@the_idx_authors={},{} + class Source + require_relative 'hub_options' # hub_options.rb + require_relative 'html_harvest_topics' # html_harvest_topics.rb + require_relative 'html_harvest_authors' # html_harvest_authors.rb + require_relative 'se' # se.rb + include SiSU_Env + def initialize(opt) + @opt=opt + @env=SiSU_Env::InfoEnv.new + end + def read + begin + harvest_pth=@env.path.webserv + '/' + @opt.base_stub + FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) + cases(@opt,@env) + rescue + ensure + SiSU_Env::CreateSite.new(@opt).cp_css + end + end + def help + puts <. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_FormatAuthor + class Author + def initialize(author_param) + @author_param=author_param + end + def author_details + @authors,@author_array=[],[] + authors=@author_param.scan(/[^;]+/) + authors.each do |a| + a=a.strip + if a =~/"(.+?)"/ + @authors << { the: $1 } + @author_array << $1.upcase + else #if a =~/,/ + x=a.scan(/[^,]+/) + x[0]=x[0].strip + x[1]=x[1].strip if x[1] + if x.length==1 + @authors << { the: x[0] } + @author_array << x[0].upcase + elsif x.length==2 + @authors << { the: x[0], others: x[1] } + @author_array << "#{x[0].upcase}, #{x[1]}" + else #p x.length + end + end + end + l = @authors.length + authors_string='' + @authors.each_with_index do |a,i| + authors_string += if a[:others] + if (l - i) > 1 + "#{a[:others]} #{a[:the]}, " + else + "#{a[:others]} #{a[:the]}" + end + else + if (l - i) > 2 + "#{a[:the]}, " + else + "#{a[:the]}" + end + end + end + { + last_first_a: authors, + last_first_format_a: @author_array, + authors_h: @authors, + authors_s: authors_string, + authors_param: @author_param + } + end + end +end +__END__ diff --git a/lib/sisu/v5/html_harvest_authors.rb b/lib/sisu/v5/html_harvest_authors.rb new file mode 100644 index 00000000..427ef4d3 --- /dev/null +++ b/lib/sisu/v5/html_harvest_authors.rb @@ -0,0 +1,470 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract authors and their writings from document set + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module SiSU_HarvestAuthors + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + class Songsheet + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=SiSU_HarvestAuthors::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_idx=SiSU_HarvestAuthors::Index.new( + idx_array, + @@the_idx_authors + ).construct_book_author_index + SiSU_HarvestAuthors::OutputIndex.new( + @opt, + the_idx + ).html_print.html_songsheet + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang = + @data,@filename,@name,@idx_array,@lang + @title=@subtitle=@fulltitle=@author=@author_format=@date=nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title && @subtitle && @author && @date + end + @fulltitle=@subtitle \ + ? (@title + ' - ' + @subtitle) + : @title + if @title \ + and @author_format + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { + filename: filename, + file: file, + date: @date, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each_pair do |lang,idx_arr| + @@the_idx_authors[lang] ||= {} + idx_arr.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author=author.strip + if @@the_idx_authors[lang][author].is_a?(NilClass) + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { + filename: idx[:filename], + file: idx[:file], + author: idx[:author], + title: idx[:title], + date: idx[:date], + page: idx[:page], + lang: idx[:lang] + } + end + end + end + @the_idx=@@the_idx_authors + end + end + class OutputIndex + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + @vz=SiSU_Viz::Defaults.new + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file="#{harvest_pth}/authors.#{lng}.html" + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest authors (#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close \ + if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + topics='topics.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + end + l=ln[lg][:t] + harvest_languages += + %{#{l}   } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by= + '(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by= + '(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by= + '(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + < + + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

SiSU Metadata Harvest - Authors #{output_structure_by}

+

[ HOME ] also see SiSU Metadata Harvest - Topics

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] \ + << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{@vz.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << html + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string[0])[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n

#{@letter}

} + end + @output[lng][:html] \ + << %{\n

#{@letter}

} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + works=[] + a[1][:md].each do |i| + manifest_at=if @env.output_dir_structure.by? == :language + i[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + i[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + './' + i[:file] + '/' + i[:page] + else '' #error + end + work=[ + "#{i[:date]} #{i[:title]}", + %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} + ] + works<<=(@output[lng][:html_mnt].is_a?(File)) \ + ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) + : work + end + works.sort_by {|y| y[0]}.each do |z| + @output[lng][:html] << z[1] + @output[lng][:html_mnt] << z[2] \ + if @output[lng][:html_mnt].is_a?(File) + end + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ diff --git a/lib/sisu/v5/html_harvest_topics.rb b/lib/sisu/v5/html_harvest_topics.rb new file mode 100644 index 00000000..59812c68 --- /dev/null +++ b/lib/sisu/v5/html_harvest_topics.rb @@ -0,0 +1,900 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract topics and associated writings from document set + (topics use topic_register header) + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module SiSU_HarvestTopics + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + include SiSU_Viz + class Songsheet + @@the_idx_topics={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|classify):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|\s*$|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||=[] + idx_array=SiSU_HarvestTopics::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_hash=SiSU_HarvestTopics::Index.new( + @opt, + @env, + idx_array, + @@the_idx_topics + ).song + SiSU_HarvestTopics::OutputIndex.new( + @opt, + the_hash + ).html_print.html_songsheet + end + end + class Mix + def spaces + Ax[:spaces] + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang= + @data,@filename,@name,@idx_array,@lang + @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m + data.each do |para| + if para=~ rgx[:idx] + @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join + end + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + break if @title && @subtitle && @author && @idx_lst + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title \ + and @author_format \ + and @idx_list + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<=if @idx_list =~/;/ + g=@idx_list.scan(/[^;]+/) + idxl=[] + g.each do |i| + i=i.strip + idxl << { + filename: filename, + file: file, + rough_idx: i, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + end + idxl + else { + filename: filename, + file: file, + rough_idx: @idx_list, + title: @fulltitle, + author: creator, + page: page, + lang: lang, + } + end + else + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" + end + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index < Mix + def initialize(opt,env,idx_array,the_idx) + @opt, @env,@idx_array,@the_idx= + opt,env, idx_array, the_idx + @@the_idx_topics=@the_idx + end + def song + the_idx=construct_book_topic_keys + construct_book_topic_hash(the_idx) + end + def capital(txt) + txt_a=txt.scan(/\S+/) + tx='' + txt_a.each do |t| + tx += t[0].chr.capitalize + t[1,txt.length] + ' ' + end + tx.strip + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def contents(idx,lang) + names='' + idx[:author][:last_first_format_a].each do |n| + s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + names=if @env.output_dir_structure.by? == :language + names += %{#{n}, } + else + names += %{#{n}, } + end + end + { + filename: idx[:filename], + file: idx[:file], + author: names, + title: idx[:title], + page: idx[:page] + } + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def key_create(c,alt) + x=nil + x=if c.length==6 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(c[3][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==5 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==4 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==3 + c[0].to_s + '|' + + capital(alt.to_s) + end + end + def construct_book_topic_keys + idx_array=@idx_array + @idx_a=[] + @the_a=[] + idx_array.each_pair do |lang,idx_arr| + @@the_idx_topics[lang] ||= {} + idx_arr.each do |idx| + if idx[:rough_idx] + idx_lst=idx[:rough_idx].scan(/[^:]+/) + else + puts "no topic register in: << #{idx[:filename]} >>" + next + end + idx_a=[] + idx_lst.each do |c| + idx_a << c.scan(/[^|\n]+/m) + end + idx_a << contents(idx,lang) + @idx_a << [lang] + idx_a + end + end + @idx_a.each do |c| + if c.length > 1 \ + and c.is_a?(Array) + if c[2].is_a?(Hash) + c[1].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[2]] if v + end + end + end + if c.length > 2 \ + and c.is_a?(Array) + if c[3].is_a?(Hash) + c[2].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[3]] if v + end + end + end + if c.length > 3 \ + and c.is_a?(Array) + if c[4].is_a?(Hash) + c[3].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[4]] if v + end + end + end + if c.length > 4 \ + and c.is_a?(Array) + if c[5].is_a?(Hash) + c[4].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[5]] if v + end + end + end + if c.length > 5 \ + and c.is_a?(Array) + if c[6].is_a?(Hash) + c[5].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[6]] if v + end + end + end + end + @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} + end + def construct_book_topic_hash(t) + @the_h={} + t.each do |z| + x=z[0].scan(/[^|]+/) + depth=x.length + extract=(depth-1) + k=case extract + when 4 + { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } + when 3 + { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } + when 2 + { x[0] => { x[1] => { x[2] => z[1] } } } + when 1 + { x[0] => { x[1] => z[1] } } + when 0 + { x[0] => z[1] } + end + if extract >= 0 + k.each_pair do |x0,y0| + if extract == 0 + @the_h[x0] ||={ md: [] } + @the_h[x0][:md] << y0 + else + @the_h[x0] ||={} + end + #puts spaces*0 + x0 + if extract >= 1 + y0.each_pair do |x1,y1| + if extract == 1 + @the_h[x0][x1] ||={ md: [] } + @the_h[x0][x1][:md] << y1 + else + @the_h[x0][x1] ||={} + end + #puts spaces*1 + x1 + if extract >= 2 + y1.each_pair do |x2,y2| + if extract == 2 + @the_h[x0][x1][x2] ||={ md: [] } + @the_h[x0][x1][x2][:md] << y2 + else + @the_h[x0][x1][x2] ||={} + end + #puts spaces*2 + x2 + if extract >= 3 + y2.each_pair do |x3,y3| + if extract == 3 + @the_h[x0][x1][x2][x3] ||={ md: [] } + @the_h[x0][x1][x2][x3][:md] << y3 + else + @the_h[x0][x1][x2][x3] ||={} + end + #puts spaces*3 + x3 + if extract == 4 + y3.each_pair do |x4,y4| + if extract == 4 + @the_h[x0][x1][x2][x3][x4] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][:md] << y4 + else + @the_h[x0][x1][x2][x3][x4] ||={} + end + #puts spaces*4 + x4 + if extract == 5 + y4.each_pair do |x5,y5| + if extract == 5 + @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 + end + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + #@the_h.each_pair { |x,y| p x; p y } + @the_h + end + def traverse_base + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + y4.each_pair do |x5,y5| + puts spaces*5 + x5 if x5.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + def traverse + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each { |x| puts spaces*5 + x[:title] } + end + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each { |x| puts spaces*5 + x[:title] } + end + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each { |x| puts spaces*5 + x[:title] } + end + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each { |x| puts spaces*5 + x[:title] } + end + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each { |x| puts spaces*5 + x[:title] } + end + y4.each_pair do |x5,y5| + puts spaces*5 + x4 if x4.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + end + class OutputIndex < Mix + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + @vz=SiSU_Viz::Defaults.new + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.' + lng + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file=harvest_pth + '/' + 'topics.' + lng + '.html' + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") + : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest topics(#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + if @opt.act[:maintenance][:set]==:on + @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') + end + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + #traverse + html_file_open + html_head + html_alph + html_body_traverse + html_tail + html_file_close + end + def html_body_traverse + @the_idx.each_pair do |x0,y0| + lng=x0 + if x0.is_a?(String) + #do_string_name(lng,'lev0',x0) + #puts spaces*0 + x0 + end + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each do |x| + #do_hash(lng,attrib,x) #lv==0 ? + #puts spaces*5 + x[:title] + end + end + y0.each_pair do |x1,y1| + if x1.is_a?(String) + do_string_name(lng,'lev0',x1) + #puts spaces*1 + x1 + end + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each do |x| + do_hash(lng,0,x) + #puts spaces*5 + x[:title] + end + end + y1.each_pair do |x2,y2| + if x2.is_a?(String) + do_string(lng,'lev1',x2) + #puts spaces*2 + x2 + end + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each do |x| + do_hash(lng,1,x) + #puts spaces*5 + x[:title] + end + end + y2.each_pair do |x3,y3| + if x3.is_a?(String) + do_string(lng,'lev2',x3) + #puts spaces*3 + x3 + end + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each do |x| + do_hash(lng,2,x) + #puts spaces*5 + x[:title] + end + end + y3.each_pair do |x4,y4| + if x4.is_a?(String) + do_string(lng,'lev3',x4) + #puts spaces*4 + x4 + end + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each do |x| + do_hash(lng,3,x) + #puts spaces*5 + x[:title] + end + end + y4.each_pair do |x5,y5| + if x5.is_a?(String) + do_string(lng,'lev4',x5) + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + def html_head_adjust(lng,type='') + css_path,authors='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + authors='authors.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file=harvest_pth + '/' + 'topics.' + lg + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file=harvest_pth + '/topics.' + lg + '.html' + end + l=ln[lg][:t] + harvest_languages += + %{#{l}   } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by='(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by='(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by='(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + < + + + +SiSU Metadata Harvest - Topics + + + + + + + + + + + + +

SiSU Metadata Harvest - Topics #{output_structure_by}

+

[ HOME ] also see SiSU Metadata Harvest - Authors

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{@vz.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html] << html + end + def do_html_maintenance(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string(lng,attrib,string) + html=%{

#{string}

} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_default(lng,attrib,string) + html=%{

#{string}

} + do_html(lng,html) + end + def do_string_maintenance(lng,attrib,string) + html=%{

#{string}

} + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string)[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n

#{@letter}

} + end + @output[lng][:html] \ + << %{\n

#{@letter}

} + else break + end + end + end + name=string.strip.gsub(/\s+/,'_') + html=%{

#{string}

} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_array(lng,lv,array) + lv+=1 + array.each do |b| + do_case(lng,lv,b) + end + end + def do_hash_md(lng,attrib,hash) + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + manifest_at=if @env.output_dir_structure.by? == :language + hash[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + hash[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + "./#{hash[:file]}/#{hash[:page]}" + else '' #error + end + html=%{#{hash[:title]} - #{hash[:author]}} + do_string_default(lng,attrib,html) + end + def do_hash_md_maintenance(lng,attrib,hash) + if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output + html=%{[src]  #{hash[:title]} - #{hash[:author]}} + do_string_maintenance(lng,attrib,html) + end + end + def do_hash(lng,lv,hash) + lv+=1 + key=[] + hash.each_key do |m| + if m == :md + do_case(lng,lv,hash[m]) + elsif m != :title \ + and m != :author \ + and m != :filename \ + and m != :file \ + and m != :rough_idx \ + and m != :page + key << m + elsif m == :title + do_hash_md(lng,'work',hash) + do_hash_md_maintenance(lng,'work',hash) + end + end + if key.length > 0 + key.sort.each do |m| + attrib="lev#{lv}" + lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) + do_case(lng,lv,hash[m]) + end + end + end + def do_case(lng,lv,a) + case a + when String + attrib="lev#{lv}" + if a=~/S/ + lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) + end + when Array + do_array(lng,lv,a) + when Hash + do_hash(lng,lv,a) + end + end + #def html_body + # the_idx=@the_idx + # the_idx.each_pair do |lng,lng_array| + # lng_array.sort.each do |a| + # do_case(lng,-1,a) + # end + # end + #end + self + end + end +end +__END__ +terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} + | |_ {tl2} -|_ {fa}[fa]{filenames and other details} + | | |_{tl3} -|_ {fa}[fa]{filenames and other details} + | | | |_{tl4} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4a} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4b} - {fa}[fa]{filenames and other details} + | | | | + | | | |_ ... + | | | + | | |_{tl3a} - {fa}[fa]{filenames and other details} + | | + | |_{tl2a} - {fa}[fa]{filenames and other details} + | + |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} + |_ ... diff --git a/lib/sisu/v5/hub_actions.rb b/lib/sisu/v5/hub_actions.rb index 6c97aa4b..7781ef63 100644 --- a/lib/sisu/v5/hub_actions.rb +++ b/lib/sisu/v5/hub_actions.rb @@ -559,7 +559,7 @@ module SiSU_Hub_Actions def harvest? if @opt.act[:harvest][:set]==:on #% --harvest if @opt.f_pths.length > 0 - require_relative 'harvest' # harvest.rb + require_relative 'html_harvest' # html_harvest.rb SiSU_Harvest::Source.new(@opt).read else msg='harvest request requires sisu markup files' diff --git a/lib/sisu/v6/harvest.rb b/lib/sisu/v6/harvest.rb deleted file mode 100644 index 09c1b883..00000000 --- a/lib/sisu/v6/harvest.rb +++ /dev/null @@ -1,114 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - harvest metadata from document corpus (suitable for medium sized sites) - (concept example, [to remove size constraint: implement SQL equivalent]) - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_Harvest - @@the_idx_topics,@@the_idx_authors={},{} - class Source - require_relative 'hub_options' # hub_options.rb - require_relative 'harvest_topics' # harvest_topics.rb - require_relative 'harvest_authors' # harvest_authors.rb - require_relative 'se' # se.rb - include SiSU_Env - def initialize(opt) - @opt=opt - @env=SiSU_Env::InfoEnv.new - end - def read - begin - harvest_pth=@env.path.webserv + '/' + @opt.base_stub - FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) - cases(@opt,@env) - rescue - ensure - SiSU_Env::CreateSite.new(@opt).cp_css - end - end - def help - puts <. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_FormatAuthor - class Author - def initialize(author_param) - @author_param=author_param - end - def author_details - @authors,@author_array=[],[] - authors=@author_param.scan(/[^;]+/) - authors.each do |a| - a=a.strip - if a =~/"(.+?)"/ - @authors << { the: $1 } - @author_array << $1.upcase - else #if a =~/,/ - x=a.scan(/[^,]+/) - x[0]=x[0].strip - x[1]=x[1].strip if x[1] - if x.length==1 - @authors << { the: x[0] } - @author_array << x[0].upcase - elsif x.length==2 - @authors << { the: x[0], others: x[1] } - @author_array << "#{x[0].upcase}, #{x[1]}" - else #p x.length - end - end - end - l = @authors.length - authors_string='' - @authors.each_with_index do |a,i| - authors_string += if a[:others] - if (l - i) > 1 - "#{a[:others]} #{a[:the]}, " - else - "#{a[:others]} #{a[:the]}" - end - else - if (l - i) > 2 - "#{a[:the]}, " - else - "#{a[:the]}" - end - end - end - { - last_first_a: authors, - last_first_format_a: @author_array, - authors_h: @authors, - authors_s: authors_string, - authors_param: @author_param - } - end - end -end -__END__ diff --git a/lib/sisu/v6/harvest_authors.rb b/lib/sisu/v6/harvest_authors.rb deleted file mode 100644 index e48c4f78..00000000 --- a/lib/sisu/v6/harvest_authors.rb +++ /dev/null @@ -1,470 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract authors and their writings from document set - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module SiSU_HarvestAuthors - require_relative 'harvest_author_format' # harvest_author_format.rb - class Songsheet - @@the_idx_authors={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|date):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||= [] - idx_array=SiSU_HarvestAuthors::Harvest.new( - @opt, - @env, - a, - filename, - name, - idx_array, - lang - ).extract_harvest - end - end - the_idx=SiSU_HarvestAuthors::Index.new( - idx_array, - @@the_idx_authors - ).construct_book_author_index - SiSU_HarvestAuthors::OutputIndex.new( - @opt, - the_idx - ).html_print.html_songsheet - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt, @env,@data,@filename,@name,@idx_array,@lang= - opt,env, data, filename, name, idx_array, lang - end - def extract_harvest - data, filename, name, idx_array, lang = - @data,@filename,@name,@idx_array,@lang - @title=@subtitle=@fulltitle=@author=@author_format=@date=nil - @authors=[] - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m - data.each do |para| - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - if para=~ rgx[:date] - @date=rgx[:date].match(para)[1] - end - break if @title && @subtitle && @author && @date - end - @fulltitle=@subtitle \ - ? (@title + ' - ' + @subtitle) - : @title - if @title \ - and @author_format - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by? == :language - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<= { - filename: filename, - file: file, - date: @date, - title: @fulltitle, - author: creator, - page: page, - lang: lang - } - else - #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index - def initialize(idx_array,the_idx) - @idx_array,@the_idx=idx_array,the_idx - @@the_idx_authors=@the_idx - end - def capital(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def construct_book_author_index - idx_array=@idx_array - idx_array.each_pair do |lang,idx_arr| - @@the_idx_authors[lang] ||= {} - idx_arr.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author=author.strip - if @@the_idx_authors[lang][author].is_a?(NilClass) - @@the_idx_authors[lang][author]={ md: [] } - end - @@the_idx_authors[lang][author][:md] << { - filename: idx[:filename], - file: idx[:file], - author: idx[:author], - title: idx[:title], - date: idx[:date], - page: idx[:page], - lang: idx[:lang] - } - end - end - end - @the_idx=@@the_idx_authors - end - end - class OutputIndex - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.new.sisu_yaml.rc - @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @alph=@alphabet_list.dup - @letter=@alph.shift - @vz=SiSU_Viz::Defaults.new - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by? == :language - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + lng + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + 'manifest' - file="#{harvest_pth}/authors.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub - file="#{harvest_pth}/authors.#{lng}.html" - end - FileUtils::mkdir_p(harvest_pth) \ - unless FileTest.directory?(harvest_pth) - fileinfo=(@opt.act[:verbose][:set]==:on \ - || @opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:urls_selected][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) \ - ? ("file://#{file}") : '' - SiSU_Screen::Ansi.new( - @opt.act[:color_state][:set], - "harvest authors (#{@opt.files.length} files)", - fileinfo - ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on - @output[lng][:html]=File.new(file,'w') - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close \ - if @output[lng][:html_mnt].is_a?(File) - end - end - def html_print - def html_songsheet - html_file_open - html_head - html_alph - html_body - html_tail - html_file_close - end - def html_head_adjust(lng,type='') - css_path,topics='','' - if @env.output_dir_structure.by? == :language - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - topics='topics.html' - elsif @env.output_dir_structure.by? == :filetype - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - css_path=(type !~/maintenance/) \ - ? './_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lg| - if @env.output_dir_structure.by? == :language - harvest_pth="../../#{lg}/manifest" - file="#{harvest_pth}/authors.html" - elsif @env.output_dir_structure.by? == :filetype - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - elsif @env.output_dir_structure.by? == :filename - harvest_pth='.' - file="#{harvest_pth}/authors.#{lg}.html" - end - l=ln[lg][:t] - harvest_languages += - %{#{l}   } - end - sv=SiSU_Env::InfoVersion.instance.get_version - if @env.output_dir_structure.by? == :language - home_pth='../..' - output_structure_by= - '(output organised by language & filetype)' - elsif @env.output_dir_structure.by? == :filetype - home_pth='..' - output_structure_by= - '(output organised by filetype)' - elsif @env.output_dir_structure.by? == :filename - home_pth='.' - output_structure_by= - '(output organised by filename)' - else - home_pth='.' - output_structure_by='(output organised by ?)' - end - < - - - -SiSU Metadata Harvest - Authors - - - - - - - - - - - - -

SiSU Metadata Harvest - Authors #{output_structure_by}

-

[ HOME ] also see SiSU Metadata Harvest - Topics

-

#{@env.widget_static.search_form}

-
-

#{harvest_languages}

-
-WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] \ - << html_head_adjust(lng,'maintenance') \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] \ - << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{#{x}, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << a - end - end - def html_tail - a =< - - - - - - -#{@vz.credits_sisu} - - -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html_mnt] << html \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << html - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string[0])[1] - if @lng != lng - @alph=@alphabet_list.dup - @letter=@alph.shift - @lng = lng - end - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] \ - << %{\n

#{@letter}

} - end - @output[lng][:html] \ - << %{\n

#{@letter}

} - else break - end - end - end - end - def html_body - the_idx=@the_idx - the_idx.each_pair do |lng,lng_array| - lng_array.sort.each do |a| - do_string_name(lng,'',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{

#{a[0]}

} - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] << x - end - @output[lng][:html] << x - lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert - works=[] - a[1][:md].each do |i| - manifest_at=if @env.output_dir_structure.by? == :language - i[:file] + Sfx[:html] - elsif @env.output_dir_structure.by? == :filetype - i[:file] + lang_code_insert + Sfx[:html] - elsif @env.output_dir_structure.by? == :filename - './' + i[:file] + '/' + i[:page] - else '' #error - end - work=[ - "#{i[:date]} #{i[:title]}", - %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} - ] - works<<=(@output[lng][:html_mnt].is_a?(File)) \ - ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) - : work - end - works.sort_by {|y| y[0]}.each do |z| - @output[lng][:html] << z[1] - @output[lng][:html_mnt] << z[2] \ - if @output[lng][:html_mnt].is_a?(File) - end - end - end - end - self - end - def screen_print - def cycle - the_idx=@the_idx - the_idx.sort.each do |a| - puts a[0] - a[1][:md].each do |x| - puts "\t" + x[:file] - end - end - end - self - end - end -end -__END__ diff --git a/lib/sisu/v6/harvest_topics.rb b/lib/sisu/v6/harvest_topics.rb deleted file mode 100644 index 2b80565e..00000000 --- a/lib/sisu/v6/harvest_topics.rb +++ /dev/null @@ -1,900 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract topics and associated writings from document set - (topics use topic_register header) - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, - All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Git - - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module SiSU_HarvestTopics - require_relative 'harvest_author_format' # harvest_author_format.rb - include SiSU_Viz - class Songsheet - @@the_idx_topics={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|classify):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|\s*$|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||=[] - idx_array=SiSU_HarvestTopics::Harvest.new( - @opt, - @env, - a, - filename, - name, - idx_array, - lang - ).extract_harvest - end - end - the_hash=SiSU_HarvestTopics::Index.new( - @opt, - @env, - idx_array, - @@the_idx_topics - ).song - SiSU_HarvestTopics::OutputIndex.new( - @opt, - the_hash - ).html_print.html_songsheet - end - end - class Mix - def spaces - Ax[:spaces] - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt, @env,@data,@filename,@name,@idx_array,@lang= - opt,env, data, filename, name, idx_array, lang - end - def extract_harvest - data, filename, name, idx_array, lang= - @data,@filename,@name,@idx_array,@lang - @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m - data.each do |para| - if para=~ rgx[:idx] - @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join - end - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - break if @title && @subtitle && @author && @idx_lst - end - @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title - if @title \ - and @author_format \ - and @idx_list - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by? == :language - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<=if @idx_list =~/;/ - g=@idx_list.scan(/[^;]+/) - idxl=[] - g.each do |i| - i=i.strip - idxl << { - filename: filename, - file: file, - rough_idx: i, - title: @fulltitle, - author: creator, - page: page, - lang: lang - } - end - idxl - else { - filename: filename, - file: file, - rough_idx: @idx_list, - title: @fulltitle, - author: creator, - page: page, - lang: lang, - } - end - else - if (@opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) - p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" - end - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index < Mix - def initialize(opt,env,idx_array,the_idx) - @opt, @env,@idx_array,@the_idx= - opt,env, idx_array, the_idx - @@the_idx_topics=@the_idx - end - def song - the_idx=construct_book_topic_keys - construct_book_topic_hash(the_idx) - end - def capital(txt) - txt_a=txt.scan(/\S+/) - tx='' - txt_a.each do |t| - tx += t[0].chr.capitalize + t[1,txt.length] + ' ' - end - tx.strip - end - def capital_(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def contents(idx,lang) - names='' - idx[:author][:last_first_format_a].each do |n| - s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - names=if @env.output_dir_structure.by? == :language - names += %{#{n}, } - else - names += %{#{n}, } - end - end - { - filename: idx[:filename], - file: idx[:file], - author: names, - title: idx[:title], - page: idx[:page] - } - end - def capital_(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def key_create(c,alt) - x=nil - x=if c.length==6 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(c[2][0].to_s) + '|' + - capital(c[3][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==5 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(c[2][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==4 - c[0].to_s + '|' + - capital(c[1][0].to_s) + '|' + - capital(alt.to_s) - elsif c.length==3 - c[0].to_s + '|' + - capital(alt.to_s) - end - end - def construct_book_topic_keys - idx_array=@idx_array - @idx_a=[] - @the_a=[] - idx_array.each_pair do |lang,idx_arr| - @@the_idx_topics[lang] ||= {} - idx_arr.each do |idx| - if idx[:rough_idx] - idx_lst=idx[:rough_idx].scan(/[^:]+/) - else - puts "no topic register in: << #{idx[:filename]} >>" - next - end - idx_a=[] - idx_lst.each do |c| - idx_a << c.scan(/[^|\n]+/m) - end - idx_a << contents(idx,lang) - @idx_a << [lang] + idx_a - end - end - @idx_a.each do |c| - if c.length > 1 \ - and c.is_a?(Array) - if c[2].is_a?(Hash) - c[1].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[2]] if v - end - end - end - if c.length > 2 \ - and c.is_a?(Array) - if c[3].is_a?(Hash) - c[2].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[3]] if v - end - end - end - if c.length > 3 \ - and c.is_a?(Array) - if c[4].is_a?(Hash) - c[3].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[4]] if v - end - end - end - if c.length > 4 \ - and c.is_a?(Array) - if c[5].is_a?(Hash) - c[4].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[5]] if v - end - end - end - if c.length > 5 \ - and c.is_a?(Array) - if c[6].is_a?(Hash) - c[5].each do |alt| - v=key_create(c,alt) - @the_a << [v, c[6]] if v - end - end - end - end - @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} - end - def construct_book_topic_hash(t) - @the_h={} - t.each do |z| - x=z[0].scan(/[^|]+/) - depth=x.length - extract=(depth-1) - k=case extract - when 4 - { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } - when 3 - { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } - when 2 - { x[0] => { x[1] => { x[2] => z[1] } } } - when 1 - { x[0] => { x[1] => z[1] } } - when 0 - { x[0] => z[1] } - end - if extract >= 0 - k.each_pair do |x0,y0| - if extract == 0 - @the_h[x0] ||={ md: [] } - @the_h[x0][:md] << y0 - else - @the_h[x0] ||={} - end - #puts spaces*0 + x0 - if extract >= 1 - y0.each_pair do |x1,y1| - if extract == 1 - @the_h[x0][x1] ||={ md: [] } - @the_h[x0][x1][:md] << y1 - else - @the_h[x0][x1] ||={} - end - #puts spaces*1 + x1 - if extract >= 2 - y1.each_pair do |x2,y2| - if extract == 2 - @the_h[x0][x1][x2] ||={ md: [] } - @the_h[x0][x1][x2][:md] << y2 - else - @the_h[x0][x1][x2] ||={} - end - #puts spaces*2 + x2 - if extract >= 3 - y2.each_pair do |x3,y3| - if extract == 3 - @the_h[x0][x1][x2][x3] ||={ md: [] } - @the_h[x0][x1][x2][x3][:md] << y3 - else - @the_h[x0][x1][x2][x3] ||={} - end - #puts spaces*3 + x3 - if extract == 4 - y3.each_pair do |x4,y4| - if extract == 4 - @the_h[x0][x1][x2][x3][x4] ||={ md: [] } - @the_h[x0][x1][x2][x3][x4][:md] << y4 - else - @the_h[x0][x1][x2][x3][x4] ||={} - end - #puts spaces*4 + x4 - if extract == 5 - y4.each_pair do |x5,y5| - if extract == 5 - @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } - @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 - end - #puts spaces*5 + x5 - end - end - end - end - end - end - end - end - end - end - end - end - end - #@the_h.each_pair { |x,y| p x; p y } - @the_h - end - def traverse_base - @the_h.each_pair do |x0,y0| - puts spaces*0 + x0 if x0.is_a?(String) - if y0.is_a?(Hash) - y0.each_pair do |x1,y1| - puts spaces*1 + x1 if x1.is_a?(String) - if y1.is_a?(Hash) - y1.each_pair do |x2,y2| - puts spaces*2 + x2 if x2.is_a?(String) - if y2.is_a?(Hash) - y2.each_pair do |x3,y3| - puts spaces*3 + x3 if x3.is_a?(String) - if y3.is_a?(Hash) - y3.each_pair do |x4,y4| - puts spaces*4 + x4 if x4.is_a?(String) - if y4.is_a?(Hash) - y4.each_pair do |x5,y5| - puts spaces*5 + x5 if x5.is_a?(String) - end - end - end - end - end - end - end - end - end - end - end - end - def traverse - @the_h.each_pair do |x0,y0| - puts spaces*0 + x0 if x0.is_a?(String) - if y0.is_a?(Hash) - if y0.has_key?(:md) - y0[:md].each { |x| puts spaces*5 + x[:title] } - end - y0.each_pair do |x1,y1| - puts spaces*1 + x1 if x1.is_a?(String) - if y1.is_a?(Hash) - if y1.has_key?(:md) - y1[:md].each { |x| puts spaces*5 + x[:title] } - end - y1.each_pair do |x2,y2| - puts spaces*2 + x2 if x2.is_a?(String) - if y2.is_a?(Hash) - if y2.has_key?(:md) - y2[:md].each { |x| puts spaces*5 + x[:title] } - end - y2.each_pair do |x3,y3| - puts spaces*3 + x3 if x3.is_a?(String) - if y3.is_a?(Hash) - if y3.has_key?(:md) - y3[:md].each { |x| puts spaces*5 + x[:title] } - end - y3.each_pair do |x4,y4| - puts spaces*4 + x4 if x4.is_a?(String) - if y4.is_a?(Hash) - if y4.has_key?(:md) - y4[:md].each { |x| puts spaces*5 + x[:title] } - end - y4.each_pair do |x5,y5| - puts spaces*5 + x4 if x4.is_a?(String) - end - end - end - end - end - end - end - end - end - end - end - end - end - class OutputIndex < Mix - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.new.sisu_yaml.rc - @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @alph=@alphabet_list.dup - @letter=@alph.shift - @vz=SiSU_Viz::Defaults.new - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by? == :language - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + lng + '/' \ - + 'manifest' - file=harvest_pth + '/' + 'topics.html' - elsif @env.output_dir_structure.by? == :filetype - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub + '/' \ - + 'manifest' - file=harvest_pth + '/' + 'topics.' + lng + '.html' - elsif @env.output_dir_structure.by? == :filename - harvest_pth=@env.path.webserv + '/' \ - + @opt.base_stub - file=harvest_pth + '/' + 'topics.' + lng + '.html' - end - FileUtils::mkdir_p(harvest_pth) \ - unless FileTest.directory?(harvest_pth) - fileinfo=(@opt.act[:verbose][:set]==:on \ - || @opt.act[:verbose_plus][:set]==:on \ - || @opt.act[:urls_selected][:set]==:on \ - || @opt.act[:maintenance][:set]==:on) \ - ? ("file://#{file}") - : '' - SiSU_Screen::Ansi.new( - @opt.act[:color_state][:set], - "harvest topics(#{@opt.files.length} files)", - fileinfo - ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on - @output[lng][:html]=File.new(file,'w') - if @opt.act[:maintenance][:set]==:on - @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') - end - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) - end - end - def html_print - def html_songsheet - #traverse - html_file_open - html_head - html_alph - html_body_traverse - html_tail - html_file_close - end - def html_body_traverse - @the_idx.each_pair do |x0,y0| - lng=x0 - if x0.is_a?(String) - #do_string_name(lng,'lev0',x0) - #puts spaces*0 + x0 - end - if y0.is_a?(Hash) - if y0.has_key?(:md) - y0[:md].each do |x| - #do_hash(lng,attrib,x) #lv==0 ? - #puts spaces*5 + x[:title] - end - end - y0.each_pair do |x1,y1| - if x1.is_a?(String) - do_string_name(lng,'lev0',x1) - #puts spaces*1 + x1 - end - if y1.is_a?(Hash) - if y1.has_key?(:md) - y1[:md].each do |x| - do_hash(lng,0,x) - #puts spaces*5 + x[:title] - end - end - y1.each_pair do |x2,y2| - if x2.is_a?(String) - do_string(lng,'lev1',x2) - #puts spaces*2 + x2 - end - if y2.is_a?(Hash) - if y2.has_key?(:md) - y2[:md].each do |x| - do_hash(lng,1,x) - #puts spaces*5 + x[:title] - end - end - y2.each_pair do |x3,y3| - if x3.is_a?(String) - do_string(lng,'lev2',x3) - #puts spaces*3 + x3 - end - if y3.is_a?(Hash) - if y3.has_key?(:md) - y3[:md].each do |x| - do_hash(lng,2,x) - #puts spaces*5 + x[:title] - end - end - y3.each_pair do |x4,y4| - if x4.is_a?(String) - do_string(lng,'lev3',x4) - #puts spaces*4 + x4 - end - if y4.is_a?(Hash) - if y4.has_key?(:md) - y4[:md].each do |x| - do_hash(lng,3,x) - #puts spaces*5 + x[:title] - end - end - y4.each_pair do |x5,y5| - if x5.is_a?(String) - do_string(lng,'lev4',x5) - #puts spaces*5 + x5 - end - end - end - end - end - end - end - end - end - end - end - end - end - def html_head_adjust(lng,type='') - css_path,authors='','' - if @env.output_dir_structure.by? == :language - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - authors='authors.html' - elsif @env.output_dir_structure.by? == :filetype - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - authors="authors.#{lng}.html" - elsif @env.output_dir_structure.by? == :filename - css_path=(type !~/maintenance/) \ - ? './_sisu/css/harvest.css' - : 'harvest.css' - authors="authors.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lg| - if @env.output_dir_structure.by? == :language - harvest_pth="../../#{lg}/manifest" - file=harvest_pth + '/' + 'topics.html' - elsif @env.output_dir_structure.by? == :filetype - harvest_pth='.' - file=harvest_pth + '/' + 'topics.' + lg + '.html' - elsif @env.output_dir_structure.by? == :filename - harvest_pth='.' - file=harvest_pth + '/topics.' + lg + '.html' - end - l=ln[lg][:t] - harvest_languages += - %{#{l}   } - end - sv=SiSU_Env::InfoVersion.instance.get_version - if @env.output_dir_structure.by? == :language - home_pth='../..' - output_structure_by='(output organised by language & filetype)' - elsif @env.output_dir_structure.by? == :filetype - home_pth='..' - output_structure_by='(output organised by filetype)' - elsif @env.output_dir_structure.by? == :filename - home_pth='.' - output_structure_by='(output organised by filename)' - else - home_pth='.' - output_structure_by='(output organised by ?)' - end - < - - - -SiSU Metadata Harvest - Topics - - - - - - - - - - - - -

SiSU Metadata Harvest - Topics #{output_structure_by}

-

[ HOME ] also see SiSU Metadata Harvest - Authors

-

#{@env.widget_static.search_form}

-
-

#{harvest_languages}

-
-WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] \ - << html_head_adjust(lng,'maintenance') \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{#{x}, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @opt.act[:maintenance][:set]==:on - @output[lng][:html] << a - end - end - def html_tail - a =< - - - - - - -#{@vz.credits_sisu} - - -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a \ - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html] << html - end - def do_html_maintenance(lng,html) - @output[lng][:html_mnt] << html \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string(lng,attrib,string) - html=%{

#{string}

} - do_html(lng,html) - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string_default(lng,attrib,string) - html=%{

#{string}

} - do_html(lng,html) - end - def do_string_maintenance(lng,attrib,string) - html=%{

#{string}

} - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string)[1] - if @lng != lng - @alph=@alphabet_list.dup - @letter=@alph.shift - @lng = lng - end - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].is_a?(File) - @output[lng][:html_mnt] \ - << %{\n

#{@letter}

} - end - @output[lng][:html] \ - << %{\n

#{@letter}

} - else break - end - end - end - name=string.strip.gsub(/\s+/,'_') - html=%{

#{string}

} - do_html(lng,html) - do_html_maintenance(lng,html) \ - if @output[lng][:html_mnt].is_a?(File) - end - def do_array(lng,lv,array) - lv+=1 - array.each do |b| - do_case(lng,lv,b) - end - end - def do_hash_md(lng,attrib,hash) - lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert - manifest_at=if @env.output_dir_structure.by? == :language - hash[:file] + Sfx[:html] - elsif @env.output_dir_structure.by? == :filetype - hash[:file] + lang_code_insert + Sfx[:html] - elsif @env.output_dir_structure.by? == :filename - "./#{hash[:file]}/#{hash[:page]}" - else '' #error - end - html=%{#{hash[:title]} - #{hash[:author]}} - do_string_default(lng,attrib,html) - end - def do_hash_md_maintenance(lng,attrib,hash) - if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output - html=%{[src]  #{hash[:title]} - #{hash[:author]}} - do_string_maintenance(lng,attrib,html) - end - end - def do_hash(lng,lv,hash) - lv+=1 - key=[] - hash.each_key do |m| - if m == :md - do_case(lng,lv,hash[m]) - elsif m != :title \ - and m != :author \ - and m != :filename \ - and m != :file \ - and m != :rough_idx \ - and m != :page - key << m - elsif m == :title - do_hash_md(lng,'work',hash) - do_hash_md_maintenance(lng,'work',hash) - end - end - if key.length > 0 - key.sort.each do |m| - attrib="lev#{lv}" - lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) - do_case(lng,lv,hash[m]) - end - end - end - def do_case(lng,lv,a) - case a - when String - attrib="lev#{lv}" - if a=~/S/ - lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) - end - when Array - do_array(lng,lv,a) - when Hash - do_hash(lng,lv,a) - end - end - #def html_body - # the_idx=@the_idx - # the_idx.each_pair do |lng,lng_array| - # lng_array.sort.each do |a| - # do_case(lng,-1,a) - # end - # end - #end - self - end - end -end -__END__ -terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} - | |_ {tl2} -|_ {fa}[fa]{filenames and other details} - | | |_{tl3} -|_ {fa}[fa]{filenames and other details} - | | | |_{tl4} - {fa}[fa]{filenames and other details} - | | | | - | | | |_{tl4a} - {fa}[fa]{filenames and other details} - | | | | - | | | |_{tl4b} - {fa}[fa]{filenames and other details} - | | | | - | | | |_ ... - | | | - | | |_{tl3a} - {fa}[fa]{filenames and other details} - | | - | |_{tl2a} - {fa}[fa]{filenames and other details} - | - |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} - |_ ... diff --git a/lib/sisu/v6/html_harvest.rb b/lib/sisu/v6/html_harvest.rb new file mode 100644 index 00000000..b1bd7ca5 --- /dev/null +++ b/lib/sisu/v6/html_harvest.rb @@ -0,0 +1,114 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + harvest metadata from document corpus (suitable for medium sized sites) + (concept example, [to remove size constraint: implement SQL equivalent]) + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_Harvest + @@the_idx_topics,@@the_idx_authors={},{} + class Source + require_relative 'hub_options' # hub_options.rb + require_relative 'html_harvest_topics' # html_harvest_topics.rb + require_relative 'html_harvest_authors' # html_harvest_authors.rb + require_relative 'se' # se.rb + include SiSU_Env + def initialize(opt) + @opt=opt + @env=SiSU_Env::InfoEnv.new + end + def read + begin + harvest_pth=@env.path.webserv + '/' + @opt.base_stub + FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) + cases(@opt,@env) + rescue + ensure + SiSU_Env::CreateSite.new(@opt).cp_css + end + end + def help + puts <. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_FormatAuthor + class Author + def initialize(author_param) + @author_param=author_param + end + def author_details + @authors,@author_array=[],[] + authors=@author_param.scan(/[^;]+/) + authors.each do |a| + a=a.strip + if a =~/"(.+?)"/ + @authors << { the: $1 } + @author_array << $1.upcase + else #if a =~/,/ + x=a.scan(/[^,]+/) + x[0]=x[0].strip + x[1]=x[1].strip if x[1] + if x.length==1 + @authors << { the: x[0] } + @author_array << x[0].upcase + elsif x.length==2 + @authors << { the: x[0], others: x[1] } + @author_array << "#{x[0].upcase}, #{x[1]}" + else #p x.length + end + end + end + l = @authors.length + authors_string='' + @authors.each_with_index do |a,i| + authors_string += if a[:others] + if (l - i) > 1 + "#{a[:others]} #{a[:the]}, " + else + "#{a[:others]} #{a[:the]}" + end + else + if (l - i) > 2 + "#{a[:the]}, " + else + "#{a[:the]}" + end + end + end + { + last_first_a: authors, + last_first_format_a: @author_array, + authors_h: @authors, + authors_s: authors_string, + authors_param: @author_param + } + end + end +end +__END__ diff --git a/lib/sisu/v6/html_harvest_authors.rb b/lib/sisu/v6/html_harvest_authors.rb new file mode 100644 index 00000000..f748f400 --- /dev/null +++ b/lib/sisu/v6/html_harvest_authors.rb @@ -0,0 +1,470 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract authors and their writings from document set + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module SiSU_HarvestAuthors + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + class Songsheet + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=SiSU_HarvestAuthors::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_idx=SiSU_HarvestAuthors::Index.new( + idx_array, + @@the_idx_authors + ).construct_book_author_index + SiSU_HarvestAuthors::OutputIndex.new( + @opt, + the_idx + ).html_print.html_songsheet + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang = + @data,@filename,@name,@idx_array,@lang + @title=@subtitle=@fulltitle=@author=@author_format=@date=nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title && @subtitle && @author && @date + end + @fulltitle=@subtitle \ + ? (@title + ' - ' + @subtitle) + : @title + if @title \ + and @author_format + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { + filename: filename, + file: file, + date: @date, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each_pair do |lang,idx_arr| + @@the_idx_authors[lang] ||= {} + idx_arr.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author=author.strip + if @@the_idx_authors[lang][author].is_a?(NilClass) + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { + filename: idx[:filename], + file: idx[:file], + author: idx[:author], + title: idx[:title], + date: idx[:date], + page: idx[:page], + lang: idx[:lang] + } + end + end + end + @the_idx=@@the_idx_authors + end + end + class OutputIndex + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + @vz=SiSU_Viz::Defaults.new + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file="#{harvest_pth}/authors.#{lng}.html" + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest authors (#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close \ + if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + topics='topics.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + end + l=ln[lg][:t] + harvest_languages += + %{#{l}   } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by= + '(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by= + '(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by= + '(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + < + + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

SiSU Metadata Harvest - Authors #{output_structure_by}

+

[ HOME ] also see SiSU Metadata Harvest - Topics

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] \ + << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{@vz.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << html + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string[0])[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n

#{@letter}

} + end + @output[lng][:html] \ + << %{\n

#{@letter}

} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + works=[] + a[1][:md].each do |i| + manifest_at=if @env.output_dir_structure.by? == :language + i[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + i[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + './' + i[:file] + '/' + i[:page] + else '' #error + end + work=[ + "#{i[:date]} #{i[:title]}", + %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} + ] + works<<=(@output[lng][:html_mnt].is_a?(File)) \ + ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) + : work + end + works.sort_by {|y| y[0]}.each do |z| + @output[lng][:html] << z[1] + @output[lng][:html_mnt] << z[2] \ + if @output[lng][:html_mnt].is_a?(File) + end + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ diff --git a/lib/sisu/v6/html_harvest_topics.rb b/lib/sisu/v6/html_harvest_topics.rb new file mode 100644 index 00000000..87ed931a --- /dev/null +++ b/lib/sisu/v6/html_harvest_topics.rb @@ -0,0 +1,900 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract topics and associated writings from document set + (topics use topic_register header) + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Git + + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module SiSU_HarvestTopics + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + include SiSU_Viz + class Songsheet + @@the_idx_topics={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|classify):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|\s*$|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||=[] + idx_array=SiSU_HarvestTopics::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_hash=SiSU_HarvestTopics::Index.new( + @opt, + @env, + idx_array, + @@the_idx_topics + ).song + SiSU_HarvestTopics::OutputIndex.new( + @opt, + the_hash + ).html_print.html_songsheet + end + end + class Mix + def spaces + Ax[:spaces] + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang= + @data,@filename,@name,@idx_array,@lang + @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m + data.each do |para| + if para=~ rgx[:idx] + @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join + end + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + break if @title && @subtitle && @author && @idx_lst + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title \ + and @author_format \ + and @idx_list + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<=if @idx_list =~/;/ + g=@idx_list.scan(/[^;]+/) + idxl=[] + g.each do |i| + i=i.strip + idxl << { + filename: filename, + file: file, + rough_idx: i, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + end + idxl + else { + filename: filename, + file: file, + rough_idx: @idx_list, + title: @fulltitle, + author: creator, + page: page, + lang: lang, + } + end + else + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" + end + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index < Mix + def initialize(opt,env,idx_array,the_idx) + @opt, @env,@idx_array,@the_idx= + opt,env, idx_array, the_idx + @@the_idx_topics=@the_idx + end + def song + the_idx=construct_book_topic_keys + construct_book_topic_hash(the_idx) + end + def capital(txt) + txt_a=txt.scan(/\S+/) + tx='' + txt_a.each do |t| + tx += t[0].chr.capitalize + t[1,txt.length] + ' ' + end + tx.strip + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def contents(idx,lang) + names='' + idx[:author][:last_first_format_a].each do |n| + s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + names=if @env.output_dir_structure.by? == :language + names += %{#{n}, } + else + names += %{#{n}, } + end + end + { + filename: idx[:filename], + file: idx[:file], + author: names, + title: idx[:title], + page: idx[:page] + } + end + def capital_(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def key_create(c,alt) + x=nil + x=if c.length==6 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(c[3][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==5 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(c[2][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==4 + c[0].to_s + '|' + + capital(c[1][0].to_s) + '|' + + capital(alt.to_s) + elsif c.length==3 + c[0].to_s + '|' + + capital(alt.to_s) + end + end + def construct_book_topic_keys + idx_array=@idx_array + @idx_a=[] + @the_a=[] + idx_array.each_pair do |lang,idx_arr| + @@the_idx_topics[lang] ||= {} + idx_arr.each do |idx| + if idx[:rough_idx] + idx_lst=idx[:rough_idx].scan(/[^:]+/) + else + puts "no topic register in: << #{idx[:filename]} >>" + next + end + idx_a=[] + idx_lst.each do |c| + idx_a << c.scan(/[^|\n]+/m) + end + idx_a << contents(idx,lang) + @idx_a << [lang] + idx_a + end + end + @idx_a.each do |c| + if c.length > 1 \ + and c.is_a?(Array) + if c[2].is_a?(Hash) + c[1].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[2]] if v + end + end + end + if c.length > 2 \ + and c.is_a?(Array) + if c[3].is_a?(Hash) + c[2].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[3]] if v + end + end + end + if c.length > 3 \ + and c.is_a?(Array) + if c[4].is_a?(Hash) + c[3].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[4]] if v + end + end + end + if c.length > 4 \ + and c.is_a?(Array) + if c[5].is_a?(Hash) + c[4].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[5]] if v + end + end + end + if c.length > 5 \ + and c.is_a?(Array) + if c[6].is_a?(Hash) + c[5].each do |alt| + v=key_create(c,alt) + @the_a << [v, c[6]] if v + end + end + end + end + @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} + end + def construct_book_topic_hash(t) + @the_h={} + t.each do |z| + x=z[0].scan(/[^|]+/) + depth=x.length + extract=(depth-1) + k=case extract + when 4 + { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } + when 3 + { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } + when 2 + { x[0] => { x[1] => { x[2] => z[1] } } } + when 1 + { x[0] => { x[1] => z[1] } } + when 0 + { x[0] => z[1] } + end + if extract >= 0 + k.each_pair do |x0,y0| + if extract == 0 + @the_h[x0] ||={ md: [] } + @the_h[x0][:md] << y0 + else + @the_h[x0] ||={} + end + #puts spaces*0 + x0 + if extract >= 1 + y0.each_pair do |x1,y1| + if extract == 1 + @the_h[x0][x1] ||={ md: [] } + @the_h[x0][x1][:md] << y1 + else + @the_h[x0][x1] ||={} + end + #puts spaces*1 + x1 + if extract >= 2 + y1.each_pair do |x2,y2| + if extract == 2 + @the_h[x0][x1][x2] ||={ md: [] } + @the_h[x0][x1][x2][:md] << y2 + else + @the_h[x0][x1][x2] ||={} + end + #puts spaces*2 + x2 + if extract >= 3 + y2.each_pair do |x3,y3| + if extract == 3 + @the_h[x0][x1][x2][x3] ||={ md: [] } + @the_h[x0][x1][x2][x3][:md] << y3 + else + @the_h[x0][x1][x2][x3] ||={} + end + #puts spaces*3 + x3 + if extract == 4 + y3.each_pair do |x4,y4| + if extract == 4 + @the_h[x0][x1][x2][x3][x4] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][:md] << y4 + else + @the_h[x0][x1][x2][x3][x4] ||={} + end + #puts spaces*4 + x4 + if extract == 5 + y4.each_pair do |x5,y5| + if extract == 5 + @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } + @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 + end + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + #@the_h.each_pair { |x,y| p x; p y } + @the_h + end + def traverse_base + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + y4.each_pair do |x5,y5| + puts spaces*5 + x5 if x5.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + def traverse + @the_h.each_pair do |x0,y0| + puts spaces*0 + x0 if x0.is_a?(String) + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each { |x| puts spaces*5 + x[:title] } + end + y0.each_pair do |x1,y1| + puts spaces*1 + x1 if x1.is_a?(String) + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each { |x| puts spaces*5 + x[:title] } + end + y1.each_pair do |x2,y2| + puts spaces*2 + x2 if x2.is_a?(String) + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each { |x| puts spaces*5 + x[:title] } + end + y2.each_pair do |x3,y3| + puts spaces*3 + x3 if x3.is_a?(String) + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each { |x| puts spaces*5 + x[:title] } + end + y3.each_pair do |x4,y4| + puts spaces*4 + x4 if x4.is_a?(String) + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each { |x| puts spaces*5 + x[:title] } + end + y4.each_pair do |x5,y5| + puts spaces*5 + x4 if x4.is_a?(String) + end + end + end + end + end + end + end + end + end + end + end + end + end + class OutputIndex < Mix + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + @vz=SiSU_Viz::Defaults.new + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file=harvest_pth + '/' + 'topics.' + lng + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file=harvest_pth + '/' + 'topics.' + lng + '.html' + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") + : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest topics(#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + if @opt.act[:maintenance][:set]==:on + @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') + end + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + #traverse + html_file_open + html_head + html_alph + html_body_traverse + html_tail + html_file_close + end + def html_body_traverse + @the_idx.each_pair do |x0,y0| + lng=x0 + if x0.is_a?(String) + #do_string_name(lng,'lev0',x0) + #puts spaces*0 + x0 + end + if y0.is_a?(Hash) + if y0.has_key?(:md) + y0[:md].each do |x| + #do_hash(lng,attrib,x) #lv==0 ? + #puts spaces*5 + x[:title] + end + end + y0.each_pair do |x1,y1| + if x1.is_a?(String) + do_string_name(lng,'lev0',x1) + #puts spaces*1 + x1 + end + if y1.is_a?(Hash) + if y1.has_key?(:md) + y1[:md].each do |x| + do_hash(lng,0,x) + #puts spaces*5 + x[:title] + end + end + y1.each_pair do |x2,y2| + if x2.is_a?(String) + do_string(lng,'lev1',x2) + #puts spaces*2 + x2 + end + if y2.is_a?(Hash) + if y2.has_key?(:md) + y2[:md].each do |x| + do_hash(lng,1,x) + #puts spaces*5 + x[:title] + end + end + y2.each_pair do |x3,y3| + if x3.is_a?(String) + do_string(lng,'lev2',x3) + #puts spaces*3 + x3 + end + if y3.is_a?(Hash) + if y3.has_key?(:md) + y3[:md].each do |x| + do_hash(lng,2,x) + #puts spaces*5 + x[:title] + end + end + y3.each_pair do |x4,y4| + if x4.is_a?(String) + do_string(lng,'lev3',x4) + #puts spaces*4 + x4 + end + if y4.is_a?(Hash) + if y4.has_key?(:md) + y4[:md].each do |x| + do_hash(lng,3,x) + #puts spaces*5 + x[:title] + end + end + y4.each_pair do |x5,y5| + if x5.is_a?(String) + do_string(lng,'lev4',x5) + #puts spaces*5 + x5 + end + end + end + end + end + end + end + end + end + end + end + end + end + def html_head_adjust(lng,type='') + css_path,authors='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + authors='authors.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + authors="authors.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file=harvest_pth + '/' + 'topics.html' + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file=harvest_pth + '/' + 'topics.' + lg + '.html' + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file=harvest_pth + '/topics.' + lg + '.html' + end + l=ln[lg][:t] + harvest_languages += + %{#{l}   } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by='(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by='(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by='(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + < + + + +SiSU Metadata Harvest - Topics + + + + + + + + + + + + +

SiSU Metadata Harvest - Topics #{output_structure_by}

+

[ HOME ] also see SiSU Metadata Harvest - Authors

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{@vz.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html] << html + end + def do_html_maintenance(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string(lng,attrib,string) + html=%{

#{string}

} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_default(lng,attrib,string) + html=%{

#{string}

} + do_html(lng,html) + end + def do_string_maintenance(lng,attrib,string) + html=%{

#{string}

} + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string)[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n

#{@letter}

} + end + @output[lng][:html] \ + << %{\n

#{@letter}

} + else break + end + end + end + name=string.strip.gsub(/\s+/,'_') + html=%{

#{string}

} + do_html(lng,html) + do_html_maintenance(lng,html) \ + if @output[lng][:html_mnt].is_a?(File) + end + def do_array(lng,lv,array) + lv+=1 + array.each do |b| + do_case(lng,lv,b) + end + end + def do_hash_md(lng,attrib,hash) + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + manifest_at=if @env.output_dir_structure.by? == :language + hash[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + hash[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + "./#{hash[:file]}/#{hash[:page]}" + else '' #error + end + html=%{#{hash[:title]} - #{hash[:author]}} + do_string_default(lng,attrib,html) + end + def do_hash_md_maintenance(lng,attrib,hash) + if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output + html=%{[src]  #{hash[:title]} - #{hash[:author]}} + do_string_maintenance(lng,attrib,html) + end + end + def do_hash(lng,lv,hash) + lv+=1 + key=[] + hash.each_key do |m| + if m == :md + do_case(lng,lv,hash[m]) + elsif m != :title \ + and m != :author \ + and m != :filename \ + and m != :file \ + and m != :rough_idx \ + and m != :page + key << m + elsif m == :title + do_hash_md(lng,'work',hash) + do_hash_md_maintenance(lng,'work',hash) + end + end + if key.length > 0 + key.sort.each do |m| + attrib="lev#{lv}" + lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) + do_case(lng,lv,hash[m]) + end + end + end + def do_case(lng,lv,a) + case a + when String + attrib="lev#{lv}" + if a=~/S/ + lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) + end + when Array + do_array(lng,lv,a) + when Hash + do_hash(lng,lv,a) + end + end + #def html_body + # the_idx=@the_idx + # the_idx.each_pair do |lng,lng_array| + # lng_array.sort.each do |a| + # do_case(lng,-1,a) + # end + # end + #end + self + end + end +end +__END__ +terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} + | |_ {tl2} -|_ {fa}[fa]{filenames and other details} + | | |_{tl3} -|_ {fa}[fa]{filenames and other details} + | | | |_{tl4} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4a} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4b} - {fa}[fa]{filenames and other details} + | | | | + | | | |_ ... + | | | + | | |_{tl3a} - {fa}[fa]{filenames and other details} + | | + | |_{tl2a} - {fa}[fa]{filenames and other details} + | + |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} + |_ ... diff --git a/lib/sisu/v6/hub_actions.rb b/lib/sisu/v6/hub_actions.rb index e7e42bd2..076461db 100644 --- a/lib/sisu/v6/hub_actions.rb +++ b/lib/sisu/v6/hub_actions.rb @@ -559,7 +559,7 @@ module SiSU_Hub_Actions def harvest? if @opt.act[:harvest][:set]==:on #% --harvest if @opt.f_pths.length > 0 - require_relative 'harvest' # harvest.rb + require_relative 'html_harvest' # html_harvest.rb SiSU_Harvest::Source.new(@opt).read else msg='harvest request requires sisu markup files' -- cgit v1.2.3