diff options
Diffstat (limited to 'lib/sisu/develop/ao_references.rb')
-rw-r--r-- | lib/sisu/develop/ao_references.rb | 502 |
1 files changed, 502 insertions, 0 deletions
diff --git a/lib/sisu/develop/ao_references.rb b/lib/sisu/develop/ao_references.rb new file mode 100644 index 00000000..5ef94c61 --- /dev/null +++ b/lib/sisu/develop/ao_references.rb @@ -0,0 +1,502 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** system environment, resource control and configuration details + +** Author: Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, + All Rights Reserved. + +** License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +** SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + +** Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + +** Git + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/ao_references.rb;hb=HEAD> + +=end +module SiSU_AO_References + class Bibliography + def initialize(md,data) + @md,@data=md,data + end + def sort_bibliography_array_by_author_year_title(bib) + if bib + bib.compact.sort_by do |c| + [c[:author_arr][0],c[:year],c[:title]] + end + end + end + def citation_in_prepared_bibliography(cite) + @cite=cite + def article + { + is: :article, + author_raw: nil, + author: nil, + author_arr: nil, + title: nil, + language: nil, + journal: nil, + volume: nil, + editor: nil, + year: nil, + where: nil, + url: nil, + note: nil, + short_name: nil, + id: nil, + } + end + def book + { + is: :book, + author_raw: nil, + author: nil, + author_arr: nil, + #editor: nil, + title: nil, + subtitle: nil, + fulltitle: nil, + language: nil, + publisher: nil, + edition: nil, + year: nil, + where: nil, + url: nil, + note: nil, + short_name: nil, + id: nil, + } + end + def citation_metadata + type=if @cite =~/^jo: \S+/m + :article + elsif @cite =~/^au: \S+/m + :book + end + if type + citeblock=@cite.split("\n") + if type == :article + citation=article + citeblock.select do |meta| + case meta + when /^((?:au|author):\s+)\S+/ #req + citation[:author_raw]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:ti|title):\s+)\S+/ #req + citation[:title]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:lng|language):\s+)\S+/ + citation[:language]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:jo|journal):\s+)\S+/ #req? + citation[:journal]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:vo|volume):\s+)\S+/ + citation[:volume]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:edr|editor):\s+)\S+/ + citation[:editor]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:yr|year):\s+)\S+/ #req? + citation[:year]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:pst|publisher_state):\s+)\S+/ + citation[:where]=/^#{$1}(.+)/.match(meta)[1] + when /^(url:\s+)\S+/ + citation[:url]=/^#{$1}(.+)/.match(meta)[1] + when /^(note:\s+)\S+/ + citation[:note]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:sn|shortname):\s+)\S+/ + citation[:short_name]=/^#{$1}(.+)/.match(meta)[1] + when /^(id:\s+)\S+/ + citation[:id]=/^#{$1}(.+)/.match(meta)[1] + end + end + end + if type == :book + citation=book + citeblock.select do |meta| + case meta + when /^((?:au|author):\s+)\S+/ #req + citation[:author_raw]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:ti|title):\s+)\S+/ #req + citation[:title]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:st|subtitle):\s+)\S+/ + citation[:subtitle]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:lng|language):\s+)\S+/ + citation[:language]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:pb|publisher):\s+)\S+/ + citation[:publisher]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:edn|edition):\s+)\S+/ + citation[:edition]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:yr|year):\s+)\S+/ #req? + citation[:year]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:pst|publisher_state):\s+)\S+/ + citation[:where]=/^#{$1}(.+)/.match(meta)[1] + when /^(url:\s+)\S+/ + citation[:url]=/^#{$1}(.+)/.match(meta)[1] + when /^(note:\s+)\S+/ + citation[:note]=/^#{$1}(.+)/.match(meta)[1] + when /^((?:sn|shortname):\s+)\S+/ + citation[:short_name]=/^#{$1}(.+)/.match(meta)[1] + when /^(id:\s+)\S+/ + citation[:id]=/^#{$1}(.+)/.match(meta)[1] + end + end + end + if citation[:is]==:book + if citation[:subtitle] + citation[:fulltitle] = citation[:title] \ + + ' - ' \ + + citation[:subtitle] + else + citation[:fulltitle] = citation[:title] + end + end + citation[:author_arr]=citation[:author_raw].split(/;\s*/) + citation[:author]=citation[:author_arr].map do |author| + author.gsub(/(.+?),\s+(.+)/,'\2 \1').strip + end.join(', ').strip + unless citation[:short_name] + citation[:short_name]=%{#{c[:author]}, "#{c[:short_name]}" (#{c[:date]})} + end + end + citation + end + self + end + def biblio_extraction + bibliography=[] + biblioflag=false + code_flag=false + flag_code_curly=:not_code_curly + flag_code_tics=:not_code_tics + @data=@data.select do |t_o| + if t_o =~/^code\{/ + flag_code_curly=:code_curly + elsif t_o =~/^\}code/ + flag_code_curly=:not_code_curly + elsif t_o =~/^``` code/ + flag_code_tics=:code_tics + elsif flag_code_tics ==:code_tics \ + and t_o =~/^```/ + flag_code_tics=:not_code_tics + end + code_flag=if flag_code_curly==:code_curly \ + or flag_code_tics==:code_tics + true + else false + end + unless code_flag + if @md.flag_auto_biblio + if t_o =~/^1~biblio(?:graphy)?/ + biblioflag = true + t_o + elsif t_o =~/^:?[B-D1]~/ + biblioflag = false + t_o + elsif biblioflag + if t_o !~/\A%+ / + bibliography << citation_in_prepared_bibliography(t_o).citation_metadata + next + else + t_o + end + else t_o + end + elsif @md.flag_biblio + if t_o =~/^1~biblio(?:graphy)?/ + biblioflag = true + next + elsif t_o =~/^:?[B-D]~/ + next + elsif t_o =~/^:?[B-D1]~/ + biblioflag = false + t_o + elsif biblioflag + if t_o !~/\A%+ / + bibliography << t_o + next + else + t_o + end + else t_o + end + else t_o + end + else t_o + end + end.compact + if @md.flag_auto_biblio \ + and bibliography.length > 0 + data_new=[] + bibliography=sort_bibliography_array_by_author_year_title(bibliography) + @data.select do |t_o| + if t_o =~/^1~biblio(?:graphy)?/ + bibliography.each do |c| + if c[:is]==:book + <<-WOK + +!_ #{c[:author]} +/{"#{c[:fulltitle]}"}/ #{c[:publisher]} (#{c[:year]}) + WOK + elsif c[:is]==:article + <<-WOK + +!_ #{c[:author]} +/{"#{c[:title]}"}/ #{c[:journal]} (#{c[:year]}) + WOK + end + end + else data_new << t_o + end + end + @data=data_new + end + [@data,bibliography] + end + end + class Citations + def initialize(md='',data='') + @md,@data=md,data + #@biblio=[] + end + def songsheet + tuned_file,citations=citations_scan(@data) + [tuned_file,citations] + end + def sort_bibliography_array_by_author_year(bib) + bib.sort_by do |c| + [c[:author_raw],c[:year]] + #[c[:author_arr][0],c[:year],c[:title]] + end + end + def citations_regex + def pages_pattern + %r{(?:[,.:]?\s+(?:p{1,2}\.?\s+)?(?:\d+--?\d+)[,.]?\s+)?} + end + def editor_pattern + %r{(?<editor>(?:editor|edited by)\s+.+?)} + end + def year_pattern + %r{[(\[]?(?<year>\d{4})[\])]?[.,]?} + end + def authors_year_title_publication_editor_pages + /(?<authors>.+?)\s+#{year_pattern}\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{editor_pattern}#{pages_pattern}/m # note ed. is usually edition rather than editor + end + def authors_title_publication_year_editor_pages + /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{year_pattern}\s+#{editor_pattern}#{pages_pattern}/m # note ed. is usually edition rather than editor + end + def authors_title_publication_editor_year_pages ### + /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+ed.\s+#{editor_pattern}#{year_pattern}#{pages_pattern}/m + # note ed. is usually edition rather than editor + end + def authors_title_publication_editor_pages_year ### + /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{editor_pattern}#{pages_pattern}#{year_pattern}/m # note ed. is usually edition rather than editor + end + def authors_year_title_publication_pages + /(?<authors>.+?)\s+#{year_pattern}\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})[,.;]?#{pages_pattern}/m + end + def authors_title_publication_year_pages + /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{year_pattern}\s+#{pages_pattern}/m + end + def authors_title_publication_pages_year ### + /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})#{pages_pattern}#{year_pattern}/m + end + def authors_year_publication_pages + /(?<authors>.+?)\s+#{year_pattern}\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})#{pages_pattern}/m + end + def authors_publication_year_pages + /(?<authors>.+?)\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})[,.;]?\s+(?<publisher>.+?)?#{year_pattern}#{pages_pattern}[.;]?/m + end + self + end + def authors?(citations) + citations.each.map do |b| + if b =~ /^.+\s+::.+?:$/ + c=/^(?<citation>.+?)\s+::(?<shortref>.+?):$/.match(b) + { + citation: c[:citation], + shortref: c[:shortref], + c[:shortref].to_s => c[:citation] + } + else { citation: b } + end + end + end + def long_and_short_ref?(citations) #could be useful, keep ... ectract shortref + citations.each.map do |b| + if b =~ /^.+\s+::.+?:$/ + c=/^(?<citation>.+?)\s+::(?<shortref>.+?):$/.match(b) + { + citation: c[:citation], + shortref: c[:shortref], + c[:shortref].to_s => c[:citation] + } + else { citation: b } + end + end + end + def citation_detail(citations) #could be useful, keep ... extract shortref + bibahash=[] + number=0 + missed=0 + citations.select do |b| + z=if b =~citations_regex.authors_year_title_publication_editor_pages + c=citations_regex.authors_year_title_publication_editor_pages.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + editor: c[:editor], + } + elsif b =~citations_regex.authors_title_publication_year_editor_pages + c=citations_regex.authors_title_publication_year_editor_pages.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + editor: c[:editor], + } + elsif b =~citations_regex.authors_title_publication_editor_year_pages + c=citations_regex.authors_title_publication_editor_year_pages.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + editor: c[:editor], + } + elsif b =~citations_regex.authors_title_publication_editor_pages_year + c=citations_regex.authors_title_publication_editor_pages_year.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + editor: c[:editor], + } + elsif b =~citations_regex.authors_year_title_publication_pages + c=citations_regex.authors_year_title_publication_pages.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + } + elsif b =~citations_regex.authors_title_publication_year_pages + c=citations_regex.authors_title_publication_year_pages.match(b) + { + is: :article, + author_raw: c[:authors], + year: c[:year], + title: c[:title], + publication: c[:publication], + } + elsif b =~citations_regex.authors_year_publication_pages + c=citations_regex.authors_year_publication_pages.match(b) + { + is: :book, + author_raw: c[:authors], + year: c[:year], + publication: c[:publication], + } + elsif b =~citations_regex.authors_publication_year_pages + c=citations_regex.authors_publication_year_pages.match(b) + { + is: :book, + author_raw: c[:authors], + year: c[:year], + publication: c[:publication], + } + else b + end + if not z.is_a?(NilClass) \ + and z.is_a?(Hash) \ + and z[:author_raw].length > 0 + z[:author_arr]=z[:author_raw].split(/;\s*/) + z[:author]=z[:author_arr].map do |author| + author.gsub(/(.+?),\s+(.+)/,'\2 \1').strip + end.join(', ').strip + if @md.opt.act[:verbose_plus][:set]==:on \ + || @md.opt.act[:maintenance][:set]==:on + number +=1 if z.is_a?(Hash) + missed +=1 if z.is_a?(String) + (z.is_a?(Hash)) \ + ? (p '[' + number.to_s + '] ' + z.to_s) + : (p '<' + missed.to_s + '> ' + z.to_s) + end + end + bibahash << z if z.is_a?(Hash) + end + bibahash=sort_bibliography_array_by_author_year(bibahash.compact) + bibahash + end + def citations_scan(data) + citations=[] + #short_ref=[] + tuned_file = data.compact.select do |dob| + if dob.is !=:meta \ + && dob.is !=:comment \ + && dob.is !=:code \ + && dob.is !=:table + if dob.obj =~/\.:.+?:\./ + citations << dob.obj.scan(/\.:\s*(.+?)\s*:\./m) + #short_ref << dob.obj.scan(/\.:\s+(.+?)\s+::([^:]+)::\./m) #look at later + ##short_ref << dob.obj.scan(/\.:\s+(.+?)\s+::(.+?)::\./m) #look at later + #short_ref << dob.obj.scan(/\.:\s*(.+?)\s*(::(.+?):)?:\./m) #look at later + citations=citations.flatten.compact + dob.obj=dob.obj. #remove citations delimiter & helpers from text + gsub(/\.:|:\./,'') + end + end + dob if dob.is_a?(Object) + end + #bib=long_and_short_ref?(citations) #could be useful, keep ... extract shortref + citations=citation_detail(citations) + [tuned_file,citations] + end + end +end +__END__ |