From b0481de5c3a378f3e1d6cec774cd7ce36d7fcac8 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph@amissah.com>
Date: Tue, 24 Jul 2007 15:13:50 +0100
Subject: open archive initiative for metadata harvesting, initial
 implementation, decide use later -O

---
 CHANGELOG                        |   3 +
 lib/sisu/v0/hub.rb               |  33 ++++---
 lib/sisu/v0/manifest.rb          |   1 -
 lib/sisu/v0/param.rb             |   3 +-
 lib/sisu/v0/shared_html_lite.rb  |   4 +-
 lib/sisu/v0/sysenv.rb            |   5 +
 lib/sisu/v0/xml_md_oai_pmh_dc.rb | 204 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 234 insertions(+), 19 deletions(-)
 create mode 100644 lib/sisu/v0/xml_md_oai_pmh_dc.rb

diff --git a/CHANGELOG b/CHANGELOG
index bb53e377..8493671c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,6 +14,9 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz
 
   * db html, fix related to match of multiple urls within paragraph
 
+  * open archive initiative for metadata harvesting, initial implementation,
+    XML output available (-O), decide use later
+
   * debian vim
     * moved vim install back to addons
     * added recommends vim-addon-manager
diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb
index 35160d23..125a0500 100644
--- a/lib/sisu/v0/hub.rb
+++ b/lib/sisu/v0/hub.rb
@@ -150,6 +150,7 @@ module SiSU
                   when /^plaintext$/;       SiSU_Plaintext::Source.new(@opt).read     # -a -A -e -E -f
                   when /^wikispeak$/;       SiSU_Wikispeak::Source.new(@opt).read     # -g
                   when /^odf$/;             SiSU_ODF::Source.new(@opt).read           # -o
+                  when /^xml_md_oai_pmh_dc$/; SiSU_XML_metadata::OAI_PMH.new(@opt).read # -O
                   when /^texpdf$/;          SiSU_TeX::Source.new(@opt).read           # -p
                   when /^texinfo$/;         SiSU_TexInfo::Source.new(@opt).read       # -I
                   #when /^docbook$/;         SiSU_Docbook::Source.new(@opt).read       # -B
@@ -283,7 +284,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
     def actions
       if @opt.mod.inspect =~/--convert|--to|--from/; require "#{SiSU_lib}/sst_convert_markup"
       end
-      if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnoprRSsTtQqUuVvwWXxYyZ_0-9])/ and
+      if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnOoprRSsTtQqUuVvwWXxYyZ_0-9])/ and
          @opt.cmd =~/^-/ and
          @opt.mod.inspect !~/--(?:sitemaps|query|identify)/ or
          @opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/ #and
@@ -292,7 +293,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
         flag=SiSU_Env::Info_processing_flag.new
         extra=''
         if @opt.cmd !~/[mn]/
-          extra+=if @opt.cmd =~/[abeghHhINoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm'           #% add dal
+          extra+=if @opt.cmd =~/[abeghHhINOoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm'           #% add dal
           elsif ((@opt.cmd =~/[Dd]/ or (@opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/)) \
           and @opt.mod.inspect !~/(?:remove|(?:(?:re)?create(?:all)?|dropall|drop)$)/) \
           and @opt.cmd !~/[mn]/
@@ -447,32 +448,34 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
         end
         @opt.files=@opt.files.collect {|x| x=x.gsub(/\.ssm$/,'._sst') }
         if @opt.cmd =~/S/
-          op('sisupod_make','sisupod (zip)')                         #% -S make sisupod
+          op('sisupod_make','sisupod (zip)')                   #% -S make sisupod
           if @opt.fns=~/\.kdi._sst/
-            op('share_src_kdissert','kdissert (kdi)')                #% -S share kdissert source
+            op('share_src_kdissert','kdissert (kdi)')          #% -S share kdissert source
           end
         end
-        if @opt.cmd =~/N/; op('digests','digests')                   #% -N digest tree
+        if @opt.cmd =~/N/; op('digests','digests')             #% -N digest tree
         end
-        if @opt.cmd =~/[hHz]/; op('html','html')                     #% -h -H -z html css
+        if @opt.cmd =~/[hHz]/; op('html','html')               #% -h -H -z html css
         end
-        if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext')         #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file
+        if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext')   #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file
         end
-        if @opt.cmd =~/g/; op('wikispeak','wikispeak')             #% -g wiki
+        if @opt.cmd =~/g/; op('wikispeak','wikispeak')         #% -g wiki
         end
-        if @opt.cmd =~/o/; op('odf','OpenDocument')                  #% -o opendocument
+        if @opt.cmd =~/o/; op('odf','OpenDocument')            #% -o opendocument
         end
-        if @opt.cmd =~/x/; op('xml','xml sax')                       #% -x xml sax type
+        if @opt.cmd =~/x/; op('xml','xml sax')                 #% -x xml sax type
         end
-        if @opt.cmd =~/X/; op('xml_dom','xml dom')                   #% -X xml dom type
+        if @opt.cmd =~/X/; op('xml_dom','xml dom')             #% -X xml dom type
         end
-        if @opt.cmd =~/b/; op('xhtml','xhtml sax')                   #% -b xhtml sax type
+        if @opt.cmd =~/b/; op('xhtml','xhtml sax')             #% -b xhtml sax type
         end
-        #if @opt.cmd =~/B/; op('docbook','docbook xml')               #% -B docbook xml
+        #if @opt.cmd =~/B/; op('docbook','docbook xml')         #% -B docbook xml
         #end
-        if @opt.cmd =~/w/; op('concordance','Concordance')           #% -w concordance
+        if @opt.cmd =~/w/; op('concordance','Concordance')     #% -w concordance
         end
-        if @opt.cmd =~/t/                                            #% -t termsheet/standard form
+        if @opt.cmd =~/O/; op('xml_md_oai_pmh_dc','OAI PMH')   #% -O open archive initiative, metadata harvesting
+        end
+        if @opt.cmd =~/t/                                      #% -t termsheet/standard form
           SiSU_Help::Help.new('termsheet').help_request
         	@opt.files.each do |fns|
             if FileTest.file?(fns)
diff --git a/lib/sisu/v0/manifest.rb b/lib/sisu/v0/manifest.rb
index 931ea96b..998d3c59 100644
--- a/lib/sisu/v0/manifest.rb
+++ b/lib/sisu/v0/manifest.rb
@@ -445,7 +445,6 @@ module SiSU_Manifest
         begin
           id,file='',''
           vz=SiSU_Env::Get_init.instance.skin
-          #vz=SiSU_Viz::Skin.new
           banner_table=if vz.banner_home_button_only !~ /http:\/\/www\.jus\.uio\.no\/sisu/ and vz.banner_home_button_only !~  /sisu\.home\.png/
 <<WOK
 <table summary="band" width="100%" border="0" cellpadding="3" cellspacing="0">
diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb
index 2ab37afb..2264c48a 100644
--- a/lib/sisu/v0/param.rb
+++ b/lib/sisu/v0/param.rb
@@ -338,7 +338,7 @@ module SiSU_Param
               end
             when /^(?:0~type|@type:)\s+(.+?)$/m;             @dc_type=$1                            #% metainfo DC
             when /^(?:0~format|@format:)\s+(.+?)$/m;         @dc_format=$1                          #% metainfo DC
-            when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                      #% metainfo DC
+            #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1                      #% metainfo DC
             when /^(?:0~source|@source:)\s+(.+?)$/m;         @dc_source=$1                          #% metainfo DC
             when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m               #% metainfo DC
               x=$1.strip
@@ -655,6 +655,7 @@ module SiSU_Param
         end if @flv
         @lang.uniq!
         @fn=SiSU_Env::Env_call.new(@fns).lang(fn_set_lang[:c])
+        @dc_identifier="#{@env.url.root}/#@fnb/#{@fn[:toc]}" #DC note constructed dc identifier
         if @en[:note] > 0 and @en[:sum] > 0
           if @en[:sum] > 0
           else tell=SiSU_Screen::Ansi.new(@cmd,'both endnote styles used',"~{ #{@en[:sum]} }~ and ^~ #{@en[:mark]}")
diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb
index 50abb284..2bcea532 100644
--- a/lib/sisu/v0/shared_html_lite.rb
+++ b/lib/sisu/v0/shared_html_lite.rb
@@ -126,8 +126,8 @@ module SiSU_Format_Shared
     end
     def markup(para)
       if para =~/\{.+?\}((?:http|ftp)\S+|image)/
-        @word_mode=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/)
-        word_mode=urls(@word_mode)
+        wm=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/)
+        word_mode=urls(wm)
         words=word_mode.join(' ')
         para.gsub!(/.+/,words)
       end
diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb
index e9e39268..91e03459 100644
--- a/lib/sisu/v0/sysenv.rb
+++ b/lib/sisu/v0/sysenv.rb
@@ -454,6 +454,7 @@ module SiSU_Env
         :digest          => filename(code,'digest','.txt'),
         :metadata        => filename(code,'metadata','.html'), #chk
         :manifest        => filename(code,'sisu_manifest','.html'),
+        :oai_pmh         => filename(code,'oai_pmh','.xml'),
         :sitemap         => filename(code,'sitemap','.xml'),
         :sitemap_touch   => filename(code,"sitemap_#@fnb",'.xml'),
         :sxs             => filename(code,@fnb,'.sxs.xml'),
@@ -1780,6 +1781,8 @@ module SiSU_Env
         end
         if @md.cmd =~ /o/;                         ft << @md.fn[:odf]
         end
+        if @md.cmd =~ /O/;                         ft << @md.fn[:oai_pmh]
+        end
         if @md.cmd =~ /s/;                         ft << @md.fns
         end
         if @md.cmd =~ /S/;                         ft << 'sisupod.zip' << '.kdi'
@@ -1808,6 +1811,8 @@ module SiSU_Env
         end
         if @opt.cmd =~ /o/;                        ft << 'opendocument.odt' << '??.opendocument.odt' << 'opendocument.??.odt'
         end
+        if @opt.cmd =~ /O/;                        ft << 'oai_pmh.xml'
+        end
         if @opt.cmd =~ /s/;                        ft << '.sst' << '.ssi' << '.ssm'
         end
         if @opt.cmd =~ /S/;                        ft << 'sisupod.zip' << '.kdi'
diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
new file mode 100644
index 00000000..7ac7c3a6
--- /dev/null
+++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
@@ -0,0 +1,204 @@
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+   SiSU, a framework for document structuring, publishing and search
+
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+   2007 Ralph Amissah
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the Free
+   Software Foundation, either version 3 of the License, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   You should have received a copy of the GNU General Public License along with
+   this program. If not, see <http://www.gnu.org/licenses/>.
+
+   If you have Internet connection, the latest version of the GPL should be
+   available at these locations:
+   <http://www.fsf.org/licenses/gpl.html>
+   <http://www.gnu.org/copyleft/gpl.html>
+   <http://www.jus.uio.no/sisu/gpl.fsf>
+
+ * SiSU uses:
+   * Standard SiSU markup syntax,
+   * Standard SiSU meta-markup syntax, and the
+   * Standard SiSU object citation numbering and system
+
+ * Hompages:
+   <http://www.jus.uio.no/sisu>
+   <http://www.sisudoc.org>
+
+ * Download:
+   <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+   <ralph@amissah.com>
+   <ralph.amissah@gmail.com>
+
+ ** Description: summary of generated outputs and metadata
+
+=end
+module SiSU_XML_metadata
+  require "#{SiSU_lib}/sysenv"
+  include SiSU_Env
+  require "#{SiSU_lib}/param"
+  include SiSU_Param
+  class OAI_PMH
+    def initialize(opt)
+      @md=SiSU_Param::Parameters.new(opt).get
+      @oai_pmh=[]
+    end
+    def read
+      output
+    end
+    def pre
+<<WOK
+<?xml version="1.0" encoding="UTF-8"?>
+<oai_dc:dc
+  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+  xmlns:dc="http://purl.org/dc/elements/1.1/"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
+  http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+WOK
+    end
+    def body
+      if @md.dc_title                                                            # DublinCore 1 - title
+        @oai_pmh << %{  <dc:title xml:lang="en">#{@md.dc_title}</dc:title>\n}
+        #@oai_pmh << %{  <dc:title xml:lang="en">#{seg_name}#{@md.dc_title}</dc:title>\n}
+      end
+      if @md.dc_creator                                                          # DublinCore 2 - creator/author (author)
+        txt=meta_content_clean(@md.dc_creator)
+        @oai_pmh << %{  <dc:creator>#{txt}</dc:creator>\n}
+      end
+      if @md.dc_subject                                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
+        txt=meta_content_clean(@md.dc_subject)
+        @oai_pmh << %{  <dc:subject>#{txt}</dc:subject>\n}
+      end
+      if @md.dc_description                                                      # DublinCore 4 - description
+        txt=meta_content_clean(@md.dc_description)
+        @oai_pmh << %{  <dc:description>#{txt}</dc:description>\n}
+      end
+      if @md.dc_publisher                                                        # DublinCore 5 - publisher (current copy published by)
+        txt=meta_content_clean(@md.dc_publisher)
+        @oai_pmh << %{  <dc:publisher>#{txt}</dc:publisher>\n}
+      end
+      if @md.dc_contributor                                                      # DublinCore 6 - contributor
+        txt=meta_content_clean(@md.dc_contributor)
+        @oai_pmh << %{  <dc:contributor>#{txt}</dc:contributor>\n}
+      end
+      if @md.dc_date                                                             # DublinCore 7 - date year-mm-dd
+        @oai_pmh << %{  <dc:date>#{@md.dc_date}</dc:date>\n}
+      end
+      if @md.dc_date_created                                                     # DublinCore 7 - date.created
+        @oai_pmh << %{  <dc:date_created>#{@md.dc_date_created}</dc:date_created>\n}
+      end
+      if @md.dc_date_issued                                                      # DublinCore 7 - date.issued
+        @oai_pmh << %{  <dc:date_issued>#{@md.dc_date_issued}</dc:date_issued>\n}
+      end
+      if @md.dc_date_available                                                   # DublinCore 7 - date.available
+        @oai_pmh << %{  <dc:date_available>#{@md.dc_date_available}</dc:date_available>\n}
+      end
+      if @md.dc_date_valid                                                       # DublinCore 7 - date.valid
+        @oai_pmh << %{  <dc:date_valid>#{@md.dc_date_valid}</dc:date_valid>\n}
+      end
+      if @md.dc_date_modified                                                    # DublinCore 7 - date.modified
+        @oai_pmh <<  %{  <dc:date_modified>#{@md.dc_date_modified}</dc:date_modified>\n}
+      end
+      if @md.dc_type                                                             # DublinCore 8 - type
+        txt=meta_content_clean(@md.dc_type)
+        @oai_pmh << %{  <dc:type>#{txt}</dc:type>\n}
+      end
+      if @md.dc_format                                                           # DublinCore 9 - format
+        txt=meta_content_clean(@md.dc_format)
+        @oai_pmh << %{  <dc:format>#{txt}</dc:format>\n}
+      end
+      if @md.dc_identifier                                                       # DublinCore 10 - identifier
+        txt=meta_content_clean(@md.dc_identifier)
+        @oai_pmh << %{  <dc:identifier>#{txt}</dc:identifier>\n}
+      end
+      if @md.dc_source                                                           # DublinCore 11 - source
+        txt=meta_content_clean(@md.dc_source)
+        @oai_pmh << %{  <dc:source>#{txt}</dc:source>\n}
+      end
+      if @md.dc_language[:name]                                                  # DublinCore 12 - language (English)
+        @oai_pmh << %{  <dc:language>#{@md.dc_language[:name]}</dc:language>\n}
+      end
+      if @md.language_original[:name]
+        @oai_pmh << %{  <dc:language>#{@md.language_original[:name]}</dc:language>\n}
+      end
+      if @md.dc_relation                                                         # DublinCore 13 - relation
+        txt=meta_content_clean(@md.dc_relation)
+        @oai_pmh << %{  <dc:relation>#{txt}</dc:relation>\n}
+      end
+      if @md.dc_coverage                                                         # DublinCore 14 - coverage
+        txt=meta_content_clean(@md.dc_coverage)
+        @oai_pmh << %{  <dc:coverage>#{txt}</dc:coverage>\n}
+      end
+      if @md.dc_rights                                                           # DublinCore 15 - rights
+        txt=meta_content_clean(@md.dc_rights)
+        @oai_pmh << %{  <dc:rights>#{txt}</dc:rights>\n}
+      end
+      if @md.keywords
+        txt=meta_content_clean(@md.keywords)
+        @oai_pmh << %{  <dc:keywords>#{txt}</dc:keywords>\n}
+      end
+      @oai_pmh
+    end
+    def meta_content_clean(content='')
+      unless content.nil?
+        content.tr!('"',"'")
+      end
+      content
+    end
+    def post
+      '</oai_dc:dc>'
+    end
+    def output
+      SiSU_Env::SiSU_file.new(@md).mkdir
+      oai_pmh=SiSU_Env::SiSU_file.new(@md,@md.fn[:oai_pmh]).mkfile #implement in param
+      oai_pmh << pre
+      body.each do |x|
+        oai_pmh << x
+      end
+      oai_pmh << post
+    end
+  end
+end
+__END__
+#http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
+#sample implementation, e.g. 2
+<?xml version="1.0" encoding="UTF-8"?>
+<oai_dc:dc
+    xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+    xmlns:dc="http://purl.org/dc/elements/1.1/"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
+    http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+  <dc:title xml:lang="en">Grassmann's space analysis</dc:title>
+  <dc:creator>Hyde, E. W. (Edward Wyllys)</dc:creator>
+  <dc:subject>LCSH:Ausdehnungslehre; LCCN QA205.H99</dc:subject>
+  <dc:publisher>J. Wiley &amp; Sons</dc:publisher>
+  <dc:date>Created: 1906; Available: 1991</dc:date>
+  <dc:type>text</dc:type>
+  <dc:identifier>http://resolver.library.cornell.edu/math/1796949
+     </dc:identifier>
+  <dc:language>english</dc:language>
+  <dc:rights xml:lang="en">Public Domain</dc:rights>
+</oai_dc:dc>
-- 
cgit v1.2.3