From 65477054fd798728bf186aa2938727ddddbe86a5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 May 2007 02:06:46 +0100 Subject: Imported upstream version 0.52.7 --- lib/sisu/0.52/db_create.rb | 530 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 530 insertions(+) create mode 100644 lib/sisu/0.52/db_create.rb (limited to 'lib/sisu/0.52/db_create.rb') diff --git a/lib/sisu/0.52/db_create.rb b/lib/sisu/0.52/db_create.rb new file mode 100644 index 00000000..d5fab27d --- /dev/null +++ b/lib/sisu/0.52/db_create.rb @@ -0,0 +1,530 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: modules shared by the different db types, dbi, postgresql, sqlite + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + + Summary of GPL 2 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl2.fsf + + SiSU was first released to the public on January 4th 2005 + + SiSU uses: + + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + © Ralph Amissah 1997, current 2007. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com +=end +module SiSU_DB_create + require SiSU_lib + '/db_columns' + class Create < SiSU_DB_columns::Column_size + require SiSU_lib + '/sysenv' + @@dl=nil + def initialize(opt,conn='',sql_type='pg') + @opt,@conn,@sql_type=opt,conn,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + @comment=comment + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def available + DBI.available_drivers.each do |driver| + puts "Driver: #{driver}" + DBI.data_sources(driver).each do |dsn| + puts "\tDatasource: #{dsn}" + end + end + end + def create_db + @env=SiSU_Env::Info_env.new(@opt.fns) + tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) + tell.colorize unless @opt.cmd =~/q/ + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub + end + def comment + @comment=Hash.new('') + case @sql_type + when /pg/ + @comment['metadata'] =%{ + COMMENT ON Table metadata + IS 'contains SiSU documents metadata with metadata'; + COMMENT ON COLUMN metadata.tid + IS 'unique'; + COMMENT ON COLUMN metadata.filename + IS 'document filename'; + COMMENT ON COLUMN metadata.title + IS 'metadata title (dublin core element 1)'; + COMMENT ON COLUMN metadata.subtitle + IS 'document subtitle'; + COMMENT ON COLUMN metadata.creator + IS 'metadata creator (dublin core element 2)'; + COMMENT ON COLUMN metadata.illustrator + IS 'metadata illustrator'; + COMMENT ON COLUMN metadata.translator + IS 'metadata translator'; + COMMENT ON COLUMN metadata.subject + IS 'metadata subject (dublin core element 3)'; + COMMENT ON COLUMN metadata.date + IS 'metadata date (dublin core element 7)'; + COMMENT ON COLUMN metadata.date_created + IS 'metadata date created (dublin core)'; + COMMENT ON COLUMN metadata.date_issued + IS 'metadata date of issue (dublin core)'; + COMMENT ON COLUMN metadata.date_available + IS 'metadata date available (dublin core)'; + COMMENT ON COLUMN metadata.date_valid + IS 'metadata date valid (dublin core)'; + COMMENT ON COLUMN metadata.date_modified + IS 'metadata date modified (dublin core)'; + COMMENT ON COLUMN metadata.type + IS 'metadata type (dublin core element 8)'; + COMMENT ON COLUMN metadata.description + IS 'metadata description (dublin core element 4)'; + COMMENT ON COLUMN metadata.publisher + IS 'metadata publisher (dublin core element 5)'; + COMMENT ON COLUMN metadata.contributor + IS 'metadata contributor (dublin core element 6)'; + COMMENT ON COLUMN metadata.prepared_by + IS 'metadata markup prepared by'; + COMMENT ON COLUMN metadata.digitized_by + IS 'metadata digitized by'; + COMMENT ON COLUMN metadata.format + IS 'metadata format (dublin core element 9)'; + COMMENT ON COLUMN metadata.identifier + IS 'metadata identifier (dublin core element 10)'; + COMMENT ON COLUMN metadata.source + IS 'metadata source (dublin core element 11)'; + COMMENT ON COLUMN metadata.language + IS 'metadata language (dublin core element 12)'; + COMMENT ON COLUMN metadata.language_original + IS 'metadata original language'; + COMMENT ON COLUMN metadata.relation + IS 'metadata (dublin core element 13)'; + COMMENT ON COLUMN metadata.coverage + IS 'metadata coverage (dublin core element 14)'; + COMMENT ON COLUMN metadata.rights + IS 'metadata rights / copyright / license (dublin core element 15)'; + COMMENT ON COLUMN metadata.owner + IS 'metadata owner'; + COMMENT ON COLUMN metadata.keywords + IS 'metadata keywords'; + COMMENT ON COLUMN metadata.comment + IS 'metadata comment'; + COMMENT ON COLUMN metadata.abstract + IS 'metadata abstract'; + COMMENT ON COLUMN metadata.loc + IS 'metadata library of congress'; + COMMENT ON COLUMN metadata.dewey + IS 'metadata dewey'; + COMMENT ON COLUMN metadata.isbn + IS 'metadata isbn'; + COMMENT ON COLUMN metadata.pg + IS 'metadata project gutenberg number'; + COMMENT ON COLUMN metadata.prefix_a + IS 'metadata prefix'; + COMMENT ON COLUMN metadata.prefix_b + IS 'metadata prefix'; + COMMENT ON COLUMN metadata.skin + IS 'metadata sisu skin'; + COMMENT ON COLUMN metadata.markup + IS 'metadata markup source'; + COMMENT ON COLUMN metadata.links + IS 'metadata links'; + COMMENT ON COLUMN metadata.information + IS 'metadata information'; + COMMENT ON COLUMN metadata.contact + IS 'metadata contact'; + COMMENT ON COLUMN metadata.suffix + IS 'metadata sisu suffix (output related)'; + COMMENT ON COLUMN metadata.filename + IS 'metadata source filename'; + COMMENT ON COLUMN metadata.types + IS 'document types scroll 1, seg 2, both 3'; + COMMENT ON COLUMN metadata.subj + IS 'subject areas - no way to populate at present as not mapped'; +/* + CREATE FUNCTION fileremoval() RETURNS opaque AS ' + BEGIN + DELETE FROM metadata WHERE tid=#@removetid; + DELETE FROM documents WHERE documents.metadata_tid=#@removetid; + DELETE FROM endnotes WHERE endnotes.metadata_tid=#@removetid; + DELETE FROM endnotes_asterisk WHERE endnotes_asterisk.metadata_tid=#@removetid; + DELETE FROM endnotes_plus WHERE endnotes_plus.metadata_tid=#@removetid; + DELETE FROM urls WHERE urls.metadata_tid=#@removetid; + END; + ' LANGUAGE 'plpgsql'; + CREATE TRIGGER removefile AFTER INSERT + PROCEDURE fileremoval(); +*/ + } + @comment['documents'] =%{ + COMMENT ON Table documents + IS 'contains searchable text of SiSU documents'; + COMMENT ON COLUMN documents.lid + IS 'unique'; + COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata'; + COMMENT ON COLUMN documents.lev + IS 'doc level 1-6 \d\~'; + COMMENT ON COLUMN documents.seg + IS 'segment name from level 4'; + COMMENT ON COLUMN documents.ocn + IS 'object citation number'; + COMMENT ON COLUMN documents.en_a + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)'; + COMMENT ON COLUMN documents.en_z + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)'; + COMMENT ON COLUMN documents.en_a_asterisk + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)'; + COMMENT ON COLUMN documents.en_z_asterisk + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)'; + COMMENT ON COLUMN documents.en_a_plus + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)'; + COMMENT ON COLUMN documents.en_z_plus + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)'; + COMMENT ON COLUMN documents.types + IS 'document types seg scroll'; + COMMENT ON COLUMN documents.clean + IS 'text object - substantive text: clean, stripped of markup'; + COMMENT ON COLUMN documents.body + IS 'text object - substantive text: light html markup'; + COMMENT ON COLUMN documents.lev1 + IS 'document structure, level 1'; + COMMENT ON COLUMN documents.lev2 + IS 'document structure, level 2'; + COMMENT ON COLUMN documents.lev3 + IS 'document structure, level 3'; + COMMENT ON COLUMN documents.lev4 + IS 'document structure, level 4'; + COMMENT ON COLUMN documents.lev5 + IS 'document structure, level 5'; + COMMENT ON COLUMN documents.lev6 + IS 'document structure, level 6'; + } + @comment['endnotes'] =%{ + COMMENT ON Table endnotes + IS 'contains searchable text of SiSU documents endnotes'; + COMMENT ON COLUMN endnotes.nid + IS 'unique'; + COMMENT ON COLUMN endnotes.document_lid + IS 'ties to text block from which referenced'; + COMMENT ON COLUMN endnotes.nr + IS 'endnote number '; + COMMENT ON COLUMN endnotes.clean + IS 'endnote substantive content, stripped of markup'; + COMMENT ON COLUMN endnotes.body + IS 'endnote substantive content'; + COMMENT ON COLUMN endnotes.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced'; + COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document'; + } + @comment['endnotes_asterisk'] =%{ + COMMENT ON Table endnotes_asterisk + IS 'contains searchable text of SiSU documents endnotes asterisk'; + COMMENT ON COLUMN endnotes_asterisk.nid + IS 'unique'; + COMMENT ON COLUMN endnotes_asterisk.document_lid + IS 'ties to text block from which referenced'; + COMMENT ON COLUMN endnotes_asterisk.nr + IS 'endnote number '; + COMMENT ON COLUMN endnotes_asterisk.clean + IS 'endnote substantive content, stripped of markup'; + COMMENT ON COLUMN endnotes_asterisk.body + IS 'endnote substantive content'; + COMMENT ON COLUMN endnotes_asterisk.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced'; + COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document'; + } + @comment['endnotes_plus'] =%{ + COMMENT ON Table endnotes_plus + IS 'contains searchable text of SiSU documents endnotes'; + COMMENT ON COLUMN endnotes_plus.nid + IS 'unique'; + COMMENT ON COLUMN endnotes_plus.document_lid + IS 'ties to text block from which referenced'; + COMMENT ON COLUMN endnotes_plus.nr + IS 'endnote number '; + COMMENT ON COLUMN endnotes_plus.clean + IS 'endnote substantive content, stripped of markup'; + COMMENT ON COLUMN endnotes_plus.body + IS 'endnote substantive content'; + COMMENT ON COLUMN endnotes_plus.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced'; + COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document'; + } + @comment['urls'] =%{ + COMMENT ON Table urls + IS 'contains base url links to different SiSU output'; + COMMENT ON COLUMN documents.metadata_tid + IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one'; + COMMENT ON COLUMN urls.plaintext + IS 'plaintext utf-8'; + COMMENT ON COLUMN urls.html_toc + IS 'table of contents for segmented html document'; + COMMENT ON COLUMN urls.html_doc + IS 'html document (scroll)'; + COMMENT ON COLUMN urls.xhtml + IS 'xhtml document (scroll)'; + COMMENT ON COLUMN urls.xml_sax + IS 'xml sax oriented document (scroll)'; + COMMENT ON COLUMN urls.xml_dom + IS 'xml dom oriented document (scroll)'; + COMMENT ON COLUMN urls.odf + IS 'opendocument format text'; + COMMENT ON COLUMN urls.pdf_p + IS 'pdf portrait'; + COMMENT ON COLUMN urls.pdf_l + IS 'pdf landscape'; + COMMENT ON COLUMN urls.concordance + IS 'rudimentary document index linked to html'; + COMMENT ON COLUMN urls.latex_p + IS 'latex portrait'; + COMMENT ON COLUMN urls.latex_l + IS 'latex_landscape'; + COMMENT ON COLUMN urls.markup + IS 'markup'; + COMMENT ON COLUMN urls.sisupod + IS 'SiSU document format .tgz (all SiSU information on document)'; + } + end + @comment + end + def output_dir? + dir=SiSU_Env::Info_env.new('') + if @opt.cmd =~/d/; dir.path.webserv_stub_ensure + end + end + def create_table_metadata + print %{ + currently using sisu dbi module + to be populated from documents files + create tables metadata + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE metadata ( + tid INT4 PRIMARY KEY, + title VARCHAR(#{lt_title}) NULL, + subtitle VARCHAR(#{lt_subtitle}) NULL, + creator VARCHAR(#{lt_creator}) NULL, + illustrator VARCHAR(#{lt_illustrator}) NULL, + translator VARCHAR(#{lt_translator}) NULL, + subject VARCHAR(#{lt_subject}) NULL, + date VARCHAR(#{lt_date}) NULL, + date_created VARCHAR(#{lt_date_created}) NULL, + date_issued VARCHAR(#{lt_date_issued}) NULL, + date_available VARCHAR(#{lt_date_available}) NULL, + date_valid VARCHAR(#{lt_date_valid}) NULL, + date_modified VARCHAR(#{lt_date_modified}) NULL, +/* date DATE, */ +/* date_created DATE, */ +/* date_issued DATE, */ +/* date_available DATE, */ +/* date_valid DATE, */ +/* date_modified DATE, */ + type VARCHAR(#{lt_type}) NULL, + description VARCHAR(#{lt_description}) NULL, + publisher VARCHAR(#{lt_publisher}) NULL, + contributor VARCHAR(#{lt_contributor}) NULL, + prepared_by VARCHAR(#{lt_prepared_by}) NULL, + digitized_by VARCHAR(#{lt_digitized_by}) NULL, + format VARCHAR(#{lt_format}) NULL, + identifier VARCHAR(#{lt_identifier}) NULL, + source VARCHAR(#{lt_source}) NULL, + language VARCHAR(#{lt_language}) NULL, + language_original VARCHAR(#{lt_language_original}) NULL, + relation VARCHAR(#{lt_relation}) NULL, + coverage VARCHAR(#{lt_coverage}) NULL, + rights VARCHAR(#{lt_rights}) NULL, + copyright VARCHAR(#{lt_copyright}) NULL, + owner VARCHAR(#{lt_owner}) NULL, + keywords VARCHAR(#{lt_keywords}) NULL, + comment VARCHAR(#{lt_comment}) NULL, + loc VARCHAR(#{lt_loc}) NULL, + dewey VARCHAR(#{lt_dewey}) NULL, + isbn VARCHAR(#{lt_isbn}) NULL, + pg VARCHAR(#{lt_pg}) NULL, + abstract VARCHAR(#{lt_abstract}) NULL, + prefix_a TEXT NULL, + prefix_b TEXT NULL, + skin VARCHAR(#{lt_skin}) NULL, + markup VARCHAR(#{lt_markup}) NULL, + links VARCHAR(#{lt_links}) NULL, + information VARCHAR(#{lt_information}) NULL, + contact VARCHAR(#{lt_contact}) NULL, + suffix VARCHAR(#{lt_suffix}) NULL, + filename VARCHAR(#{lt_filename}) NULL UNIQUE, + types CHAR(#{lt_types}) NULL, + subj VARCHAR(#{lt_subj}) NULL + ); + #{@comment['metadata']} + }) + end + def create_table # create documents base + print %{ + to be populated from documents files + create tables documents document_trade document_env + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE documents ( + lid INT4 PRIMARY KEY, + metadata_tid INT4 REFERENCES metadata, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(#{document_seg}) NULL, + lev SMALLINT NULL, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + digest_clean CHAR(#{@@dl}), + digest_all CHAR(#{@@dl}), + types CHAR(1) NULL + ); + #{@comment['documents']} + }) + end + def create_table_endnotes + print %{ + to be populated from documents files + create tables endnotes + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes ( + nid INT4 PRIMARY KEY, + document_lid INT4 REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid INT4 REFERENCES metadata + ); + #{@comment['endnotes']} + }) + end + def create_table_endnotes_asterisk + print %{ + to be populated from documents files + create tables endnotes_asterisk + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_asterisk ( + nid INT4 PRIMARY KEY, + document_lid INT4 REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid INT4 REFERENCES metadata + ); + #{@comment['endnotes_asterisk']} + }) + end + def create_table_endnotes_plus + print %{ + to be populated from documents files + create tables endnotes_plus + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_plus ( + nid INT4 PRIMARY KEY, + document_lid INT4 REFERENCES documents, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid INT4 REFERENCES metadata + ); + #{@comment['endnotes_plus']} + }) + end + def create_table_urls # create documents file links mapping + print %{ + currently using sisu dbi module + to be populated from documents files + create tables urls + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE urls ( + metadata_tid INT4 REFERENCES metadata, + plaintext varchar(512), + html_toc varchar(512), + html_doc varchar(512), + xhtml varchar(512), + xml_sax varchar(512), + xml_dom varchar(512), + odf varchar(512), + pdf_p varchar(512), + pdf_l varchar(512), + concordance varchar(512), + latex_p varchar(512), + latex_l varchar(512), + digest varchar(512), + manifest varchar(512), + markup varchar(512), + sisupod varchar(512) + ); + #{@comment['urls']} + }) + end + end +end +__END__ -- cgit v1.2.3