From 479e71e90146b1067120faf489af1b13a5a0cf3d Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 14 Apr 2018 16:39:52 -0400 Subject: sqlite3 some stuff & rearrange files, dlang, org --- src/sdp/meta/read_config_files.d | 6 +- src/sdp/output/hub.d | 4 +- src/sdp/output/paths_output.d | 42 +- src/sdp/output/source_sisupod.d | 26 +- src/sdp/output/sqlite.d | 806 +++++++++++++++++++++++---------------- src/sdp/output/sqlite_discrete.d | 491 ++++++++++++------------ src/sdp/sdp.d | 4 +- 7 files changed, 754 insertions(+), 625 deletions(-) (limited to 'src/sdp') diff --git a/src/sdp/meta/read_config_files.d b/src/sdp/meta/read_config_files.d index 56a2bf8..22d285f 100644 --- a/src/sdp/meta/read_config_files.d +++ b/src/sdp/meta/read_config_files.d @@ -125,10 +125,8 @@ static template readConfigSite() { config_file_str = conf_file.readText; break; } - } - catch (ErrnoException ex) { - } - catch (FileException ex) { + } catch (ErrnoException ex) { + } catch (FileException ex) { } } if (config_file_str.length > 0) { break; } diff --git a/src/sdp/output/hub.d b/src/sdp/output/hub.d index 830c6d0..373092b 100644 --- a/src/sdp/output/hub.d +++ b/src/sdp/output/hub.d @@ -133,14 +133,14 @@ template outputHubOp() { sdp.output.source_sisupod, sdp.output.create_zip_file, sdp.output.paths_output; - void outputHubOp(O)(O opt_action) { + void outputHubOp(E,O)(E env, O opt_action) { mixin SiSUoutputRgxInit; static auto rgx = Rgx(); if ((opt_action.sqlite_create)) { if ((opt_action.verbose)) { writeln("sqlite create table... "); } - SQLiteTablesCreate!()(); + SQLiteTablesCreate!()(env, opt_action); } if ((opt_action.sqlite_drop)) { if ((opt_action.verbose)) { diff --git a/src/sdp/output/paths_output.d b/src/sdp/output/paths_output.d index d58b990..cd4817b 100644 --- a/src/sdp/output/paths_output.d +++ b/src/sdp/output/paths_output.d @@ -29,12 +29,32 @@ template SiSUoutPaths() { return _PathsStruct(); } } +template SiSUoutPathSQLite() { + auto SiSUoutPathSQLite(Po)( + Po output_pth_root, + ) { + struct _PathsStruct { + string output_root() { + string out_root_; + if (output_pth_root.length > 0) { + out_root_ = output_pth_root; + } else { + out_root_ = "sisugen"; + } + return out_root_; + } + string output_base() { + return asNormalizedPath(output_root).array; + } + } + return _PathsStruct(); + } +} template SiSUoutPathsFnPd() { /+ TODO stuff to work out here +/ auto SiSUoutPathsFnPd(Fn,Pn)( Fn fn_src_pth, Pn pod_name - // Pn pod_name = "", ) { struct _PathsStruct { string base_filename() { @@ -216,14 +236,16 @@ template SiSUpathsSQLiteDiscrete() { Po output_pth_root, Lng lng, ) { - auto out_pth = SiSUoutPaths!()(output_pth_root, lng); - string base_dir = "sqlite"; struct _PathsStruct { string base_filename(string fn_src) { return fn_src.baseName.stripExtension; } string base() { - return asNormalizedPath((out_pth.output_root).chainPath(base_dir)).array; + auto out_pth = SiSUoutPaths!()(output_pth_root, lng); + string base_dir = "sqlite"; + return asNormalizedPath((out_pth.output_base).chainPath(base_dir)).array; + // return asNormalizedPath((out_pth.output_root).chainPath(base_dir)).array; // single file for each file, multiple languages + // return asNormalizedPath((out_pth.output_base).chainPath(base_dir)).array; // separate files for each language } string seg(string fn_src) { return asNormalizedPath(base.chainPath(base_filename(fn_src))).array; @@ -238,23 +260,19 @@ template SiSUpathsSQLiteDiscrete() { template SiSUpathsSQLite() { mixin SiSUrgxInit; static auto rgx = Rgx(); - auto SiSUpathsSQLite(Po,Lng)( + auto SiSUpathsSQLite(Po)( Po output_pth_root, - Lng lng, ) { - auto out_pth = SiSUoutPaths!()(output_pth_root, lng); - string base_dir = "sqlite"; struct _PathsStruct { string base_filename(string fn_src) { return fn_src.baseName.stripExtension; } string base() { + auto out_pth = SiSUoutPathSQLite!()(output_pth_root); // decide whether to have separate files for each language + string base_dir = "sqlite"; return asNormalizedPath((out_pth.output_root).chainPath(base_dir)).array; } - string seg(string fn_src) { - return asNormalizedPath(base.chainPath(base_filename(fn_src))).array; - } - string sqlite_file(string fn_src) { + string sqlite_file(string fn_src = "sdp-niu") { return asNormalizedPath(base.chainPath(base_filename(fn_src) ~ ".sqlite")).array; } } diff --git a/src/sdp/output/source_sisupod.d b/src/sdp/output/source_sisupod.d index 6c50cf2..50b37b0 100644 --- a/src/sdp/output/source_sisupod.d +++ b/src/sdp/output/source_sisupod.d @@ -77,7 +77,7 @@ template SiSUpod() { ~ "/" ~ image; if (exists(fn_src_in)) { debug(io) { - writeln("WARNING (io debug) src out found: ", fn_src_in); + writeln("(io debug) src out found: ", fn_src_in); } if (doc_matters.opt.action.source) { fn_src_in.copy(fn_src_out_filesystem); @@ -108,7 +108,7 @@ template SiSUpod() { ~ "/" ~ "sisu_document_make"; if (exists(fn_src_in)) { debug(io) { - writeln("WARNING (io debug) src out found: ", fn_src_in); + writeln("(io debug) src out found: ", fn_src_in); } if (doc_matters.opt.action.source) { fn_src_in.copy(fn_src_out_filesystem); @@ -139,7 +139,7 @@ template SiSUpod() { string[] filelist_src_zpod_arr; if (exists(fn_src_in)) { debug(io) { - writeln("WARNING (io debug) src out found: ", fn_src_in); + writeln("(io debug) src in found: ", fn_src_in); } filelist_src_out_sisupod_arr ~= fn_src_out_sisupod_zip_base; filelist_src_zpod_arr ~= fn_src_out_inside_pod; @@ -162,7 +162,7 @@ template SiSUpod() { } else { if (doc_matters.opt.action.verbose || doc_matters.opt.action.debug_do) { - writeln("WARNING (io) src out NOT found (markup source): ", fn_src_in); + writeln("WARNING (io) src in NOT found (markup source): ", fn_src_in); } } } { /+ bundle insert files +/ @@ -193,7 +193,7 @@ template SiSUpod() { ).filesystem_open_zpod.to!string; if (exists(fn_src_in)) { debug(io) { - writeln("WARNING (io debug) src out found: ", fn_src_in); + writeln("(io debug) src out found: ", fn_src_in); } if (doc_matters.opt.action.source) { fn_src_in.copy(fn_src_out_filesystem); @@ -217,16 +217,24 @@ template SiSUpod() { } } { auto fn_src_in = doc_matters.src.filename; - if (exists(fn_src_in)) { - if (doc_matters.opt.action.sisupod) { + if (doc_matters.opt.action.sisupod) { + if (exists(doc_matters.src.file_with_absolute_path)) { createZipFile!()(fn_sisupod, zip.build()); + } else { + writeln("WARNING check missing source file(s): ", doc_matters.opt.action.sisupod); + } + if (!(exists(fn_sisupod))) { + writeln("WARNING failed to create sisupod zip archive: ", fn_sisupod); } } } if (exists(fn_sisupod)) { try { - auto data = (cast(byte[]) (fn_sisupod).read); - writefln("%-(%02x%) %s", data.sha256Of, fn_sisupod); + if (doc_matters.opt.action.verbose) { + auto data = (cast(byte[]) (fn_sisupod).read); + writeln(doc_matters.src.filename, " >> "); + writefln("%-(%02x%) %s", data.sha256Of, fn_sisupod); + } debug(sisupod) { try { auto zipped = new ZipArchive((fn_sisupod).read); diff --git a/src/sdp/output/sqlite.d b/src/sdp/output/sqlite.d index 480c317..2409e3b 100644 --- a/src/sdp/output/sqlite.d +++ b/src/sdp/output/sqlite.d @@ -3,18 +3,92 @@ import sdp.output; import std.file, std.conv : to; +long _metadata_tid_lastrowid; +template SQLiteDbRun() { + void SQLiteDbRun(Db,St,O)( + auto ref Db db, + auto ref St db_statement, + auto ref O opt_action, + string note, + ) { + debug(sql_statement) { + writeln(db_statement); + } + try { + db.run( + "\nBEGIN;\n" ~ + db_statement ~ + "\nCOMMIT;\n" + ); + if (!(opt_action.sqlite_create)) { + _metadata_tid_lastrowid = db.lastInsertRowid(); + writeln("last row id: ", _metadata_tid_lastrowid); + } + } catch (ErrnoException ex) { + writeln("ERROR SQLite : ", ex); + } catch (Exception ex) { + writeln("ERROR SQLite : ", ex); + } + if (opt_action.debug_do) { + writeln(note); + if (opt_action.verbose) { + writeln(db_statement); + } + } + } +} template SQLiteBuildTablesAndPopulate() { void SQLiteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, - auto ref I doc_matters, + auto ref I doc_matters, ) { import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); + auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path); + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); + template SQLiteDbStatementComposite() { + void SQLiteDbStatementComposite(Db,D,I)( + auto ref Db db, + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + { + { + string _db_statement; + { + if ((doc_matters.opt.action.sqlite_create)) { + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); + pth_sqlite.base.mkdirRecurse; + _db_statement ~= SQLiteTablesReCreate!()(); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE"); + } + if ((doc_matters.opt.action.sqlite_update)) { // TODO + _db_statement ~= SQLiteInsertMetadata!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT MetaData"); + /+ get tid (lastrowid or max) for use in doc_objects table +/ + _metadata_tid_lastrowid = db.lastInsertRowid(); + _db_statement ~= SQLiteInsertDocObjectsLoop!()(doc_matters.xml.keys_seq.sql, _metadata_tid_lastrowid); // FIX + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT DocObjects"); + } + } + db.close; + } + } + } + } + template SQLinsertDelimiter() { + auto SQLinsertDelimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( - auto ref I doc_matters, + auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { @@ -23,7 +97,6 @@ template SQLiteBuildTablesAndPopulate() { ) { string _notes; string _urls; - _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; @@ -239,9 +312,17 @@ template SQLiteBuildTablesAndPopulate() { string _align = ("style=\"text-align:" ~ ((obj.table_column_aligns[col_idx] == "l") ? "left\"" : "right\"")); - _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= "<" + ~ _col_is + ~ " width=\"" + ~ obj.table_column_widths[col_idx].to!string + ~ "%\" " + ~ _align + ~ ">"; _table ~= cell; - _table ~= ""; + _table ~= ""; } } _table ~= ""; @@ -452,31 +533,41 @@ template SQLiteBuildTablesAndPopulate() { return sqlite_format_and_load_objects(); } } - template SQLiteInstruct() { - Statement SQLiteInstruct(I)( - auto ref I doc_matters, - ) { - auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path, doc_matters.src.language); - pth_sqlite.base.mkdirRecurse; - auto db = Database(pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); - // auto db = Database(":memory:"); // open database in memory - if (doc_matters.opt.action.sqlite_create) { - if ((doc_matters.opt.action.verbose)) { writeln("sqlite create tables... "); } - db.run(" + template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + DROP INDEX IF EXISTS ocn; + DROP INDEX IF EXISTS digest_clean; + DROP INDEX IF EXISTS digest_all; + DROP INDEX IF EXISTS clean; + DROP INDEX IF EXISTS lev0; + DROP INDEX IF EXISTS lev1; + DROP INDEX IF EXISTS lev2; + DROP INDEX IF EXISTS lev3; + DROP INDEX IF EXISTS lev4; + DROP INDEX IF EXISTS lev5; + DROP INDEX IF EXISTS lev6; + DROP INDEX IF EXISTS title; + DROP INDEX IF EXISTS creator_author; + DROP INDEX IF EXISTS src_filename; + DROP INDEX IF EXISTS language_document_char; + DROP INDEX IF EXISTS classify_topic_register; DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; CREATE TABLE metadata_and_text ( - tid BIGINT PRIMARY KEY, + tid INTEGER PRIMARY KEY, + /* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ title VARCHAR(800) NOT NULL, - title_main VARCHAR(400) NULL, /*NOT*/ + title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, - creator_author VARCHAR(600) NULL, + creator_author VARCHAR(600) NOT NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, @@ -490,7 +581,7 @@ template SQLiteBuildTablesAndPopulate() { creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, - language_document_char VARCHAR(6) NULL, /*NOT*/ + language_document_char VARCHAR(6) NOT NULL, language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, @@ -539,18 +630,20 @@ template SQLiteBuildTablesAndPopulate() { notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, - src_filename VARCHAR(256) NULL, + src_filename VARCHAR(256) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ src_text TEXT NULL, fulltext TEXT NULL, links TEXT NULL ); - /* */ CREATE TABLE doc_objects ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata_and_text, + lid INTEGER PRIMARY KEY, + metadata_tid INTEGER REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -581,37 +674,50 @@ template SQLiteBuildTablesAndPopulate() { digest_clean CHAR(256), digest_all CHAR(256), types CHAR(1) NULL - ) - "); - } - Statement insert_metadata = db.prepare(" + ); + CREATE INDEX idx_ocn ON doc_objects(ocn); + CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); + CREATE INDEX idx_digest_all ON doc_objects(digest_all); + CREATE INDEX idx_clean ON doc_objects(clean); + CREATE INDEX idx_lev0 ON doc_objects(lev0); + CREATE INDEX idx_lev1 ON doc_objects(lev1); + CREATE INDEX idx_lev2 ON doc_objects(lev2); + CREATE INDEX idx_lev3 ON doc_objects(lev3); + CREATE INDEX idx_lev4 ON doc_objects(lev4); + CREATE INDEX idx_lev5 ON doc_objects(lev5); + CREATE INDEX idx_lev6 ON doc_objects(lev6); + CREATE INDEX idx_title ON metadata_and_text(title); + CREATE INDEX idx_author ON metadata_and_text(creator_author); + CREATE INDEX idx_filename ON metadata_and_text(src_filename); + CREATE INDEX idx_language ON metadata_and_text(language_document_char); + CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); + ¶",); + return _sql_instruct; + } + } + template SQLiteInsertMetadata() { + string SQLiteInsertMetadata(I)( + auto ref I doc_matters, + ) { + string _insert_metadata; + _insert_metadata = format(q"¶ INSERT INTO metadata_and_text ( - tid, + src_filename, title, title_main, title_sub, title_short, title_edition, - title_note, title_language, - title_language_char, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, creator_author, creator_author_email, - creator_author_hon, - creator_author_nationality, - creator_editor, - creator_contributor, creator_illustrator, - creator_photographer, creator_translator, - creator_prepared_by, - creator_digitized_by, - creator_audio, - creator_video, - language_document, - language_document_char, - language_original, - language_original_char, date_added_to_site, date_available, date_created, @@ -619,17 +725,18 @@ template SQLiteBuildTablesAndPopulate() { date_modified, date_published, date_valid, - date_translated, - date_original_publication, - date_generated, - publisher, + identifier_isbn, + identifier_oclc, + language_document, + language_document_char, + notes_abstract, + notes_description, original_publisher, original_language, original_language_char, original_source, - original_institution, - original_nationality, original_title, + publisher, rights_copyright, rights_copyright_audio, rights_copyright_cover, @@ -638,272 +745,144 @@ template SQLiteBuildTablesAndPopulate() { rights_copyright_text, rights_copyright_translation, rights_copyright_video, - rights_license, - identifier_oclc, - identifier_isbn, - classify_topic_register, - classify_subject, - classify_loc, - classify_dewey, - classify_keywords, - notes_abstract, - notes_description, - notes_comment, - notes_coverage, - notes_relation, - notes_history, - notes_type, - notes_format, - notes_prefix, - notes_prefix_a, - notes_prefix_b, - notes_suffix, - src_filename, - src_fingerprint, - src_filesize, - src_wordcount, - src_text, - fulltext, - links + rights_license ) VALUES ( - :tid, - :title, - :title_main, - :title_sub, - :title_short, - :title_edition, - :title_note, - :title_language, - :title_language_char, - :creator_author, - :creator_author_email, - :creator_author_hon, - :creator_author_nationality, - :creator_editor, - :creator_contributor, - :creator_illustrator, - :creator_photographer, - :creator_translator, - :creator_prepared_by, - :creator_digitized_by, - :creator_audio, - :creator_video, - :language_document, - :language_document_char, - :language_original, - :language_original_char, - :date_added_to_site, - :date_available, - :date_created, - :date_issued, - :date_modified, - :date_published, - :date_valid, - :date_translated, - :date_original_publication, - :date_generated, - :publisher, - :original_publisher, - :original_language, - :original_language_char, - :original_source, - :original_institution, - :original_nationality, - :original_title, - :rights_copyright, - :rights_copyright_audio, - :rights_copyright_cover, - :rights_copyright_illustrations, - :rights_copyright_photographs, - :rights_copyright_text, - :rights_copyright_translation, - :rights_copyright_video, - :rights_license, - :identifier_oclc, - :identifier_isbn, - :classify_topic_register, - :classify_subject, - :classify_loc, - :classify_dewey, - :classify_keywords, - :notes_abstract, - :notes_description, - :notes_comment, - :notes_coverage, - :notes_relation, - :notes_history, - :notes_type, - :notes_format, - :notes_prefix, - :notes_prefix_a, - :notes_prefix_b, - :notes_suffix, - :src_filename, - :src_fingerprint, - :src_filesize, - :src_wordcount, - :src_text, - :fulltext, - :links - ) - "); - // insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? - insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); - insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); - insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); - insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // - insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); - insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); - // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); - insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); - insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); - insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); - insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); - insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); - insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); - insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); - // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); - // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); - insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); - // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); - insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); - // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); - // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); - insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); - insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); - insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); - insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); - insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); - insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); - insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); - // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); - // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); - // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); - insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); - insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); - insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); - insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); - // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); - // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); - insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); - insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); - insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); - insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); - insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); - insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); - insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); - insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); - // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); - insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); - insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); - insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); - insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); - insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); - // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); - insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); - insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); - insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); - // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); - // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); - // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); - insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); - // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); - // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); - // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); - // insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); - // insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); - // insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); - // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); - insert_metadata.execute(); insert_metadata.reset(); - /+ watch +/ - writeln(" ", pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); - if ((doc_matters.opt.action.verbose)) { - writeln("sql statement executed"); - } - assert(db.totalChanges == 1); - // - Statement insert_doc_objects = db.prepare(" - INSERT INTO doc_objects ( - lid, - metadata_tid, - ocn, - ocnd, - ocns, - clean, - body, - book_idx, - seg, - lev_an, - lev, - lev0, - lev1, - lev2, - lev3, - lev4, - lev5, - lev6, - lev7, - en_a, - en_z, - en_a_asterisk, - en_z_asterisk, - en_a_plus, - en_z_plus, - t_of, - t_is, - node, - parent, - digest_clean, - digest_all, - types - ) - VALUES ( - :lid, - :metadata_tid, - :ocn, - :ocnd, - :ocns, - :clean, - :body, - :book_idx, - :seg, - :lev_an, - :lev, - :lev0, - :lev1, - :lev2, - :lev3, - :lev4, - :lev5, - :lev6, - :lev7, - :en_a, - :en_z, - :en_a_asterisk, - :en_z_asterisk, - :en_a_plus, - :en_z_plus, - :t_of, - :t_is, - :node, - :parent, - :digest_clean, - :digest_all, - :types - ) - "); - return insert_doc_objects; + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s' + ); + ¶", + SQLinsertDelimiter!()(doc_matters.src.filename), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license) + ); + return _insert_metadata; } } - template SQLiteObjectsLoop() { - void SQLiteObjectsLoop(P)( - auto ref P doc_parts, + template SQLiteInsertDocObjectsLoop() { + string SQLiteInsertDocObjectsLoop(P,I)( + auto ref P doc_parts, + auto ref I _metadata_tid, ) { - Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); + string insertDocObjectsRow(O)(O obj) { + string _insert_doc_objects_row; + _insert_doc_objects_row = format(q"¶ + INSERT INTO doc_objects ( + metadata_tid, + ocn, + clean, + body, + lev, + t_of, + t_is + ) + VALUES ( + %d, + %s, + '%s', + '%s', + %s, + '%s', + '%s' + ); + ¶", + _metadata_tid, + obj.ocn, + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), + obj.heading_lev_markup, + obj.is_of, + obj.is_a, + ); + return _insert_doc_objects_row; + } auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; + string[] _insert_doc_objects; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1048,43 +1027,192 @@ template SQLiteBuildTablesAndPopulate() { ); } } - insert_doc_objects.bind(":t_of", obj.is_of); - insert_doc_objects.bind(":t_is", obj.is_a); - insert_doc_objects.bind(":ocn", obj.ocn); - insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search - insert_doc_objects.bind(":body", obj_txt["html"]); - // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search - insert_doc_objects.bind(":lev", obj.heading_lev_markup); - // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete - // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info - // insert_doc_objects.bind(":lev0", ""); - // insert_doc_objects.bind(":lev1", ""); - // insert_doc_objects.bind(":lev2", ""); - // insert_doc_objects.bind(":lev3", ""); - // insert_doc_objects.bind(":lev4", ""); - // insert_doc_objects.bind(":lev5", ""); - // insert_doc_objects.bind(":lev6", ""); - // insert_doc_objects.bind(":lev7", ""); - // insert_doc_objects.bind(":node", ""); - // insert_doc_objects.bind(":type", ""); - // insert_doc_objects.bind(":parent_ocn", ""); - // insert_doc_objects.bind(":ancestors", ""); - // insert_doc_objects.bind(":heading_lev_markup", ""); - // insert_doc_objects.bind(":heading_lev_collapsed", ""); - // insert_doc_objects.bind(":parent_lev_markup", ""); - // insert_doc_objects.bind(":heading_ancestors", ""); - // insert_doc_objects.bind(":node", ""); - insert_doc_objects.execute(); insert_doc_objects.reset(); - } + if (!(obj.is_a == "comment")) { + _insert_doc_objects ~= insertDocObjectsRow(obj); + } + } // loop closes } + return _insert_doc_objects.join.to!(char[]).toUTF8; } } - SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql); + SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } template SQLiteTablesCreate() { - void SQLiteTablesCreate()() { - writeln("table create"); + void SQLiteTablesCreate(E,O)(E env, O opt_action) { + import d2sqlite3; + template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + DROP INDEX IF EXISTS ocn; + DROP INDEX IF EXISTS digest_clean; + DROP INDEX IF EXISTS digest_all; + DROP INDEX IF EXISTS clean; + DROP INDEX IF EXISTS lev0; + DROP INDEX IF EXISTS lev1; + DROP INDEX IF EXISTS lev2; + DROP INDEX IF EXISTS lev3; + DROP INDEX IF EXISTS lev4; + DROP INDEX IF EXISTS lev5; + DROP INDEX IF EXISTS lev6; + DROP INDEX IF EXISTS title; + DROP INDEX IF EXISTS creator_author; + DROP INDEX IF EXISTS src_filename; + DROP INDEX IF EXISTS language_document_char; + DROP INDEX IF EXISTS classify_topic_register; + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE metadata_and_text ( + tid INTEGER PRIMARY KEY, + /* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NOT NULL, + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NOT NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NOT NULL, + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NOT NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL + ); + CREATE TABLE doc_objects ( + lid INTEGER PRIMARY KEY, + metadata_tid INTEGER REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL + ); + CREATE INDEX idx_ocn ON doc_objects(ocn); + CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); + CREATE INDEX idx_digest_all ON doc_objects(digest_all); + CREATE INDEX idx_clean ON doc_objects(clean); + CREATE INDEX idx_lev0 ON doc_objects(lev0); + CREATE INDEX idx_lev1 ON doc_objects(lev1); + CREATE INDEX idx_lev2 ON doc_objects(lev2); + CREATE INDEX idx_lev3 ON doc_objects(lev3); + CREATE INDEX idx_lev4 ON doc_objects(lev4); + CREATE INDEX idx_lev5 ON doc_objects(lev5); + CREATE INDEX idx_lev6 ON doc_objects(lev6); + CREATE INDEX idx_title ON metadata_and_text(title); + CREATE INDEX idx_author ON metadata_and_text(creator_author); + CREATE INDEX idx_filename ON metadata_and_text(src_filename); + CREATE INDEX idx_language ON metadata_and_text(language_document_char); + CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); + ¶",); + return _sql_instruct; + } + } + if (opt_action.sqlite_create) { + string _db_statement; + auto pth_sqlite = SiSUpathsSQLite!()(opt_action.output_dir_set); // ISSUE + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file); // ISSUE + { + _db_statement ~= SQLiteTablesReCreate!()(); + } + SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE"); + } } } template SQLiteTablesDrop() { diff --git a/src/sdp/output/sqlite_discrete.d b/src/sdp/output/sqlite_discrete.d index 443fb3b..40e0b15 100644 --- a/src/sdp/output/sqlite_discrete.d +++ b/src/sdp/output/sqlite_discrete.d @@ -3,18 +3,75 @@ import sdp.output; import std.file, std.conv : to; +template SQLiteDiscreteDbRun() { + void SQLiteDiscreteDbRun(Db,St,O)( + auto ref Db db, + auto ref St db_statement, + auto ref O opt_action, + string note, + ) { + debug(sql_statement) { + writeln(db_statement); + } + try { + db.run( + "\nBEGIN;\n" ~ + db_statement ~ + "\nCOMMIT;\n" + ); + } catch (ErrnoException ex) { + writeln("ERROR SQLite : ", ex); + } catch (Exception ex) { + writeln("ERROR SQLite : ", ex); + } + if (opt_action.debug_do) { + writeln(note); + if (opt_action.verbose) { + writeln(db_statement); + } + } + } +} template SQLiteDiscreteBuildTablesAndPopulate() { void SQLiteDiscreteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, - auto ref I doc_matters, + auto ref I doc_matters, ) { import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); + template SQLiteDiscreteDbStatementComposite() { + void SQLiteDiscreteDbStatementComposite(Db,D,I)( + auto ref Db db, + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + { + string _db_statement; + { + _db_statement ~= SQLiteDiscreteTablesReCreate!()(); + _db_statement ~= SQLiteDiscreteInsertMetadata!()(doc_matters); + _db_statement ~= SQLiteDiscreteInsertDocObjectsLoop!()(doc_matters.xml.keys_seq.sql, 1); // FIX + SQLiteDiscreteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects"); + } + db.close; + } + } + } + template SQLinsertDelimiter() { + auto SQLinsertDelimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( - auto ref I doc_matters, + auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { @@ -459,27 +516,41 @@ template SQLiteDiscreteBuildTablesAndPopulate() { return sqlite_format_and_load_objects(); } } - template SQLiteInstruct() { - void SQLiteInstruct(Db,I)( - Db db, - auto ref I doc_matters, - ) { - db.run(" + template SQLiteDiscreteTablesReCreate() { + string SQLiteDiscreteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + DROP INDEX IF EXISTS ocn; + DROP INDEX IF EXISTS digest_clean; + DROP INDEX IF EXISTS digest_all; + DROP INDEX IF EXISTS clean; + DROP INDEX IF EXISTS lev0; + DROP INDEX IF EXISTS lev1; + DROP INDEX IF EXISTS lev2; + DROP INDEX IF EXISTS lev3; + DROP INDEX IF EXISTS lev4; + DROP INDEX IF EXISTS lev5; + DROP INDEX IF EXISTS lev6; + DROP INDEX IF EXISTS title; + DROP INDEX IF EXISTS creator_author; + DROP INDEX IF EXISTS src_filename; + DROP INDEX IF EXISTS language_document_char; + DROP INDEX IF EXISTS classify_topic_register; DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; - BEGIN; CREATE TABLE metadata_and_text ( - tid BIGINT PRIMARY KEY, + tid INTEGER PRIMARY KEY, + /* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ title VARCHAR(800) NOT NULL, - title_main VARCHAR(400) NULL, /*NOT*/ + title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, - creator_author VARCHAR(600) NULL, + creator_author VARCHAR(600) NOT NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, @@ -493,7 +564,7 @@ template SQLiteDiscreteBuildTablesAndPopulate() { creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, - language_document_char VARCHAR(6) NULL, /*NOT*/ + language_document_char VARCHAR(6) NOT NULL, language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, @@ -542,18 +613,20 @@ template SQLiteDiscreteBuildTablesAndPopulate() { notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, - src_filename VARCHAR(256) NULL, + src_filename VARCHAR(256) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ src_text TEXT NULL, fulltext TEXT NULL, links TEXT NULL ); - /* */ CREATE TABLE doc_objects ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata_and_text, + lid INTEGER PRIMARY KEY, + metadata_tid INTEGER REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -585,36 +658,49 @@ template SQLiteDiscreteBuildTablesAndPopulate() { digest_all CHAR(256), types CHAR(1) NULL ); - COMMIT - "); - Statement insert_metadata = db.prepare(" + CREATE INDEX idx_ocn ON doc_objects(ocn); + CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); + CREATE INDEX idx_digest_all ON doc_objects(digest_all); + CREATE INDEX idx_clean ON doc_objects(clean); + CREATE INDEX idx_lev0 ON doc_objects(lev0); + CREATE INDEX idx_lev1 ON doc_objects(lev1); + CREATE INDEX idx_lev2 ON doc_objects(lev2); + CREATE INDEX idx_lev3 ON doc_objects(lev3); + CREATE INDEX idx_lev4 ON doc_objects(lev4); + CREATE INDEX idx_lev5 ON doc_objects(lev5); + CREATE INDEX idx_lev6 ON doc_objects(lev6); + CREATE INDEX idx_title ON metadata_and_text(title); + CREATE INDEX idx_author ON metadata_and_text(creator_author); + CREATE INDEX idx_filename ON metadata_and_text(src_filename); + CREATE INDEX idx_language ON metadata_and_text(language_document_char); + CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); + ¶",); + return _sql_instruct; + } + } + template SQLiteDiscreteInsertMetadata() { + string SQLiteDiscreteInsertMetadata(I)( + auto ref I doc_matters, + ) { + string _insert_metadata; + _insert_metadata = format(q"¶ INSERT INTO metadata_and_text ( - tid, + src_filename, title, title_main, title_sub, title_short, title_edition, - title_note, title_language, - title_language_char, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, creator_author, creator_author_email, - creator_author_hon, - creator_author_nationality, - creator_editor, - creator_contributor, creator_illustrator, - creator_photographer, creator_translator, - creator_prepared_by, - creator_digitized_by, - creator_audio, - creator_video, - language_document, - language_document_char, - language_original, - language_original_char, date_added_to_site, date_available, date_created, @@ -622,17 +708,18 @@ template SQLiteDiscreteBuildTablesAndPopulate() { date_modified, date_published, date_valid, - date_translated, - date_original_publication, - date_generated, - publisher, + identifier_isbn, + identifier_oclc, + language_document, + language_document_char, + notes_abstract, + notes_description, original_publisher, original_language, original_language_char, original_source, - original_institution, - original_nationality, original_title, + publisher, rights_copyright, rights_copyright_audio, rights_copyright_cover, @@ -641,205 +728,113 @@ template SQLiteDiscreteBuildTablesAndPopulate() { rights_copyright_text, rights_copyright_translation, rights_copyright_video, - rights_license, - identifier_oclc, - identifier_isbn, - classify_topic_register, - classify_subject, - classify_loc, - classify_dewey, - classify_keywords, - notes_abstract, - notes_description, - notes_comment, - notes_coverage, - notes_relation, - notes_history, - notes_type, - notes_format, - notes_prefix, - notes_prefix_a, - notes_prefix_b, - notes_suffix, - src_filename, - src_fingerprint, - src_filesize, - src_wordcount, - src_text, - fulltext, - links + rights_license ) VALUES ( - :tid, - :title, - :title_main, - :title_sub, - :title_short, - :title_edition, - :title_note, - :title_language, - :title_language_char, - :creator_author, - :creator_author_email, - :creator_author_hon, - :creator_author_nationality, - :creator_editor, - :creator_contributor, - :creator_illustrator, - :creator_photographer, - :creator_translator, - :creator_prepared_by, - :creator_digitized_by, - :creator_audio, - :creator_video, - :language_document, - :language_document_char, - :language_original, - :language_original_char, - :date_added_to_site, - :date_available, - :date_created, - :date_issued, - :date_modified, - :date_published, - :date_valid, - :date_translated, - :date_original_publication, - :date_generated, - :publisher, - :original_publisher, - :original_language, - :original_language_char, - :original_source, - :original_institution, - :original_nationality, - :original_title, - :rights_copyright, - :rights_copyright_audio, - :rights_copyright_cover, - :rights_copyright_illustrations, - :rights_copyright_photographs, - :rights_copyright_text, - :rights_copyright_translation, - :rights_copyright_video, - :rights_license, - :identifier_oclc, - :identifier_isbn, - :classify_topic_register, - :classify_subject, - :classify_loc, - :classify_dewey, - :classify_keywords, - :notes_abstract, - :notes_description, - :notes_comment, - :notes_coverage, - :notes_relation, - :notes_history, - :notes_type, - :notes_format, - :notes_prefix, - :notes_prefix_a, - :notes_prefix_b, - :notes_suffix, - :src_filename, - :src_fingerprint, - :src_filesize, - :src_wordcount, - :src_text, - :fulltext, - :links - ) - "); - // insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? - insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); - insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); - insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); - insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // - insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); - insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); - // insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); - insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); - insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); - insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); - insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); - insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); - insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); - insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); - // insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); - // insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); - insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); - // insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); - insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); - // insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); - // insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); - insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); - insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); - insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); - insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); - insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); - insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); - insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); - // insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); - // insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); - // insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); - insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); - insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); - insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); - insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); - // insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); - // insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); - insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); - insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); - insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); - insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); - insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); - insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); - insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); - insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); - // insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); - insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); - insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); - insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); - insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); - insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); - // insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); - insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); - insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); - insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); - // insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); - // insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); - // insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); - insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); - // insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); - // insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); - // insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); - // insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); - // insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); - // insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); - // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); - insert_metadata.execute(); insert_metadata.reset(); - /+ watch +/ - if ((doc_matters.opt.action.verbose)) { - writeln("sql statement executed"); - } - assert(db.totalChanges == 1); - // + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s' + ); + ¶", + SQLinsertDelimiter!()(doc_matters.src.filename), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license) + ); + return _insert_metadata; } } - template SQLiteObjectsLoop() { - void SQLiteObjectsLoop(P)( - auto ref P doc_parts, + template SQLiteDiscreteInsertDocObjectsLoop() { + string SQLiteDiscreteInsertDocObjectsLoop(P,I)( + auto ref P doc_parts, + auto ref I tid, ) { string insertDocObjectsRow(O)(O obj) { - auto sql_insert_delimiter(string _txt) { - _txt = _txt - .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); - return _txt; - } string _insert_doc_objects_row; - _insert_doc_objects_row = format(q"¶ + _insert_doc_objects_row = format(q"¶ INSERT INTO doc_objects ( + metadata_tid, ocn, clean, body, @@ -848,6 +843,7 @@ template SQLiteDiscreteBuildTablesAndPopulate() { t_is ) VALUES ( + %d, %s, '%s', '%s', @@ -856,24 +852,20 @@ template SQLiteDiscreteBuildTablesAndPopulate() { '%s' ); ¶", + 1, obj.ocn, - sql_insert_delimiter(obj_txt["text"]), - sql_insert_delimiter(obj_txt["html"]), + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), obj.heading_lev_markup, obj.is_of, obj.is_a, ); return _insert_doc_objects_row; } - auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); - pth_sqlite.base.mkdirRecurse; - auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); - SQLiteInstruct!()(db, doc_matters); // consider best location, need to feed individual objects for sqlite table: doc_objects, possibly a separate template? auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; string[] _insert_doc_objects; - _insert_doc_objects ~= "BEGIN;\n"; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1023,24 +1015,9 @@ template SQLiteDiscreteBuildTablesAndPopulate() { } } // loop closes } - _insert_doc_objects ~= "COMMIT"; - debug(sql_statement) { - writeln("#+BEGIN_SRC sql\n", _insert_doc_objects.join, "\n#+END_SRC"); - } - std.utf.validate(_insert_doc_objects.join); // TODO - db.run(_insert_doc_objects.join.to!(char[]).toUTF8); + return _insert_doc_objects.join.to!(char[]).toUTF8; } } - SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql); - } -} -template SQLiteDiscreteTablesCreate() { - void SQLiteDiscreteTablesCreate()() { - writeln("table create"); - } -} -template SQLiteDiscreteTablesDrop() { - void SQLiteDiscreteTablesDrop()() { - writeln("table drop"); + SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } diff --git a/src/sdp/sdp.d b/src/sdp/sdp.d index a50b0e9..e5d73c0 100755 --- a/src/sdp/sdp.d +++ b/src/sdp/sdp.d @@ -103,7 +103,7 @@ void main(string[] args) { std.getopt.config.passThrough, "assert", "--assert set optional assertions on", &opts["assertions"], "concordance", "--concordance file for document", &opts["concordance"], - "debug", "--debug only relevant when debug options compiled in", &opts["debug"], + "debug", "--debug", &opts["debug"], "digest", "--digest hash digest for each object", &opts["digest"], "docbook", "--docbook process docbook output", &opts["docbook"], "epub", "--epub process epub output", &opts["epub"], @@ -341,7 +341,7 @@ void main(string[] args) { debug(steps) { writeln("step0 commence → (without processing files)"); } - outputHubOp!()(_opt_action); + outputHubOp!()(_env, _opt_action); debug(steps) { writeln("- step0 complete"); } -- cgit v1.2.3