From 096d12cb15e191dbd83f3399ba9bfef57bc9d826 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 11 Apr 2018 21:37:45 -0400 Subject: 0.26.0 sqlite single statement insertion of objects - d2sqlite3 db.run, begin commit used with insert statement - can be used after upstream fix that should follow d2sqlite3 0.16.0 --- src/sdp/output/paths_output.d | 38 +++++++++ src/sdp/output/paths_source.d | 14 ++-- src/sdp/output/rgx.d | 3 + src/sdp/output/sqlite.d | 52 ++++++------ src/sdp/output/sqlite_discrete.d | 166 +++++++++++++-------------------------- src/sdp/output/xmls.d | 2 +- 6 files changed, 132 insertions(+), 143 deletions(-) (limited to 'src/sdp/output') diff --git a/src/sdp/output/paths_output.d b/src/sdp/output/paths_output.d index 1db74d9..d58b990 100644 --- a/src/sdp/output/paths_output.d +++ b/src/sdp/output/paths_output.d @@ -29,6 +29,44 @@ template SiSUoutPaths() { return _PathsStruct(); } } +template SiSUoutPathsFnPd() { + /+ TODO stuff to work out here +/ + auto SiSUoutPathsFnPd(Fn,Pn)( + Fn fn_src_pth, + Pn pod_name + // Pn pod_name = "", + ) { + struct _PathsStruct { + string base_filename() { + return fn_src_pth.baseName.stripExtension; + } + string base_pod_and_filename() { // TODO + /+ + - if pod, + - pod_name + - file_name + - if pod_name == file_name + - file_name + - else if pod_name != file_name + - pod_name.file_name + +/ + auto _fn_src = fn_src_pth.baseName.stripExtension; + string _output_base_name; + if (!(pod_name.empty)) { + if (pod_name == _fn_src) { + _output_base_name = _fn_src; + } else { + _output_base_name = pod_name ~ "." ~ _fn_src; + } + } else { + _output_base_name = _fn_src; + } + return _output_base_name; + } + } + return _PathsStruct(); + } +} template SiSUpathsHTML() { mixin SiSUrgxInit; static auto rgx = Rgx(); diff --git a/src/sdp/output/paths_source.d b/src/sdp/output/paths_source.d index 59cb618..c420d7e 100644 --- a/src/sdp/output/paths_source.d +++ b/src/sdp/output/paths_source.d @@ -15,7 +15,7 @@ template PodManifest() { mixin SiSUrgxInit; static auto rgx = Rgx(); auto PodManifest(P)( - P _pth = "", + P _pth ) { struct ManifestFile_ { auto pod_manifest_filename() { @@ -37,8 +37,8 @@ template PodManifest() { _manifest_path = m.captures["podpath"]; } } else { - writeln("WARNING, issue with manifest_path: ", _pth); - _manifest_path = _pth; // _manifest_path = null; + writeln("WARNING, issue with manifest_path: ", _pth); // remove? + _manifest_path = null; // _manifest_path = ""; } return _manifest_path; } @@ -65,7 +65,7 @@ template PathMatters() { auto PathMatters(O,E,P,F)( O _opt_actions, E _env, - P _pth = "", + P _pth, F _fns = "", char[][] _manifest_fn_list = [[]], ) { @@ -115,6 +115,9 @@ template PathMatters() { auto manifest_path() { return _manifest.pod_manifest_path; } + auto pod_name() { // TODO decide what returned if src_is_pod == false + return _manifest.pod_manifest_path.baseName; + } auto manifest_file_with_path() { return _manifest.pod_manifest_file_with_path; } @@ -190,7 +193,8 @@ template PathMatters() { assert(_dir == m.captures["dir"]); } else { _dir = asNormalizedPath(path_and_fn.chainPath("../../../")).array; - assert(_dir == absolute_path_to_src.match(rgx.src_base_parent_dir_name).captures["dir"]); + assert(_dir == absolute_path_to_src + .match(rgx.src_base_parent_dir_name).captures["dir"]); } if ((_opt_actions.debug_do)) { writeln("--> (base_dir) ", _dir); diff --git a/src/sdp/output/rgx.d b/src/sdp/output/rgx.d index adadf9e..d559491 100644 --- a/src/sdp/output/rgx.d +++ b/src/sdp/output/rgx.d @@ -15,6 +15,7 @@ static template SiSUoutputRgxInit() { static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); static nbsp_and_space = ctRegex!(` [ ]`, "mg"); static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); + static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg"); static src_pth_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/sisupod[.]manifest$`); @@ -69,6 +70,8 @@ static template SiSUoutputRgxInit() { static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg"); static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg"); static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); + static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); + static quotation_mark_various = ctRegex!(q"¶['‘’“”"`´¨]¶", "mg"); /+ inline markup font face mod +/ static inline_faces = ctRegex!(`(?P(?P[*!_^,+#-])\{(?P.+?)\}[*!_^,+#-])`, "mg"); static inline_emphasis = ctRegex!(`\*\{(?P.+?)\}\*`, "mg"); diff --git a/src/sdp/output/sqlite.d b/src/sdp/output/sqlite.d index 25940cc..480c317 100644 --- a/src/sdp/output/sqlite.d +++ b/src/sdp/output/sqlite.d @@ -56,7 +56,7 @@ template SQLiteBuildTablesAndPopulate() { auto return ref const O obj, ) { string _html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -66,17 +66,17 @@ template SQLiteBuildTablesAndPopulate() { return _txt; } string _html_font_face(string _txt){ - _txt = (_txt) - .replaceAll(rgx.inline_emphasis, ("$1")) - .replaceAll(rgx.inline_bold, ("$1")) - .replaceAll(rgx.inline_underscore, ("$1")) - .replaceAll(rgx.inline_italics, ("$1")) - .replaceAll(rgx.inline_superscript, ("$1")) - .replaceAll(rgx.inline_subscript, ("$1")) - .replaceAll(rgx.inline_strike, ("$1")) - .replaceAll(rgx.inline_insert, ("$1")) - .replaceAll(rgx.inline_mono, ("$1")) - .replaceAll(rgx.inline_cite, ("$1")); + _txt = _txt + .replaceAll(rgx.inline_emphasis, "$1") + .replaceAll(rgx.inline_bold, "$1") + .replaceAll(rgx.inline_underscore, "$1") + .replaceAll(rgx.inline_italics, "$1") + .replaceAll(rgx.inline_superscript, "$1") + .replaceAll(rgx.inline_subscript, "$1") + .replaceAll(rgx.inline_strike, "$1") + .replaceAll(rgx.inline_insert, "$1") + .replaceAll(rgx.inline_mono, "$1") + .replaceAll(rgx.inline_cite, "$1"); return _txt; } string _notes; @@ -102,7 +102,7 @@ template SQLiteBuildTablesAndPopulate() { return _txt; } string html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -112,7 +112,7 @@ template SQLiteBuildTablesAndPopulate() { return _txt; } string html_special_characters_code(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -121,17 +121,17 @@ template SQLiteBuildTablesAndPopulate() { return _txt; } string html_font_face(string _txt){ - _txt = (_txt) - .replaceAll(rgx.inline_emphasis, ("$1")) - .replaceAll(rgx.inline_bold, ("$1")) - .replaceAll(rgx.inline_underscore, ("$1")) - .replaceAll(rgx.inline_italics, ("$1")) - .replaceAll(rgx.inline_superscript, ("$1")) - .replaceAll(rgx.inline_subscript, ("$1")) - .replaceAll(rgx.inline_strike, ("$1")) - .replaceAll(rgx.inline_insert, ("$1")) - .replaceAll(rgx.inline_mono, ("$1")) - .replaceAll(rgx.inline_cite, ("$1")); + _txt = _txt + .replaceAll(rgx.inline_emphasis, "$1") + .replaceAll(rgx.inline_bold, "$1") + .replaceAll(rgx.inline_underscore, "$1") + .replaceAll(rgx.inline_italics, "$1") + .replaceAll(rgx.inline_superscript, "$1") + .replaceAll(rgx.inline_subscript, "$1") + .replaceAll(rgx.inline_strike, "$1") + .replaceAll(rgx.inline_insert, "$1") + .replaceAll(rgx.inline_mono, "$1") + .replaceAll(rgx.inline_cite, "$1"); return _txt; } auto html_heading(O)( @@ -223,7 +223,7 @@ template SQLiteBuildTablesAndPopulate() { auto return ref const O obj, string _txt, ) { - string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_rows = _txt.split(rgx.table_delimiter_row); string[] _table_cols; string _table; string _tablenote; diff --git a/src/sdp/output/sqlite_discrete.d b/src/sdp/output/sqlite_discrete.d index 541c4d8..443fb3b 100644 --- a/src/sdp/output/sqlite_discrete.d +++ b/src/sdp/output/sqlite_discrete.d @@ -23,7 +23,6 @@ template SQLiteDiscreteBuildTablesAndPopulate() { ) { string _notes; string _urls; - _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; @@ -102,7 +101,7 @@ template SQLiteDiscreteBuildTablesAndPopulate() { return _txt; } string html_special_characters(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -112,7 +111,7 @@ template SQLiteDiscreteBuildTablesAndPopulate() { return _txt; } string html_special_characters_code(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") @@ -121,7 +120,7 @@ template SQLiteDiscreteBuildTablesAndPopulate() { return _txt; } string html_font_face(string _txt){ - _txt = (_txt) + _txt = _txt .replaceAll(rgx.inline_emphasis, "$1") .replaceAll(rgx.inline_bold, "$1") .replaceAll(rgx.inline_underscore, "$1") @@ -461,13 +460,10 @@ template SQLiteDiscreteBuildTablesAndPopulate() { } } template SQLiteInstruct() { - Statement SQLiteInstruct(I)( + void SQLiteInstruct(Db,I)( + Db db, auto ref I doc_matters, ) { - auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); - pth_sqlite.base.mkdirRecurse; - auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); - // auto db = Database(":memory:"); // open database in memory db.run(" DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; @@ -824,93 +820,60 @@ template SQLiteDiscreteBuildTablesAndPopulate() { // insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); insert_metadata.execute(); insert_metadata.reset(); /+ watch +/ - writeln(" ", pth_sqlite.sqlite_file(doc_matters.src.filename)); if ((doc_matters.opt.action.verbose)) { writeln("sql statement executed"); } assert(db.totalChanges == 1); // - Statement _insert_doc_objects = db.prepare(" - INSERT INTO doc_objects ( - lid, - metadata_tid, - ocn, - ocnd, - ocns, - clean, - body, - book_idx, - seg, - lev_an, - lev, - lev0, - lev1, - lev2, - lev3, - lev4, - lev5, - lev6, - lev7, - en_a, - en_z, - en_a_asterisk, - en_z_asterisk, - en_a_plus, - en_z_plus, - t_of, - t_is, - node, - parent, - digest_clean, - digest_all, - types - ) - VALUES ( - :lid, - :metadata_tid, - :ocn, - :ocnd, - :ocns, - :clean, - :body, - :book_idx, - :seg, - :lev_an, - :lev, - :lev0, - :lev1, - :lev2, - :lev3, - :lev4, - :lev5, - :lev6, - :lev7, - :en_a, - :en_z, - :en_a_asterisk, - :en_z_asterisk, - :en_a_plus, - :en_z_plus, - :t_of, - :t_is, - :node, - :parent, - :digest_clean, - :digest_all, - :types - ) - "); - return _insert_doc_objects; } } template SQLiteObjectsLoop() { void SQLiteObjectsLoop(P)( auto ref P doc_parts, ) { - Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); + string insertDocObjectsRow(O)(O obj) { + auto sql_insert_delimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + string _insert_doc_objects_row; + _insert_doc_objects_row = format(q"¶ + INSERT INTO doc_objects ( + ocn, + clean, + body, + lev, + t_of, + t_is + ) + VALUES ( + %s, + '%s', + '%s', + %s, + '%s', + '%s' + ); + ¶", + obj.ocn, + sql_insert_delimiter(obj_txt["text"]), + sql_insert_delimiter(obj_txt["html"]), + obj.heading_lev_markup, + obj.is_of, + obj.is_a, + ); + return _insert_doc_objects_row; + } + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); + SQLiteInstruct!()(db, doc_matters); // consider best location, need to feed individual objects for sqlite table: doc_objects, possibly a separate template? auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; + string[] _insert_doc_objects; + _insert_doc_objects ~= "BEGIN;\n"; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1055,36 +1018,17 @@ template SQLiteDiscreteBuildTablesAndPopulate() { ); } } - insert_doc_objects.bind(":t_of", obj.is_of); - insert_doc_objects.bind(":t_is", obj.is_a); - insert_doc_objects.bind(":ocn", obj.ocn); - insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search - insert_doc_objects.bind(":body", obj_txt["html"]); - // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search - insert_doc_objects.bind(":lev", obj.heading_lev_markup); - // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete - // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info - // insert_doc_objects.bind(":lev0", ""); - // insert_doc_objects.bind(":lev1", ""); - // insert_doc_objects.bind(":lev2", ""); - // insert_doc_objects.bind(":lev3", ""); - // insert_doc_objects.bind(":lev4", ""); - // insert_doc_objects.bind(":lev5", ""); - // insert_doc_objects.bind(":lev6", ""); - // insert_doc_objects.bind(":lev7", ""); - // insert_doc_objects.bind(":node", ""); - // insert_doc_objects.bind(":type", ""); - // insert_doc_objects.bind(":parent_ocn", ""); - // insert_doc_objects.bind(":ancestors", ""); - // insert_doc_objects.bind(":heading_lev_markup", ""); - // insert_doc_objects.bind(":heading_lev_collapsed", ""); - // insert_doc_objects.bind(":parent_lev_markup", ""); - // insert_doc_objects.bind(":heading_ancestors", ""); - // insert_doc_objects.bind(":node", ""); - insert_doc_objects.execute(); insert_doc_objects.reset(); - } + if (!(obj.is_a == "comment")) { + _insert_doc_objects ~= insertDocObjectsRow(obj); + } + } // loop closes + } + _insert_doc_objects ~= "COMMIT"; + debug(sql_statement) { + writeln("#+BEGIN_SRC sql\n", _insert_doc_objects.join, "\n#+END_SRC"); } - insert_doc_objects.finalize(); + std.utf.validate(_insert_doc_objects.join); // TODO + db.run(_insert_doc_objects.join.to!(char[]).toUTF8); } } SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql); diff --git a/src/sdp/output/xmls.d b/src/sdp/output/xmls.d index 952c9cd..58ae237 100644 --- a/src/sdp/output/xmls.d +++ b/src/sdp/output/xmls.d @@ -163,7 +163,7 @@ template outputXHTMLs() { .replaceAll( rgx.br_nl, ""); } else { - writeln("WARNING home button text expected"); + _locations = "

SiSU

\n

www.sisudoc.org

\n

sources / git

"; } string o; o = format(q"¶
-- cgit v1.2.3