diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-22 13:52:21 -0400 |
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-22 20:42:31 -0400 |
| commit | 51549f11d60cd353564486b3598e69259fb01b66 (patch) | |
| tree | 8dec193314d88ccfd76d80bc1cef951acf2b2204 /org | |
| parent | .ssp document abstraction as PEG parsable text (diff) | |
document abstraction as per document sqlite db
--show-abstraction-db flag to write per-document
- SQLite database of document abstraction
(Claude-Code primary assist)
- Add a new output mode that serializes the in-memory document
abstraction to a per-document SQLite database. This complements
the .ssp text format (--show-abstraction) with a queryable
database representation of the same data.
- Schema:
metadata table - key/value pairs for document metadata
(title, creator, dates, rights, classify, identifiers,
language, notes, make settings, doc_has counts)
objects table - one row per document object with columns:
section, seq (position within section), ocn, is_a,
is_of_part, is_of_type, heading_level, identifier,
parent_ocn, last_descendant_ocn, ancestors,
indent/bullet/lang, has_* flags, segment/anchor tags,
table/code properties, text content
Indexed on: section, ocn, parent_ocn, is_a, heading_level
- Uses prepared statements via d2sqlite3 (existing dependency)
for safe and efficient insertion. Each document produces a
standalone .abstraction.db file in the abstraction/ output
directory.
- New files:
src/sisudoc/io_out/create_abstraction_db.d
Follows the same pattern as create_abstraction_txt.d.
Creates schema, populates metadata via key/value inserts,
then iterates all sections writing objects with prepared
statements within a single transaction.
- Changes to spine.d:
- Add "show-abstraction-db" to opts init, getopt, OptActions
- Add to abstraction(), require_processing_files(), and
meta_processing_general() gates
- Insert call at both spineAbstraction sites
- Tested against all 35 sample documents (including 9-language
live-manual) - zero failures. Works standalone or combined
with --show-abstraction and other output flags.
- Example queries the database supports:
SELECT ocn, heading_level, text FROM objects
WHERE is_a = 'heading' AND section = 'body';
SELECT * FROM objects WHERE parent_ocn = 10;
SELECT key, value FROM metadata WHERE key LIKE 'title.%';
Co-Authored-By: Anthropic Claude Opus 4.6 (1M context)
Diffstat (limited to 'org')
| -rw-r--r-- | org/out_src_abstraction_sqlite_db.org | 365 | ||||
| -rw-r--r-- | org/spine.org | 25 |
2 files changed, 388 insertions, 2 deletions
diff --git a/org/out_src_abstraction_sqlite_db.org b/org/out_src_abstraction_sqlite_db.org new file mode 100644 index 0000000..a048934 --- /dev/null +++ b/org/out_src_abstraction_sqlite_db.org @@ -0,0 +1,365 @@ +-*- mode: org -*- +#+TITLE: sisudoc spine (doc_reform) output pod source sqlite db +#+DESCRIPTION: documents - structuring, publishing in multiple formats & search +#+FILETAGS: :spine:output:source:pod: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 (continuously updated, current 2026) Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :results silent +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes +#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t + +- [[./doc-reform.org][doc-reform.org]] [[./][org/]] + +* (Object-Centric) Document Abstraction SQLite db + +- Process markup document, create document abstraction + +** _module template_ :module:metadoc_from_src: + +rename source_abstraction_peg_txt.d + +#+HEADER: :tangle "../src/sisudoc/io_out/create_abstraction_db.d" +#+HEADER: :noweb yes +#+BEGIN_SRC d +<<doc_header_including_copyright_and_license>> +module sisudoc.io_out.create_abstraction_db; + +/+ ↓ write document abstraction as per-document sqlite3 database +/ +template spineAbstractionDb() { + import std.conv : to; + import std.file; + import std.path; + import std.stdio; + import std.string; + import std.array; + import d2sqlite3; + import sisudoc.io_out.paths_output; + + void spineAbstractionDb(D)(D doc) { + auto doc_abstraction = doc.abstraction; + auto doc_matters = doc.matters; + + /+ ↓ determine output path +/ + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); + string base_dir = "abstraction"; + string base_pth = ((out_pth.output_base.chainPath(base_dir)).asNormalizedPath).array; + try { + if (!exists(base_pth)) { + base_pth.mkdirRecurse; + } + } catch (Exception ex) { + } + string db_file = ((base_pth.chainPath( + doc_matters.src.doc_uid_out ~ ".abstraction.db")).asNormalizedPath).array; + + /+ ↓ remove existing file to start fresh +/ + try { + if (exists(db_file)) { + remove(db_file); + } + } catch (Exception ex) { + } + + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", db_file); + } + + /+ ↓ open database and create schema +/ + auto db = Database(db_file); + db.run("PRAGMA journal_mode=WAL"); + db.run("PRAGMA synchronous=NORMAL"); + + db.run(" + CREATE TABLE metadata ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + + CREATE TABLE objects ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + section TEXT NOT NULL, + seq INTEGER NOT NULL, + ocn INTEGER DEFAULT 0, + is_a TEXT NOT NULL, + is_of_part TEXT, + is_of_type TEXT, + heading_level INTEGER, + identifier TEXT, + parent_ocn INTEGER DEFAULT 0, + last_descendant_ocn INTEGER DEFAULT 0, + ancestors TEXT, + dummy_heading INTEGER DEFAULT 0, + object_number_off INTEGER DEFAULT 0, + indent_base INTEGER DEFAULT 0, + indent_hang INTEGER DEFAULT 0, + bullet INTEGER DEFAULT 0, + lang TEXT, + has_links INTEGER DEFAULT 0, + has_notes_reg INTEGER DEFAULT 0, + has_notes_star INTEGER DEFAULT 0, + has_images INTEGER DEFAULT 0, + segment TEXT, + segment_prev TEXT, + segment_next TEXT, + anchor TEXT, + table_cols INTEGER, + table_widths TEXT, + table_header INTEGER, + code_syntax TEXT, + code_linenumbers INTEGER DEFAULT 0, + text TEXT + ); + + CREATE INDEX idx_objects_section ON objects(section); + CREATE INDEX idx_objects_ocn ON objects(ocn); + CREATE INDEX idx_objects_parent ON objects(parent_ocn); + CREATE INDEX idx_objects_is_a ON objects(is_a); + CREATE INDEX idx_objects_heading ON objects(heading_level) + WHERE heading_level IS NOT NULL; + "); + + /+ ↓ populate metadata +/ + db.run("BEGIN TRANSACTION"); + + auto meta_stmt = db.prepare( + "INSERT INTO metadata (key, value) VALUES (:key, :value)" + ); + auto meta = doc_matters.conf_make_meta.meta; + + void insertMeta(string key, string value) { + if (value.length > 0) { + meta_stmt.bind(":key", key); + meta_stmt.bind(":value", value); + meta_stmt.execute(); + meta_stmt.reset(); + } + } + + insertMeta("title.main", meta.title_main); + insertMeta("title.subtitle", meta.title_subtitle); + insertMeta("title.full", meta.title_full); + insertMeta("title.language", meta.title_language); + insertMeta("creator.author", meta.creator_author); + insertMeta("creator.author_surname", meta.creator_author_surname); + insertMeta("creator.author_surname_fn", meta.creator_author_surname_fn); + insertMeta("creator.author_email", meta.creator_author_email); + insertMeta("creator.illustrator", meta.creator_illustrator); + insertMeta("creator.translator", meta.creator_translator); + insertMeta("date.published", meta.date_published); + insertMeta("date.created", meta.date_created); + insertMeta("date.issued", meta.date_issued); + insertMeta("date.available", meta.date_available); + insertMeta("date.modified", meta.date_modified); + insertMeta("date.valid", meta.date_valid); + insertMeta("rights.copyright", meta.rights_copyright); + insertMeta("rights.license", meta.rights_license); + insertMeta("classify.topic_register", meta.classify_topic_register); + insertMeta("classify.subject", meta.classify_subject); + insertMeta("classify.keywords", meta.classify_keywords); + insertMeta("classify.loc", meta.classify_loc); + insertMeta("classify.dewey", meta.classify_dewey); + insertMeta("identifier.isbn", meta.identifier_isbn); + insertMeta("identifier.oclc", meta.identifier_oclc); + insertMeta("language.document", meta.language_document); + insertMeta("notes.abstract", meta.notes_abstract); + insertMeta("notes.description", meta.notes_description); + insertMeta("notes.summary", meta.notes_summary); + + /+ ↓ make settings +/ + auto make = doc_matters.conf_make_meta.make; + insertMeta("make.doc_type", make.doc_type); + insertMeta("make.auto_num_top_at_level", make.auto_num_top_at_level); + insertMeta("make.auto_num_top_lv", make.auto_num_top_lv.to!string); + insertMeta("make.auto_num_depth", make.auto_num_depth.to!string); + + /+ ↓ doc_has counts +/ + insertMeta("doc_has.inline_links", doc_matters.has.inline_links.to!string); + insertMeta("doc_has.inline_notes_reg", doc_matters.has.inline_notes_reg.to!string); + insertMeta("doc_has.inline_notes_star", doc_matters.has.inline_notes_star.to!string); + insertMeta("doc_has.tables", doc_matters.has.tables.to!string); + insertMeta("doc_has.codeblocks", doc_matters.has.codeblocks.to!string); + insertMeta("doc_has.images", doc_matters.has.images.to!string); + insertMeta("doc_has.poems", doc_matters.has.poems.to!string); + insertMeta("doc_has.groups", doc_matters.has.groups.to!string); + insertMeta("doc_has.blocks", doc_matters.has.blocks.to!string); + insertMeta("doc_has.quotes", doc_matters.has.quotes.to!string); + + meta_stmt.finalize(); + + /+ ↓ populate objects +/ + auto obj_stmt = db.prepare( + "INSERT INTO objects (" + ~ "section, seq, ocn, is_a, is_of_part, is_of_type," + ~ "heading_level, identifier, parent_ocn, last_descendant_ocn," + ~ "ancestors, dummy_heading, object_number_off," + ~ "indent_base, indent_hang, bullet, lang," + ~ "has_links, has_notes_reg, has_notes_star, has_images," + ~ "segment, segment_prev, segment_next, anchor," + ~ "table_cols, table_widths, table_header," + ~ "code_syntax, code_linenumbers, text" + ~ ") VALUES (" + ~ ":section, :seq, :ocn, :is_a, :is_of_part, :is_of_type," + ~ ":heading_level, :identifier, :parent_ocn, :last_descendant_ocn," + ~ ":ancestors, :dummy_heading, :object_number_off," + ~ ":indent_base, :indent_hang, :bullet, :lang," + ~ ":has_links, :has_notes_reg, :has_notes_star, :has_images," + ~ ":segment, :segment_prev, :segment_next, :anchor," + ~ ":table_cols, :table_widths, :table_header," + ~ ":code_syntax, :code_linenumbers, :text" + ~ ")" + ); + + string[] section_order = ["head", "toc", "body", "endnotes", + "glossary", "bibliography", "bookindex", "blurb"]; + + foreach (section; section_order) { + if (section !in doc_abstraction) continue; + auto section_objs = doc_abstraction[section]; + if (section_objs.length == 0) continue; + + foreach (seq, obj; section_objs) { + obj_stmt.bind(":section", section); + obj_stmt.bind(":seq", cast(int) seq); + obj_stmt.bind(":ocn", obj.metainfo.ocn); + obj_stmt.bind(":is_a", obj.metainfo.is_a); + + /+ ↓ nullable string fields +/ + void bindStr(string param, string val) { + import std.typecons : Nullable; + if (val.length > 0) { + obj_stmt.bind(param, val); + } else { + obj_stmt.bind(param, Nullable!string()); + } + } + + bindStr(":is_of_part", obj.metainfo.is_of_part); + bindStr(":is_of_type", obj.metainfo.is_of_type); + + /+ ↓ heading level +/ + { + import std.typecons : Nullable; + if (obj.metainfo.is_a == "heading" && obj.metainfo.heading_lev_markup < 9) { + obj_stmt.bind(":heading_level", obj.metainfo.heading_lev_markup); + } else { + obj_stmt.bind(":heading_level", Nullable!int()); + } + } + + bindStr(":identifier", obj.metainfo.identifier); + obj_stmt.bind(":parent_ocn", obj.metainfo.parent_ocn); + obj_stmt.bind(":last_descendant_ocn", obj.metainfo.last_descendant_ocn); + + /+ ↓ ancestors as space-separated integers +/ + { + bool has_ancestors = false; + foreach (a; obj.metainfo.markedup_ancestors) { + if (a != 0) { has_ancestors = true; break; } + } + if (has_ancestors) { + string anc; + foreach (i, a; obj.metainfo.markedup_ancestors) { + if (i > 0) anc ~= " "; + anc ~= a.to!string; + } + obj_stmt.bind(":ancestors", anc); + } else { + import std.typecons : Nullable; + obj_stmt.bind(":ancestors", Nullable!string()); + } + } + + obj_stmt.bind(":dummy_heading", obj.metainfo.dummy_heading ? 1 : 0); + obj_stmt.bind(":object_number_off", obj.metainfo.object_number_off ? 1 : 0); + obj_stmt.bind(":indent_base", obj.attrib.indent_base); + obj_stmt.bind(":indent_hang", obj.attrib.indent_hang); + obj_stmt.bind(":bullet", obj.attrib.bullet ? 1 : 0); + bindStr(":lang", obj.attrib.language); + obj_stmt.bind(":has_links", obj.has.inline_links ? 1 : 0); + obj_stmt.bind(":has_notes_reg", obj.has.inline_notes_reg ? 1 : 0); + obj_stmt.bind(":has_notes_star", obj.has.inline_notes_star ? 1 : 0); + obj_stmt.bind(":has_images", obj.has.images ? 1 : 0); + bindStr(":segment", obj.tags.in_segment_html); + bindStr(":segment_prev", obj.tags.segname_prev); + bindStr(":segment_next", obj.tags.segname_next); + bindStr(":anchor", obj.tags.anchor_tag_html); + + /+ ↓ table properties +/ + { + import std.typecons : Nullable; + if (obj.metainfo.is_a == "table" && obj.table.number_of_columns > 0) { + obj_stmt.bind(":table_cols", obj.table.number_of_columns); + if (obj.table.column_widths.length > 0) { + string[] ws; + foreach (w; obj.table.column_widths) ws ~= w.to!string; + obj_stmt.bind(":table_widths", ws.join(" ")); + } else { + obj_stmt.bind(":table_widths", Nullable!string()); + } + obj_stmt.bind(":table_header", obj.table.heading ? 1 : 0); + } else { + obj_stmt.bind(":table_cols", Nullable!int()); + obj_stmt.bind(":table_widths", Nullable!string()); + obj_stmt.bind(":table_header", Nullable!int()); + } + } + + /+ ↓ code block properties +/ + { + import std.typecons : Nullable; + if (obj.metainfo.is_a == "code") { + bindStr(":code_syntax", obj.code_block.syntax); + obj_stmt.bind(":code_linenumbers", obj.code_block.linenumbers ? 1 : 0); + } else { + obj_stmt.bind(":code_syntax", Nullable!string()); + obj_stmt.bind(":code_linenumbers", 0); + } + } + + /+ ↓ text content +/ + bindStr(":text", obj.text); + + obj_stmt.execute(); + obj_stmt.reset(); + } + } + + obj_stmt.finalize(); + db.run("COMMIT TRANSACTION"); + } +} +#+END_SRC + +* org includes +** project version + +#+NAME: spine_version +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>> +#+END_SRC + +** year + +#+NAME: year +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>> +#+END_SRC + +** document header including copyright & license + +#+NAME: doc_header_including_copyright_and_license +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> +#+END_SRC + +* __END__ diff --git a/org/spine.org b/org/spine.org index 1eef0a3..abc4b07 100644 --- a/org/spine.org +++ b/org/spine.org @@ -342,6 +342,7 @@ bool[string] opts = [ "pod" : false, "serial" : false, "show-abstraction" : false, + "show-abstraction-db" : false, "show-config" : false, "show-curate" : false, "show-curate-authors" : false, @@ -478,6 +479,7 @@ auto helpInfo = getopt(args, "serial", "serial processing", &opts["serial"], "skip-output", "skip output", &opts["skip-output"], "show-abstraction", "show document abstraction (write .ssp file)", &opts["show-abstraction"], + "show-abstraction-db", "show document abstraction (write .db sqlite file)", &opts["show-abstraction-db"], "show-config", "show config", &opts["show-config"], "show-curate", "show curate", &opts["show-curate"], "show-curate-authors", "show curate authors", &opts["show-curate-authors"], @@ -699,6 +701,9 @@ struct OptActions { @trusted bool show_abstraction() { return opts["show-abstraction"]; } + @trusted bool show_abstraction_db() { + return opts["show-abstraction-db"]; + } @trusted bool show_curate() { return opts["show-curate"]; } @@ -948,6 +953,7 @@ struct OptActions { return ( opts["abstraction"] || show_abstraction + || show_abstraction_db || concordance || source_or_pod || curate @@ -975,6 +981,7 @@ struct OptActions { || odt || manifest || show_abstraction + || show_abstraction_db || show_make || show_metadata || show_summary @@ -990,6 +997,7 @@ struct OptActions { return ( opts["abstraction"] || show_abstraction + || show_abstraction_db || curate || html || epub @@ -1543,7 +1551,8 @@ if ((doc.matters.opt.action.debug_do) <<spine_each_file_do_debugs_checkdoc_1>> <<spine_each_file_do_debugs_checkdoc_2>> <<spine_each_file_do_debugs_checkdoc_3>> -<<spine_each_file_show_abstraction>> +<<spine_each_file_show_abstraction_peg>> +<<spine_each_file_show_abstraction_db>> <<spine_each_file_do_debugs_checkdoc_4>> <<spine_each_file_do_debugs_checkdoc_5>> #+END_SRC @@ -1598,7 +1607,7 @@ if (doc.matters.opt.action.show_config) { ***** show abstraction (PEG) --show-abstraction -#+NAME: spine_each_file_show_abstraction +#+NAME: spine_each_file_show_abstraction_peg #+BEGIN_SRC d /+ ↓ document abstraction text representation +/ if (doc.matters.opt.action.show_abstraction) { @@ -1607,6 +1616,18 @@ if (doc.matters.opt.action.show_abstraction) { } #+END_SRC +***** show abstraction (sqlite db) +--show-abstraction-db + +#+NAME: spine_each_file_show_abstraction_db +#+BEGIN_SRC d +/+ ↓ document abstraction sqlite database +/ +if (doc.matters.opt.action.show_abstraction_db) { + import sisudoc.io_out.create_abstraction_db; + spineAbstractionDb!()(doc); +} +#+END_SRC + ***** abstraction curate :abstraction:curate: - abstraction curate |
