aboutsummaryrefslogtreecommitdiffhomepage
path: root/org
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-04-22 13:52:21 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-04-22 20:42:31 -0400
commit51549f11d60cd353564486b3598e69259fb01b66 (patch)
tree8dec193314d88ccfd76d80bc1cef951acf2b2204 /org
parent.ssp document abstraction as PEG parsable text (diff)
document abstraction as per document sqlite db
--show-abstraction-db flag to write per-document - SQLite database of document abstraction (Claude-Code primary assist) - Add a new output mode that serializes the in-memory document abstraction to a per-document SQLite database. This complements the .ssp text format (--show-abstraction) with a queryable database representation of the same data. - Schema: metadata table - key/value pairs for document metadata (title, creator, dates, rights, classify, identifiers, language, notes, make settings, doc_has counts) objects table - one row per document object with columns: section, seq (position within section), ocn, is_a, is_of_part, is_of_type, heading_level, identifier, parent_ocn, last_descendant_ocn, ancestors, indent/bullet/lang, has_* flags, segment/anchor tags, table/code properties, text content Indexed on: section, ocn, parent_ocn, is_a, heading_level - Uses prepared statements via d2sqlite3 (existing dependency) for safe and efficient insertion. Each document produces a standalone .abstraction.db file in the abstraction/ output directory. - New files: src/sisudoc/io_out/create_abstraction_db.d Follows the same pattern as create_abstraction_txt.d. Creates schema, populates metadata via key/value inserts, then iterates all sections writing objects with prepared statements within a single transaction. - Changes to spine.d: - Add "show-abstraction-db" to opts init, getopt, OptActions - Add to abstraction(), require_processing_files(), and meta_processing_general() gates - Insert call at both spineAbstraction sites - Tested against all 35 sample documents (including 9-language live-manual) - zero failures. Works standalone or combined with --show-abstraction and other output flags. - Example queries the database supports: SELECT ocn, heading_level, text FROM objects WHERE is_a = 'heading' AND section = 'body'; SELECT * FROM objects WHERE parent_ocn = 10; SELECT key, value FROM metadata WHERE key LIKE 'title.%'; Co-Authored-By: Anthropic Claude Opus 4.6 (1M context)
Diffstat (limited to 'org')
-rw-r--r--org/out_src_abstraction_sqlite_db.org365
-rw-r--r--org/spine.org25
2 files changed, 388 insertions, 2 deletions
diff --git a/org/out_src_abstraction_sqlite_db.org b/org/out_src_abstraction_sqlite_db.org
new file mode 100644
index 0000000..a048934
--- /dev/null
+++ b/org/out_src_abstraction_sqlite_db.org
@@ -0,0 +1,365 @@
+-*- mode: org -*-
+#+TITLE: sisudoc spine (doc_reform) output pod source sqlite db
+#+DESCRIPTION: documents - structuring, publishing in multiple formats & search
+#+FILETAGS: :spine:output:source:pod:
+#+AUTHOR: Ralph Amissah
+#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
+#+COPYRIGHT: Copyright (C) 2015 (continuously updated, current 2026) Ralph Amissah
+#+LANGUAGE: en
+#+STARTUP: content hideblocks hidestars noindent entitiespretty
+#+PROPERTY: header-args :exports code
+#+PROPERTY: header-args+ :noweb yes
+#+PROPERTY: header-args+ :results silent
+#+PROPERTY: header-args+ :cache no
+#+PROPERTY: header-args+ :padline no
+#+PROPERTY: header-args+ :mkdirp yes
+#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t
+
+- [[./doc-reform.org][doc-reform.org]] [[./][org/]]
+
+* (Object-Centric) Document Abstraction SQLite db
+
+- Process markup document, create document abstraction
+
+** _module template_ :module:metadoc_from_src:
+
+rename source_abstraction_peg_txt.d
+
+#+HEADER: :tangle "../src/sisudoc/io_out/create_abstraction_db.d"
+#+HEADER: :noweb yes
+#+BEGIN_SRC d
+<<doc_header_including_copyright_and_license>>
+module sisudoc.io_out.create_abstraction_db;
+
+/+ ↓ write document abstraction as per-document sqlite3 database +/
+template spineAbstractionDb() {
+ import std.conv : to;
+ import std.file;
+ import std.path;
+ import std.stdio;
+ import std.string;
+ import std.array;
+ import d2sqlite3;
+ import sisudoc.io_out.paths_output;
+
+ void spineAbstractionDb(D)(D doc) {
+ auto doc_abstraction = doc.abstraction;
+ auto doc_matters = doc.matters;
+
+ /+ ↓ determine output path +/
+ auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language);
+ string base_dir = "abstraction";
+ string base_pth = ((out_pth.output_base.chainPath(base_dir)).asNormalizedPath).array;
+ try {
+ if (!exists(base_pth)) {
+ base_pth.mkdirRecurse;
+ }
+ } catch (Exception ex) {
+ }
+ string db_file = ((base_pth.chainPath(
+ doc_matters.src.doc_uid_out ~ ".abstraction.db")).asNormalizedPath).array;
+
+ /+ ↓ remove existing file to start fresh +/
+ try {
+ if (exists(db_file)) {
+ remove(db_file);
+ }
+ } catch (Exception ex) {
+ }
+
+ if (doc_matters.opt.action.vox_gt_1) {
+ writeln(" ", db_file);
+ }
+
+ /+ ↓ open database and create schema +/
+ auto db = Database(db_file);
+ db.run("PRAGMA journal_mode=WAL");
+ db.run("PRAGMA synchronous=NORMAL");
+
+ db.run("
+ CREATE TABLE metadata (
+ key TEXT PRIMARY KEY,
+ value TEXT NOT NULL
+ );
+
+ CREATE TABLE objects (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ section TEXT NOT NULL,
+ seq INTEGER NOT NULL,
+ ocn INTEGER DEFAULT 0,
+ is_a TEXT NOT NULL,
+ is_of_part TEXT,
+ is_of_type TEXT,
+ heading_level INTEGER,
+ identifier TEXT,
+ parent_ocn INTEGER DEFAULT 0,
+ last_descendant_ocn INTEGER DEFAULT 0,
+ ancestors TEXT,
+ dummy_heading INTEGER DEFAULT 0,
+ object_number_off INTEGER DEFAULT 0,
+ indent_base INTEGER DEFAULT 0,
+ indent_hang INTEGER DEFAULT 0,
+ bullet INTEGER DEFAULT 0,
+ lang TEXT,
+ has_links INTEGER DEFAULT 0,
+ has_notes_reg INTEGER DEFAULT 0,
+ has_notes_star INTEGER DEFAULT 0,
+ has_images INTEGER DEFAULT 0,
+ segment TEXT,
+ segment_prev TEXT,
+ segment_next TEXT,
+ anchor TEXT,
+ table_cols INTEGER,
+ table_widths TEXT,
+ table_header INTEGER,
+ code_syntax TEXT,
+ code_linenumbers INTEGER DEFAULT 0,
+ text TEXT
+ );
+
+ CREATE INDEX idx_objects_section ON objects(section);
+ CREATE INDEX idx_objects_ocn ON objects(ocn);
+ CREATE INDEX idx_objects_parent ON objects(parent_ocn);
+ CREATE INDEX idx_objects_is_a ON objects(is_a);
+ CREATE INDEX idx_objects_heading ON objects(heading_level)
+ WHERE heading_level IS NOT NULL;
+ ");
+
+ /+ ↓ populate metadata +/
+ db.run("BEGIN TRANSACTION");
+
+ auto meta_stmt = db.prepare(
+ "INSERT INTO metadata (key, value) VALUES (:key, :value)"
+ );
+ auto meta = doc_matters.conf_make_meta.meta;
+
+ void insertMeta(string key, string value) {
+ if (value.length > 0) {
+ meta_stmt.bind(":key", key);
+ meta_stmt.bind(":value", value);
+ meta_stmt.execute();
+ meta_stmt.reset();
+ }
+ }
+
+ insertMeta("title.main", meta.title_main);
+ insertMeta("title.subtitle", meta.title_subtitle);
+ insertMeta("title.full", meta.title_full);
+ insertMeta("title.language", meta.title_language);
+ insertMeta("creator.author", meta.creator_author);
+ insertMeta("creator.author_surname", meta.creator_author_surname);
+ insertMeta("creator.author_surname_fn", meta.creator_author_surname_fn);
+ insertMeta("creator.author_email", meta.creator_author_email);
+ insertMeta("creator.illustrator", meta.creator_illustrator);
+ insertMeta("creator.translator", meta.creator_translator);
+ insertMeta("date.published", meta.date_published);
+ insertMeta("date.created", meta.date_created);
+ insertMeta("date.issued", meta.date_issued);
+ insertMeta("date.available", meta.date_available);
+ insertMeta("date.modified", meta.date_modified);
+ insertMeta("date.valid", meta.date_valid);
+ insertMeta("rights.copyright", meta.rights_copyright);
+ insertMeta("rights.license", meta.rights_license);
+ insertMeta("classify.topic_register", meta.classify_topic_register);
+ insertMeta("classify.subject", meta.classify_subject);
+ insertMeta("classify.keywords", meta.classify_keywords);
+ insertMeta("classify.loc", meta.classify_loc);
+ insertMeta("classify.dewey", meta.classify_dewey);
+ insertMeta("identifier.isbn", meta.identifier_isbn);
+ insertMeta("identifier.oclc", meta.identifier_oclc);
+ insertMeta("language.document", meta.language_document);
+ insertMeta("notes.abstract", meta.notes_abstract);
+ insertMeta("notes.description", meta.notes_description);
+ insertMeta("notes.summary", meta.notes_summary);
+
+ /+ ↓ make settings +/
+ auto make = doc_matters.conf_make_meta.make;
+ insertMeta("make.doc_type", make.doc_type);
+ insertMeta("make.auto_num_top_at_level", make.auto_num_top_at_level);
+ insertMeta("make.auto_num_top_lv", make.auto_num_top_lv.to!string);
+ insertMeta("make.auto_num_depth", make.auto_num_depth.to!string);
+
+ /+ ↓ doc_has counts +/
+ insertMeta("doc_has.inline_links", doc_matters.has.inline_links.to!string);
+ insertMeta("doc_has.inline_notes_reg", doc_matters.has.inline_notes_reg.to!string);
+ insertMeta("doc_has.inline_notes_star", doc_matters.has.inline_notes_star.to!string);
+ insertMeta("doc_has.tables", doc_matters.has.tables.to!string);
+ insertMeta("doc_has.codeblocks", doc_matters.has.codeblocks.to!string);
+ insertMeta("doc_has.images", doc_matters.has.images.to!string);
+ insertMeta("doc_has.poems", doc_matters.has.poems.to!string);
+ insertMeta("doc_has.groups", doc_matters.has.groups.to!string);
+ insertMeta("doc_has.blocks", doc_matters.has.blocks.to!string);
+ insertMeta("doc_has.quotes", doc_matters.has.quotes.to!string);
+
+ meta_stmt.finalize();
+
+ /+ ↓ populate objects +/
+ auto obj_stmt = db.prepare(
+ "INSERT INTO objects ("
+ ~ "section, seq, ocn, is_a, is_of_part, is_of_type,"
+ ~ "heading_level, identifier, parent_ocn, last_descendant_ocn,"
+ ~ "ancestors, dummy_heading, object_number_off,"
+ ~ "indent_base, indent_hang, bullet, lang,"
+ ~ "has_links, has_notes_reg, has_notes_star, has_images,"
+ ~ "segment, segment_prev, segment_next, anchor,"
+ ~ "table_cols, table_widths, table_header,"
+ ~ "code_syntax, code_linenumbers, text"
+ ~ ") VALUES ("
+ ~ ":section, :seq, :ocn, :is_a, :is_of_part, :is_of_type,"
+ ~ ":heading_level, :identifier, :parent_ocn, :last_descendant_ocn,"
+ ~ ":ancestors, :dummy_heading, :object_number_off,"
+ ~ ":indent_base, :indent_hang, :bullet, :lang,"
+ ~ ":has_links, :has_notes_reg, :has_notes_star, :has_images,"
+ ~ ":segment, :segment_prev, :segment_next, :anchor,"
+ ~ ":table_cols, :table_widths, :table_header,"
+ ~ ":code_syntax, :code_linenumbers, :text"
+ ~ ")"
+ );
+
+ string[] section_order = ["head", "toc", "body", "endnotes",
+ "glossary", "bibliography", "bookindex", "blurb"];
+
+ foreach (section; section_order) {
+ if (section !in doc_abstraction) continue;
+ auto section_objs = doc_abstraction[section];
+ if (section_objs.length == 0) continue;
+
+ foreach (seq, obj; section_objs) {
+ obj_stmt.bind(":section", section);
+ obj_stmt.bind(":seq", cast(int) seq);
+ obj_stmt.bind(":ocn", obj.metainfo.ocn);
+ obj_stmt.bind(":is_a", obj.metainfo.is_a);
+
+ /+ ↓ nullable string fields +/
+ void bindStr(string param, string val) {
+ import std.typecons : Nullable;
+ if (val.length > 0) {
+ obj_stmt.bind(param, val);
+ } else {
+ obj_stmt.bind(param, Nullable!string());
+ }
+ }
+
+ bindStr(":is_of_part", obj.metainfo.is_of_part);
+ bindStr(":is_of_type", obj.metainfo.is_of_type);
+
+ /+ ↓ heading level +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "heading" && obj.metainfo.heading_lev_markup < 9) {
+ obj_stmt.bind(":heading_level", obj.metainfo.heading_lev_markup);
+ } else {
+ obj_stmt.bind(":heading_level", Nullable!int());
+ }
+ }
+
+ bindStr(":identifier", obj.metainfo.identifier);
+ obj_stmt.bind(":parent_ocn", obj.metainfo.parent_ocn);
+ obj_stmt.bind(":last_descendant_ocn", obj.metainfo.last_descendant_ocn);
+
+ /+ ↓ ancestors as space-separated integers +/
+ {
+ bool has_ancestors = false;
+ foreach (a; obj.metainfo.markedup_ancestors) {
+ if (a != 0) { has_ancestors = true; break; }
+ }
+ if (has_ancestors) {
+ string anc;
+ foreach (i, a; obj.metainfo.markedup_ancestors) {
+ if (i > 0) anc ~= " ";
+ anc ~= a.to!string;
+ }
+ obj_stmt.bind(":ancestors", anc);
+ } else {
+ import std.typecons : Nullable;
+ obj_stmt.bind(":ancestors", Nullable!string());
+ }
+ }
+
+ obj_stmt.bind(":dummy_heading", obj.metainfo.dummy_heading ? 1 : 0);
+ obj_stmt.bind(":object_number_off", obj.metainfo.object_number_off ? 1 : 0);
+ obj_stmt.bind(":indent_base", obj.attrib.indent_base);
+ obj_stmt.bind(":indent_hang", obj.attrib.indent_hang);
+ obj_stmt.bind(":bullet", obj.attrib.bullet ? 1 : 0);
+ bindStr(":lang", obj.attrib.language);
+ obj_stmt.bind(":has_links", obj.has.inline_links ? 1 : 0);
+ obj_stmt.bind(":has_notes_reg", obj.has.inline_notes_reg ? 1 : 0);
+ obj_stmt.bind(":has_notes_star", obj.has.inline_notes_star ? 1 : 0);
+ obj_stmt.bind(":has_images", obj.has.images ? 1 : 0);
+ bindStr(":segment", obj.tags.in_segment_html);
+ bindStr(":segment_prev", obj.tags.segname_prev);
+ bindStr(":segment_next", obj.tags.segname_next);
+ bindStr(":anchor", obj.tags.anchor_tag_html);
+
+ /+ ↓ table properties +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "table" && obj.table.number_of_columns > 0) {
+ obj_stmt.bind(":table_cols", obj.table.number_of_columns);
+ if (obj.table.column_widths.length > 0) {
+ string[] ws;
+ foreach (w; obj.table.column_widths) ws ~= w.to!string;
+ obj_stmt.bind(":table_widths", ws.join(" "));
+ } else {
+ obj_stmt.bind(":table_widths", Nullable!string());
+ }
+ obj_stmt.bind(":table_header", obj.table.heading ? 1 : 0);
+ } else {
+ obj_stmt.bind(":table_cols", Nullable!int());
+ obj_stmt.bind(":table_widths", Nullable!string());
+ obj_stmt.bind(":table_header", Nullable!int());
+ }
+ }
+
+ /+ ↓ code block properties +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "code") {
+ bindStr(":code_syntax", obj.code_block.syntax);
+ obj_stmt.bind(":code_linenumbers", obj.code_block.linenumbers ? 1 : 0);
+ } else {
+ obj_stmt.bind(":code_syntax", Nullable!string());
+ obj_stmt.bind(":code_linenumbers", 0);
+ }
+ }
+
+ /+ ↓ text content +/
+ bindStr(":text", obj.text);
+
+ obj_stmt.execute();
+ obj_stmt.reset();
+ }
+ }
+
+ obj_stmt.finalize();
+ db.run("COMMIT TRANSACTION");
+ }
+}
+#+END_SRC
+
+* org includes
+** project version
+
+#+NAME: spine_version
+#+HEADER: :noweb yes
+#+BEGIN_SRC emacs-lisp
+<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>>
+#+END_SRC
+
+** year
+
+#+NAME: year
+#+HEADER: :noweb yes
+#+BEGIN_SRC emacs-lisp
+<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>>
+#+END_SRC
+
+** document header including copyright & license
+
+#+NAME: doc_header_including_copyright_and_license
+#+HEADER: :noweb yes
+#+BEGIN_SRC emacs-lisp
+<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>>
+#+END_SRC
+
+* __END__
diff --git a/org/spine.org b/org/spine.org
index 1eef0a3..abc4b07 100644
--- a/org/spine.org
+++ b/org/spine.org
@@ -342,6 +342,7 @@ bool[string] opts = [
"pod" : false,
"serial" : false,
"show-abstraction" : false,
+ "show-abstraction-db" : false,
"show-config" : false,
"show-curate" : false,
"show-curate-authors" : false,
@@ -478,6 +479,7 @@ auto helpInfo = getopt(args,
"serial", "serial processing", &opts["serial"],
"skip-output", "skip output", &opts["skip-output"],
"show-abstraction", "show document abstraction (write .ssp file)", &opts["show-abstraction"],
+ "show-abstraction-db", "show document abstraction (write .db sqlite file)", &opts["show-abstraction-db"],
"show-config", "show config", &opts["show-config"],
"show-curate", "show curate", &opts["show-curate"],
"show-curate-authors", "show curate authors", &opts["show-curate-authors"],
@@ -699,6 +701,9 @@ struct OptActions {
@trusted bool show_abstraction() {
return opts["show-abstraction"];
}
+ @trusted bool show_abstraction_db() {
+ return opts["show-abstraction-db"];
+ }
@trusted bool show_curate() {
return opts["show-curate"];
}
@@ -948,6 +953,7 @@ struct OptActions {
return (
opts["abstraction"]
|| show_abstraction
+ || show_abstraction_db
|| concordance
|| source_or_pod
|| curate
@@ -975,6 +981,7 @@ struct OptActions {
|| odt
|| manifest
|| show_abstraction
+ || show_abstraction_db
|| show_make
|| show_metadata
|| show_summary
@@ -990,6 +997,7 @@ struct OptActions {
return (
opts["abstraction"]
|| show_abstraction
+ || show_abstraction_db
|| curate
|| html
|| epub
@@ -1543,7 +1551,8 @@ if ((doc.matters.opt.action.debug_do)
<<spine_each_file_do_debugs_checkdoc_1>>
<<spine_each_file_do_debugs_checkdoc_2>>
<<spine_each_file_do_debugs_checkdoc_3>>
-<<spine_each_file_show_abstraction>>
+<<spine_each_file_show_abstraction_peg>>
+<<spine_each_file_show_abstraction_db>>
<<spine_each_file_do_debugs_checkdoc_4>>
<<spine_each_file_do_debugs_checkdoc_5>>
#+END_SRC
@@ -1598,7 +1607,7 @@ if (doc.matters.opt.action.show_config) {
***** show abstraction (PEG)
--show-abstraction
-#+NAME: spine_each_file_show_abstraction
+#+NAME: spine_each_file_show_abstraction_peg
#+BEGIN_SRC d
/+ ↓ document abstraction text representation +/
if (doc.matters.opt.action.show_abstraction) {
@@ -1607,6 +1616,18 @@ if (doc.matters.opt.action.show_abstraction) {
}
#+END_SRC
+***** show abstraction (sqlite db)
+--show-abstraction-db
+
+#+NAME: spine_each_file_show_abstraction_db
+#+BEGIN_SRC d
+/+ ↓ document abstraction sqlite database +/
+if (doc.matters.opt.action.show_abstraction_db) {
+ import sisudoc.io_out.create_abstraction_db;
+ spineAbstractionDb!()(doc);
+}
+#+END_SRC
+
***** abstraction curate :abstraction:curate:
- abstraction curate