diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2025-10-03 12:15:42 -0400 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2025-10-03 12:27:26 -0400 |
commit | 297410fc013135e992842b8347c2e0bbec042d20 (patch) | |
tree | ff3e3f0b952ac60e7c7404be63e7c7e0dede4d57 /org/out_text.org | |
parent | (editor emacs org-mode includes) (diff) |
- spine --text [--output=output path] [markup source]
Diffstat (limited to 'org/out_text.org')
-rw-r--r-- | org/out_text.org | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/org/out_text.org b/org/out_text.org new file mode 100644 index 0000000..df5ccb2 --- /dev/null +++ b/org/out_text.org @@ -0,0 +1,572 @@ +-*- mode: org -*- +#+TITLE: sisudoc spine (doc_reform) output xmls +#+DESCRIPTION: documents - structuring, publishing in multiple formats & search +#+FILETAGS: :spine:output:text: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2025 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :results output none +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes +#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t + +- [[./doc-reform.org][doc-reform.org]] [[./][org/]] +- [[./output_hub.org][output_hub]] + +* Text +** outputText template + +#+HEADER: :tangle "../src/sisudoc/io_out/text.d" +#+HEADER: :noweb yes +#+BEGIN_SRC d +<<doc_header_including_copyright_and_license>> +module sisudoc.io_out.text; +@safe: +template outputText() { + <<munge>> + <<the_document>> + void outputText(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + <<text_out>> + text_out(doc_abstraction, doc_matters); + } +} +#+END_SRC + +** Output + +#+NAME: text_out +#+HEADER: :noweb yes +#+BEGIN_SRC d +void text_out(D,M)( + const D doc_abstraction, + M doc_matters, +) { + struct Text { + string head; + string content; + string tail; + } + auto text = Text(); + // text.head = theDocument!().text_head(doc_matters); + text.content = theDocument!().text_body(doc_abstraction, doc_matters); + text.tail = theDocument!().text_tail(doc_matters); + auto pth_text = spinePathsText(doc_matters); + try { + import std.file; + if (!exists(pth_text.base_pth)) { + (pth_text.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_text.text_file); + } + // writeln(pth_text.base_pth); + auto f = File(pth_text.text_file, "w"); + // f.writeln(text.head); + f.writeln(text.content); + f.writeln(text.tail); +} +#+END_SRC + +* The Document +** theDocument template + +#+NAME: the_document +#+HEADER: :noweb yes +#+BEGIN_SRC d +template theDocument() { + import std.stdio; + import sisudoc.io_out; + <<text_head>> + <<text_body_assign_munge>> + <<text_tail>> +} +#+END_SRC + +** the Document (assign munge) +*** Head SKIP + +#+NAME: text_head +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_head(M)( + M doc_matters, +) { + return "head"; +} +#+END_SRC + +*** Body munge assign + +#+NAME: text_body_assign_munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_body(D,M)( + const D doc_abstraction, + M doc_matters, +) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj, doc_matters); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj, doc_matters); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj, doc_matters); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj, doc_matters); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj, doc_matters); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj, doc_matters); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj, doc_matters); } // LATER + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj, doc_matters); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj, doc_matters); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj, doc_matters); } // LATER + } + } + return doc_object; +} +#+END_SRC + +*** Tail + +#+NAME: text_tail +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_tail(M)( + M doc_matters, +) { + string metadata_; + if (doc_matters.opt.action.debug_do) { + writeln(doc_matters.src.filename_base); + writeln("Title: ", doc_matters.conf_make_meta.meta.title_full); + writeln(" Author: ", doc_matters.conf_make_meta.meta.creator_author); + writeln(" Published: ", doc_matters.conf_make_meta.meta.date_published); + writeln(" Copyright: ", doc_matters.conf_make_meta.meta.rights_copyright); + writeln(" License: ", doc_matters.conf_make_meta.meta.rights_license); + } + if (!(doc_matters.conf_make_meta.meta.title_full.empty)) { + metadata_ ~= "Title: " ~ doc_matters.conf_make_meta.meta.title_full ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Title information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.creator_author.empty)) { + if (doc_matters.opt.action.html_link_curate) { + metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author_surname.translate([' ' : "_"]) + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } else { + metadata_ ~= "Author: " + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Author information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= "Published: " ~ doc_matters.conf_make_meta.meta.date_published ~ "\n\n"; + if (!(doc_matters.conf_make_meta.meta.rights_copyright.empty)) { + metadata_ ~= "Copyright: " ~ doc_matters.conf_make_meta.meta.rights_copyright ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no Copyright information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.rights_license.empty)) { + metadata_ ~= "License: " ~ doc_matters.conf_make_meta.meta.rights_license ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no License information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= doc_matters.generator_program.project_name.strip ~ "\n"; + metadata_ ~= doc_matters.generator_program.url_home.strip; + return metadata_; +} +#+END_SRC + +* Munge + +#+NAME: munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +template munge() { + import sisudoc.io_out; + import sisudoc.io_out.rgx; + import std.stdio; + import std.conv; + import std.conv : to; + import std.typecons : Nullable; + mixin spineRgxOut; + static auto rgx = RgxO(); + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + template special_characters_and_font_face() { + string code(string _txt){ + _txt = _txt + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string general(string _txt) { + _txt = _txt + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line, "\n") + .replaceAll(rgx.br_line_inline, "\n") + .replaceAll(rgx.br_line_spaced, "\n\n") + .replaceAll(rgx.inline_strike, "-{$1}-") + .replaceAll(rgx.inline_insert, "+{$1}+") + .replaceAll(rgx.inline_cite, "\"{$1}\"") + .replaceAll(rgx.inline_emphasis, "!{$1}!") + .replaceAll(rgx.inline_bold, "*{$1}*") + .replaceAll(rgx.inline_italics, "/{$1}/") + .replaceAll(rgx.inline_underscore, "_{$1}_") + .replaceAll(rgx.inline_superscript, "^{$1}^") + .replaceAll(rgx.inline_subscript, ",{$1},") + .replaceAll(rgx.inline_mono, "#{$1}#"); + return _txt; + } + string links_and_images(string _txt){ + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m.captures[3] == "0") { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1])); + } else { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1] ~ " ≫" ~ m.captures[3])); + } + } + } + if (_txt.matchFirst(rgx.inline_image)) { + foreach (m; _txt.matchAll(rgx.inline_image)) { + _txt = _txt + .replaceFirst(rgx.inline_image, (m.captures[3])); + } + } + return _txt; + } + } + string generalMunge(O,M)(O obj, M doc_matters) { + string _txt = obj.text; + string _notes; + string _ocn; + string general_munge; + if (obj.metainfo.ocn == 0 || doc_matters.opt.action.ocn_off) { + _ocn = ""; + } else { + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newline; + } + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { + _notes ~= newlines ~ m["num"] ~ ". " ~ m["note"]; + } + } + _txt = _txt.replaceAll(rgx.inline_notes_al_regular_number_note, "[$1]"); + if (obj.metainfo.is_a == "code") { + _txt = special_characters_and_font_face!().code(_txt); + } else { + _txt = special_characters_and_font_face!().general(_txt); + } + _txt = special_characters_and_font_face!().links_and_images(_txt); + if (obj.metainfo.is_a == "heading") { + general_munge = newline ~ _txt ~ _notes ~ newline ~ _ocn ~ newline; + } else { + general_munge = _txt ~ _notes ~ newline ~ _ocn ~ newline; + } + return general_munge; + } + string toc(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + // _txt = _special_characters_and_font_face(obj.text); + string _txt = special_characters_and_font_face!().general(obj.text); + string _spaces; + switch (obj.attrib.indent_hang) { + case 1: _spaces = ""; + break; + case 2: _spaces = ":"; + break; + case 3: _spaces = "∴"; + break; + case 4: _spaces = " "; + break; + case 5: _spaces = " "; + break; + case 6: _spaces = " "; + break; + case 7: _spaces = " "; + break; + case 8: _spaces = " "; + break; + default: + break; + } + _txt = _txt.replaceAll(rgx.inline_link, (_spaces ~ "$1 ≫ $3")); + return _txt ~ newline; + } + string heading(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string para(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string group(O,M)(O obj, M doc_matters) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string block(O,M)(O obj, M doc_matters) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string poem(O,M)(O obj, M doc_matters) { // LATER + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string verse(O,M)(O obj, M doc_matters) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string code(O,M)(O obj, M doc_matters) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string quote(O,M)(O obj, M doc_matters) { // LATER + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + auto tablarize(O)( + string _txt, + const O obj, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table = ""; + string _tablenote = ""; + int[] _col_width; + _col_width.length = obj.table.number_of_columns.to!ulong; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if (!((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2))) { + if (_col_width[col_idx] < (cell.length.to!int)) { + _col_width[col_idx] = cell.length.to!int; + } + } + } + } + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell ~ newline; + } else { + if (obj.table.column_aligns[col_idx] == "l") { + _table ~= format(q"┃%-*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } else { + _table ~= format(q"┃%*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } + _table = _table + .replaceAll(regex("\\s*$"), ""); + } + } + _table ~= newline; + } + Tuple!(string, string) t = tuple( + _table, + _tablenote, + ); + return t; + } + // string _txt = obj.text; + // writeln(obj.table.column_widths); + auto _t = tablarize(obj.text, obj); + string _txt = _t[0]; + string _tablenote = _t[1]; + return _txt ~ _tablenote ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _ocn; + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」"; + string _txt = obj.text; + _txt = _txt + .replaceFirst(rgx.inline_link, ("$1")) + .replaceFirst(rgx.inline_superscript, ("$1")); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bookindex(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = _txt + .replaceAll(rgx.inline_link, ("≫$1")) + .replaceAll(regex("\\s*\\\\"), ""); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bibliography(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + // ALT: + // string _general_munge = generalMunge(obj,doc_matters); + // return _general_munge; + } + string glossary(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt; + } + string blurb(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string comment(O,M)(O obj, M doc_matters) { // LATER + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } +} +#+END_SRC + +* org includes +** spine project VERSION + +#+NAME: spine_version +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>> +#+END_SRC + +** year + +#+NAME: year +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>> +#+END_SRC + +** document header including copyright & license + +#+NAME: doc_header_including_copyright_and_license +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> +#+END_SRC + +* __END__ +* TODO WORKON + +#+BEGIN_SRC org +TODO + PRIORITY + LATER +- object types + - comment + - quote +- images ? remove?? (currently with inline links) +- anchor tags (for internal links)? + +WISH +- underline headings? +- endnote info on calling object ≫\d+ +- break para text at set width? +- text wrap at text-line-width specified option + +DONE +- line breaks +- font face: bold, italics etc. +- object types + - toc + - inline_link /[┥┝┤├] + - indents + obj.metainfo.heading_lev_markup < 4 + - group + - block + - code + - verse + - table + - endnote section + - CAVEAT: would like to, but do not point back to object number of origin + REDO gathering of endnotes, (get/tie calling ocn) + - bookindex + - bibliography + - glossary + - blurb +- inline_link /[┥┝┤├] +#+END_SRC + |