diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2025-10-03 12:15:42 -0400 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2025-10-03 12:27:26 -0400 |
commit | 297410fc013135e992842b8347c2e0bbec042d20 (patch) | |
tree | ff3e3f0b952ac60e7c7404be63e7c7e0dede4d57 /src/sisudoc/io_out/text.d | |
parent | (editor emacs org-mode includes) (diff) |
- spine --text [--output=output path] [markup source]
Diffstat (limited to 'src/sisudoc/io_out/text.d')
-rw-r--r-- | src/sisudoc/io_out/text.d | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/src/sisudoc/io_out/text.d b/src/sisudoc/io_out/text.d new file mode 100644 index 0000000..da0e2b6 --- /dev/null +++ b/src/sisudoc/io_out/text.d @@ -0,0 +1,475 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +module sisudoc.io_out.text; +@safe: +template outputText() { + template munge() { + import sisudoc.io_out; + import sisudoc.io_out.rgx; + import std.stdio; + import std.conv; + import std.conv : to; + import std.typecons : Nullable; + mixin spineRgxOut; + static auto rgx = RgxO(); + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + template special_characters_and_font_face() { + string code(string _txt){ + _txt = _txt + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string general(string _txt) { + _txt = _txt + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line, "\n") + .replaceAll(rgx.br_line_inline, "\n") + .replaceAll(rgx.br_line_spaced, "\n\n") + .replaceAll(rgx.inline_strike, "-{$1}-") + .replaceAll(rgx.inline_insert, "+{$1}+") + .replaceAll(rgx.inline_cite, "\"{$1}\"") + .replaceAll(rgx.inline_emphasis, "!{$1}!") + .replaceAll(rgx.inline_bold, "*{$1}*") + .replaceAll(rgx.inline_italics, "/{$1}/") + .replaceAll(rgx.inline_underscore, "_{$1}_") + .replaceAll(rgx.inline_superscript, "^{$1}^") + .replaceAll(rgx.inline_subscript, ",{$1},") + .replaceAll(rgx.inline_mono, "#{$1}#"); + return _txt; + } + string links_and_images(string _txt){ + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m.captures[3] == "0") { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1])); + } else { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1] ~ " ≫" ~ m.captures[3])); + } + } + } + if (_txt.matchFirst(rgx.inline_image)) { + foreach (m; _txt.matchAll(rgx.inline_image)) { + _txt = _txt + .replaceFirst(rgx.inline_image, (m.captures[3])); + } + } + return _txt; + } + } + string generalMunge(O,M)(O obj, M doc_matters) { + string _txt = obj.text; + string _notes; + string _ocn; + string general_munge; + if (obj.metainfo.ocn == 0 || doc_matters.opt.action.ocn_off) { + _ocn = ""; + } else { + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newline; + } + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { + _notes ~= newlines ~ m["num"] ~ ". " ~ m["note"]; + } + } + _txt = _txt.replaceAll(rgx.inline_notes_al_regular_number_note, "[$1]"); + if (obj.metainfo.is_a == "code") { + _txt = special_characters_and_font_face!().code(_txt); + } else { + _txt = special_characters_and_font_face!().general(_txt); + } + _txt = special_characters_and_font_face!().links_and_images(_txt); + if (obj.metainfo.is_a == "heading") { + general_munge = newline ~ _txt ~ _notes ~ newline ~ _ocn ~ newline; + } else { + general_munge = _txt ~ _notes ~ newline ~ _ocn ~ newline; + } + return general_munge; + } + string toc(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + // _txt = _special_characters_and_font_face(obj.text); + string _txt = special_characters_and_font_face!().general(obj.text); + string _spaces; + switch (obj.attrib.indent_hang) { + case 1: _spaces = ""; + break; + case 2: _spaces = ":"; + break; + case 3: _spaces = "∴"; + break; + case 4: _spaces = " "; + break; + case 5: _spaces = " "; + break; + case 6: _spaces = " "; + break; + case 7: _spaces = " "; + break; + case 8: _spaces = " "; + break; + default: + break; + } + _txt = _txt.replaceAll(rgx.inline_link, (_spaces ~ "$1 ≫ $3")); + return _txt ~ newline; + } + string heading(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string para(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string group(O,M)(O obj, M doc_matters) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string block(O,M)(O obj, M doc_matters) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string poem(O,M)(O obj, M doc_matters) { // LATER + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string verse(O,M)(O obj, M doc_matters) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string code(O,M)(O obj, M doc_matters) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string quote(O,M)(O obj, M doc_matters) { // LATER + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + auto tablarize(O)( + string _txt, + const O obj, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table = ""; + string _tablenote = ""; + int[] _col_width; + _col_width.length = obj.table.number_of_columns.to!ulong; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if (!((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2))) { + if (_col_width[col_idx] < (cell.length.to!int)) { + _col_width[col_idx] = cell.length.to!int; + } + } + } + } + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell ~ newline; + } else { + if (obj.table.column_aligns[col_idx] == "l") { + _table ~= format(q"┃%-*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } else { + _table ~= format(q"┃%*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } + _table = _table + .replaceAll(regex("\\s*$"), ""); + } + } + _table ~= newline; + } + Tuple!(string, string) t = tuple( + _table, + _tablenote, + ); + return t; + } + // string _txt = obj.text; + // writeln(obj.table.column_widths); + auto _t = tablarize(obj.text, obj); + string _txt = _t[0]; + string _tablenote = _t[1]; + return _txt ~ _tablenote ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _ocn; + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」"; + string _txt = obj.text; + _txt = _txt + .replaceFirst(rgx.inline_link, ("$1")) + .replaceFirst(rgx.inline_superscript, ("$1")); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bookindex(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = _txt + .replaceAll(rgx.inline_link, ("≫$1")) + .replaceAll(regex("\\s*\\\\"), ""); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bibliography(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + // ALT: + // string _general_munge = generalMunge(obj,doc_matters); + // return _general_munge; + } + string glossary(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt; + } + string blurb(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string comment(O,M)(O obj, M doc_matters) { // LATER + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + } + template theDocument() { + import std.stdio; + import sisudoc.io_out; + string text_head(M)( + M doc_matters, + ) { + return "head"; + } + string text_body(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj, doc_matters); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj, doc_matters); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj, doc_matters); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj, doc_matters); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj, doc_matters); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj, doc_matters); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj, doc_matters); } // LATER + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj, doc_matters); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj, doc_matters); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj, doc_matters); } // LATER + } + } + return doc_object; + } + string text_tail(M)( + M doc_matters, + ) { + string metadata_; + if (doc_matters.opt.action.debug_do) { + writeln(doc_matters.src.filename_base); + writeln("Title: ", doc_matters.conf_make_meta.meta.title_full); + writeln(" Author: ", doc_matters.conf_make_meta.meta.creator_author); + writeln(" Published: ", doc_matters.conf_make_meta.meta.date_published); + writeln(" Copyright: ", doc_matters.conf_make_meta.meta.rights_copyright); + writeln(" License: ", doc_matters.conf_make_meta.meta.rights_license); + } + if (!(doc_matters.conf_make_meta.meta.title_full.empty)) { + metadata_ ~= "Title: " ~ doc_matters.conf_make_meta.meta.title_full ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Title information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.creator_author.empty)) { + if (doc_matters.opt.action.html_link_curate) { + metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author_surname.translate([' ' : "_"]) + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } else { + metadata_ ~= "Author: " + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Author information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= "Published: " ~ doc_matters.conf_make_meta.meta.date_published ~ "\n\n"; + if (!(doc_matters.conf_make_meta.meta.rights_copyright.empty)) { + metadata_ ~= "Copyright: " ~ doc_matters.conf_make_meta.meta.rights_copyright ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no Copyright information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.rights_license.empty)) { + metadata_ ~= "License: " ~ doc_matters.conf_make_meta.meta.rights_license ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no License information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= doc_matters.generator_program.project_name.strip ~ "\n"; + metadata_ ~= doc_matters.generator_program.url_home.strip; + return metadata_; + } + } + void outputText(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + void text_out(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + struct Text { + string head; + string content; + string tail; + } + auto text = Text(); + // text.head = theDocument!().text_head(doc_matters); + text.content = theDocument!().text_body(doc_abstraction, doc_matters); + text.tail = theDocument!().text_tail(doc_matters); + auto pth_text = spinePathsText(doc_matters); + try { + import std.file; + if (!exists(pth_text.base_pth)) { + (pth_text.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_text.text_file); + } + // writeln(pth_text.base_pth); + auto f = File(pth_text.text_file, "w"); + // f.writeln(text.head); + f.writeln(text.content); + f.writeln(text.tail); + } + text_out(doc_abstraction, doc_matters); + } +} |