diff options
-rw-r--r-- | data/pod/sisu-spine-markup/media/text/en/sisu-spine-markup.sst | 2 | ||||
-rw-r--r-- | flake.lock | 6 | ||||
-rw-r--r-- | org/default_paths.org | 64 | ||||
-rw-r--r-- | org/default_regex.org | 34 | ||||
-rw-r--r-- | org/ocda.org | 10 | ||||
-rw-r--r-- | org/ocda_functions.org | 17 | ||||
-rw-r--r-- | org/out_metadata.org | 5 | ||||
-rw-r--r-- | org/out_skel.org | 324 | ||||
-rw-r--r-- | org/out_text.org | 572 | ||||
-rw-r--r-- | org/output_hub.org | 28 | ||||
-rw-r--r-- | org/spine.org | 30 | ||||
-rw-r--r-- | org/spine_markup_sample.org | 2 | ||||
-rw-r--r-- | src/sisudoc/io_out/hub.d | 14 | ||||
-rw-r--r-- | src/sisudoc/io_out/metadata.d | 5 | ||||
-rw-r--r-- | src/sisudoc/io_out/paths_output.d | 51 | ||||
-rw-r--r-- | src/sisudoc/io_out/rgx.d | 23 | ||||
-rw-r--r-- | src/sisudoc/io_out/skel.d | 268 | ||||
-rw-r--r-- | src/sisudoc/io_out/text.d | 475 | ||||
-rw-r--r-- | src/sisudoc/meta/metadoc_from_src.d | 10 | ||||
-rw-r--r-- | src/sisudoc/meta/metadoc_from_src_functions.d | 17 | ||||
-rw-r--r-- | src/sisudoc/meta/rgx.d | 33 | ||||
-rwxr-xr-x | src/sisudoc/spine.d | 30 |
22 files changed, 1942 insertions, 78 deletions
diff --git a/data/pod/sisu-spine-markup/media/text/en/sisu-spine-markup.sst b/data/pod/sisu-spine-markup/media/text/en/sisu-spine-markup.sst index 6ae09d1..a2339b9 100644 --- a/data/pod/sisu-spine-markup/media/text/en/sisu-spine-markup.sst +++ b/data/pod/sisu-spine-markup/media/text/en/sisu-spine-markup.sst @@ -1153,7 +1153,7 @@ code(number){ 3~ Tables ={ SiSU markup:tables;tables } -Tables may be prepared in two either of two forms +Tables may be prepared in either of two forms !_ resulting output: @@ -2,11 +2,11 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1757746433, - "narHash": "sha256-fEvTiU4s9lWgW7mYEU/1QUPirgkn+odUBTaindgiziY=", + "lastModified": 1759417375, + "narHash": "sha256-O7eHcgkQXJNygY6AypkF9tFhsoDQjpNEojw3eFs73Ow=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "6d7ec06d6868ac6d94c371458fc2391ded9ff13d", + "rev": "dc704e6102e76aad573f63b74c742cd96f8f1e6c", "type": "github" }, "original": { diff --git a/org/default_paths.org b/org/default_paths.org index 2aaab14..f73a9ef 100644 --- a/org/default_paths.org +++ b/org/default_paths.org @@ -993,6 +993,8 @@ import sisudoc.meta.rgx_files; <<template_paths_sqlite_0>> <<template_paths_sqlite_1>> <<template_paths_sqlite_2>> +<<template_paths_text>> +<<template_paths_skel>> #+END_SRC ** shared out path, base directory :out: @@ -1457,7 +1459,7 @@ template spinePathsODT() { auto spinePathsODT(M)( M doc_matters, ) { - auto out_pth = spineOutPaths!()( doc_matters.output_path, doc_matters.src.language); + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); string base_dir = "odf"; struct _PathsStruct { string base_pth() { // dir will contain odt document file (also debug file tree) @@ -1688,6 +1690,66 @@ template spinePathsSQLite() { } #+END_SRC +** _text_ :text: + +#+NAME: template_paths_text +#+BEGIN_SRC d +template spinePathsText() { + import std.conv; + auto spinePathsText(M)( + M doc_matters, + ) { + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); + string base_dir = "text"; + struct _PathsStruct { + string base_pth() { + return (((out_pth.output_base).chainPath(base_dir)).asNormalizedPath).array; + } + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string text_file() { + return ((base_pth.chainPath(doc_matters.src.doc_uid_out ~ ".txt")).asNormalizedPath).array; + } + string dirtop() { + return "".chainPath("").array; + } + } + return _PathsStruct(); + } +} +#+END_SRC + +** _skel_ :skel: + +#+NAME: template_paths_skel +#+BEGIN_SRC d +template spinePathsSkel() { + import std.conv; + auto spinePathsSkel(M)( + M doc_matters, + ) { + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); + string base_dir = "skel"; + struct _PathsStruct { + string base_pth() { + return (((out_pth.output_base).chainPath(base_dir)).asNormalizedPath).array; + } + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string skel_file() { + return ((base_pth.chainPath(doc_matters.src.doc_uid_out ~ ".skel")).asNormalizedPath).array; + } + string dirtop() { + return "".chainPath("").array; + } + } + return _PathsStruct(); + } +} +#+END_SRC + * org includes ** project version diff --git a/org/default_regex.org b/org/default_regex.org index 9a0ded7..fb2b993 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -308,9 +308,9 @@ static table_col_separator_nl = ctRegex!(`[┊]$`, "mg"); #+BEGIN_SRC d /+ inline markup footnotes endnotes +/ static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); -static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); -static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); -static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); +static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\s*\}~`, "mg"); +static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\s*\}~`, "m"); +static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\s*\}~`, "m"); static note_ref = ctRegex!(`^\S+?noteref_(?P<ref>[0-9]+)`, "mg"); // {^{73.}^}#noteref_73 #+END_SRC @@ -331,8 +331,8 @@ static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])( static smid_inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); static smid_inline_link_naked_url = ctRegex!(`(?P<pre>^|[ (\[])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?=[.,;:?!'"]?([ )\]]|$))`, "mg"); static smid_inline_link_markup_regular = ctRegex!(`(?P<pre>^|[ (\[])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); -static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg"); -static smid_inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); +static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg"); +static smid_inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); #+END_SRC #+BEGIN_SRC d @@ -507,7 +507,6 @@ static make_breakcolumn = ctRegex!(`break=(?P<breakcolum #+END_SRC * 2. ctRegex defaults shared by meta & output (generic) - ** meta #+NAME: prgmkup_rgx_meta @@ -545,9 +544,9 @@ static middle_dot = ctRegex!(`·`, "mg"); static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg"); static br_linebreaks = ctRegex!(`[┘┙]`, "mg"); -static br_line = ctRegex!(`┘`, "mg"); -static br_line_inline = ctRegex!(`┙`, "mg"); -static br_line_spaced = ctRegex!(`┚`, "mg"); +static br_line = ctRegex!(`\s*┘\s*`, "mg"); +static br_line_inline = ctRegex!(`\s*┙\s*`, "mg"); +static br_line_spaced = ctRegex!(`\s*┚\s*`, "mg"); #+END_SRC #+BEGIN_SRC d @@ -604,6 +603,8 @@ static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m"); static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg"); static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg"); +// static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section +// static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); @@ -625,13 +626,14 @@ static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<i static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg"); static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor -static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg"); -static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg"); -static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used -static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); -static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links -static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); -static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); +// space cleaning should not be necessary +static inline_link = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#?(\S+?))├`, "mg"); +static inline_link_empty = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤├`, "mg"); +static inline_link_number = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<num>[0-9]+)├`, "mg"); // not used +static inline_link_number_only = ctRegex!(`\s*(?P<linked_text>\s*┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); +static inline_link_stow_uri = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links +static inline_link_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); +static inline_link_seg_and_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg"); static url = ctRegex!(`https?://`, "mg"); diff --git a/org/ocda.org b/org/ocda.org index d03d1cc..96fe139 100644 --- a/org/ocda.org +++ b/org/ocda.org @@ -972,7 +972,7 @@ ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt } { // document segnames ST_segnames get_segnames; - get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); // + get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); segnames = get_segnames.segnames; html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr; html_segnames_ptr = get_segnames.html_segnames_ptr; @@ -1409,36 +1409,42 @@ string[][string] document_section_keys_sequenced = [ "scroll": ["head", "toc", "body",], "seg": ["head", "toc", "body",], "sql": ["head", "body",], - "latex": ["head", "toc", "body",] + "latex": ["head", "toc", "body",], + "text": ["head", "toc", "body",], ]; if (document_the["endnotes"].length > 1) { document_section_keys_sequenced["scroll"] ~= "endnotes"; document_section_keys_sequenced["seg"] ~= "endnotes"; document_section_keys_sequenced["latex"] ~= "endnotes"; + document_section_keys_sequenced["text"] ~= "endnotes"; } if (document_the["glossary"].length > 1) { document_section_keys_sequenced["scroll"] ~= "glossary"; document_section_keys_sequenced["seg"] ~= "glossary"; document_section_keys_sequenced["sql"] ~= "glossary"; document_section_keys_sequenced["latex"] ~= "glossary"; + document_section_keys_sequenced["text"] ~= "glossary"; } if (document_the["bibliography"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bibliography"; document_section_keys_sequenced["seg"] ~= "bibliography"; document_section_keys_sequenced["sql"] ~= "bibliography"; document_section_keys_sequenced["latex"] ~= "bibliography"; + document_section_keys_sequenced["text"] ~= "bibliography"; } if (document_the["bookindex"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bookindex"; document_section_keys_sequenced["seg"] ~= "bookindex"; document_section_keys_sequenced["sql"] ~= "bookindex"; document_section_keys_sequenced["latex"] ~= "bookindex"; + document_section_keys_sequenced["text"] ~= "bookindex"; } if (document_the["blurb"].length > 1) { document_section_keys_sequenced["scroll"] ~= "blurb"; document_section_keys_sequenced["seg"] ~= "blurb"; document_section_keys_sequenced["sql"] ~= "blurb"; document_section_keys_sequenced["latex"] ~= "blurb"; + document_section_keys_sequenced["text"] ~= "blurb"; } if ((opt_action.html) || (opt_action.html_scroll) diff --git a/org/ocda_functions.org b/org/ocda_functions.org index 90574f4..cba37b9 100644 --- a/org/ocda_functions.org +++ b/org/ocda_functions.org @@ -2747,10 +2747,8 @@ static struct ObjInlineMarkup { CMM conf_make_meta, Flag!"_new_doc" _new_doc ) { - obj_txt["munge"] = obj_[obj_key_].dup; - obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`))) - ? obj_txt["munge"] - : obj_txt["munge"].strip; + obj_txt["munge"] = obj_[obj_key_].dup; + obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`))) ? obj_txt["munge"] : obj_txt["munge"].strip; if (_new_doc) { anchor_tag = ""; } @@ -2769,8 +2767,8 @@ static struct ObjInlineMarkup { || (obj_["is"] == "group") || (obj_["is"] == "block") || (obj_["is"] == "verse")) { - obj_txt["munge"] = (obj_txt["munge"]).inline_markup_faces; - obj_txt["munge"] = (obj_txt["munge"]).links_and_images; + obj_txt["munge"] = (obj_txt["munge"]).inline_markup_faces; + obj_txt["munge"] = (obj_txt["munge"]).links_and_images; } switch (obj_["is"]) { case "heading": @@ -3553,8 +3551,8 @@ ubyte[32] obj_digest()( // ↓ - endnotes struct NotesSection { string[string] object_notes; - int previous_count; - int mkn; + int previous_count; + int mkn; static auto rgx = RgxI(); private auto gather_notes_for_endnote_section( ObjGenericComposite[] contents_am, @@ -5652,6 +5650,9 @@ auto docSectKeysSeq(string[][string] document_section_keys_sequenced) { string[] latex() { return document_section_keys_sequenced["latex"]; } + string[] text() { + return document_section_keys_sequenced["text"]; + } } return doc_sect_keys_seq(); } diff --git a/org/out_metadata.org b/org/out_metadata.org index 5a7e579..d92ff92 100644 --- a/org/out_metadata.org +++ b/org/out_metadata.org @@ -83,6 +83,7 @@ if (doc_matters.opt.action.debug_do) { } auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language); auto pth_epub = spinePathsEPUB!()(doc_matters.output_path, doc_matters.src.language); +auto pth_text = spinePathsText!()(doc_matters); auto pth_pdf = spinePathsPDF!()(doc_matters); auto pth_pod = spinePathsPods!()(doc_matters); metadata_ ~= format(q"┃<body lang="en" xml:lang="en"> @@ -164,6 +165,10 @@ if ((doc_matters.opt.action.html_link_pdf) || (doc_matters.opt.action.html_link_ ~ "." ~ doc_matters.src.language ~ ".letter.portrait.pdf\" class=\"lnkicon\">" ~ " □ pdf (U.S. letter) </a>] "; } +if (doc_matters.opt.action.html_link_text) { + metadata_ ~= " [<a href=\"../" ~ "text/" ~ doc_matters.src.filename_base ~ "." ~ doc_matters.src.language ~ ".txt\" class=\"lnkicon\">" + ~ " □ txt </a>] "; +} metadata_ ~= "</p>"; if (doc_matters.opt.action.html_link_markup_source) { metadata_ ~= "<hr /><p class=\"lev1\">source: " ~ doc_matters.src.filename_base ~ "</p>"; diff --git a/org/out_skel.org b/org/out_skel.org new file mode 100644 index 0000000..43be371 --- /dev/null +++ b/org/out_skel.org @@ -0,0 +1,324 @@ +-*- mode: org -*- +#+TITLE: sisudoc spine (doc_reform) output xmls +#+DESCRIPTION: documents - structuring, publishing in multiple formats & search +#+FILETAGS: :spine:output:skel: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2025 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :results output none +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes +#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t + +- [[./doc-reform.org][doc-reform.org]] [[./][org/]] +- [[./output_hub.org][output_hub]] + +* Skel +** outputSkel template + +#+HEADER: :tangle "../src/sisudoc/io_out/skel.d" +#+HEADER: :noweb yes +#+BEGIN_SRC d +<<doc_header_including_copyright_and_license>> +module sisudoc.io_out.skel; +@safe: +template outputSkel() { + <<munge>> + <<the_document>> + void outputSkel(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + <<skel_out>> + skel_out(doc_abstraction, doc_matters); + } +} +#+END_SRC + +** Output + +#+NAME: skel_out +#+HEADER: :noweb yes +#+BEGIN_SRC d +void skel_out(D,M)( + const D doc_abstraction, + M doc_matters, +) { + struct Skel { + string head; + string content; + string tail; + } + auto skel = Skel(); + skel.head = theDocument!().skel_head(doc_matters); + skel.content = theDocument!().skel_body(doc_abstraction, doc_matters); + skel.tail = theDocument!().skel_tail(doc_matters); + auto pth_skel = spinePathsSkel(doc_matters); + try { + import std.file; + if (!exists(pth_skel.base_pth)) { + (pth_skel.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_skel.skel_file); + } + // writeln(pth_skel.base_pth); + auto f = File(pth_skel.skel_file, "w"); + f.writeln(skel.head); + f.writeln(skel.content); + f.writeln(skel.tail); +} +#+END_SRC + +* The Document +** theDocument template + +#+NAME: the_document +#+HEADER: :noweb yes +#+BEGIN_SRC d +template theDocument() { + import std.stdio; + import sisudoc.io_out; + <<skel_head>> + <<skel_body_assign_munge>> + <<skel_tail>> +} +#+END_SRC + +** the Document (assign munge) +*** Head + +#+NAME: skel_head +#+HEADER: :noweb yes +#+BEGIN_SRC d +// static auto rgx = RgxO(); +string skel_head(M)( + M doc_matters, +) { + return "head"; +} +#+END_SRC + +*** Body munge assign + +#+NAME: skel_body_assign_munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +string skel_body(D,M)( + const D doc_abstraction, + M doc_matters, +) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj); } + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj); } + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj); } + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj); } + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj); } + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj); } + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj); } + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj); } + } + } + return doc_object; +} +#+END_SRC + +*** Tail + +#+NAME: skel_tail +#+HEADER: :noweb yes +#+BEGIN_SRC d +string skel_tail(M)( + M doc_matters, +) { + return "tail"; +} +#+END_SRC + +* Munge + +#+NAME: munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +template munge() { + import std.stdio; + import std.conv; + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + string toc(O)(O obj) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + return obj.text ~ newline; + } + string heading(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string para(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string group(O)(O obj) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string block(O)(O obj) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string poem(O)(O obj) { + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + // return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + return obj.text ~ newlines; + } + string verse(O)(O obj) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string code(O)(O obj) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string quote(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string bookindex(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string bibliography(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string glossary(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string blurb(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string comment(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } +} +#+END_SRC + +* org includes +** spine project VERSION + +#+NAME: spine_version +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>> +#+END_SRC + +** year + +#+NAME: year +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>> +#+END_SRC + +** document header including copyright & license + +#+NAME: doc_header_including_copyright_and_license +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> +#+END_SRC + +* __END__ diff --git a/org/out_text.org b/org/out_text.org new file mode 100644 index 0000000..df5ccb2 --- /dev/null +++ b/org/out_text.org @@ -0,0 +1,572 @@ +-*- mode: org -*- +#+TITLE: sisudoc spine (doc_reform) output xmls +#+DESCRIPTION: documents - structuring, publishing in multiple formats & search +#+FILETAGS: :spine:output:text: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2025 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :results output none +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no +#+PROPERTY: header-args+ :mkdirp yes +#+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t + +- [[./doc-reform.org][doc-reform.org]] [[./][org/]] +- [[./output_hub.org][output_hub]] + +* Text +** outputText template + +#+HEADER: :tangle "../src/sisudoc/io_out/text.d" +#+HEADER: :noweb yes +#+BEGIN_SRC d +<<doc_header_including_copyright_and_license>> +module sisudoc.io_out.text; +@safe: +template outputText() { + <<munge>> + <<the_document>> + void outputText(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + <<text_out>> + text_out(doc_abstraction, doc_matters); + } +} +#+END_SRC + +** Output + +#+NAME: text_out +#+HEADER: :noweb yes +#+BEGIN_SRC d +void text_out(D,M)( + const D doc_abstraction, + M doc_matters, +) { + struct Text { + string head; + string content; + string tail; + } + auto text = Text(); + // text.head = theDocument!().text_head(doc_matters); + text.content = theDocument!().text_body(doc_abstraction, doc_matters); + text.tail = theDocument!().text_tail(doc_matters); + auto pth_text = spinePathsText(doc_matters); + try { + import std.file; + if (!exists(pth_text.base_pth)) { + (pth_text.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_text.text_file); + } + // writeln(pth_text.base_pth); + auto f = File(pth_text.text_file, "w"); + // f.writeln(text.head); + f.writeln(text.content); + f.writeln(text.tail); +} +#+END_SRC + +* The Document +** theDocument template + +#+NAME: the_document +#+HEADER: :noweb yes +#+BEGIN_SRC d +template theDocument() { + import std.stdio; + import sisudoc.io_out; + <<text_head>> + <<text_body_assign_munge>> + <<text_tail>> +} +#+END_SRC + +** the Document (assign munge) +*** Head SKIP + +#+NAME: text_head +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_head(M)( + M doc_matters, +) { + return "head"; +} +#+END_SRC + +*** Body munge assign + +#+NAME: text_body_assign_munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_body(D,M)( + const D doc_abstraction, + M doc_matters, +) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj, doc_matters); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj, doc_matters); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj, doc_matters); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj, doc_matters); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj, doc_matters); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj, doc_matters); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj, doc_matters); } // LATER + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj, doc_matters); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj, doc_matters); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj, doc_matters); } // LATER + } + } + return doc_object; +} +#+END_SRC + +*** Tail + +#+NAME: text_tail +#+HEADER: :noweb yes +#+BEGIN_SRC d +string text_tail(M)( + M doc_matters, +) { + string metadata_; + if (doc_matters.opt.action.debug_do) { + writeln(doc_matters.src.filename_base); + writeln("Title: ", doc_matters.conf_make_meta.meta.title_full); + writeln(" Author: ", doc_matters.conf_make_meta.meta.creator_author); + writeln(" Published: ", doc_matters.conf_make_meta.meta.date_published); + writeln(" Copyright: ", doc_matters.conf_make_meta.meta.rights_copyright); + writeln(" License: ", doc_matters.conf_make_meta.meta.rights_license); + } + if (!(doc_matters.conf_make_meta.meta.title_full.empty)) { + metadata_ ~= "Title: " ~ doc_matters.conf_make_meta.meta.title_full ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Title information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.creator_author.empty)) { + if (doc_matters.opt.action.html_link_curate) { + metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author_surname.translate([' ' : "_"]) + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } else { + metadata_ ~= "Author: " + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Author information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= "Published: " ~ doc_matters.conf_make_meta.meta.date_published ~ "\n\n"; + if (!(doc_matters.conf_make_meta.meta.rights_copyright.empty)) { + metadata_ ~= "Copyright: " ~ doc_matters.conf_make_meta.meta.rights_copyright ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no Copyright information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.rights_license.empty)) { + metadata_ ~= "License: " ~ doc_matters.conf_make_meta.meta.rights_license ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no License information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= doc_matters.generator_program.project_name.strip ~ "\n"; + metadata_ ~= doc_matters.generator_program.url_home.strip; + return metadata_; +} +#+END_SRC + +* Munge + +#+NAME: munge +#+HEADER: :noweb yes +#+BEGIN_SRC d +template munge() { + import sisudoc.io_out; + import sisudoc.io_out.rgx; + import std.stdio; + import std.conv; + import std.conv : to; + import std.typecons : Nullable; + mixin spineRgxOut; + static auto rgx = RgxO(); + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + template special_characters_and_font_face() { + string code(string _txt){ + _txt = _txt + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string general(string _txt) { + _txt = _txt + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line, "\n") + .replaceAll(rgx.br_line_inline, "\n") + .replaceAll(rgx.br_line_spaced, "\n\n") + .replaceAll(rgx.inline_strike, "-{$1}-") + .replaceAll(rgx.inline_insert, "+{$1}+") + .replaceAll(rgx.inline_cite, "\"{$1}\"") + .replaceAll(rgx.inline_emphasis, "!{$1}!") + .replaceAll(rgx.inline_bold, "*{$1}*") + .replaceAll(rgx.inline_italics, "/{$1}/") + .replaceAll(rgx.inline_underscore, "_{$1}_") + .replaceAll(rgx.inline_superscript, "^{$1}^") + .replaceAll(rgx.inline_subscript, ",{$1},") + .replaceAll(rgx.inline_mono, "#{$1}#"); + return _txt; + } + string links_and_images(string _txt){ + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m.captures[3] == "0") { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1])); + } else { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1] ~ " ≫" ~ m.captures[3])); + } + } + } + if (_txt.matchFirst(rgx.inline_image)) { + foreach (m; _txt.matchAll(rgx.inline_image)) { + _txt = _txt + .replaceFirst(rgx.inline_image, (m.captures[3])); + } + } + return _txt; + } + } + string generalMunge(O,M)(O obj, M doc_matters) { + string _txt = obj.text; + string _notes; + string _ocn; + string general_munge; + if (obj.metainfo.ocn == 0 || doc_matters.opt.action.ocn_off) { + _ocn = ""; + } else { + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newline; + } + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { + _notes ~= newlines ~ m["num"] ~ ". " ~ m["note"]; + } + } + _txt = _txt.replaceAll(rgx.inline_notes_al_regular_number_note, "[$1]"); + if (obj.metainfo.is_a == "code") { + _txt = special_characters_and_font_face!().code(_txt); + } else { + _txt = special_characters_and_font_face!().general(_txt); + } + _txt = special_characters_and_font_face!().links_and_images(_txt); + if (obj.metainfo.is_a == "heading") { + general_munge = newline ~ _txt ~ _notes ~ newline ~ _ocn ~ newline; + } else { + general_munge = _txt ~ _notes ~ newline ~ _ocn ~ newline; + } + return general_munge; + } + string toc(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + // _txt = _special_characters_and_font_face(obj.text); + string _txt = special_characters_and_font_face!().general(obj.text); + string _spaces; + switch (obj.attrib.indent_hang) { + case 1: _spaces = ""; + break; + case 2: _spaces = ":"; + break; + case 3: _spaces = "∴"; + break; + case 4: _spaces = " "; + break; + case 5: _spaces = " "; + break; + case 6: _spaces = " "; + break; + case 7: _spaces = " "; + break; + case 8: _spaces = " "; + break; + default: + break; + } + _txt = _txt.replaceAll(rgx.inline_link, (_spaces ~ "$1 ≫ $3")); + return _txt ~ newline; + } + string heading(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string para(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string group(O,M)(O obj, M doc_matters) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string block(O,M)(O obj, M doc_matters) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string poem(O,M)(O obj, M doc_matters) { // LATER + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string verse(O,M)(O obj, M doc_matters) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string code(O,M)(O obj, M doc_matters) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string quote(O,M)(O obj, M doc_matters) { // LATER + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + auto tablarize(O)( + string _txt, + const O obj, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table = ""; + string _tablenote = ""; + int[] _col_width; + _col_width.length = obj.table.number_of_columns.to!ulong; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if (!((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2))) { + if (_col_width[col_idx] < (cell.length.to!int)) { + _col_width[col_idx] = cell.length.to!int; + } + } + } + } + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell ~ newline; + } else { + if (obj.table.column_aligns[col_idx] == "l") { + _table ~= format(q"┃%-*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } else { + _table ~= format(q"┃%*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } + _table = _table + .replaceAll(regex("\\s*$"), ""); + } + } + _table ~= newline; + } + Tuple!(string, string) t = tuple( + _table, + _tablenote, + ); + return t; + } + // string _txt = obj.text; + // writeln(obj.table.column_widths); + auto _t = tablarize(obj.text, obj); + string _txt = _t[0]; + string _tablenote = _t[1]; + return _txt ~ _tablenote ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _ocn; + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」"; + string _txt = obj.text; + _txt = _txt + .replaceFirst(rgx.inline_link, ("$1")) + .replaceFirst(rgx.inline_superscript, ("$1")); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bookindex(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = _txt + .replaceAll(rgx.inline_link, ("≫$1")) + .replaceAll(regex("\\s*\\\\"), ""); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bibliography(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + // ALT: + // string _general_munge = generalMunge(obj,doc_matters); + // return _general_munge; + } + string glossary(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt; + } + string blurb(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string comment(O,M)(O obj, M doc_matters) { // LATER + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } +} +#+END_SRC + +* org includes +** spine project VERSION + +#+NAME: spine_version +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_project_version()>> +#+END_SRC + +** year + +#+NAME: year +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:year()>> +#+END_SRC + +** document header including copyright & license + +#+NAME: doc_header_including_copyright_and_license +#+HEADER: :noweb yes +#+BEGIN_SRC emacs-lisp +<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>> +#+END_SRC + +* __END__ +* TODO WORKON + +#+BEGIN_SRC org +TODO + PRIORITY + LATER +- object types + - comment + - quote +- images ? remove?? (currently with inline links) +- anchor tags (for internal links)? + +WISH +- underline headings? +- endnote info on calling object ≫\d+ +- break para text at set width? +- text wrap at text-line-width specified option + +DONE +- line breaks +- font face: bold, italics etc. +- object types + - toc + - inline_link /[┥┝┤├] + - indents + obj.metainfo.heading_lev_markup < 4 + - group + - block + - code + - verse + - table + - endnote section + - CAVEAT: would like to, but do not point back to object number of origin + REDO gathering of endnotes, (get/tie calling ocn) + - bookindex + - bibliography + - glossary + - blurb +- inline_link /[┥┝┤├] +#+END_SRC + diff --git a/org/output_hub.org b/org/output_hub.org index e7216f1..47123b2 100644 --- a/org/output_hub.org +++ b/org/output_hub.org @@ -35,7 +35,7 @@ template outputHub() { @system void outputHub(D)(D doc) { mixin Msg; auto msg = Msg!()(doc.matters); - enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } + enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff, text, skel } void Scheduled(D)(int sched, D doc) { auto msg = Msg!()(doc.matters); <<output_scheduled_task_source_or_pod>> @@ -45,8 +45,10 @@ template outputHub() { <<output_scheduled_task_html_seg>> <<output_scheduled_task_html_out>> <<output_scheduled_task_latex>> + <<output_scheduled_task_text>> <<output_scheduled_task_odt>> <<output_scheduled_task_sqlite>> + <<output_scheduled_task_skel>> } if (doc.matters.opt.action.vox_gt_1) { writeln(doc.matters.src.filename_base); } if (!(doc.matters.opt.action.parallelise_subprocesses)) { @@ -230,6 +232,18 @@ if (sched == outTask.latex) { } #+END_SRC +**** text :text:txt: + +#+NAME: output_scheduled_task_text +#+BEGIN_SRC d +if (sched == outTask.text) { + msg.v("text processing... "); + import sisudoc.io_out.text; + outputText!()(doc.abstraction, doc.matters); + msg.vv("text done"); +} +#+END_SRC + **** odf / odt :odf:odt: #+NAME: output_scheduled_task_odt @@ -242,6 +256,18 @@ if (sched == outTask.odt) { } #+END_SRC +**** skel :skel:text: + +#+NAME: output_scheduled_task_skel +#+BEGIN_SRC d +if (sched == outTask.skel) { + msg.v("skel processing... "); + import sisudoc.io_out.skel; + outputSkel!()(doc.abstraction, doc.matters); + msg.vv("skel done"); +} +#+END_SRC + **** sqlite discrete :sqlite: #+NAME: output_scheduled_task_sqlite diff --git a/org/spine.org b/org/spine.org index 6acc62f..8f0af14 100644 --- a/org/spine.org +++ b/org/spine.org @@ -311,6 +311,7 @@ bool[string] opts = [ "html-link-pdf-a4" : false, "html-link-pdf-letter" : false, "html-link-search" : false, + "html-link-text" : false, "html-seg" : false, "html-scroll" : false, "latex" : false, @@ -343,6 +344,7 @@ bool[string] opts = [ "show-pod" : false, "show-sqlite" : false, "show-summary" : false, + "skel" : false, "source" : false, "sqlite-discrete" : false, "sqlite-db-create" : false, @@ -431,6 +433,7 @@ auto helpInfo = getopt(args, "html-link-pdf", "provide html link to pdf a4 & letter output", &opts["html-link-pdf"], "html-link-pdf-a4", "provide html link to pdf a4 output", &opts["html-link-pdf-a4"], "html-link-pdf-letter", "provide html link to pdf letter size output", &opts["html-link-pdf-letter"], + "html-link-text", "provide html link to text output", &opts["html-link-text"], "html-link-search", "html embedded search submission", &opts["html-link-search"], "html-seg", "process html output", &opts["html-seg"], "html-scroll", "process html output", &opts["html-scroll"], @@ -480,6 +483,7 @@ auto helpInfo = getopt(args, "set-digest", "default hash digest type (e.g. sha256)", &settings["set-digest"], "set-papersize", "default papersize (latex pdf eg. a4 or a5 or b4 or letter)", &settings["set-papersize"], "set-textwrap", "default textwrap (e.g. 80 (characters)", &settings["set-textwrap"], + "skel", "skel (dummy outline)", &opts["skel"], "sqlite-discrete", "process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-db-create", "create db, create tables", &opts["sqlite-db-create"], "sqlite-db-drop", "drop tables & db", &opts["sqlite-db-drop"], @@ -527,7 +531,7 @@ if (helpInfo.helpWanted) { #+NAME: spine_args_get_options_aa2str #+BEGIN_SRC d -enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } +enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff, text, skel } struct OptActions { @trusted bool allow_downloads() { return opts["allow-downloads"]; @@ -631,6 +635,12 @@ struct OptActions { @trusted bool html_link_pdf_letter() { return (opts["html-link-pdf-letter"]) ? true : false; } + @trusted bool html_link_text() { + return (opts["html-link-text"]) ? true : false; + } + @trusted bool text_link_curate() { + return (opts["text-link-curate"]) ? true : false; + } @trusted bool html_link_search() { return (opts["html-link-search"]) ? true : false; } @@ -739,6 +749,12 @@ struct OptActions { || opts["sqlite-update"] ) ? true : false; } + @trusted bool skel() { + return opts["skel"]; + } + @trusted bool text() { + return opts["text"]; + } @trusted bool vox_0() { // --silent return opts["vox_is0"]; } @@ -775,9 +791,6 @@ struct OptActions { @trusted bool vox_default() { return vox_gt_1; } // defalt, & above @trusted bool vox_verbose() { return vox_gt_2; } // --verbose -v & above @trusted bool vox_very_verbose() { return vox_gt_3; } // --very-verbose - @trusted bool text() { - return opts["text"]; - } @trusted bool xhtml() { return opts["xhtml"]; } @@ -891,6 +904,8 @@ struct OptActions { || latex || manifest || sqlite_discrete + || text + || skel ) { _is = true; } else { _is = false; } @@ -909,6 +924,8 @@ struct OptActions { if (html_stuff) { schedule ~= outTask.html_stuff; } if (odt) { schedule ~= outTask.odt; } if (latex) { schedule ~= outTask.latex; } + if (text) { schedule ~= outTask.text; } + if (skel) { schedule ~= outTask.skel; } return schedule.sort().uniq; } @trusted bool abstraction() { @@ -925,6 +942,8 @@ struct OptActions { || sqlite_discrete || sqlite_delete || sqlite_update + || text + || skel ) ? true : false; } @trusted bool require_processing_files() { @@ -946,6 +965,7 @@ struct OptActions { || sqlite_update || text || xhtml + || skel ) ? true : false; } @trusted bool meta_processing_general() { @@ -958,6 +978,8 @@ struct OptActions { || latex || sqlite_discrete || sqlite_update + || text + || skel ) ? true :false; } @trusted bool meta_processing_xml_dom() { diff --git a/org/spine_markup_sample.org b/org/spine_markup_sample.org index 2965fe5..60c6272 100644 --- a/org/spine_markup_sample.org +++ b/org/spine_markup_sample.org @@ -1300,7 +1300,7 @@ code(number){ 3~ Tables ={ SiSU markup:tables;tables } -Tables may be prepared in two either of two forms +Tables may be prepared in either of two forms !_ resulting output: diff --git a/src/sisudoc/io_out/hub.d b/src/sisudoc/io_out/hub.d index 0e25811..f98be01 100644 --- a/src/sisudoc/io_out/hub.d +++ b/src/sisudoc/io_out/hub.d @@ -62,7 +62,7 @@ template outputHub() { @system void outputHub(D)(D doc) { mixin Msg; auto msg = Msg!()(doc.matters); - enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } + enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff, text, skel } void Scheduled(D)(int sched, D doc) { auto msg = Msg!()(doc.matters); if (sched == outTask.source_or_pod) { @@ -118,6 +118,12 @@ template outputHub() { outputLaTeX!()(doc.abstraction, doc.matters); msg.vv("latex done"); } + if (sched == outTask.text) { + msg.v("text processing... "); + import sisudoc.io_out.text; + outputText!()(doc.abstraction, doc.matters); + msg.vv("text done"); + } if (sched == outTask.odt) { msg.v("odf:odt processing... "); import sisudoc.io_out.odt; @@ -130,6 +136,12 @@ template outputHub() { doc.SQLiteHubDiscreteBuildTablesAndPopulate!(); msg.vv("sqlite done"); } + if (sched == outTask.skel) { + msg.v("skel processing... "); + import sisudoc.io_out.skel; + outputSkel!()(doc.abstraction, doc.matters); + msg.vv("skel done"); + } } if (doc.matters.opt.action.vox_gt_1) { writeln(doc.matters.src.filename_base); } if (!(doc.matters.opt.action.parallelise_subprocesses)) { diff --git a/src/sisudoc/io_out/metadata.d b/src/sisudoc/io_out/metadata.d index 6e6183b..a89b31a 100644 --- a/src/sisudoc/io_out/metadata.d +++ b/src/sisudoc/io_out/metadata.d @@ -417,6 +417,7 @@ string theme_light_1 = format(q"┃ } auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language); auto pth_epub = spinePathsEPUB!()(doc_matters.output_path, doc_matters.src.language); + auto pth_text = spinePathsText!()(doc_matters); auto pth_pdf = spinePathsPDF!()(doc_matters); auto pth_pod = spinePathsPods!()(doc_matters); metadata_ ~= format(q"┃<body lang="en" xml:lang="en"> @@ -498,6 +499,10 @@ string theme_light_1 = format(q"┃ ~ "." ~ doc_matters.src.language ~ ".letter.portrait.pdf\" class=\"lnkicon\">" ~ " □ pdf (U.S. letter) </a>] "; } + if (doc_matters.opt.action.html_link_text) { + metadata_ ~= " [<a href=\"../" ~ "text/" ~ doc_matters.src.filename_base ~ "." ~ doc_matters.src.language ~ ".txt\" class=\"lnkicon\">" + ~ " □ txt </a>] "; + } metadata_ ~= "</p>"; if (doc_matters.opt.action.html_link_markup_source) { metadata_ ~= "<hr /><p class=\"lev1\">source: " ~ doc_matters.src.filename_base ~ "</p>"; diff --git a/src/sisudoc/io_out/paths_output.d b/src/sisudoc/io_out/paths_output.d index a5b73a0..c3e677d 100644 --- a/src/sisudoc/io_out/paths_output.d +++ b/src/sisudoc/io_out/paths_output.d @@ -471,7 +471,7 @@ template spinePathsODT() { auto spinePathsODT(M)( M doc_matters, ) { - auto out_pth = spineOutPaths!()( doc_matters.output_path, doc_matters.src.language); + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); string base_dir = "odf"; struct _PathsStruct { string base_pth() { // dir will contain odt document file (also debug file tree) @@ -668,3 +668,52 @@ template spinePathsSQLite() { return _PathsStruct(); } } + +template spinePathsText() { + import std.conv; + auto spinePathsText(M)( + M doc_matters, + ) { + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); + string base_dir = "text"; + struct _PathsStruct { + string base_pth() { + return (((out_pth.output_base).chainPath(base_dir)).asNormalizedPath).array; + } + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string text_file() { + return ((base_pth.chainPath(doc_matters.src.doc_uid_out ~ ".txt")).asNormalizedPath).array; + } + string dirtop() { + return "".chainPath("").array; + } + } + return _PathsStruct(); + } +} +template spinePathsSkel() { + import std.conv; + auto spinePathsSkel(M)( + M doc_matters, + ) { + auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language); + string base_dir = "skel"; + struct _PathsStruct { + string base_pth() { + return (((out_pth.output_base).chainPath(base_dir)).asNormalizedPath).array; + } + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string skel_file() { + return ((base_pth.chainPath(doc_matters.src.doc_uid_out ~ ".skel")).asNormalizedPath).array; + } + string dirtop() { + return "".chainPath("").array; + } + } + return _PathsStruct(); + } +} diff --git a/src/sisudoc/io_out/rgx.d b/src/sisudoc/io_out/rgx.d index 9c70c1e..666e71f 100644 --- a/src/sisudoc/io_out/rgx.d +++ b/src/sisudoc/io_out/rgx.d @@ -78,9 +78,9 @@ static template spineRgxOut() { static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg"); static br_linebreaks = ctRegex!(`[┘┙]`, "mg"); - static br_line = ctRegex!(`┘`, "mg"); - static br_line_inline = ctRegex!(`┙`, "mg"); - static br_line_spaced = ctRegex!(`┚`, "mg"); + static br_line = ctRegex!(`\s*┘\s*`, "mg"); + static br_line_inline = ctRegex!(`\s*┙\s*`, "mg"); + static br_line_spaced = ctRegex!(`\s*┚\s*`, "mg"); /+ quotation marks +/ static quotes_open_and_close = ctRegex!(`[“”]`, "mg"); /+ inline markup footnotes endnotes +/ @@ -90,6 +90,8 @@ static template spineRgxOut() { static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m"); static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg"); static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg"); + // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section + // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); @@ -100,13 +102,14 @@ static template spineRgxOut() { static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg"); static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor - static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg"); - static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg"); - static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used - static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); - static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links - static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); - static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); + // space cleaning should not be necessary + static inline_link = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#?(\S+?))├`, "mg"); + static inline_link_empty = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤├`, "mg"); + static inline_link_number = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<num>[0-9]+)├`, "mg"); // not used + static inline_link_number_only = ctRegex!(`\s*(?P<linked_text>\s*┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); + static inline_link_stow_uri = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links + static inline_link_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); + static inline_link_seg_and_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg"); static url = ctRegex!(`https?://`, "mg"); diff --git a/src/sisudoc/io_out/skel.d b/src/sisudoc/io_out/skel.d new file mode 100644 index 0000000..b616695 --- /dev/null +++ b/src/sisudoc/io_out/skel.d @@ -0,0 +1,268 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +module sisudoc.io_out.skel; +@safe: +template outputSkel() { + template munge() { + import std.stdio; + import std.conv; + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + string toc(O)(O obj) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + return obj.text ~ newline; + } + string heading(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string para(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string group(O)(O obj) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string block(O)(O obj) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string poem(O)(O obj) { + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + // return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + return obj.text ~ newlines; + } + string verse(O)(O obj) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string code(O)(O obj) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string quote(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string bookindex(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string bibliography(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string glossary(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string blurb(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string comment(O)(O obj) { + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + } + template theDocument() { + import std.stdio; + import sisudoc.io_out; + // static auto rgx = RgxO(); + string skel_head(M)( + M doc_matters, + ) { + return "head"; + } + string skel_body(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj); } + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj); } + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj); } + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj); } + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj); } + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj); } + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj); } + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj); } + } + } + return doc_object; + } + string skel_tail(M)( + M doc_matters, + ) { + return "tail"; + } + } + void outputSkel(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + void skel_out(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + struct Skel { + string head; + string content; + string tail; + } + auto skel = Skel(); + skel.head = theDocument!().skel_head(doc_matters); + skel.content = theDocument!().skel_body(doc_abstraction, doc_matters); + skel.tail = theDocument!().skel_tail(doc_matters); + auto pth_skel = spinePathsSkel(doc_matters); + try { + import std.file; + if (!exists(pth_skel.base_pth)) { + (pth_skel.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_skel.skel_file); + } + // writeln(pth_skel.base_pth); + auto f = File(pth_skel.skel_file, "w"); + f.writeln(skel.head); + f.writeln(skel.content); + f.writeln(skel.tail); + } + skel_out(doc_abstraction, doc_matters); + } +} diff --git a/src/sisudoc/io_out/text.d b/src/sisudoc/io_out/text.d new file mode 100644 index 0000000..da0e2b6 --- /dev/null +++ b/src/sisudoc/io_out/text.d @@ -0,0 +1,475 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +module sisudoc.io_out.text; +@safe: +template outputText() { + template munge() { + import sisudoc.io_out; + import sisudoc.io_out.rgx; + import std.stdio; + import std.conv; + import std.conv : to; + import std.typecons : Nullable; + mixin spineRgxOut; + static auto rgx = RgxO(); + void puts(string _obj_is) { + writeln(__FILE__, ":", __LINE__, ": ", _obj_is); + } + string newline = "\n"; + string newlines = "\n\n"; + template special_characters_and_font_face() { + string code(string _txt){ + _txt = _txt + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string general(string _txt) { + _txt = _txt + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line, "\n") + .replaceAll(rgx.br_line_inline, "\n") + .replaceAll(rgx.br_line_spaced, "\n\n") + .replaceAll(rgx.inline_strike, "-{$1}-") + .replaceAll(rgx.inline_insert, "+{$1}+") + .replaceAll(rgx.inline_cite, "\"{$1}\"") + .replaceAll(rgx.inline_emphasis, "!{$1}!") + .replaceAll(rgx.inline_bold, "*{$1}*") + .replaceAll(rgx.inline_italics, "/{$1}/") + .replaceAll(rgx.inline_underscore, "_{$1}_") + .replaceAll(rgx.inline_superscript, "^{$1}^") + .replaceAll(rgx.inline_subscript, ",{$1},") + .replaceAll(rgx.inline_mono, "#{$1}#"); + return _txt; + } + string links_and_images(string _txt){ + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m.captures[3] == "0") { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1])); + } else { + _txt = _txt + .replaceFirst(rgx.inline_link, (m.captures[1] ~ " ≫" ~ m.captures[3])); + } + } + } + if (_txt.matchFirst(rgx.inline_image)) { + foreach (m; _txt.matchAll(rgx.inline_image)) { + _txt = _txt + .replaceFirst(rgx.inline_image, (m.captures[3])); + } + } + return _txt; + } + } + string generalMunge(O,M)(O obj, M doc_matters) { + string _txt = obj.text; + string _notes; + string _ocn; + string general_munge; + if (obj.metainfo.ocn == 0 || doc_matters.opt.action.ocn_off) { + _ocn = ""; + } else { + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newline; + } + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { + _notes ~= newlines ~ m["num"] ~ ". " ~ m["note"]; + } + } + _txt = _txt.replaceAll(rgx.inline_notes_al_regular_number_note, "[$1]"); + if (obj.metainfo.is_a == "code") { + _txt = special_characters_and_font_face!().code(_txt); + } else { + _txt = special_characters_and_font_face!().general(_txt); + } + _txt = special_characters_and_font_face!().links_and_images(_txt); + if (obj.metainfo.is_a == "heading") { + general_munge = newline ~ _txt ~ _notes ~ newline ~ _ocn ~ newline; + } else { + general_munge = _txt ~ _notes ~ newline ~ _ocn ~ newline; + } + return general_munge; + } + string toc(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return "toc\n"; + // _txt = _special_characters_and_font_face(obj.text); + string _txt = special_characters_and_font_face!().general(obj.text); + string _spaces; + switch (obj.attrib.indent_hang) { + case 1: _spaces = ""; + break; + case 2: _spaces = ":"; + break; + case 3: _spaces = "∴"; + break; + case 4: _spaces = " "; + break; + case 5: _spaces = " "; + break; + case 6: _spaces = " "; + break; + case 7: _spaces = " "; + break; + case 8: _spaces = " "; + break; + default: + break; + } + _txt = _txt.replaceAll(rgx.inline_link, (_spaces ~ "$1 ≫ $3")); + return _txt ~ newline; + } + string heading(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string para(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string group(O,M)(O obj, M doc_matters) { + /+ + The "group" is different from the "block" mark in that "group" does not + preserve whitespace, the "block" mark does. The text falling within the + block is a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string block(O,M)(O obj, M doc_matters) { + /+ + The "block" is different from the "group" mark in that the "block" mark + (like the "poem" mark) preserves whitespace, the "group" mark does not. + The text falling within the "block" is a single object, which is different + from the "poem" mark where each identified verse is an object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string poem(O,M)(O obj, M doc_matters) { // LATER + /+ + The "poem" mark like the "block" preserves whitespace. Text followed by + two newlines are identified as verse and each verse is an object i.e. a + poem may consist of multiple verse each of which is identified as an + object, unlike a text "block" which is identified as a single object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + string verse(O,M)(O obj, M doc_matters) { + /+ + See description of poem, the poem is demarkated but the verse is the + object. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string code(O,M)(O obj, M doc_matters) { + /+ + "Code" blocks are a single text object, in which the original text is + preserved. + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string quote(O,M)(O obj, M doc_matters) { // LATER + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newline ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string table(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + auto tablarize(O)( + string _txt, + const O obj, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table = ""; + string _tablenote = ""; + int[] _col_width; + _col_width.length = obj.table.number_of_columns.to!ulong; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if (!((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2))) { + if (_col_width[col_idx] < (cell.length.to!int)) { + _col_width[col_idx] = cell.length.to!int; + } + } + } + } + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell ~ newline; + } else { + if (obj.table.column_aligns[col_idx] == "l") { + _table ~= format(q"┃%-*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } else { + _table ~= format(q"┃%*s%s┃", + _col_width[col_idx], + cell, + (_table_cols.length > (col_idx + 1)) ? " ┊ " : "" + ); + } + _table = _table + .replaceAll(regex("\\s*$"), ""); + } + } + _table ~= newline; + } + Tuple!(string, string) t = tuple( + _table, + _tablenote, + ); + return t; + } + // string _txt = obj.text; + // writeln(obj.table.column_widths); + auto _t = tablarize(obj.text, obj); + string _txt = _t[0]; + string _tablenote = _t[1]; + return _txt ~ _tablenote ~ "「" ~ obj.metainfo.ocn.to!string ~ "」" ~ newlines; + } + string endnote(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _ocn; + _ocn = "「" ~ obj.metainfo.ocn.to!string ~ "」"; + string _txt = obj.text; + _txt = _txt + .replaceFirst(rgx.inline_link, ("$1")) + .replaceFirst(rgx.inline_superscript, ("$1")); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bookindex(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = _txt + .replaceAll(rgx.inline_link, ("≫$1")) + .replaceAll(regex("\\s*\\\\"), ""); + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + } + string bibliography(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt ~ newlines; + // ALT: + // string _general_munge = generalMunge(obj,doc_matters); + // return _general_munge; + } + string glossary(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _txt = obj.text; + _txt = special_characters_and_font_face!().general(_txt); + return _txt; + } + string blurb(O,M)(O obj, M doc_matters) { + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + string _general_munge = generalMunge(obj,doc_matters); + return _general_munge; + } + string comment(O,M)(O obj, M doc_matters) { // LATER + /+ + +/ + // puts(obj.metainfo.is_a); + // return obj.metainfo.is_a; + return obj.text ~ newlines; + } + } + template theDocument() { + import std.stdio; + import sisudoc.io_out; + string text_head(M)( + M doc_matters, + ) { + return "head"; + } + string text_body(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + string doc_object = ""; + foreach (section; doc_matters.has.keys_seq.scroll) { + foreach (obj; doc_abstraction[section]) { + if (obj.metainfo.is_a == "toc") { doc_object ~= munge!().toc(obj, doc_matters); } + if (obj.metainfo.is_a == "heading") { doc_object ~= munge!().heading(obj, doc_matters); } + if (obj.metainfo.is_a == "para") { doc_object ~= munge!().para(obj, doc_matters); } + if (obj.metainfo.is_a == "group") { doc_object ~= munge!().group(obj, doc_matters); } + if (obj.metainfo.is_a == "block") { doc_object ~= munge!().block(obj, doc_matters); } + if (obj.metainfo.is_a == "poem") { doc_object ~= munge!().poem(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "verse") { doc_object ~= munge!().verse(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "code") { doc_object ~= munge!().code(obj, doc_matters); } + if (obj.metainfo.is_a == "quote") { doc_object ~= munge!().quote(obj, doc_matters); } // LATER + if (obj.metainfo.is_a == "table") { doc_object ~= munge!().table(obj, doc_matters); } + if (obj.metainfo.is_a == "endnote") { doc_object ~= munge!().endnote(obj, doc_matters); } + if (obj.metainfo.is_a == "bookindex") { doc_object ~= munge!().bookindex(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "bibliography") { doc_object ~= munge!().bibliography(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "glossary") { doc_object ~= munge!().glossary(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "blurb") { doc_object ~= munge!().blurb(obj, doc_matters); } // CHECK + if (obj.metainfo.is_a == "comment") { doc_object ~= munge!().comment(obj, doc_matters); } // LATER + } + } + return doc_object; + } + string text_tail(M)( + M doc_matters, + ) { + string metadata_; + if (doc_matters.opt.action.debug_do) { + writeln(doc_matters.src.filename_base); + writeln("Title: ", doc_matters.conf_make_meta.meta.title_full); + writeln(" Author: ", doc_matters.conf_make_meta.meta.creator_author); + writeln(" Published: ", doc_matters.conf_make_meta.meta.date_published); + writeln(" Copyright: ", doc_matters.conf_make_meta.meta.rights_copyright); + writeln(" License: ", doc_matters.conf_make_meta.meta.rights_license); + } + if (!(doc_matters.conf_make_meta.meta.title_full.empty)) { + metadata_ ~= "Title: " ~ doc_matters.conf_make_meta.meta.title_full ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Title information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.creator_author.empty)) { + if (doc_matters.opt.action.html_link_curate) { + metadata_ ~= "Author: " ~ doc_matters.conf_make_meta.meta.creator_author_surname.translate([' ' : "_"]) + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } else { + metadata_ ~= "Author: " + ~ doc_matters.conf_make_meta.meta.creator_author ~ "\n\n"; + } + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("ERROR no Author information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= "Published: " ~ doc_matters.conf_make_meta.meta.date_published ~ "\n\n"; + if (!(doc_matters.conf_make_meta.meta.rights_copyright.empty)) { + metadata_ ~= "Copyright: " ~ doc_matters.conf_make_meta.meta.rights_copyright ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no Copyright information provided in document header ", doc_matters.src.filename_base); + } + if (!(doc_matters.conf_make_meta.meta.rights_license.empty)) { + metadata_ ~= "License: " ~ doc_matters.conf_make_meta.meta.rights_license ~ "\n\n"; + } else if (doc_matters.opt.action.debug_do || doc_matters.opt.action.vox_gt_3) { + writeln("WARNING no License information provided in document header ", doc_matters.src.filename_base); + } + metadata_ ~= doc_matters.generator_program.project_name.strip ~ "\n"; + metadata_ ~= doc_matters.generator_program.url_home.strip; + return metadata_; + } + } + void outputText(D,M) ( + const D doc_abstraction, + M doc_matters, + ) { + import std.stdio; + import sisudoc.io_out; + void text_out(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + struct Text { + string head; + string content; + string tail; + } + auto text = Text(); + // text.head = theDocument!().text_head(doc_matters); + text.content = theDocument!().text_body(doc_abstraction, doc_matters); + text.tail = theDocument!().text_tail(doc_matters); + auto pth_text = spinePathsText(doc_matters); + try { + import std.file; + if (!exists(pth_text.base_pth)) { + (pth_text.base_pth).mkdirRecurse; + } + } catch (ErrnoException ex) { + } + if (doc_matters.opt.action.vox_gt_1) { + writeln(" ", pth_text.text_file); + } + // writeln(pth_text.base_pth); + auto f = File(pth_text.text_file, "w"); + // f.writeln(text.head); + f.writeln(text.content); + f.writeln(text.tail); + } + text_out(doc_abstraction, doc_matters); + } +} diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d index 24ae935..904444a 100644 --- a/src/sisudoc/meta/metadoc_from_src.d +++ b/src/sisudoc/meta/metadoc_from_src.d @@ -939,7 +939,7 @@ template docAbstraction() { } { // document segnames ST_segnames get_segnames; - get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); // + get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); segnames = get_segnames.segnames; html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr; html_segnames_ptr = get_segnames.html_segnames_ptr; @@ -1376,36 +1376,42 @@ template docAbstraction() { "scroll": ["head", "toc", "body",], "seg": ["head", "toc", "body",], "sql": ["head", "body",], - "latex": ["head", "toc", "body",] + "latex": ["head", "toc", "body",], + "text": ["head", "toc", "body",], ]; if (document_the["endnotes"].length > 1) { document_section_keys_sequenced["scroll"] ~= "endnotes"; document_section_keys_sequenced["seg"] ~= "endnotes"; document_section_keys_sequenced["latex"] ~= "endnotes"; + document_section_keys_sequenced["text"] ~= "endnotes"; } if (document_the["glossary"].length > 1) { document_section_keys_sequenced["scroll"] ~= "glossary"; document_section_keys_sequenced["seg"] ~= "glossary"; document_section_keys_sequenced["sql"] ~= "glossary"; document_section_keys_sequenced["latex"] ~= "glossary"; + document_section_keys_sequenced["text"] ~= "glossary"; } if (document_the["bibliography"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bibliography"; document_section_keys_sequenced["seg"] ~= "bibliography"; document_section_keys_sequenced["sql"] ~= "bibliography"; document_section_keys_sequenced["latex"] ~= "bibliography"; + document_section_keys_sequenced["text"] ~= "bibliography"; } if (document_the["bookindex"].length > 1) { document_section_keys_sequenced["scroll"] ~= "bookindex"; document_section_keys_sequenced["seg"] ~= "bookindex"; document_section_keys_sequenced["sql"] ~= "bookindex"; document_section_keys_sequenced["latex"] ~= "bookindex"; + document_section_keys_sequenced["text"] ~= "bookindex"; } if (document_the["blurb"].length > 1) { document_section_keys_sequenced["scroll"] ~= "blurb"; document_section_keys_sequenced["seg"] ~= "blurb"; document_section_keys_sequenced["sql"] ~= "blurb"; document_section_keys_sequenced["latex"] ~= "blurb"; + document_section_keys_sequenced["text"] ~= "blurb"; } if ((opt_action.html) || (opt_action.html_scroll) diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d index 3ae10d1..6718e82 100644 --- a/src/sisudoc/meta/metadoc_from_src_functions.d +++ b/src/sisudoc/meta/metadoc_from_src_functions.d @@ -2557,10 +2557,8 @@ template docAbstractionFunctions() { CMM conf_make_meta, Flag!"_new_doc" _new_doc ) { - obj_txt["munge"] = obj_[obj_key_].dup; - obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`))) - ? obj_txt["munge"] - : obj_txt["munge"].strip; + obj_txt["munge"] = obj_[obj_key_].dup; + obj_txt["munge"] = (obj_["is"].match(ctRegex!(`verse|code`))) ? obj_txt["munge"] : obj_txt["munge"].strip; if (_new_doc) { anchor_tag = ""; } @@ -2579,8 +2577,8 @@ template docAbstractionFunctions() { || (obj_["is"] == "group") || (obj_["is"] == "block") || (obj_["is"] == "verse")) { - obj_txt["munge"] = (obj_txt["munge"]).inline_markup_faces; - obj_txt["munge"] = (obj_txt["munge"]).links_and_images; + obj_txt["munge"] = (obj_txt["munge"]).inline_markup_faces; + obj_txt["munge"] = (obj_txt["munge"]).links_and_images; } switch (obj_["is"]) { case "heading": @@ -3299,8 +3297,8 @@ template docAbstractionFunctions() { // ↓ - endnotes struct NotesSection { string[string] object_notes; - int previous_count; - int mkn; + int previous_count; + int mkn; static auto rgx = RgxI(); private auto gather_notes_for_endnote_section( ObjGenericComposite[] contents_am, @@ -5406,6 +5404,9 @@ template docSectKeysSeq() { string[] latex() { return document_section_keys_sequenced["latex"]; } + string[] text() { + return document_section_keys_sequenced["text"]; + } } return doc_sect_keys_seq(); } diff --git a/src/sisudoc/meta/rgx.d b/src/sisudoc/meta/rgx.d index 259ab82..1a26f73 100644 --- a/src/sisudoc/meta/rgx.d +++ b/src/sisudoc/meta/rgx.d @@ -148,16 +148,16 @@ static template spineRgxIn() { static table_col_separator_nl = ctRegex!(`[┊]$`, "mg"); /+ inline markup footnotes endnotes +/ static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); - static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); - static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); - static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); + static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\s*\}~`, "mg"); + static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\s*\}~`, "m"); + static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\s*\}~`, "m"); static note_ref = ctRegex!(`^\S+?noteref_(?P<ref>[0-9]+)`, "mg"); // {^{73.}^}#noteref_73 static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); static smid_inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); static smid_inline_link_naked_url = ctRegex!(`(?P<pre>^|[ (\[])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?=[.,;:?!'"]?([ )\]]|$))`, "mg"); static smid_inline_link_markup_regular = ctRegex!(`(?P<pre>^|[ (\[])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); - static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg"); - static smid_inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); + static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg"); + static smid_inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); static image = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg"); static smid_image = ctRegex!(`(?P<pre>(?:^|[ ])[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); static smid_image_generic = ctRegex!(`(?:^|[ ])[{┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg"); @@ -221,9 +221,9 @@ static template spineRgxIn() { static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg"); static br_linebreaks = ctRegex!(`[┘┙]`, "mg"); - static br_line = ctRegex!(`┘`, "mg"); - static br_line_inline = ctRegex!(`┙`, "mg"); - static br_line_spaced = ctRegex!(`┚`, "mg"); + static br_line = ctRegex!(`\s*┘\s*`, "mg"); + static br_line_inline = ctRegex!(`\s*┙\s*`, "mg"); + static br_line_spaced = ctRegex!(`\s*┚\s*`, "mg"); /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented @@ -231,6 +231,8 @@ static template spineRgxIn() { static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m"); static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg"); static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg"); + // static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section + // static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*(≫\s\d+)?\s*】`, "mg"); // ocn of origin would be useful in endnote section static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); @@ -241,13 +243,14 @@ static template spineRgxIn() { static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg"); static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg"); static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor - static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg"); - static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg"); - static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used - static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); - static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links - static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); - static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); + // space cleaning should not be necessary + static inline_link = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#?(\S+?))├`, "mg"); + static inline_link_empty = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤├`, "mg"); + static inline_link_number = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<num>[0-9]+)├`, "mg"); // not used + static inline_link_number_only = ctRegex!(`\s*(?P<linked_text>\s*┥.+?┝)┤(?P<num>[0-9]+)├`, "mg"); + static inline_link_stow_uri = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links + static inline_link_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>#(?P<hash>\S+?))├`, "mg"); + static inline_link_seg_and_hash = ctRegex!(`┥\s*(?P<text>.+?)\s*┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg"); static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg"); static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg"); static url = ctRegex!(`https?://`, "mg"); diff --git a/src/sisudoc/spine.d b/src/sisudoc/spine.d index eceaf51..82138f7 100755 --- a/src/sisudoc/spine.d +++ b/src/sisudoc/spine.d @@ -144,6 +144,7 @@ string program_name = "spine"; "html-link-pdf-a4" : false, "html-link-pdf-letter" : false, "html-link-search" : false, + "html-link-text" : false, "html-seg" : false, "html-scroll" : false, "latex" : false, @@ -176,6 +177,7 @@ string program_name = "spine"; "show-pod" : false, "show-sqlite" : false, "show-summary" : false, + "skel" : false, "source" : false, "sqlite-discrete" : false, "sqlite-db-create" : false, @@ -250,6 +252,7 @@ string program_name = "spine"; "html-link-pdf", "provide html link to pdf a4 & letter output", &opts["html-link-pdf"], "html-link-pdf-a4", "provide html link to pdf a4 output", &opts["html-link-pdf-a4"], "html-link-pdf-letter", "provide html link to pdf letter size output", &opts["html-link-pdf-letter"], + "html-link-text", "provide html link to text output", &opts["html-link-text"], "html-link-search", "html embedded search submission", &opts["html-link-search"], "html-seg", "process html output", &opts["html-seg"], "html-scroll", "process html output", &opts["html-scroll"], @@ -299,6 +302,7 @@ string program_name = "spine"; "set-digest", "default hash digest type (e.g. sha256)", &settings["set-digest"], "set-papersize", "default papersize (latex pdf eg. a4 or a5 or b4 or letter)", &settings["set-papersize"], "set-textwrap", "default textwrap (e.g. 80 (characters)", &settings["set-textwrap"], + "skel", "skel (dummy outline)", &opts["skel"], "sqlite-discrete", "process discrete sqlite output", &opts["sqlite-discrete"], "sqlite-db-create", "create db, create tables", &opts["sqlite-db-create"], "sqlite-db-drop", "drop tables & db", &opts["sqlite-db-drop"], @@ -339,7 +343,7 @@ string program_name = "spine"; if (helpInfo.helpWanted) { defaultGetoptPrinter("Some information about the program.", helpInfo.options); } - enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } + enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff, text, skel } struct OptActions { @trusted bool allow_downloads() { return opts["allow-downloads"]; @@ -443,6 +447,12 @@ string program_name = "spine"; @trusted bool html_link_pdf_letter() { return (opts["html-link-pdf-letter"]) ? true : false; } + @trusted bool html_link_text() { + return (opts["html-link-text"]) ? true : false; + } + @trusted bool text_link_curate() { + return (opts["text-link-curate"]) ? true : false; + } @trusted bool html_link_search() { return (opts["html-link-search"]) ? true : false; } @@ -551,6 +561,12 @@ string program_name = "spine"; || opts["sqlite-update"] ) ? true : false; } + @trusted bool skel() { + return opts["skel"]; + } + @trusted bool text() { + return opts["text"]; + } @trusted bool vox_0() { // --silent return opts["vox_is0"]; } @@ -587,9 +603,6 @@ string program_name = "spine"; @trusted bool vox_default() { return vox_gt_1; } // defalt, & above @trusted bool vox_verbose() { return vox_gt_2; } // --verbose -v & above @trusted bool vox_very_verbose() { return vox_gt_3; } // --very-verbose - @trusted bool text() { - return opts["text"]; - } @trusted bool xhtml() { return opts["xhtml"]; } @@ -703,6 +716,8 @@ string program_name = "spine"; || latex || manifest || sqlite_discrete + || text + || skel ) { _is = true; } else { _is = false; } @@ -721,6 +736,8 @@ string program_name = "spine"; if (html_stuff) { schedule ~= outTask.html_stuff; } if (odt) { schedule ~= outTask.odt; } if (latex) { schedule ~= outTask.latex; } + if (text) { schedule ~= outTask.text; } + if (skel) { schedule ~= outTask.skel; } return schedule.sort().uniq; } @trusted bool abstraction() { @@ -737,6 +754,8 @@ string program_name = "spine"; || sqlite_discrete || sqlite_delete || sqlite_update + || text + || skel ) ? true : false; } @trusted bool require_processing_files() { @@ -758,6 +777,7 @@ string program_name = "spine"; || sqlite_update || text || xhtml + || skel ) ? true : false; } @trusted bool meta_processing_general() { @@ -770,6 +790,8 @@ string program_name = "spine"; || latex || sqlite_discrete || sqlite_update + || text + || skel ) ? true :false; } @trusted bool meta_processing_xml_dom() { |