From 845f6440b8d2a672769f553a45aa1406c17d4c4f Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 20 Jan 2022 15:20:01 -0500 Subject: xmls, html group text output (bullets & indents) --- org/default_regex.org | 44 ++++++++++++++++++++++++++--- org/out_sqlite.org | 63 ++++++++++++++++++++++++++++++++++++++++++ org/out_xmls.org | 55 +++++++++++++++++++++++++++++++----- src/doc_reform/io_out/epub3.d | 2 +- src/doc_reform/io_out/html.d | 4 +-- src/doc_reform/io_out/rgx.d | 23 +++++++++++++++ src/doc_reform/io_out/sqlite.d | 56 +++++++++++++++++++++++++++++++++++++ src/doc_reform/io_out/xmls.d | 43 +++++++++++++++++++++++++++- src/doc_reform/meta/rgx.d | 3 +- 9 files changed, 277 insertions(+), 16 deletions(-) diff --git a/org/default_regex.org b/org/default_regex.org index b0a47ca..9ea040e 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -45,7 +45,8 @@ static template spineRgxIn() { <> <> <> - <> + <> + <> <> <> <> @@ -144,11 +145,11 @@ static yaml_header_meta_title = ctRegex!(`^\s*title\s*:\ static yaml_config = ctRegex!(`^[a-z]+\s*:\s*(?:"?\w|$)`, "m"); #+END_SRC -** heading & paragraph operators :paragraph:operator: +** heading operators :heading:operator: -#+NAME: meta_rgx_heading_and_paragraph_marks +#+NAME: meta_rgx_heading_marks #+BEGIN_SRC d -/+ heading & paragraph operators +/ +/+ heading operators +/ static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); static headings = ctRegex!(`^:?(?P[A-D1-4])[~](?:[a-z0-9_.-]*[?]?|[!](?:glossary|bibliogrphy|biblio|references?|blurb))(?:\s|$)`,"i"); @@ -164,6 +165,13 @@ static heading_marker_tag_has_colon = ctRegex!(`([:])`); static heading_biblio = ctRegex!(`^1[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^1[~][!](glossary)`); static heading_blurb = ctRegex!(`^1[~][!](blurb)`); +#+END_SRC + +** paragraph operators :paragraph:operator: + +#+NAME: meta_rgx_paragraph_marks +#+BEGIN_SRC d +/+ paragraph operators +/ static para_bullet = ctRegex!(`^_[*] `); static para_bullet_indent = ctRegex!(`^_(?P[1-9])[*] `); static para_indent = ctRegex!(`^_(?P[1-9])[ ]`); @@ -172,6 +180,33 @@ static para_attribs = ctRegex!(`^_(?:(?:[0-9]) static para_inline_link_anchor = ctRegex!(`\*[~](?P[a-z0-9_.-]+)(?= |$)`,"i"); #+END_SRC +#+NAME: grouped_text_rgx_paragraph_marks +#+BEGIN_SRC d +/+ paragraph operators +/ +static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m"); +static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m"); +static grouped_para_indent_3 = ctRegex!(`^_3[ ]`, "m"); +static grouped_para_indent_4 = ctRegex!(`^_4[ ]`, "m"); +static grouped_para_indent_5 = ctRegex!(`^_5[ ]`, "m"); +static grouped_para_indent_6 = ctRegex!(`^_6[ ]`, "m"); +static grouped_para_indent_7 = ctRegex!(`^_7[ ]`, "m"); +static grouped_para_indent_8 = ctRegex!(`^_8[ ]`, "m"); +static grouped_para_indent_9 = ctRegex!(`^_9[ ]`, "m"); +static grouped_para_bullet = ctRegex!(`^_[*] `, "m"); +static grouped_para_bullet_indent_1 = ctRegex!(`^_1[*] `, "m"); +static grouped_para_bullet_indent_2 = ctRegex!(`^_2[*] `, "m"); +static grouped_para_bullet_indent_3 = ctRegex!(`^_3[*] `, "m"); +static grouped_para_bullet_indent_4 = ctRegex!(`^_4[*] `, "m"); +static grouped_para_bullet_indent_5 = ctRegex!(`^_5[*] `, "m"); +static grouped_para_bullet_indent_6 = ctRegex!(`^_6[*] `, "m"); +static grouped_para_bullet_indent_7 = ctRegex!(`^_7[*] `, "m"); +static grouped_para_bullet_indent_8 = ctRegex!(`^_8[*] `, "m"); +static grouped_para_bullet_indent_9 = ctRegex!(`^_9[*] `, "m"); +static grouped_para_bullet_indent = ctRegex!(`^_(?P[1-9])[*] `, "m"); +static grouped_para_indent = ctRegex!(`^_(?P[1-9])[ ]`, "m"); +static grouped_para_indent_hang = ctRegex!(`^_(?P[0-9])_(?P[0-9])[ ]`, "m"); +#+END_SRC + ** blocked markup *** blocked markup curly & tic :block: @@ -414,6 +449,7 @@ static template spineRgxOut() { <> <> <> + <> } } #+END_SRC diff --git a/org/out_sqlite.org b/org/out_sqlite.org index 21e1cea..ecbfbe9 100644 --- a/org/out_sqlite.org +++ b/org/out_sqlite.org @@ -191,6 +191,7 @@ template SQLiteFormatAndLoadObject() { <> <> <> + <> <> <> <> @@ -611,6 +612,63 @@ string html_font_face(string _txt){ #+END_SRC ****** inline markup +******* grouped text + +#+NAME: sanitize_and_munge_inline_html_grouped_text_bullets_indents +#+BEGIN_SRC d +string inline_grouped_text_bullets_indents(M,O)( + M doc_matters, + const O obj, + string _txt, + string _suffix = ".html", + string _xml_type = "seg", +) { + static auto rgx = RgxO(); + if (obj.metainfo.is_a == "group") { + _txt = (_txt) + .replaceAll(rgx.grouped_para_indent_1, + "  ") + .replaceAll(rgx.grouped_para_indent_2, + "    ") + .replaceAll(rgx.grouped_para_indent_3, + "      ") + .replaceAll(rgx.grouped_para_indent_4, + "        ") + .replaceAll(rgx.grouped_para_indent_5, + "          ") + .replaceAll(rgx.grouped_para_indent_6, + "            ") + .replaceAll(rgx.grouped_para_indent_7, + "              ") + .replaceAll(rgx.grouped_para_indent_8, + "                ") + .replaceAll(rgx.grouped_para_indent_9, + "                  ") + .replaceAll(rgx.grouped_para_indent_hang, "  ") + .replaceAll(rgx.grouped_para_bullet, "●  ") + .replaceAll(rgx.grouped_para_bullet_indent_1, + "  ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_2, + "    ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_3, + "      ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_4, + "        ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_5, + "          ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_6, + "            ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_7, + "              ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_8, + "                ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_9, + "                  ●  "); + } + return _txt; +} +#+END_SRC + ******* images #+NAME: sanitize_and_munge_inline_html_images @@ -798,6 +856,9 @@ string inline_markup(M,O)( const O obj, string _txt, ) { + if (obj.metainfo.is_a == "group") { + _txt = inline_grouped_text_bullets_indents(doc_matters, obj, _txt, xml_type); + } _txt = inline_images(doc_matters, obj, _txt, xml_type); _txt = inline_links(doc_matters, obj, _txt, xml_type); _txt = inline_notes_scroll(doc_matters, obj, _txt); @@ -915,6 +976,7 @@ string html_group(M,O)( assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "group"); string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃

%s

┃", @@ -938,6 +1000,7 @@ string html_block(M,O)( assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "block"); string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃

%s

┃", obj.metainfo.is_a, diff --git a/org/out_xmls.org b/org/out_xmls.org index 79a81c7..ca202c4 100644 --- a/org/out_xmls.org +++ b/org/out_xmls.org @@ -34,7 +34,7 @@ template outputXHTMLs() { static auto rgx = RgxO(); <> <> - <> + <> <> <> <> @@ -163,14 +163,55 @@ import } #+END_SRC -**** special characters +**** breaks indents bullets -#+NAME: xhtml_format_objects_special_characters +#+NAME: xhtml_format_objects_breaks_indents_bullets #+BEGIN_SRC d -@safe string special_characters(O)( +@safe string special_characters_breaks_indents_bullets(O)( const O obj, ) { string _txt = special_characters_text(obj.text); + if (obj.metainfo.is_a == "group") { + _txt = (_txt) + .replaceAll(rgx.grouped_para_indent_1, + "  ") + .replaceAll(rgx.grouped_para_indent_2, + "    ") + .replaceAll(rgx.grouped_para_indent_3, + "      ") + .replaceAll(rgx.grouped_para_indent_4, + "        ") + .replaceAll(rgx.grouped_para_indent_5, + "          ") + .replaceAll(rgx.grouped_para_indent_6, + "            ") + .replaceAll(rgx.grouped_para_indent_7, + "              ") + .replaceAll(rgx.grouped_para_indent_8, + "                ") + .replaceAll(rgx.grouped_para_indent_9, + "                  ") + .replaceAll(rgx.grouped_para_indent_hang, "  ") + .replaceAll(rgx.grouped_para_bullet, "●  ") + .replaceAll(rgx.grouped_para_bullet_indent_1, + "  ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_2, + "    ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_3, + "      ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_4, + "        ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_5, + "          ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_6, + "            ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_7, + "              ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_8, + "                ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_9, + "                  ●  "); + } if (!(obj.metainfo.is_a == "code")) { _txt = (_txt) .replaceAll(rgx.xhtml_line_break, "
"); @@ -1636,7 +1677,7 @@ template outputHTML() { foreach (part; doc_matters.has.keys_seq.scroll) { foreach (obj; doc_abstraction[part]) { delimit = xhtml_format.div_delimit(part, previous_part); - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); switch (obj.metainfo.is_of_part) { #+END_SRC @@ -1882,7 +1923,7 @@ default: foreach (part; doc_matters.has.keys_seq.seg) { foreach (obj; doc_abstraction[part]) { delimit = xhtml_format.div_delimit(part, previous_part); - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); #+END_SRC ***** all headings @@ -2708,7 +2749,7 @@ xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> #+BEGIN_SRC d foreach (part; doc_matters.has.keys_seq.seg) { foreach (obj; doc_abstraction[part]) { - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); #+END_SRC ***** all headings diff --git a/src/doc_reform/io_out/epub3.d b/src/doc_reform/io_out/epub3.d index 2881b69..029a2c2 100644 --- a/src/doc_reform/io_out/epub3.d +++ b/src/doc_reform/io_out/epub3.d @@ -415,7 +415,7 @@ template outputEPub3() { auto epubWrite = writeOut(); foreach (part; doc_matters.has.keys_seq.seg) { foreach (obj; doc_abstraction[part]) { - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); if (obj.metainfo.is_a == "heading") { assert(part == "head" || "toc" || "body" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb" || "tail"); switch (obj.metainfo.heading_lev_markup) { diff --git a/src/doc_reform/io_out/html.d b/src/doc_reform/io_out/html.d index dd77ab8..136229b 100644 --- a/src/doc_reform/io_out/html.d +++ b/src/doc_reform/io_out/html.d @@ -76,7 +76,7 @@ template outputHTML() { foreach (part; doc_matters.has.keys_seq.scroll) { foreach (obj; doc_abstraction[part]) { delimit = xhtml_format.div_delimit(part, previous_part); - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); switch (obj.metainfo.is_of_part) { case "frontmatter": assert(part == "head" || "toc"); switch (obj.metainfo.is_of_type) { @@ -279,7 +279,7 @@ template outputHTML() { foreach (part; doc_matters.has.keys_seq.seg) { foreach (obj; doc_abstraction[part]) { delimit = xhtml_format.div_delimit(part, previous_part); - string _txt = xhtml_format.special_characters(obj); + string _txt = xhtml_format.special_characters_breaks_indents_bullets(obj); if (obj.metainfo.is_a == "heading") { assert(part == "head" || "toc" || "body" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb" || "tail"); switch (obj.metainfo.heading_lev_markup) { diff --git a/src/doc_reform/io_out/rgx.d b/src/doc_reform/io_out/rgx.d index 12392f9..7f3b9da 100644 --- a/src/doc_reform/io_out/rgx.d +++ b/src/doc_reform/io_out/rgx.d @@ -160,5 +160,28 @@ static template spineRgxOut() { static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m"); static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg"); static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m"); + /+ paragraph operators +/ + static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m"); + static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m"); + static grouped_para_indent_3 = ctRegex!(`^_3[ ]`, "m"); + static grouped_para_indent_4 = ctRegex!(`^_4[ ]`, "m"); + static grouped_para_indent_5 = ctRegex!(`^_5[ ]`, "m"); + static grouped_para_indent_6 = ctRegex!(`^_6[ ]`, "m"); + static grouped_para_indent_7 = ctRegex!(`^_7[ ]`, "m"); + static grouped_para_indent_8 = ctRegex!(`^_8[ ]`, "m"); + static grouped_para_indent_9 = ctRegex!(`^_9[ ]`, "m"); + static grouped_para_bullet = ctRegex!(`^_[*] `, "m"); + static grouped_para_bullet_indent_1 = ctRegex!(`^_1[*] `, "m"); + static grouped_para_bullet_indent_2 = ctRegex!(`^_2[*] `, "m"); + static grouped_para_bullet_indent_3 = ctRegex!(`^_3[*] `, "m"); + static grouped_para_bullet_indent_4 = ctRegex!(`^_4[*] `, "m"); + static grouped_para_bullet_indent_5 = ctRegex!(`^_5[*] `, "m"); + static grouped_para_bullet_indent_6 = ctRegex!(`^_6[*] `, "m"); + static grouped_para_bullet_indent_7 = ctRegex!(`^_7[*] `, "m"); + static grouped_para_bullet_indent_8 = ctRegex!(`^_8[*] `, "m"); + static grouped_para_bullet_indent_9 = ctRegex!(`^_9[*] `, "m"); + static grouped_para_bullet_indent = ctRegex!(`^_(?P[1-9])[*] `, "m"); + static grouped_para_indent = ctRegex!(`^_(?P[1-9])[ ]`, "m"); + static grouped_para_indent_hang = ctRegex!(`^_(?P[0-9])_(?P[0-9])[ ]`, "m"); } } diff --git a/src/doc_reform/io_out/sqlite.d b/src/doc_reform/io_out/sqlite.d index 58f8dd9..c1b55de 100644 --- a/src/doc_reform/io_out/sqlite.d +++ b/src/doc_reform/io_out/sqlite.d @@ -294,6 +294,57 @@ template SQLiteFormatAndLoadObject() { .replaceAll(rgx.inline_cite, "$1"); return _txt; } + string inline_grouped_text_bullets_indents(M,O)( + M doc_matters, + const O obj, + string _txt, + string _suffix = ".html", + string _xml_type = "seg", + ) { + static auto rgx = RgxO(); + if (obj.metainfo.is_a == "group") { + _txt = (_txt) + .replaceAll(rgx.grouped_para_indent_1, + "  ") + .replaceAll(rgx.grouped_para_indent_2, + "    ") + .replaceAll(rgx.grouped_para_indent_3, + "      ") + .replaceAll(rgx.grouped_para_indent_4, + "        ") + .replaceAll(rgx.grouped_para_indent_5, + "          ") + .replaceAll(rgx.grouped_para_indent_6, + "            ") + .replaceAll(rgx.grouped_para_indent_7, + "              ") + .replaceAll(rgx.grouped_para_indent_8, + "                ") + .replaceAll(rgx.grouped_para_indent_9, + "                  ") + .replaceAll(rgx.grouped_para_indent_hang, "  ") + .replaceAll(rgx.grouped_para_bullet, "●  ") + .replaceAll(rgx.grouped_para_bullet_indent_1, + "  ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_2, + "    ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_3, + "      ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_4, + "        ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_5, + "          ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_6, + "            ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_7, + "              ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_8, + "                ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_9, + "                  ●  "); + } + return _txt; + } string inline_images(M,O)( M doc_matters, const O obj, @@ -457,6 +508,9 @@ template SQLiteFormatAndLoadObject() { const O obj, string _txt, ) { + if (obj.metainfo.is_a == "group") { + _txt = inline_grouped_text_bullets_indents(doc_matters, obj, _txt, xml_type); + } _txt = inline_images(doc_matters, obj, _txt, xml_type); _txt = inline_links(doc_matters, obj, _txt, xml_type); _txt = inline_notes_scroll(doc_matters, obj, _txt); @@ -527,6 +581,7 @@ template SQLiteFormatAndLoadObject() { assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "group"); string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃

%s

┃", @@ -544,6 +599,7 @@ template SQLiteFormatAndLoadObject() { assert(obj.metainfo.is_of_type == "block"); assert(obj.metainfo.is_a == "block"); string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); string o = format(q"┃

%s

┃", obj.metainfo.is_a, diff --git a/src/doc_reform/io_out/xmls.d b/src/doc_reform/io_out/xmls.d index 0112df8..f0c7185 100644 --- a/src/doc_reform/io_out/xmls.d +++ b/src/doc_reform/io_out/xmls.d @@ -102,10 +102,51 @@ template outputXHTMLs() { .replaceAll(rgx.nbsp_char, " "); return _txt; } - @safe string special_characters(O)( + @safe string special_characters_breaks_indents_bullets(O)( const O obj, ) { string _txt = special_characters_text(obj.text); + if (obj.metainfo.is_a == "group") { + _txt = (_txt) + .replaceAll(rgx.grouped_para_indent_1, + "  ") + .replaceAll(rgx.grouped_para_indent_2, + "    ") + .replaceAll(rgx.grouped_para_indent_3, + "      ") + .replaceAll(rgx.grouped_para_indent_4, + "        ") + .replaceAll(rgx.grouped_para_indent_5, + "          ") + .replaceAll(rgx.grouped_para_indent_6, + "            ") + .replaceAll(rgx.grouped_para_indent_7, + "              ") + .replaceAll(rgx.grouped_para_indent_8, + "                ") + .replaceAll(rgx.grouped_para_indent_9, + "                  ") + .replaceAll(rgx.grouped_para_indent_hang, "  ") + .replaceAll(rgx.grouped_para_bullet, "●  ") + .replaceAll(rgx.grouped_para_bullet_indent_1, + "  ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_2, + "    ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_3, + "      ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_4, + "        ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_5, + "          ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_6, + "            ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_7, + "              ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_8, + "                ●  ") + .replaceAll(rgx.grouped_para_bullet_indent_9, + "                  ●  "); + } if (!(obj.metainfo.is_a == "code")) { _txt = (_txt) .replaceAll(rgx.xhtml_line_break, "
"); diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d index 78b86a4..5df1e9c 100644 --- a/src/doc_reform/meta/rgx.d +++ b/src/doc_reform/meta/rgx.d @@ -96,7 +96,7 @@ static template spineRgxIn() { static raw_author_munge = ctRegex!(`(?P\S.+?),\s+(?P.+)`,"i"); static yaml_header_meta_title = ctRegex!(`^\s*title\s*:\s*(?:"?\w|$)`, "m"); static yaml_config = ctRegex!(`^[a-z]+\s*:\s*(?:"?\w|$)`, "m"); - /+ heading & paragraph operators +/ + /+ heading operators +/ static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); static headings = ctRegex!(`^:?(?P[A-D1-4])[~](?:[a-z0-9_.-]*[?]?|[!](?:glossary|bibliogrphy|biblio|references?|blurb))(?:\s|$)`,"i"); @@ -112,6 +112,7 @@ static template spineRgxIn() { static heading_biblio = ctRegex!(`^1[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^1[~][!](glossary)`); static heading_blurb = ctRegex!(`^1[~][!](blurb)`); + /+ paragraph operators +/ static para_bullet = ctRegex!(`^_[*] `); static para_bullet_indent = ctRegex!(`^_(?P[1-9])[*] `); static para_indent = ctRegex!(`^_(?P[1-9])[ ]`); -- cgit v1.2.3