From 90873fabd7451e1dd8c4b39303906e19bdc481f7 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 10 Apr 2024 22:24:34 -0400 Subject: 0.16.0 sisudoc (src/sisudoc sisudoc spine) - src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more) --- src/sisudoc/meta/metadoc_from_src.d | 1509 +++++++++++++++++++++++++++++++++++ 1 file changed, 1509 insertions(+) create mode 100644 src/sisudoc/meta/metadoc_from_src.d (limited to 'src/sisudoc/meta/metadoc_from_src.d') diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d new file mode 100644 index 0000000..32954f1 --- /dev/null +++ b/src/sisudoc/meta/metadoc_from_src.d @@ -0,0 +1,1509 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +// document abstraction: +// abstraction of sisu markup for downstream processing +// metadoc_from_src.d +module sisudoc.meta.metadoc_from_src; +@safe: +template docAbstraction() { + // ↓ abstraction imports + import + std.algorithm, + std.container, + std.file, + std.json, + std.path; + import + sisudoc.meta, + sisudoc.meta.defaults, + sisudoc.meta.rgx, + sisudoc.meta.metadoc_object_setter, + sisudoc.meta.rgx; + public import sisudoc.meta.metadoc_from_src_functions; + mixin docAbstractionFunctions; + @system auto docAbstraction(CMM,Opt,Mf) ( + char[][] markup_sourcefile_content, + CMM conf_make_meta, + Opt opt_action, + Mf manifested, + bool _new_doc + ) { + static auto rgx = RgxI(); + // ↓ abstraction init + scope(success) { + } + scope(failure) { + } + scope(exit) { + destroy(the_document_toc_section); + destroy(the_document_head_section); + destroy(the_document_body_section); + destroy(the_document_bibliography_section); + destroy(the_document_glossary_section); + destroy(the_document_blurb_section); + destroy(the_document_xml_dom_tail_section); + destroy(an_object); + destroy(processing); + destroy(biblio_arr_json); + previous_length = 0; + reset_note_numbers = true; + lev_anchor_tag = ""; + anchor_tag = ""; + } + mixin spineNode; + auto node_para_int_ = node_metadata_para_int; + auto node_para_str_ = node_metadata_para_str; + ObjGenericComposite comp_obj_; + line_occur = [ + "heading" : 0, + "para" : 0, + "glossary" : 0, + "blurb" : 0, + ]; + uint[string] dochas = [ + "inline_links" : 0, + "inline_notes" : 0, + "inline_notes_star" : 0, + "codeblock" : 0, + "table" : 0, + "block" : 0, + "group" : 0, + "poem" : 0, + "quote" : 0, + "images" : 0, + ]; + uint[string] pith = [ + "ocn" : 1, + "section" : 0, + "txt_is" : 0, + "block_is" : 0, + "block_state" : 0, + "block_delim" : 0, + "make_headings" : 0, + "dummy_heading_status" : 0, + "dummy_heading_multiple_objects" : 0, + "no_ocn_multiple_objects" : 0, + "verse_new" : 0, + ]; + string[string] object_number_poem = [ + "start" : "", + "end" : "" + ]; + string[] lv_ancestors_txt = [ "", "", "", "", "", "", "", "", ]; + int[string] lv = [ + "lv" : eN.bi.off, + "h0" : eN.bi.off, + "h1" : eN.bi.off, + "h2" : eN.bi.off, + "h3" : eN.bi.off, + "h4" : eN.bi.off, + "h5" : eN.bi.off, + "h6" : eN.bi.off, + "h7" : eN.bi.off, + "lev_int_collapsed" : 0, + ]; + int[string] collapsed_lev = [ + "h0" : eN.bi.off, + "h1" : eN.bi.off, + "h2" : eN.bi.off, + "h3" : eN.bi.off, + "h4" : eN.bi.off, + "h5" : eN.bi.off, + "h6" : eN.bi.off, + "h7" : eN.bi.off + ]; + string[string] heading_match_str = [ + "h_A": "^(none)", + "h_B": "^(none)", + "h_C": "^(none)", + "h_D": "^(none)", + "h_1": "^(none)", + "h_2": "^(none)", + "h_3": "^(none)", + "h_4": "^(none)" + ]; + Regex!char[string] heading_match_rgx = [ + "h_A": regex(r"^(none)"), + "h_B": regex(r"^(none)"), + "h_C": regex(r"^(none)"), + "h_D": regex(r"^(none)"), + "h_1": regex(r"^(none)"), + "h_2": regex(r"^(none)"), + "h_3": regex(r"^(none)"), + "h_4": regex(r"^(none)") + ]; + string _anchor_tag; + string toc_txt_; + an_object["glossary_nugget"] = ""; + an_object["blurb_nugget"] = ""; + comp_obj_ = set_object_heading("lev4", "frontmatter", "toc", "Table of Contents"); + comp_obj_.metainfo.identifier = ""; + comp_obj_.metainfo.dummy_heading = false; + comp_obj_.metainfo.object_number_off = true; + comp_obj_.metainfo.object_number_type = 0; + comp_obj_.tags.segment_anchor_tag_epub = "toc"; + comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; + comp_obj_.tags.in_segment_html = comp_obj_.tags.anchor_tag_html; + comp_obj_.ptr.html_segnames = html_segnames_ptr; + comp_obj_.tags.anchor_tags = ["toc"]; + tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; + tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; + auto toc_head = comp_obj_; + html_segnames_ptr_cntr++; + the_document_toc_section = [toc_head]; + static auto mkup = InlineMarkup(); + static auto munge = ObjInlineMarkupMunge(); + auto note_section = NotesSection(); + auto bookindex_extract_hash = BookIndexNuggetHash(); + string[][string] lev4_subtoc; + string[][string] segnames = ["html": ["toc"], "epub": ["toc"]]; + int cnt1 = 1; int cnt2 = 1; int cnt3 = 1; + // abstraction init ↑ + debug (substitutions) { + writeln(__LINE__, ":", __FILE__, ": DEBUG substitutions:"); + if (!(conf_make_meta.make.headings.empty)) { + writeln(conf_make_meta.make.headings); + } + if (conf_make_meta.make.substitute) { + foreach(substitution_pair; conf_make_meta.make.substitute) { + writeln("regex to match: ", substitution_pair[Substitute.match]); + writeln("substitution to make: ", substitution_pair[Substitute.markup]); + } + } + if (conf_make_meta.make.bold) { + writeln("regex to match: ", conf_make_meta.make.bold[Substitute.match]); + writeln("substitution to make: ", conf_make_meta.make.bold[Substitute.markup]); + } + if (conf_make_meta.make.emphasis) { + writeln("regex to match: ", conf_make_meta.make.emphasis[Substitute.match]); + writeln("substitution to make: ", conf_make_meta.make.emphasis[Substitute.markup]); + } + if (conf_make_meta.make.italics) { + writeln("regex to match: ", conf_make_meta.make.italics[Substitute.match]); + writeln("substitution to make: ", conf_make_meta.make.italics[Substitute.markup]); + } + } + auto loopMarkupSrcByLine( + char[][] markup_sourcefile_content, + string[string] an_object, + uint[string] pith, + ) { + _loopMarkupSrcByLineStruct ret; + srcDocLoopLineByLine_: + foreach (line; markup_sourcefile_content) { + // ↓ markup document/text line by line + // "line" variable can be empty but should never be null + // scope + scope(exit) { } + scope(failure) { + stderr.writefln( + "\n%s\n%s\n\n%s:%s\nFAILED while processing the file: ❮❮ %s ❯❯ on line with text:\n%s\n", + __MODULE__, __FUNCTION__, + __FILE__, __LINE__, + manifested.src.filename, line, + ); + } + debug(source) { writeln(line); } + debug(srclines) { if (!line.empty) { writefln("* %s", line); } } + if (!line.empty) { pith = line._check_ocn_status_(pith); } + if ( pith["block_is"] == eN.blk_is.code + && pith["block_state"] == eN.blk_state.on + ) { + // block object: code + { + ST_txt_by_line_block_generic _get = line.txt_by_line_block_code(an_object, pith); + { + an_object = _get.this_object; + pith = _get.pith; + } + } + continue; + } else if (!matchFirst(line, rgx.skip_from_regular_parse)) { + // object other than "code block" object + // (includes regular text paragraph, headings & blocks other than code) + // heading, glossary, blurb, poem, group, block, quote, table + line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic + if (line.matchFirst(rgx.heading_biblio) + || (pith["section"] == eN.sect.bibliography + && ((!(line.matchFirst(rgx.heading_glossary))) + && (!(line.matchFirst(rgx.heading_blurb))) + && (!(line.matchFirst(rgx.heading))) + && (!(line.matchFirst(rgx.comment))))) + ) { + pith["section"] = eN.sect.bibliography; + if (opt_action.backmatter && opt_action.section_biblio) { + { + ST_txt_by_line_block_biblio _get = line.txt_by_line_block_biblio(pith, bib_entry, biblio_entry_str_json, biblio_arr_json); + { + pith = _get.pith; + bib_entry = _get.bib_entry; + biblio_entry_str_json = _get.biblio_entry_str_json; + biblio_arr_json = _get.biblio_arr_json; + } + } + debug(bibliobuild) { + writeln("- ", biblio_entry_str_json); + writeln("-> ", biblio_arr_json.length); + } + } + continue; + } else if (line.matchFirst(rgx.heading_glossary) + || (pith["section"] == eN.sect.glossary + && ((!(line.matchFirst(rgx.heading_biblio))) + && (!(line.matchFirst(rgx.heading_blurb))) + && (!(line.matchFirst(rgx.heading))) + && (!(line.matchFirst(rgx.comment))))) + ) { + // within section (block object): glossary + debug(glossary) { writeln(__LINE__); writeln(line); } + pith["section"] = eN.sect.glossary; + if (opt_action.backmatter && opt_action.section_glossary) { + ST_the_section add_to_glossary_sect = line.build_the_glossary_section(pith, tag_assoc); // double check, should not be necessary to pass pith + the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[0]; + if (add_to_glossary_sect.comp_section_obj.length > 1) { // heading + the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[1]; + } + pith = add_to_glossary_sect.pith; + tag_assoc = add_to_glossary_sect.tag_assoc; + } + continue; + } else if (line.matchFirst(rgx.heading_blurb) + || (pith["section"] == eN.sect.blurb + && ((!(line.matchFirst(rgx.heading_glossary))) + && (!(line.matchFirst(rgx.heading_biblio))) + && (!(line.matchFirst(rgx.heading))) + && (!(line.matchFirst(rgx.comment))))) + ) { + pith["section"] = eN.sect.blurb; + debug(blurb) { writeln(__LINE__); writeln(line); } + if ((opt_action.backmatter && opt_action.section_blurb) && !(line.empty)) { + ST_the_section add_to_blurb_sect = line.build_the_blurb_section(pith, tag_assoc, opt_action); // double check, should not be necessary to pass pith + the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[0]; + if (add_to_blurb_sect.comp_section_obj.length > 1) { // heading + the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[1]; + } + pith = add_to_blurb_sect.pith; + tag_assoc = add_to_blurb_sect.tag_assoc; + } + continue; + } else if (pith["block_state"] == eN.blk_state.on) { + if (pith["block_is"] == eN.blk_is.quote) { + line = line + ._doc_header_and_make_substitutions_(conf_make_meta) + ._doc_header_and_make_substitutions_fontface_(conf_make_meta); + { + auto _get = line.txt_by_line_block_quote(an_object, pith); + { + an_object = _get.this_object; + pith = _get.pith; + } + } + continue; + } else if (pith["block_is"] == eN.blk_is.group) { + line = line + ._doc_header_and_make_substitutions_(conf_make_meta) + ._doc_header_and_make_substitutions_fontface_(conf_make_meta) + .replaceAll(rgx.para_delimiter, mkup.br_line_spaced ~ "$1"); + { + auto _get = line.txt_by_line_block_group(an_object, pith); + { + an_object = _get.this_object; + pith = _get.pith; + } + } + continue; + } else if (pith["block_is"] == eN.blk_is.block) { + line = line + ._doc_header_and_make_substitutions_(conf_make_meta) + ._doc_header_and_make_substitutions_fontface_(conf_make_meta); + if (auto m = line.match(rgx.spaces_keep)) { + line = line + .replaceAll(rgx.spaces_keep, (m.captures[1]).translate([ ' ' : mkup.nbsp ])); + } + { + auto _get = line.txt_by_line_block_block(an_object, pith); + { + an_object = _get.this_object; + pith = _get.pith; + } + } + continue; + } else if (pith["block_is"] == eN.blk_is.poem) { + { + auto _get = line.txt_by_line_block_poem(an_object, pith, cntr, object_number_poem, conf_make_meta, tag_in_seg); + { + an_object = _get.this_object; + pith = _get.pith; + cntr = _get.cntr; + } + } + continue; + } else if (pith["block_is"] == eN.blk_is.table) { + { + auto _get = line.txt_by_line_block_table(an_object, pith, conf_make_meta); + { + an_object = _get.this_object; + pith = _get.pith; + conf_make_meta = _get.conf_make_meta; + } + } + continue; + } + } else { + // not within a block group + assert( + (pith["block_state"] == eN.blk_state.off) + || (pith["block_state"] == eN.blk_state.closing), + "block status: none or closed" + ); + if (line.matchFirst(rgx.block_open)) { + if (line.matchFirst(rgx.block_poem_open)) { + // poem to verse exceptions! + object_reset(an_object); + processing.remove("verse"); + object_number_poem["start"] = obj_cite_digits.object_number.to!string; + } + { + auto _get = line.txt_by_line_block_start(pith, dochas, object_number_poem); + { + pith = _get.pith; + dochas = _get.dochas; + object_number_poem = _get.object_number_poem; + } + } + continue; + } else if (!line.empty) { + // line not empty - non blocks (headings, paragraphs) & closed blocks + assert(!line.empty, "line tested, line not empty surely:\n \"" ~ line ~ "\""); + assert( + (pith["block_state"] == eN.blk_state.off) + || (pith["block_state"] == eN.blk_state.closing), + "code block status: none or closed" + ); + if (pith["block_state"] == eN.blk_state.closing) { + debug(check) { writeln(__LINE__); writeln(line); } + assert( + line.matchFirst(rgx.book_index_item) + || line.matchFirst(rgx.book_index_item_open) + || pith["section"] == eN.sect.book_index, + "\nblocks closed, unless followed by book index, non-matching line:\n \"" + ~ line ~ "\"" + ); + } + if (line.matchFirst(rgx.book_index_item) + || line.matchFirst(rgx.book_index_item_open) + || pith["section"] == eN.sect.book_index) { + { // book_index + auto _get = line.flow_book_index_(an_object, book_idx_tmp, pith, opt_action); + { + an_object = _get.this_object; + pith = _get.pith; + book_idx_tmp = _get.book_idx_tmp; + } + } + } else { + // not book_index + an_object_key = "body_nugget"; + if (auto m = line.matchFirst(rgx.comment)) { + // matched comment + debug(comment) { writeln(line); } + an_object[an_object_key] ~= line ~= "\n"; + comp_obj_comment = comp_obj_comment.init; + comp_obj_comment.metainfo.is_of_part = "comment"; // breaks flow + comp_obj_comment.metainfo.is_of_section = "comment"; // breaks flow + comp_obj_comment.metainfo.is_of_type = "comment"; + comp_obj_comment.metainfo.is_a = "comment"; + comp_obj_comment.text = an_object[an_object_key].strip; + the_document_body_section ~= comp_obj_comment; + { + auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); + { + line_occur = _get.line_occur; + an_object = _get.this_object; + pith = _get.pith; + } + } + processing.remove("verse"); + ++cntr; + } else if ((line_occur["para"] == eN.bi.off + && line_occur["heading"] == eN.bi.off) + && pith["txt_is"] == eN.txt_is.off + ) { // heading or para but neither flag nor line exists + if ((conf_make_meta.make.headings.length > 2) + && (pith["make_headings"] == eN.bi.off)) { + // heading found + { + auto _get = line.flow_heading_found_(heading_match_str, conf_make_meta.make.headings, heading_match_rgx, pith); + { + heading_match_str = _get.heading_match_str; + heading_match_rgx = _get.heading_match_rgx; + pith = _get.pith; + } + } + } + if (pith["make_headings"] == eN.bi.on + && (line_occur["para"] == eN.bi.off + && line_occur["heading"] == eN.bi.off) + && pith["txt_is"] == eN.txt_is.off + ) { + // heading make set + { + auto _get = line.flow_heading_make_set_(line_occur, heading_match_rgx, pith); + { + line = _get.line; + an_object = _get.this_object; + pith = _get.pith; + } + } + } + // TODO node info: all headings identified at this point, + // - extract node info here?? + // - how long can it wait? + // - should be incorporated in composite objects + // - should happen before endnote links set (they need to be moved down?) + if (line.matchFirst(rgx.headings)) { + // heading match + line = line._doc_header_and_make_substitutions_(conf_make_meta); + { + auto _get = line.flow_heading_matched_( + an_object, + line_occur, + an_object_key, + lv, + collapsed_lev, + pith, + conf_make_meta, + ); + { + an_object = _get.this_object; + pith = _get.pith; + } + } + } else if (line_occur["para"] == eN.bi.off) { + // para match + an_object_key = "body_nugget"; + line = line + ._doc_header_and_make_substitutions_(conf_make_meta) + ._doc_header_and_make_substitutions_fontface_(conf_make_meta); + { + auto _get = line.flow_para_match_(an_object, an_object_key, indent, bullet, pith, line_occur); + { + an_object = _get.this_object; + an_object_key = _get.this_object_key; + pith = _get.pith; + indent = _get.indent; + bullet = _get.bullet; + line_occur = _get.line_occur; + } + } + } + } else if (line_occur["heading"] > eN.bi.off) { + // heading + debug(heading) { writeln(line); } + an_object[an_object_key] ~= line ~= "\n"; + ++line_occur["heading"]; + } else if (line_occur["para"] > eN.bi.off) { + // paragraph + debug(para) { writeln(an_object_key, "-> ", line); } + line = line + ._doc_header_and_make_substitutions_(conf_make_meta) + ._doc_header_and_make_substitutions_fontface_(conf_make_meta); + an_object[an_object_key] ~= " " ~ line; + ++line_occur["para"]; + } + } + } else if (pith["block_state"] == eN.blk_state.closing) { + // line empty, with blocks flag + { + auto _get = line.flow_block_flag_line_empty_( + an_object, + bookindex_extract_hash, + the_document_body_section, + bookindex_unordered_hashes, + obj_cite_digits, + comp_obj_, + cntr, + pith, + object_number_poem, + conf_make_meta, + tag_in_seg, + ); + { + an_object = _get.this_object; + the_document_body_section = _get.the_document_body_section; + bookindex_unordered_hashes = _get.bookindex_unordered_hashes; + obj_cite_digits = _get.obj_cite_digits; + comp_obj_ = _get.comp_obj_; + cntr = _get.cntr; + pith = _get.pith; + } + } + } else { + // line.empty, post contents, empty variables: + assert( + line.empty, + "\nline should be empty:\n \"" + ~ line ~ "\"" + ); + assert( + (pith["block_state"] == eN.blk_state.off), + "code block status: none" + ); + if (_new_doc) { + tag_assoc = tag_assoc.init; + lv0to3_tags = lv0to3_tags.init; + tag_in_seg = tag_in_seg.init; + } + if (pith["txt_is"] == eN.txt_is.heading + && line_occur["heading"] > eN.bi.off + ) { + // heading object (current line empty) + obj_cite_digits = (an_object["lev_markup_number"].to!int == 0) + ? ocn_emit(eN.ocn.reset) + : ocn_emit(pith["ocn"]); + an_object["is"] = "heading"; + an_object_key = "body_nugget"; + ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_object_and_anchor_tags_struct + = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, ((_new_doc) ? Yes._new_doc : No._new_doc)); + an_object["substantive"] = substantive_object_and_anchor_tags_struct.obj_txt; + anchor_tag = substantive_object_and_anchor_tags_struct.anchor_tag; + if (_new_doc) { + cnt1 = 1; + cnt2 = 1; + cnt3 = 1; + _new_doc = false; + } + if ( + an_object["lev_markup_number"].to!int == 4 + && (!(anchor_tag.empty) + || (lv0to3_tags.length > 0)) + ) { + tag_in_seg["seg_lv4"] = anchor_tag; + tag_in_seg["seg_lv1to4"] = anchor_tag; + lev_anchor_tag = anchor_tag; + tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; + tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; + if (lv0to3_tags.length > 0) { + // names used for html markup segments 1 to 4 (rather than epub which has separate segments for A to D) + foreach (lv0_to_lv3_html_tag; lv0to3_tags) { + tag_assoc[lv0_to_lv3_html_tag]["seg_lv4"] = anchor_tag; + } + } + anchor_tag_ = anchor_tag; + lv0to3_tags = lv0to3_tags.init; + } else if (an_object["lev_markup_number"].to!int > 4) { + tag_in_seg["seg_lv4"] = anchor_tag_; + tag_in_seg["seg_lv1to4"] = anchor_tag_; + lev_anchor_tag = anchor_tag; + tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"]; + tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"]; + } else if (an_object["lev_markup_number"].to!int < 4) { + string segn; + switch (an_object["lev_markup_number"].to!int) { + // names used for epub markup segments A to D + case 0: + segn = "_the_title"; + goto default; + case 1: + segn = "_part_" ~ cnt1.to!string; + ++cnt1; + goto default; + case 2: + segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string; + ++cnt2; + goto default; + case 3: + segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string ~ "_" ~ cnt3.to!string; + ++cnt3; + goto default; + default: + lv0to3_tags ~= obj_cite_digits.object_number.to!string; + lv0to3_tags ~= segn; + tag_in_seg["seg_lv4"] = segn; // for html segname need following lv4 not yet known + tag_in_seg["seg_lv1to4"] = segn; + break; + } + } + an_object["bookindex_nugget"] + = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; + bookindex_unordered_hashes + = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); + _anchor_tag = obj_cite_digits.identifier; + // (incrementally build toc) table of contents here! + { + auto _get = obj_im.flow_table_of_contents_gather_headings( + an_object, + conf_make_meta, + tag_in_seg, + _anchor_tag, + lev4_subtoc, + the_document_toc_section, + ); + { + the_document_toc_section = _get.the_document_toc_section; + lev4_subtoc = _get.lev4_subtoc; + } + } + if (an_object["lev_markup_number"] == "4") { + segnames["html"] ~= tag_in_seg["seg_lv4"]; + html_segnames_ptr = html_segnames_ptr_cntr; + html_segnames_ptr_cntr++; + } + if (an_object["lev_markup_number"].to!int <= 4) { + segnames["epub"] ~= tag_in_seg["seg_lv1to4"]; + } + auto comp_obj_ = node_construct.node_emitter_heading( + an_object, + tag_in_seg, + lev_anchor_tag, + tag_assoc, + obj_cite_digits, // OCNset + cntr, // int + heading_ptr, // int + lv_ancestors_txt, // string[] + html_segnames_ptr, // int + substantive_object_and_anchor_tags_struct, + ); + ++heading_ptr; + debug(segments) { + writeln(an_object["lev_markup_number"]); + writeln(tag_in_seg["seg_lv4"]); + writeln(tag_in_seg["seg_lv1to4"]); + } + the_document_body_section ~= comp_obj_; + debug(objectrelated1) { writeln(line); } // check + { + auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); + { + line_occur = _get.line_occur; + an_object = _get.this_object; + pith = _get.pith; + } + } + an_object.remove("lev"); + an_object.remove("lev_markup_number"); + processing.remove("verse"); + ++cntr; + } else if (pith["txt_is"] == eN.txt_is.para + && line_occur["para"] > eN.bi.off + ) { // paragraph object (current line empty) - repeated character paragraph separator + if ((an_object[an_object_key].to!string).matchFirst(rgx.repeated_character_line_separator)) { + pith["ocn"] = eN.ocn.off; + } + obj_cite_digits = ocn_emit(pith["ocn"]); + an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : ""; + bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg); + an_object["is"] = "para"; + auto comp_obj_ = node_construct.node_location_emitter( + content_non_header, + tag_in_seg, + lev_anchor_tag, + tag_assoc, + obj_cite_digits, + cntr, + heading_ptr-1, + an_object["is"], + ); + ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_obj_misc_struct + = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, No._new_doc); + an_object["substantive"] = substantive_obj_misc_struct.obj_txt; + anchor_tag = substantive_obj_misc_struct.anchor_tag; + comp_obj_ = set_object_generic("body", "body", "para", "para", an_object["substantive"].to!string.strip, obj_cite_digits.object_number); + comp_obj_.tags.html_segment_anchor_tag_is = tag_in_seg["seg_lv4"]; + comp_obj_.tags.epub_segment_anchor_tag_is = tag_in_seg["seg_lv1to4"]; + comp_obj_.metainfo.identifier = obj_cite_digits.identifier; + comp_obj_.metainfo.object_number_off = (obj_cite_digits.off == 0) ? true : false; // TODO + comp_obj_.metainfo.o_n_book_index = obj_cite_digits.bkidx; + comp_obj_.metainfo.object_number_type = obj_cite_digits.type; + comp_obj_.attrib.indent_hang = indent["hang_position"]; + comp_obj_.attrib.indent_base = indent["base_position"]; + comp_obj_.attrib.bullet = bullet; + comp_obj_.tags.anchor_tags = [anchor_tag]; anchor_tag=""; + comp_obj_.has.inline_notes_reg = substantive_obj_misc_struct.has_notes_reg; + comp_obj_.has.inline_notes_star = substantive_obj_misc_struct.has_notes_star; + comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links; + comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions; + the_document_body_section ~= comp_obj_; + tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc); + { + auto _get = txt_by_line_common_reset_(line_occur, an_object, pith); + { + line_occur = _get.line_occur; + an_object = _get.this_object; + pith = _get.pith; + } + } + indent = [ + "hang_position" : 0, + "base_position" : 0, + ]; + bullet = false; + processing.remove("verse"); + ++cntr; + // } else { // could be useful to test line variable should be empty and never null + } + } // close else for line empty + } // close else for not the above + } // close after non code, other blocks or regular text + // unless (the_document_body_section.length == 0) ? + if (the_document_body_section.length > 0) { + if (((the_document_body_section[$-1].metainfo.is_a == "para") + || (the_document_body_section[$-1].metainfo.is_a == "heading") + || (the_document_body_section[$-1].metainfo.is_a == "quote") + || (the_document_body_section[$-1].metainfo.is_a == "group") + || (the_document_body_section[$-1].metainfo.is_a == "block") + || (the_document_body_section[$-1].metainfo.is_a == "verse")) + && (the_document_body_section.length > previous_length)) { + if ((the_document_body_section[$-1].metainfo.is_a == "heading") + && (the_document_body_section[$-1].metainfo.heading_lev_markup < 5)) { + pith["section"] = eN.sect.unset; + } + if (the_document_body_section[$-1].metainfo.is_a == "verse") { + // scan for endnotes for whole poem (each verse in poem) + foreach (i; previous_length .. the_document_body_section.length) { + if (the_document_body_section[i].metainfo.is_a == "verse") { + if ((the_document_body_section[i].text).match( + rgx.inline_notes_al_all_note + )) { + object_notes = note_section.gather_notes_for_endnote_section( + the_document_body_section, + tag_in_seg, + (i).to!int, + ); + } + } + } + } else { + // scan object for endnotes + previous_length = the_document_body_section.length.to!int; + if ((the_document_body_section[$-1].text).match( + rgx.inline_notes_al_all_note + )) { + previous_count = (the_document_body_section.length -1).to!int; + object_notes = note_section.gather_notes_for_endnote_section( + the_document_body_section, + tag_in_seg, + (the_document_body_section.length-1).to!int, + ); + } + } + previous_length = the_document_body_section.length.to!int; + } + } + } + ret.toc = the_document_toc_section; + ret.body = the_document_body_section; + ret.glossary = the_document_glossary_section; + ret.blurb = the_document_blurb_section; + ret.object_notes = object_notes; + ret.segnames = segnames; + return ret; + } + { // loopMarkupSrcByLine + auto _doc_by_line = loopMarkupSrcByLine(markup_sourcefile_content, an_object, pith); + the_document_toc_section = _doc_by_line.toc; + the_document_body_section = _doc_by_line.body; + the_document_glossary_section = _doc_by_line.glossary; + the_document_blurb_section = _doc_by_line.blurb; + segnames = _doc_by_line.segnames; + object_notes = _doc_by_line.object_notes; // endnotes, compare, not sure is used + destroy(_doc_by_line); + } + { // EOF backMatter + comp_obj_ = set_object_heading("lev1", "backmatter", "tail", ""); + comp_obj_.metainfo.identifier = ""; + comp_obj_.metainfo.dummy_heading = false; + comp_obj_.metainfo.object_number_off = false; + comp_obj_.metainfo.object_number_type = 0; + comp_obj_.tags.segment_anchor_tag_epub = "_part_eof"; + comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub; + comp_obj_.tags.in_segment_html = "tail"; + comp_obj_.tags.anchor_tags = ["section_eof"]; + comp_obj_.metainfo.dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; + comp_obj_.metainfo.dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0]; + the_document_xml_dom_tail_section ~= comp_obj_; + tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html; + tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub; + } + // endNotes + ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt_action); + { // endnotes + the_document_endnotes_section = en_st.endnotes; + obj_cite_digits = en_st.ocn; + debug(endnotes) { + writefln("%s %s", __LINE__, the_document_endnotes_section.length); + foreach (o; the_document_endnotes_section) { writeln(o); } + } + } + { // glossary + if (an_object["glossary_nugget"].length == 0) { + comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Glossary section"); + comp_obj_.metainfo.identifier = ""; + comp_obj_.metainfo.dummy_heading = true; + comp_obj_.metainfo.object_number_off = true; + comp_obj_.metainfo.object_number_type = 0; + the_document_glossary_section ~= comp_obj_; + } + debug(glossary) { foreach (gloss; the_document_glossary_section) { writeln(gloss.text); } } + } + { // bibliography + string[] biblio_unsorted_incomplete = biblio_arr_json.dup; + ST_biblio_section biblio_section = backmatter_make_the_bibliography_section(biblio_unsorted_incomplete, bib_arr_json); + the_document_bibliography_section = biblio_section.bibliography_section; + tag_assoc = biblio_section.tag_assoc; + } + { // bookindex + BookIndexReportSection bi = BookIndexReportSection(); + ST_bookindex bi_st + = bi.backmatter_bookindex_build_abstraction_section(bookindex_unordered_hashes, obj_cite_digits, opt_action); + destroy(bookindex_unordered_hashes); + the_document_bookindex_section = bi_st.bookindex; + obj_cite_digits = bi_st.ocn; + debug(bookindex) { foreach (bi_entry; the_document_bookindex_section) { writeln(bi_entry); } } + } + { // blurb + if (an_object["blurb_nugget"].length == 0) { + comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Blurb section"); + comp_obj_.metainfo.identifier = ""; + comp_obj_.metainfo.object_number_off = true; + comp_obj_.metainfo.object_number_type = 0; + comp_obj_.tags.segment_anchor_tag_epub = ""; + comp_obj_.tags.anchor_tag_html = ""; + comp_obj_.tags.in_segment_html = ""; + the_document_blurb_section ~= comp_obj_; + } + debug(blurb) { foreach (blurb; the_document_blurb_section) { writeln(blurb.text); } } + } + { // toc gather backmatter + the_document_toc_section ~= backmatter_gather_table_of_contents(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); // + } + { // document head and body + the_document_head_section ~= the_document_body_section[0]; + the_document_body_section = the_document_body_section[1..$]; + } + { // document ancestors + ST_ancestors get_ancestors; + get_ancestors = the_document_body_section.after_doc_determine_ancestors(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); + the_document_body_section = get_ancestors.the_document_body_section; + the_document_endnotes_section = get_ancestors.the_document_endnotes_section; + the_document_glossary_section = get_ancestors.the_document_glossary_section; + the_document_bibliography_section = get_ancestors.the_document_bibliography_section; + the_document_bookindex_section = get_ancestors.the_document_bookindex_section; + the_document_blurb_section = get_ancestors.the_document_blurb_section; + } + { // document segnames + ST_segnames get_segnames; + get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); // + segnames = get_segnames.segnames; + html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr; + html_segnames_ptr = get_segnames.html_segnames_ptr; + } + // document head + string[] segnames_0_to_4; + foreach (ref obj; the_document_head_section) { + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + } + if (obj.metainfo.heading_lev_markup == 0) { + // TODO second hit (of two) with same assertion failure, check, fix and reinstate + // assert( obj.metainfo.ocn == 1, + // "Title OCN should be 1 not: " ~ obj.metainfo.ocn.to!string); // bug introduced 0.18.1 + obj.metainfo.ocn = 1; + obj.metainfo.identifier = "1"; + obj.metainfo.object_number_type = OCNtype.ocn; + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } + obj = _links(obj); + } + if (the_document_toc_section.length > 1) { + // scroll + dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; + dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; + foreach (ref obj; the_document_toc_section) { + if (obj.metainfo.is_a == "heading") { + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } + obj = _links(obj); + } + } + // images + string[] _images; + // multiple 1~ levels, loop through document body + if (the_document_body_section.length > 1) { + foreach (ref obj; the_document_body_section) { + if (!(obj.metainfo.identifier.empty)) { + if (!(((obj.metainfo.identifier) in tag_assoc) + && ("seg_lv4" in tag_assoc[(obj.metainfo.identifier)])) + ) { + tag_assoc[(obj.metainfo.identifier)]["seg_lv4"] + = obj.tags.html_segment_anchor_tag_is; + } + tag_assoc[(obj.metainfo.identifier)]["seg_lv1to4"] + = obj.tags.epub_segment_anchor_tag_is; + } + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.lev4_subtoc = lev4_subtoc[obj.tags.anchor_tag_html]; + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } else if (obj.metainfo.is_a == "para") { + _images ~= extract_images(obj.text); + obj = _image_dimensions(obj, manifested); + } + obj = _links(obj); + } + } + auto image_list = (_images.sort()).uniq; + // endnotes optional only one 1~ level + if (the_document_endnotes_section.length > 1) { + dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; + dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; + dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; + dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status_buffer.dup; + foreach (ref obj; the_document_endnotes_section) { + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup == 1) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } + obj = _links(obj); + } + } + // glossary optional only one 1~ level + if (the_document_glossary_section.length > 1) { + foreach (ref obj; the_document_glossary_section) { + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup == 1) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } else if (obj.metainfo.is_a == "glossary" && !(obj.text.empty)) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + obj = _links(obj); + } + } + // bibliography optional only one 1~ level + if (the_document_bibliography_section.length > 1) { + foreach (ref obj; the_document_bibliography_section) { + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup == 1) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } else if (obj.metainfo.is_a == "bibliography") { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + obj = _links(obj); + } + } + // book index, optional only one 1~ level + int ocn_ = obj_cite_digits.object_number; + int ocn_bkidx_ = 0; + int ocn_bidx_; + if (the_document_bookindex_section.length > 1) { // scroll + dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; + dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; + foreach (ref obj; the_document_bookindex_section) { + if (obj.metainfo.is_a == "heading") { + // debug(dom) { } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + } + if (obj.metainfo.heading_lev_markup == 1) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + if (obj.metainfo.heading_lev_markup <= 4) { + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } else if (obj.metainfo.is_a == "bookindex") { + obj_cite_digits = ocn_emit(eN.ocn.bkidx); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; + obj.metainfo.object_number_type = OCNtype.bkidx; + } + obj = _links(obj); + } + // TODO assert failure, reinstate + // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); + } + // blurb optional only one 1~ level + if (the_document_blurb_section.length > 1) { + foreach (ref obj; the_document_blurb_section) { + if (obj.metainfo.is_a == "heading") { + debug(dom) { writeln(obj.text); } + if (obj.metainfo.heading_lev_markup == 1) { + obj_cite_digits = ocn_emit(eN.ocn.on); + obj.metainfo.ocn = obj_cite_digits.object_number; + obj.metainfo.identifier = obj_cite_digits.identifier; + } + if (obj.metainfo.heading_lev_markup <= 4) { + segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub; + if (obj.metainfo.heading_lev_markup == 4) { + obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1]; + if (segnames["html"].length > obj.ptr.html_segnames + 1) { + obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1]; + } + assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames], + obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]); + } + } + // dom structure (marked up & collapsed) + if (opt_action.meta_processing_xml_dom) { + obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup); + obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed); + } + obj = obj.obj_heading_ancestors(lv_ancestors_txt); + } else if (obj.metainfo.is_a == "blurb") { + obj_cite_digits = ocn_emit(eN.ocn.off); + obj.metainfo.object_number_off = obj_cite_digits.off; + obj.metainfo.object_number_type = OCNtype.non; + } + obj = _links(obj); + } + } + // get descendants + if (the_document_body_section.length > 1) { + auto pairs = after_doc_get_descendants( + the_document_head_section ~ + the_document_body_section ~ + the_document_endnotes_section ~ + the_document_glossary_section ~ + the_document_bibliography_section ~ + the_document_bookindex_section ~ + the_document_blurb_section ~ + the_document_xml_dom_tail_section + ); + debug(descendants_tuple) { + pairs = pairs.sort(); + foreach (pair; pairs) { // (pair; pairs.sort()) + writeln(pair[0], "..", pair[1]); + } + } + foreach (ref obj; the_document_head_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + if (the_document_body_section.length > 1) { + foreach (ref obj; the_document_body_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_endnotes_section.length > 1) { + foreach (ref obj; the_document_endnotes_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_glossary_section.length > 1) { + foreach (ref obj; the_document_glossary_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_bibliography_section.length > 1) { + foreach (ref obj; the_document_bibliography_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_bookindex_section.length > 1) { + foreach (ref obj; the_document_bookindex_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_blurb_section.length > 1) { + foreach (ref obj; the_document_blurb_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + if (the_document_xml_dom_tail_section.length > 1) { + foreach (ref obj; the_document_xml_dom_tail_section) { + if (obj.metainfo.is_a == "heading") { + foreach (pair; pairs) { + if (obj.metainfo.ocn == pair[0]) { + obj.metainfo.last_descendant_ocn = pair[1]; + } + } + } + } + } + } + // TODO + // - note create/insert heading object sole purpose eof close all open tags + // sort out: + // - obj.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status; + // - obj.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status; + comp_obj_ = set_object_heading("lev1", "empty", "empty", ""); + comp_obj_.metainfo.identifier = ""; + comp_obj_.metainfo.dummy_heading = true; + comp_obj_.metainfo.object_number_off = true; + comp_obj_.metainfo.object_number_type = 0; + comp_obj_.tags.segment_anchor_tag_epub = ""; + comp_obj_.tags.anchor_tag_html = ""; + comp_obj_.tags.in_segment_html = ""; + comp_obj_.tags.html_segment_anchor_tag_is = ""; + comp_obj_.tags.epub_segment_anchor_tag_is = ""; + comp_obj_.metainfo.heading_lev_markup = 9; + comp_obj_.metainfo.heading_lev_collapsed = 9; + comp_obj_.metainfo.parent_ocn = 0; + comp_obj_.metainfo.parent_lev_markup = 0; + comp_obj_.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status.dup; + comp_obj_.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status.dup; + comp_obj_ = comp_obj_.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, 0); + comp_obj_ = comp_obj_.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, 0); + comp_obj_ = comp_obj_.obj_heading_ancestors(lv_ancestors_txt); + // the_dom_tail_section ~= comp_obj_; // remove tail for now, decide on later + // the doc + ObjGenericComposite[][string] document_the = [ + "head": the_document_head_section, + "toc": the_document_toc_section, + // substantive/body: + "body": the_document_body_section, + // backmatter: + "endnotes": the_document_endnotes_section, + "glossary": the_document_glossary_section, + "bibliography": the_document_bibliography_section, + "bookindex": the_document_bookindex_section, + "blurb": the_document_blurb_section, + // dom tail only + "tail": the_document_xml_dom_tail_section, + ]; + // document parts keys as needed + string[][string] document_section_keys_sequenced = [ + "scroll": ["head", "toc", "body",], + "seg": ["head", "toc", "body",], + "sql": ["head", "body",], + "latex": ["head", "toc", "body",] + ]; + if (document_the["endnotes"].length > 1) { + document_section_keys_sequenced["scroll"] ~= "endnotes"; + document_section_keys_sequenced["seg"] ~= "endnotes"; + document_section_keys_sequenced["latex"] ~= "endnotes"; + } + if (document_the["glossary"].length > 1) { + document_section_keys_sequenced["scroll"] ~= "glossary"; + document_section_keys_sequenced["seg"] ~= "glossary"; + document_section_keys_sequenced["sql"] ~= "glossary"; + document_section_keys_sequenced["latex"] ~= "glossary"; + } + if (document_the["bibliography"].length > 1) { + document_section_keys_sequenced["scroll"] ~= "bibliography"; + document_section_keys_sequenced["seg"] ~= "bibliography"; + document_section_keys_sequenced["sql"] ~= "bibliography"; + document_section_keys_sequenced["latex"] ~= "bibliography"; + } + if (document_the["bookindex"].length > 1) { + document_section_keys_sequenced["scroll"] ~= "bookindex"; + document_section_keys_sequenced["seg"] ~= "bookindex"; + document_section_keys_sequenced["sql"] ~= "bookindex"; + document_section_keys_sequenced["latex"] ~= "bookindex"; + } + if (document_the["blurb"].length > 1) { + document_section_keys_sequenced["scroll"] ~= "blurb"; + document_section_keys_sequenced["seg"] ~= "blurb"; + document_section_keys_sequenced["sql"] ~= "blurb"; + document_section_keys_sequenced["latex"] ~= "blurb"; + } + if ((opt_action.html) + || (opt_action.html_scroll) + || (opt_action.html_seg) + || (opt_action.epub)) { + document_section_keys_sequenced["scroll"] ~= "tail"; + document_section_keys_sequenced["seg"] ~= "tail"; + } + // segnames + string[] segnames_4 = segnames["html"].dup; + string[] segnames_lv1to4 = segnames["epub"].dup; + debug(segnames) { + writeln("segnames_lv4: ", segnames_4); + writeln("segnames_lv1to4: ", segnames_lv1to4); + } + // restart + destroy(the_document_head_section); + destroy(the_document_toc_section); + destroy(the_document_body_section); + destroy(the_document_endnotes_section); + destroy(the_document_glossary_section); + destroy(the_document_bibliography_section); + destroy(the_document_bookindex_section); + destroy(the_document_blurb_section); + destroy(the_document_xml_dom_tail_section); + destroy(segnames); + destroy(bookindex_unordered_hashes); + destroy(an_object); + obj_cite_digits = ocn_emit(eN.ocn.reset); + biblio_arr_json = []; + obj_cite_digit_ = 0; + html_segnames_ptr = 0; + html_segnames_ptr_cntr = 0; + content_non_header = "8"; + dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + dom_structure_markedup_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + dom_structure_collapsed_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + lev_anchor_tag = ""; + anchor_tag = ""; + // identify parts + struct DocHas_ { + uint inline_links() { + return dochas["inline_links"]; + } + uint inline_notes_reg() { + return dochas["inline_notes"]; + } + uint inline_notes_star() { + return dochas["inline_notes_star"]; + } + uint codeblocks() { + return dochas["codeblock"]; + } + uint tables() { + return dochas["table"]; + } + uint blocks() { + return dochas["block"]; + } + uint groups() { + return dochas["group"]; + } + uint poems() { + return dochas["poem"]; + } + uint quotes() { + return dochas["quote"]; + } + ulong images() { // TODO not ideal rethink + return (image_list.to!string.strip("[","]").split(",").length); + } + auto imagelist() { + return image_list; + } + auto keys_seq() { + return docSectKeysSeq!()(document_section_keys_sequenced); + } + string[] segnames_lv4() { + return segnames_4; + } + string[] segnames_lv_0_to_4() { + return segnames_0_to_4; + } + string[string][string] tag_associations() { + return tag_assoc; + } + } + auto doc_has() { + return DocHas_(); + } + // the doc to be returned + struct ST_docAbstraction { + ObjGenericComposite[][string] document_the; + DocHas_ doc_has; + } + ST_docAbstraction ret; + { + ret.document_the = document_the; + ret.doc_has = doc_has; + } + return ret; + } // ← closed: abstract doc source +} -- cgit v1.2.3