From 860b894a10f3526e6bd73d53850764c0ad95ab99 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 29 Jun 2024 13:54:28 -0400 Subject: document digests and reduction in use of tuples --- src/sisudoc/io_in/read_source_files.d | 54 +++++++++++++++++++-------- src/sisudoc/io_out/latex.d | 1 - src/sisudoc/io_out/odt.d | 2 - src/sisudoc/meta/metadoc.d | 10 ++--- src/sisudoc/meta/metadoc_from_src.d | 36 +++++++++++++----- src/sisudoc/meta/metadoc_from_src_functions.d | 25 +++++++------ src/sisudoc/meta/metadoc_object_setter.d | 1 + 7 files changed, 83 insertions(+), 46 deletions(-) (limited to 'src/sisudoc') diff --git a/src/sisudoc/io_in/read_source_files.d b/src/sisudoc/io_in/read_source_files.d index 4ba0b4f..8d814fc 100644 --- a/src/sisudoc/io_in/read_source_files.d +++ b/src/sisudoc/io_in/read_source_files.d @@ -56,6 +56,7 @@ module sisudoc.io_in.read_source_files; @safe: template spineRawMarkupContent() { import + std.digest.sha, std.file, std.path; import @@ -67,6 +68,14 @@ template spineRawMarkupContent() { static auto rgx = RgxI(); mixin spineRgxFiles; static auto rgx_files = RgxFiles(); + struct ST_doc_parts { + char[] header_raw; + char[][] sourcefile_body_content; + string[] insert_file_list; + string[] images_list; + ubyte[32] header_raw_digest; + ubyte[32] src_txt_digest; + } string[] _images=[]; string[] _extract_images(S)(S content_block) { string[] images_; @@ -86,7 +95,9 @@ template spineRawMarkupContent() { char[], "header", char[][], "src_txt", string[], "insert_files", - string[], "images" + string[], "images", + ubyte[32], "header_digest", + ubyte[32], "src_txt_digest" ); auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { auto _0_header_1_body_content_2_insert_filelist_tuple @@ -100,43 +111,50 @@ template spineRawMarkupContent() { = raw.markupSourceReadIn(fn_src); return source_txt_str; } - final auto sourceContentSplitIntoHeaderAndBody(O)( + final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)( O _opt_action, in string source_txt_str, in string fn_src="" ) { auto raw = MarkupRawUnit(); - string[] insert_file_list; - string[] images_list; + string[] insert_file_list_get; + string[] images_list_get; HeaderContentInsertsImages t = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str); char[] header_raw = t.header; + ubyte[32] header_raw_digest = t.header.sha256Of; char[][] sourcefile_body_content = t.src_txt; if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); sourcefile_body_content = tu.contents; - insert_file_list = tu.insert_files.dup; - images_list = tu.images.dup; + insert_file_list_get = tu.insert_files.dup; + images_list_get = tu.images.dup; } else if (_opt_action.source || _opt_action.pod) { auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); - images_list = tu.images.dup; + images_list_get = tu.images.dup; } + ubyte[32] src_txt_digest = sourcefile_body_content.sha256Of; string header_type = ""; - t = tuple( - header_raw, - sourcefile_body_content, - insert_file_list, - images_list - ); - return t; + ST_doc_parts ret; + { + ret.header_raw = t.header; + ret.sourcefile_body_content = sourcefile_body_content; + ret.insert_file_list = insert_file_list_get; + ret.images_list = images_list_get; + ret.header_raw_digest = t.header.sha256Of; + ret.src_txt_digest = sourcefile_body_content.sha256Of; + } + return ret; } } struct MarkupRawUnit { - import std.file; + import + std.digest.sha, + std.file; final private string readInMarkupSource(in char[] fn_src) { enforce( exists(fn_src) != 0, @@ -195,11 +213,15 @@ template spineRawMarkupContent() { char[] header = hc[0]; char[] source_txt = hc[1]; char[][] source_line_arr = markupSourceLineArray(source_txt); + ubyte[32] header_digest; + ubyte[32] src_txt_digest; HeaderContentInsertsImages t = tuple( header, source_line_arr, file_insert_list, - images_list + images_list, + header_digest, + src_txt_digest ); return t; } diff --git a/src/sisudoc/io_out/latex.d b/src/sisudoc/io_out/latex.d index 771bc57..e1b5731 100644 --- a/src/sisudoc/io_out/latex.d +++ b/src/sisudoc/io_out/latex.d @@ -320,7 +320,6 @@ template paperLaTeX() { } template outputLaTeX() { import - std.digest.sha, std.file, std.outbuffer, std.uri, diff --git a/src/sisudoc/io_out/odt.d b/src/sisudoc/io_out/odt.d index d6ac27d..0450509 100644 --- a/src/sisudoc/io_out/odt.d +++ b/src/sisudoc/io_out/odt.d @@ -55,7 +55,6 @@ template formatODT() { sisudoc.io_out.rgx, sisudoc.io_out.rgx_xhtml; import - std.digest.sha, std.file, std.outbuffer, std.uri, @@ -651,7 +650,6 @@ template outputODT() { sisudoc.io_out.rgx, sisudoc.io_out.rgx_xhtml; import - std.digest.sha, std.file, std.outbuffer, std.uri, diff --git a/src/sisudoc/meta/metadoc.d b/src/sisudoc/meta/metadoc.d index a1899da..ed9a5b1 100644 --- a/src/sisudoc/meta/metadoc.d +++ b/src/sisudoc/meta/metadoc.d @@ -64,7 +64,6 @@ template spineAbstraction() { sisudoc.io_out.hub; mixin spineBiblio; mixin outputHub; - enum headBody { header, body_content, insert_file_list, image_list } enum makeMeta { make, meta } enum docAbst { doc_abstract_obj, doc_has } @system auto spineAbstraction(E,P,O,Cfg,M,S)( @@ -89,7 +88,6 @@ template spineAbstraction() { } auto _header_body_insertfilelist_imagelist = spineRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); - static assert(_header_body_insertfilelist_imagelist.length==4); if ((_opt_action.debug_do) || (_opt_action.debug_do_stages) ) { @@ -98,7 +96,7 @@ template spineAbstraction() { debug(header_and_body) { writeln(header); writeln(_header_body_insertfilelist_imagelist.length); - writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); + // writeln(_header_body_insertfilelist_imagelist.length.body_content[0]); } /+ ↓ split header into make and meta +/ if ((_opt_action.debug_do) @@ -109,7 +107,7 @@ template spineAbstraction() { import sisudoc.meta.conf_make_meta_yaml; _make_and_meta_struct = docHeaderMakeAndMetaTupYamlExtractAndConvertToStruct!()( - _header_body_insertfilelist_imagelist[headBody.header], + _header_body_insertfilelist_imagelist.header_raw, _make_and_meta_struct, _manifest, _opt_action, @@ -127,7 +125,7 @@ template spineAbstraction() { writeln("step3 commence → (document abstraction (da); da keys; segnames; doc_matters) [", _manifest.src.filename, "]"); } auto da = docAbstraction!()( - _header_body_insertfilelist_imagelist[headBody.body_content], + _header_body_insertfilelist_imagelist.sourcefile_body_content, _make_and_meta_struct, _opt_action, _manifest, @@ -275,7 +273,7 @@ template spineAbstraction() { auto srcs() { struct SRC_ { auto file_insert_list() { - return _header_body_insertfilelist_imagelist[headBody.insert_file_list]; + return _header_body_insertfilelist_imagelist.insert_file_list; } auto image_list() { return _doc_has_struct.imagelist; diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d index 32954f1..4bd747d 100644 --- a/src/sisudoc/meta/metadoc_from_src.d +++ b/src/sisudoc/meta/metadoc_from_src.d @@ -57,6 +57,7 @@ template docAbstraction() { import std.algorithm, std.container, + std.digest.sha, std.file, std.json, std.path; @@ -970,8 +971,7 @@ template docAbstraction() { } obj = _links(obj); } - if (the_document_toc_section.length > 1) { - // scroll + if (the_document_toc_section.length > 1) { // writeln("toc"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_toc_section) { @@ -997,7 +997,7 @@ template docAbstraction() { // images string[] _images; // multiple 1~ levels, loop through document body - if (the_document_body_section.length > 1) { + if (the_document_body_section.length > 1) { // writeln("body"); foreach (ref obj; the_document_body_section) { if (!(obj.metainfo.identifier.empty)) { if (!(((obj.metainfo.identifier) in tag_assoc) @@ -1033,12 +1033,26 @@ template docAbstraction() { _images ~= extract_images(obj.text); obj = _image_dimensions(obj, manifested); } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } - auto image_list = (_images.sort()).uniq; + auto image_list = (_images.sort()).uniq; // also get digest on each image here? // workon + if (_images.length > 0) { + foreach (img; image_list) { + try { // also get sha digest on image file + // read_image + auto data = (cast(byte[]) (manifested.src.image_dir_path ~ "/" ~ img).read); + // calculate, digest, hash + writefln("%s\n%-(%02x%)::%s ⋅ %s", img, data.sha256Of, data.length, img); + writefln("%-(%02x%) ⋅ %s ⋅ %s", data.sha256Of, img, data.length); + } catch (Exception ex) { + writeln("WARNING, image not found: ", img, "\n ", manifested.src.image_dir_path ~ "/" ~ img); + } + } + } // endnotes optional only one 1~ level - if (the_document_endnotes_section.length > 1) { + if (the_document_endnotes_section.length > 1) { // writeln("endnotes"); dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup; @@ -1073,7 +1087,7 @@ template docAbstraction() { } } // glossary optional only one 1~ level - if (the_document_glossary_section.length > 1) { + if (the_document_glossary_section.length > 1) { // writeln("glossary"); foreach (ref obj; the_document_glossary_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1104,11 +1118,12 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } // bibliography optional only one 1~ level - if (the_document_bibliography_section.length > 1) { + if (the_document_bibliography_section.length > 1) { // writeln("bibliography"); foreach (ref obj; the_document_bibliography_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1139,6 +1154,7 @@ template docAbstraction() { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } @@ -1146,7 +1162,7 @@ template docAbstraction() { int ocn_ = obj_cite_digits.object_number; int ocn_bkidx_ = 0; int ocn_bidx_; - if (the_document_bookindex_section.length > 1) { // scroll + if (the_document_bookindex_section.length > 1) { // writeln("book index"); // scroll dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup; dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup; foreach (ref obj; the_document_bookindex_section) { @@ -1183,13 +1199,14 @@ template docAbstraction() { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } // TODO assert failure, reinstate // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?"); } // blurb optional only one 1~ level - if (the_document_blurb_section.length > 1) { + if (the_document_blurb_section.length > 1) { // writeln("blurb"); foreach (ref obj; the_document_blurb_section) { if (obj.metainfo.is_a == "heading") { debug(dom) { writeln(obj.text); } @@ -1220,6 +1237,7 @@ template docAbstraction() { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj.metainfo.sha256 = obj.obj_digest; obj = _links(obj); } } diff --git a/src/sisudoc/meta/metadoc_from_src_functions.d b/src/sisudoc/meta/metadoc_from_src_functions.d index 29e675c..b5956c0 100644 --- a/src/sisudoc/meta/metadoc_from_src_functions.d +++ b/src/sisudoc/meta/metadoc_from_src_functions.d @@ -54,18 +54,6 @@ module sisudoc.meta.metadoc_from_src_functions; @safe: template docAbstractionFunctions() { // ↓ abstraction imports - import - std.algorithm, - std.container, - std.file, - std.json, - std.path; - import - sisudoc.meta, - sisudoc.meta.defaults, - sisudoc.meta.rgx, - sisudoc.meta.metadoc_object_setter, - sisudoc.meta.rgx; // ↓ abstraction mixins mixin ObjectSetter; mixin InternalMarkup; @@ -2998,6 +2986,19 @@ template docAbstractionFunctions() { return obj; } // ↑ - object tags + // ↓ - object digest + pure ubyte[32] obj_digest()( + ObjGenericComposite obj, + ) { + obj.metainfo.sha256 = obj.text.sha256Of; + // if (obj.metainfo.is_a == "heading") { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a, " ", obj.metainfo.heading_lev_markup); + // } else { + // writeln(obj.metainfo.sha256.toHexString, " ", obj.metainfo.ocn, " ", obj.metainfo.is_a); + // } + return obj.metainfo.sha256; + } + // ↑ - object digest // ↓ - table of contents @system ObjGenericComposite[] backmatter_gather_table_of_contents( ObjGenericComposite[] the_document_endnotes_section, diff --git a/src/sisudoc/meta/metadoc_object_setter.d b/src/sisudoc/meta/metadoc_object_setter.d index a2ceff6..8b2daf0 100644 --- a/src/sisudoc/meta/metadoc_object_setter.d +++ b/src/sisudoc/meta/metadoc_object_setter.d @@ -173,6 +173,7 @@ template ObjectSetter() { int parent_lev_markup = 0; int parent_ocn = 0; int last_descendant_ocn = 0; + ubyte[32] sha256; } struct ObjGenericComposite { string text = ""; -- cgit v1.2.3