aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sisudoc/io_in/read_source_files.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2024-04-10 22:24:34 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2024-04-10 23:08:18 -0400
commit90873fabd7451e1dd8c4b39303906e19bdc481f7 (patch)
tree2dbb0e41f3e9c761645c8b37dafe979a01d38d32 /src/sisudoc/io_in/read_source_files.d
parent0.15.0 (diff)
0.16.0 sisudoc (src/sisudoc sisudoc spine)
- src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more)
Diffstat (limited to 'src/sisudoc/io_in/read_source_files.d')
-rw-r--r--src/sisudoc/io_in/read_source_files.d396
1 files changed, 396 insertions, 0 deletions
diff --git a/src/sisudoc/io_in/read_source_files.d b/src/sisudoc/io_in/read_source_files.d
new file mode 100644
index 0000000..4ba0b4f
--- /dev/null
+++ b/src/sisudoc/io_in/read_source_files.d
@@ -0,0 +1,396 @@
+/+
+- Name: SisuDoc Spine, Doc Reform [a part of]
+ - Description: documents, structuring, processing, publishing, search
+ - static content generator
+
+ - Author: Ralph Amissah
+ [ralph.amissah@gmail.com]
+
+ - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved.
+
+ - License: AGPL 3 or later:
+
+ Spine (SiSU), a framework for document structuring, publishing and
+ search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU AFERO General Public License as published by the
+ Free Software Foundation, either version 3 of the License, or (at your
+ option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see [https://www.gnu.org/licenses/].
+
+ If you have Internet connection, the latest version of the AGPL should be
+ available at these locations:
+ [https://www.fsf.org/licensing/licenses/agpl.html]
+ [https://www.gnu.org/licenses/agpl.html]
+
+ - Spine (by Doc Reform, related to SiSU) uses standard:
+ - docReform markup syntax
+ - standard SiSU markup syntax with modified headers and minor modifications
+ - docReform object numbering
+ - standard SiSU object citation numbering & system
+
+ - Homepages:
+ [https://www.sisudoc.org]
+ [https://www.doc-reform.org]
+
+ - Git
+ [https://git.sisudoc.org/]
+
++/
+/++
+ module source_read_source_files;<BR>
+ - open markup files<BR>
+ - if master file scan for addional files to import/insert
++/
+module sisudoc.io_in.read_source_files;
+@safe:
+template spineRawMarkupContent() {
+ import
+ std.file,
+ std.path;
+ import
+ sisudoc.meta,
+ sisudoc.io_in.paths_source,
+ sisudoc.meta.rgx_files,
+ sisudoc.meta.rgx;
+ mixin spineRgxIn;
+ static auto rgx = RgxI();
+ mixin spineRgxFiles;
+ static auto rgx_files = RgxFiles();
+ string[] _images=[];
+ string[] _extract_images(S)(S content_block) {
+ string[] images_;
+ string _content_block = content_block.to!string;
+ if (auto m = _content_block.matchAll(rgx.image)) {
+ images_ ~= m.captures[1].to!string;
+ }
+ return images_;
+ }
+ auto rawsrc = RawMarkupContent();
+ alias ContentsInsertsImages = Tuple!(
+ char[][], "contents",
+ string[], "insert_files",
+ string[], "images"
+ );
+ alias HeaderContentInsertsImages = Tuple!(
+ char[], "header",
+ char[][], "src_txt",
+ string[], "insert_files",
+ string[], "images"
+ );
+ auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) {
+ auto _0_header_1_body_content_2_insert_filelist_tuple
+ = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src);
+ return _0_header_1_body_content_2_insert_filelist_tuple;
+ }
+ struct RawMarkupContent {
+ final sourceContent(in string fn_src) {
+ auto raw = MarkupRawUnit();
+ string source_txt_str
+ = raw.markupSourceReadIn(fn_src);
+ return source_txt_str;
+ }
+ final auto sourceContentSplitIntoHeaderAndBody(O)(
+ O _opt_action,
+ in string source_txt_str,
+ in string fn_src=""
+ ) {
+ auto raw = MarkupRawUnit();
+ string[] insert_file_list;
+ string[] images_list;
+ HeaderContentInsertsImages t
+ = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str);
+ char[] header_raw = t.header;
+ char[][] sourcefile_body_content = t.src_txt;
+ if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise
+ auto ins = Inserts();
+ ContentsInsertsImages tu
+ = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
+ sourcefile_body_content = tu.contents;
+ insert_file_list = tu.insert_files.dup;
+ images_list = tu.images.dup;
+ } else if (_opt_action.source || _opt_action.pod) {
+ auto ins = Inserts();
+ ContentsInsertsImages tu
+ = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
+ images_list = tu.images.dup;
+ }
+ string header_type = "";
+ t = tuple(
+ header_raw,
+ sourcefile_body_content,
+ insert_file_list,
+ images_list
+ );
+ return t;
+ }
+ }
+ struct MarkupRawUnit {
+ import std.file;
+ final private string readInMarkupSource(in char[] fn_src) {
+ enforce(
+ exists(fn_src) != 0,
+ "file not found: «" ~
+ fn_src ~ "»"
+ );
+ string source_txt_str;
+ try {
+ if (exists(fn_src)) {
+ if (fn_src.getLinkAttributes.attrIsFile) {
+ source_txt_str = fn_src.readText;
+ } else {
+ }
+ }
+ } catch (ErrnoException ex) {
+ } catch (UTFException ex) {
+ // Handle validation errors
+ } catch (FileException ex) {
+ // Handle errors
+ }
+ std.utf.validate(source_txt_str);
+ return source_txt_str;
+ }
+ @trusted final private char[][] header0Content1(in string src_text) { // cast(char[])
+ /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
+ char[][] header_and_content;
+ auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a);
+ header_and_content ~= m.pre;
+ header_and_content ~= m.hit ~ m.post;
+ assert(header_and_content.length == 2,
+ "document markup is broken, header body split == "
+ ~ header_and_content.length.to!string
+ ~ "; (header / body array split should == 2 (split is on level A~))"
+ );
+ return header_and_content;
+ }
+ @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[])
+ char[][] source_line_arr
+ = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding);
+ return source_line_arr;
+ }
+ string markupSourceReadIn(in string fn_src) {
+ static auto rgx_files = RgxFiles();
+ enforce(
+ fn_src.match(rgx_files.src_pth_sst_or_ssm),
+ "not a dr markup filename: «" ~
+ fn_src ~ "»"
+ );
+ string source_txt_str = readInMarkupSource(fn_src);
+ return source_txt_str;
+ }
+ HeaderContentInsertsImages markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) {
+ string[] file_insert_list = [];
+ string[] images_list = [];
+ char[][] hc = header0Content1(source_txt_str);
+ char[] header = hc[0];
+ char[] source_txt = hc[1];
+ char[][] source_line_arr = markupSourceLineArray(source_txt);
+ HeaderContentInsertsImages t = tuple(
+ header,
+ source_line_arr,
+ file_insert_list,
+ images_list
+ );
+ return t;
+ }
+ final char[][] getInsertMarkupSourceContentRawLineArray(
+ in char[] fn_src_insert,
+ Regex!(char) rgx_file
+ ) {
+ enforce(
+ fn_src_insert.match(rgx_file),
+ "not a dr markup filename: «" ~
+ fn_src_insert ~ "»"
+ );
+ string source_txt_str = readInMarkupSource(fn_src_insert);
+ char[][] source_line_arr = markupSourceLineArray(source_txt_str);
+ return source_line_arr;
+ }
+ }
+ struct Inserts {
+ alias ContentsAndImages = Tuple!(
+ char[][], "insert_contents",
+ string[], "images"
+ );
+ ContentsAndImages scan_subdoc_source(O)(
+ O _opt_action,
+ char[][] markup_sourcefile_insert_content,
+ string fn_src
+ ) {
+ char[][] contents_insert;
+ int code_block_status = 0;
+ enum codeBlock { off, curly, tic, }
+ auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
+ auto markup_src_file_path = fn_pth_full.captures[1];
+ foreach (line; markup_sourcefile_insert_content) {
+ if (code_block_status == codeBlock.curly) {
+ if (line.matchFirst(rgx.block_curly_code_close)) {
+ code_block_status = codeBlock.off;
+ }
+ contents_insert ~= line;
+ } else if (line.matchFirst(rgx.block_curly_code_open)) {
+ code_block_status = codeBlock.curly;
+ contents_insert ~= line;
+ } else if (code_block_status == codeBlock.tic) {
+ if (line.matchFirst(rgx.block_tic_close)) {
+ code_block_status = codeBlock.off;
+ }
+ contents_insert ~= line;
+ } else if (line.matchFirst(rgx.block_tic_code_open)) {
+ code_block_status = codeBlock.tic;
+ contents_insert ~= line;
+ } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
+ auto insert_fn = m.captures[2];
+ auto insert_sub_pth = m.captures[1];
+ auto fn_src_insert
+ = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
+ auto raw = MarkupRawUnit();
+ auto markup_sourcesubfile_insert_content
+ = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
+ debug(insert_file) {
+ writeln(line);
+ writeln(fn_src_insert);
+ writeln(
+ " length contents insert array: ",
+ markup_sourcesubfile_insert_content.length
+ );
+ }
+ if (_opt_action.source || _opt_action.pod) {
+ _images ~= _extract_images(markup_sourcesubfile_insert_content);
+ }
+ auto ins = Inserts();
+ /+
+ - 1. load file
+ - 2. read lines
+ - 3. scan lines
+ - a. if filename insert, and insert filename
+ - repeat 1
+ - b. else
+ - add line to new array;
+ - build image list, search for any image files to add to image list
+ +/
+ } else {
+ contents_insert ~= line; // images to extract for image list?
+ if (_opt_action.source || _opt_action.pod) {
+ string[] _image_linelist = _extract_images(line);
+ if (_image_linelist.length > 0) {
+ _images ~= _image_linelist;
+ }
+ }
+ }
+ } // end src subdoc (inserts) loop
+ ContentsAndImages t = tuple(
+ contents_insert,
+ _images
+ );
+ return t;
+ }
+ ContentsInsertsImages scan_master_src_for_insert_files_and_import_content(O)(
+ O _opt_action,
+ char[][] sourcefile_body_content,
+ string fn_src
+ ) {
+ import std.algorithm;
+ char[][] contents;
+ int code_block_status = 0;
+ enum codeBlock { off, curly, tic, }
+ auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
+ auto markup_src_file_path = fn_pth_full.captures[1];
+ char[][] contents_insert;
+ string[] _images =[];
+ string[] insert_file_list =[];
+ foreach (line; sourcefile_body_content) {
+ if (code_block_status == codeBlock.curly) {
+ if (line.matchFirst(rgx.block_curly_code_close)) {
+ code_block_status = codeBlock.off;
+ }
+ contents ~= line;
+ } else if (line.matchFirst(rgx.block_curly_code_open)) {
+ code_block_status = codeBlock.curly;
+ contents ~= line;
+ } else if (code_block_status == codeBlock.tic) {
+ if (line.matchFirst(rgx.block_tic_close)) {
+ code_block_status = codeBlock.off;
+ }
+ contents ~= line;
+ } else if (line.matchFirst(rgx.block_tic_code_open)) {
+ code_block_status = codeBlock.tic;
+ contents ~= line;
+ } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
+ auto insert_fn = m.captures[2];
+ auto insert_sub_pth = m.captures[1];
+ auto fn_src_insert
+ = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
+ insert_file_list ~= fn_src_insert.to!string;
+ auto raw = MarkupRawUnit();
+ /+ TODO +/
+ auto markup_sourcefile_insert_content
+ = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
+ debug(insert_file) {
+ writeln(line);
+ writeln(fn_src_insert);
+ writeln(
+ " length contents insert array: ",
+ markup_sourcefile_insert_content.length
+ );
+ }
+ auto ins = Inserts();
+ ContentsAndImages contents_insert_tu = ins.scan_subdoc_source(
+ _opt_action,
+ markup_sourcefile_insert_content,
+ fn_src_insert.to!string
+ );
+ contents ~= contents_insert_tu.insert_contents;
+ if (_opt_action.source || _opt_action.pod) {
+ string[] _image_linelist = _extract_images(contents_insert_tu.images);
+ if (_image_linelist.length > 0) {
+ _images ~= _image_linelist;
+ }
+ }
+ /+
+ - 1. load file
+ - 2. read lines
+ - 3. scan lines
+ - a. if filename insert, and insert filename
+ - repeat 1
+ - b. else
+ - add line to new array;
+ - build image list, search for any image files to add to image list
+ +/
+ } else {
+ contents ~= line;
+ if (_opt_action.source || _opt_action.pod) {
+ string[] _image_linelist = _extract_images(line);
+ if (_image_linelist.length > 0) {
+ _images ~= _image_linelist;
+ }
+ }
+ }
+ } // end src doc loop
+ string[] images = [];
+ foreach(i; uniq(_images.sort())) {
+ images ~= i;
+ }
+ debug(insert_file) {
+ writeln(__LINE__);
+ writeln(contents.length);
+ }
+ ContentsInsertsImages t = tuple(
+ contents,
+ insert_file_list,
+ images
+ );
+ return t;
+ }
+ }
+}