From 4b6e06a59f07e3785a94efc10c6a7893f8f6f49d Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 25 Apr 2020 20:13:54 -0400 Subject: endnotes, convert to inline from binary - the d version of sisu requires inline endnotes - rudimentary tool, the original (ruby) version of sisu permitted binary endnotes (markup for a an endnote and the reference to it within the body of text), it was easier to convert some existing html to this form, however it is much less reliable, and more difficult to keep track of individual endnotes, so this is a tool to help in the conversion of any existing sisu markup documents with binary endnotes to inline endnotes, testing required --- misc/util/d/tools/endnotes_inline_from_binary.d | 127 ++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100755 misc/util/d/tools/endnotes_inline_from_binary.d diff --git a/misc/util/d/tools/endnotes_inline_from_binary.d b/misc/util/d/tools/endnotes_inline_from_binary.d new file mode 100755 index 0000000..4d9ceb0 --- /dev/null +++ b/misc/util/d/tools/endnotes_inline_from_binary.d @@ -0,0 +1,127 @@ +#!/usr/bin/env rdmd +/+ + - read in file .sst .ssi .ssm + - loop twice + - first + - check for and skip code blocks + - use unique code marker for endnote markers in text and give an endnote + number ★1, increment + - extract all endnotes in array + - second + - check that the footnote marker number count matches the number of notes + in the array + - if they match either: + - substitute each endnote marker with the array footnote[number-1] + - substitute each endnote marker with footnote + as inlined footnote markup (footnote number not needed) + - if they do not match exit + - check whether changes have been made + - if so write file with inline footnotes in sub-directory converted_output_/ + using the same name as the original file + - else, exit ++/ +import std.stdio; +import std.file; +import std.array : split; +import std.exception; +import core.stdc.errno; +import std.regex; +import std.format; +import std.conv; +void main(string[] args) { + foreach(arg; args[1..$]) { + if ( + !(arg.match(regex(r"--\w+"))) + && arg.match(regex(r"\w+?\.ss[itm]")) + ) { + writeln(arg); + string filename = arg; + try { + string[] contents, endnotes, endnote_refs; + string text = filename.readText; + string[] paragraphs = text.split("\n\n"); + int endnote_ref_count = 0; + int[string] type = [ + "curly_code" : 0, + "tic_code" : 0, + ]; + static comment = ctRegex!(`^%+ `); + static block_tic_code_open = ctRegex!("^`{3} code(?:[.](?P[a-z][0-9a-z#+_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); + static block_tic_close = ctRegex!("^(`{3})$","m"); + static block_curly_code_open = ctRegex!(`^(?:code(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); + static block_curly_code_close = ctRegex!(`^([}]code)`); + auto rgx_endnote_ref = ctRegex!(`([~]\^)(?P[)\]]? |$)`, "gm"); + auto rgx_endnote = ctRegex!(`^\^~\s+(.+|\n)`, "gm"); + foreach (paragraph; paragraphs) { /+ loop to gather binary endnotes +/ + if ( !( type["curly_code"] == 1 || type["tic_code"] == 1) + && paragraph.match(rgx_endnote) + ) { + endnotes ~= replaceAll!(m => m[1]) + (paragraph, rgx_endnote); + } else { + if ( type["curly_code"] == 1 + && paragraph.matchFirst(block_curly_code_close) + ) { + type["curly_code"] = 0; + } else if (type["tic_code"] == 1 + && paragraph.matchFirst(block_tic_close) + ) { + type["tic_code"] = 0; + } else if ( type["curly_code"] == 1 || type["tic_code"] == 1) { + // prevent search for endnotes + } else if (paragraph.matchFirst(block_curly_code_open)) { + type["curly_code"] = 1; + } else if (paragraph.matchFirst(block_tic_code_open)) { + type["tic_code"] = 1; + } else if (auto m = paragraph.matchAll(rgx_endnote_ref)) { + foreach (n; m) { + endnote_ref_count++; // endnote_refs ~= (n.captures[1]); + } + } + contents ~= paragraph; + } + } + if (endnotes.length == endnote_ref_count) { + import std.outbuffer; + writeln("endnote ref count: ", endnote_ref_count); + writeln("number of binary endnotes: ", endnotes.length); + int endnote_count = -1; + auto buffer = new OutBuffer(); + foreach (content; contents) { /+ loop to inline endnotes +/ + content = replaceAll!(m => "~{ " ~ endnotes[++endnote_count] ~ " }~" ~ m["tail"] ) + (content, rgx_endnote_ref); + buffer.write(content ~ "\n\n"); + } + if (buffer) { + try { + string dir_out = "converted_output_"; + string path_and_file_out = dir_out ~ "/" ~ filename; + dir_out.mkdirRecurse; + auto f = File(path_and_file_out, "w"); + f.write(buffer); + writeln("wrote: ", path_and_file_out); + } catch (FileException ex) { + writeln("did not write file"); + // Handle errors + } + } + } else { + writeln("ERROR binary endnote mismatch, check markup,\nmisatch in the number of endnotes & endnote references!"); + writeln(" number of endnotes: ", endnotes.length); + writeln(" number of endnote refs: ", endnote_ref_count); // endnote_refs.length, + } + // assert(endnotes.length == endnote_ref_count); + } catch (ErrnoException ex) { + switch(ex.errno) { + case EPERM: + case EACCES: // Permission denied + break; + case ENOENT: // File does not exist + break; + default: // Handle other errors + break; + } + } + } + } +} -- cgit v1.2.3