aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sisudoc/spine.d
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-04-13 15:33:07 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-04-13 16:25:56 -0400
commitd0ac448e6425c9e4246cd529aeb11643dce8093f (patch)
treec12356fbc55cffd495cc37b81ca6fb125e3be195 /src/sisudoc/spine.d
parentpackage.nix cosmetic line-breaks for build command (diff)
spine may be run against a document-markup zip pod
- claude contributed src - Opens the zip with std.zip.ZipArchive (reads the whole file into memory) - Locates pod.manifest inside the archive to discover document paths and languages - Extracts markup files (.sst/.ssm/.ssi) as in-memory strings - Extracts images as in-memory byte arrays - Extracts conf/dr_document_make if present - Presents these to the existing pipeline as if they were read from the filesystem - Some security mitigations: - Zip Slip / Path Traversal: Reject entries containing `..` or starting with `/`; canonicalize resolved paths and verify they fall within extraction root - Zip Bomb: Check `ArchiveMember.size` before extracting; enforce per-file (50MB) and total size limits (500MB) - Entry Count: Limit number of entries (a pod should have at most ~100 files) - Path depth: limit (Maximum 10 path components). - Symlinks: Verify no symlinks in extracted content before processing (post-extraction recursive scan) - Filename Validation: Only allow expected characters; reject null bytes - Malformed Zips: Catch `ZipException` from `std.zip.ZipArchive` constructor - Cleanup on error
Diffstat (limited to 'src/sisudoc/spine.d')
-rwxr-xr-xsrc/sisudoc/spine.d180
1 files changed, 178 insertions, 2 deletions
diff --git a/src/sisudoc/spine.d b/src/sisudoc/spine.d
index 5d3b228..ee3bcef 100755
--- a/src/sisudoc/spine.d
+++ b/src/sisudoc/spine.d
@@ -77,6 +77,7 @@ import sisudoc.meta.rgx_files;
import sisudoc.io_in.paths_source;
import sisudoc.io_in.read_config_files;
import sisudoc.io_in.read_source_files;
+import sisudoc.io_in.read_zip_pod;
import sisudoc.io_out.hub;
mixin(import("version.txt"));
mixin(import("configuration.txt"));
@@ -856,6 +857,9 @@ string program_name = "spine";
auto _manifested = PathMatters!()(_opt_action, _env, "");
auto _manifests = [ _manifested ];
auto _conf_file_details = configFilePaths!()(_manifested, _env, _opt_action.config_path_set);
+ /+ ↓ track extracted zip pod temp directories for cleanup +/
+ mixin spineExtractZipPod;
+ ZipPodResult[] _zip_pod_extractions;
ConfComposite _siteConfig;
if (
_opt_action.require_processing_files
@@ -863,7 +867,16 @@ string program_name = "spine";
) {
foreach(arg; args[1..$]) {
if (!(arg.match(rgx.flag_action))) { /+ cli markup source path +/ // get first input markup source file names for processing
- _manifested = PathMatters!()(_opt_action, _env, arg);
+ string _config_arg = arg;
+ /+ ↓ if first non-flag arg is a zip, extract for config discovery +/
+ if (arg.match(rgx_files.src_pth_zip)) {
+ auto _zpr = extractZipPod(arg);
+ if (_zpr.ok) {
+ _zip_pod_extractions ~= _zpr;
+ _config_arg = _zpr.pod_dir;
+ }
+ }
+ _manifested = PathMatters!()(_opt_action, _env, _config_arg);
{ /+ local site config +/
_conf_file_details = configFilePaths!()(_manifested, _env, _opt_action.config_path_set);
auto _config_local_site_struct = readConfigSite!()(_conf_file_details, _opt_action, _cfg);
@@ -1047,7 +1060,166 @@ string program_name = "spine";
_manifests ~= _manifested;
}
} else if (arg.match(rgx_files.src_pth_zip)) {
- // fns_src ~= arg; // gather input markup source file names for processing
+ /+ ↓ zip pod archive: extract to temp dir, process as pod +/
+ /+ check if this zip was already extracted during config discovery +/
+ string _zip_pod_dir;
+ foreach (ref _zpr; _zip_pod_extractions) {
+ if (_zpr.ok && _zpr.pod_dir.length > 0
+ && _zpr.pod_dir.baseName == arg.baseName.stripExtension)
+ {
+ _zip_pod_dir = _zpr.pod_dir;
+ break;
+ }
+ }
+ if (_zip_pod_dir.length == 0) {
+ auto _zpr = extractZipPod(arg);
+ if (!_zpr.ok) {
+ writeln("ERROR >> Processing Skipped! Zip extraction failed: ", arg, " - ", _zpr.error_msg);
+ } else {
+ _zip_pod_extractions ~= _zpr;
+ _zip_pod_dir = _zpr.pod_dir;
+ }
+ }
+ if (_zip_pod_dir.length > 0) {
+ /+ process extracted pod directory same as regular pod +/
+ auto _zip_manifest = PodManifest!()(_opt_action, _zip_pod_dir);
+ if (_zip_manifest.pod_manifest_file_with_path
+ && _opt_action.abstraction
+ ) {
+ string pod_manifest_root_content_paths_to_markup_location_raw_;
+ string markup_contents_location_;
+ string sisudoc_txt_ = _zip_manifest.pod_manifest_file_with_path;
+ enforce(
+ exists(sisudoc_txt_)!=0,
+ "file not found: <<" ~
+ sisudoc_txt_ ~ ">>"
+ );
+ if (exists(sisudoc_txt_)) {
+ try {
+ import dyaml;
+ Node pod_manifest_yaml;
+ try {
+ pod_manifest_yaml = Loader.fromFile(sisudoc_txt_).load();
+ } catch (ErrnoException ex) {
+ } catch (FileException ex) {
+ writeln("ERROR failed to read config file");
+ } catch (Throwable) {
+ writeln("ERROR failed to read config file content, not parsed as yaml");
+ }
+ if ("doc" in pod_manifest_yaml) {
+ if (pod_manifest_yaml["doc"].type.mapping
+ && pod_manifest_yaml["doc"].tag.match(rgx_y.yaml_tag_is_map)
+ ) {
+ if ("path" in pod_manifest_yaml["doc"]) {
+ if (pod_manifest_yaml["doc"]["path"].tag.match(rgx_y.yaml_tag_is_seq)) {
+ foreach (string _path; pod_manifest_yaml["doc"]["path"]) {
+ markup_contents_location_ ~= _path ~ "\n";
+ pod_manifest_root_content_paths_to_markup_location_raw_ ~=
+ _path ~ "\n";
+ }
+ } else if (
+ pod_manifest_yaml["doc"]["path"].type.string
+ && pod_manifest_yaml["doc"]["path"].tag.match(rgx_y.yaml_tag_is_str)
+ ) {
+ markup_contents_location_ = pod_manifest_yaml["doc"]["path"].get!string;
+ pod_manifest_root_content_paths_to_markup_location_raw_ =
+ pod_manifest_yaml["doc"]["path"].get!string;
+ }
+ }
+ if ("filename" in pod_manifest_yaml["doc"]) {
+ if (pod_manifest_yaml["doc"]["filename"].tag.match(rgx_y.yaml_tag_is_seq)) {
+ foreach (string _filename; pod_manifest_yaml["doc"]["filename"]) {
+ if ("language" in pod_manifest_yaml["doc"]) {
+ if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_seq)) {
+ foreach (string _lang; pod_manifest_yaml["doc"]["language"]) {
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang ~ "/"
+ ~ _filename ~ "\n";
+ }
+ } else if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_str)
+ ) {
+ markup_contents_location_ =
+ "media/text/"
+ ~ pod_manifest_yaml["doc"]["language"].get!string
+ ~ "/" ~ _filename ~ "\n";
+ } else {
+ string _lang_default = "en";
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang_default ~ "/"
+ ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ }
+ } else {
+ string _lang_default = "en";
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang_default ~ "/"
+ ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ }
+ }
+ } else if (
+ pod_manifest_yaml["doc"]["filename"].type.string
+ && pod_manifest_yaml["doc"]["filename"].tag.match(rgx_y.yaml_tag_is_str)
+ ) {
+ if ("language" in pod_manifest_yaml["doc"]) {
+ if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_seq)) {
+ foreach (string _lang; pod_manifest_yaml["doc"]["language"]) {
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang ~ "/"
+ ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ }
+ } else if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_str)) {
+ markup_contents_location_ =
+ "media/text/"
+ ~ pod_manifest_yaml["doc"]["language"].get!string
+ ~ "/" ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ } else {
+ string _lang_default = "en";
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang_default ~ "/"
+ ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ }
+ } else {
+ string _lang_default = "en";
+ markup_contents_location_ ~=
+ "media/text/"
+ ~ _lang_default ~ "/"
+ ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n";
+ }
+ }
+ }
+ }
+ }
+ } catch (ErrnoException ex) {
+ } catch (FileException ex) {
+ // Handle errors
+ }
+ } else {
+ writeln("manifest not found: ", sisudoc_txt_);
+ }
+ auto markup_contents_locations_arr
+ = (cast(char[]) markup_contents_location_).split;
+ auto tmp_dir_ = (sisudoc_txt_).dirName.array;
+ foreach (markup_contents_location; markup_contents_locations_arr) {
+ assert(markup_contents_location.match(rgx_files.src_pth_sst_or_ssm),
+ "not a recognised file: <<" ~
+ markup_contents_location ~ ">>"
+ );
+ auto markup_contents_location_pth_ = (markup_contents_location).to!string;
+ Regex!(char) lang_rgx_ = regex(r"/(" ~ _opt_action.languages_set.join("|") ~ ")/");
+ if (_opt_action.languages_set[0] == "all"
+ || (markup_contents_location_pth_).match(lang_rgx_)
+ ) {
+ auto _fns = (((tmp_dir_).chainPath(markup_contents_location_pth_)).array).to!string;
+ _manifested = PathMatters!()(_opt_action, _env, _zip_pod_dir, _fns, markup_contents_locations_arr);
+ _manifests ~= _manifested;
+ }
+ }
+ }
+ }
} else { // anything remaining, unused
arg_unrecognized ~= " " ~ arg;
}
@@ -1277,4 +1449,8 @@ string program_name = "spine";
}
}
} // else { writeln("NO METADATA CURATED"); }
+ /+ ↓ clean up any extracted zip pod temp directories +/
+ foreach (ref _zpr; _zip_pod_extractions) {
+ cleanupZipPod(_zpr);
+ }
}