diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-13 15:33:07 -0400 |
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2026-04-13 16:25:56 -0400 |
| commit | d0ac448e6425c9e4246cd529aeb11643dce8093f (patch) | |
| tree | c12356fbc55cffd495cc37b81ca6fb125e3be195 /src/sisudoc/spine.d | |
| parent | package.nix cosmetic line-breaks for build command (diff) | |
spine may be run against a document-markup zip pod
- claude contributed src
- Opens the zip with std.zip.ZipArchive (reads the whole file into
memory)
- Locates pod.manifest inside the archive to discover document paths
and languages
- Extracts markup files (.sst/.ssm/.ssi) as in-memory strings
- Extracts images as in-memory byte arrays
- Extracts conf/dr_document_make if present
- Presents these to the existing pipeline as if they were read from
the filesystem
- Some security mitigations:
- Zip Slip / Path Traversal: Reject entries containing `..` or
starting with `/`; canonicalize resolved paths and verify they
fall within extraction root
- Zip Bomb: Check `ArchiveMember.size` before extracting; enforce
per-file (50MB) and total size limits (500MB)
- Entry Count: Limit number of entries (a pod should have at most
~100 files)
- Path depth: limit (Maximum 10 path components).
- Symlinks: Verify no symlinks in extracted content before
processing (post-extraction recursive scan)
- Filename Validation: Only allow expected characters; reject null
bytes
- Malformed Zips: Catch `ZipException` from `std.zip.ZipArchive`
constructor
- Cleanup on error
Diffstat (limited to 'src/sisudoc/spine.d')
| -rwxr-xr-x | src/sisudoc/spine.d | 180 |
1 files changed, 178 insertions, 2 deletions
diff --git a/src/sisudoc/spine.d b/src/sisudoc/spine.d index 5d3b228..ee3bcef 100755 --- a/src/sisudoc/spine.d +++ b/src/sisudoc/spine.d @@ -77,6 +77,7 @@ import sisudoc.meta.rgx_files; import sisudoc.io_in.paths_source; import sisudoc.io_in.read_config_files; import sisudoc.io_in.read_source_files; +import sisudoc.io_in.read_zip_pod; import sisudoc.io_out.hub; mixin(import("version.txt")); mixin(import("configuration.txt")); @@ -856,6 +857,9 @@ string program_name = "spine"; auto _manifested = PathMatters!()(_opt_action, _env, ""); auto _manifests = [ _manifested ]; auto _conf_file_details = configFilePaths!()(_manifested, _env, _opt_action.config_path_set); + /+ ↓ track extracted zip pod temp directories for cleanup +/ + mixin spineExtractZipPod; + ZipPodResult[] _zip_pod_extractions; ConfComposite _siteConfig; if ( _opt_action.require_processing_files @@ -863,7 +867,16 @@ string program_name = "spine"; ) { foreach(arg; args[1..$]) { if (!(arg.match(rgx.flag_action))) { /+ cli markup source path +/ // get first input markup source file names for processing - _manifested = PathMatters!()(_opt_action, _env, arg); + string _config_arg = arg; + /+ ↓ if first non-flag arg is a zip, extract for config discovery +/ + if (arg.match(rgx_files.src_pth_zip)) { + auto _zpr = extractZipPod(arg); + if (_zpr.ok) { + _zip_pod_extractions ~= _zpr; + _config_arg = _zpr.pod_dir; + } + } + _manifested = PathMatters!()(_opt_action, _env, _config_arg); { /+ local site config +/ _conf_file_details = configFilePaths!()(_manifested, _env, _opt_action.config_path_set); auto _config_local_site_struct = readConfigSite!()(_conf_file_details, _opt_action, _cfg); @@ -1047,7 +1060,166 @@ string program_name = "spine"; _manifests ~= _manifested; } } else if (arg.match(rgx_files.src_pth_zip)) { - // fns_src ~= arg; // gather input markup source file names for processing + /+ ↓ zip pod archive: extract to temp dir, process as pod +/ + /+ check if this zip was already extracted during config discovery +/ + string _zip_pod_dir; + foreach (ref _zpr; _zip_pod_extractions) { + if (_zpr.ok && _zpr.pod_dir.length > 0 + && _zpr.pod_dir.baseName == arg.baseName.stripExtension) + { + _zip_pod_dir = _zpr.pod_dir; + break; + } + } + if (_zip_pod_dir.length == 0) { + auto _zpr = extractZipPod(arg); + if (!_zpr.ok) { + writeln("ERROR >> Processing Skipped! Zip extraction failed: ", arg, " - ", _zpr.error_msg); + } else { + _zip_pod_extractions ~= _zpr; + _zip_pod_dir = _zpr.pod_dir; + } + } + if (_zip_pod_dir.length > 0) { + /+ process extracted pod directory same as regular pod +/ + auto _zip_manifest = PodManifest!()(_opt_action, _zip_pod_dir); + if (_zip_manifest.pod_manifest_file_with_path + && _opt_action.abstraction + ) { + string pod_manifest_root_content_paths_to_markup_location_raw_; + string markup_contents_location_; + string sisudoc_txt_ = _zip_manifest.pod_manifest_file_with_path; + enforce( + exists(sisudoc_txt_)!=0, + "file not found: <<" ~ + sisudoc_txt_ ~ ">>" + ); + if (exists(sisudoc_txt_)) { + try { + import dyaml; + Node pod_manifest_yaml; + try { + pod_manifest_yaml = Loader.fromFile(sisudoc_txt_).load(); + } catch (ErrnoException ex) { + } catch (FileException ex) { + writeln("ERROR failed to read config file"); + } catch (Throwable) { + writeln("ERROR failed to read config file content, not parsed as yaml"); + } + if ("doc" in pod_manifest_yaml) { + if (pod_manifest_yaml["doc"].type.mapping + && pod_manifest_yaml["doc"].tag.match(rgx_y.yaml_tag_is_map) + ) { + if ("path" in pod_manifest_yaml["doc"]) { + if (pod_manifest_yaml["doc"]["path"].tag.match(rgx_y.yaml_tag_is_seq)) { + foreach (string _path; pod_manifest_yaml["doc"]["path"]) { + markup_contents_location_ ~= _path ~ "\n"; + pod_manifest_root_content_paths_to_markup_location_raw_ ~= + _path ~ "\n"; + } + } else if ( + pod_manifest_yaml["doc"]["path"].type.string + && pod_manifest_yaml["doc"]["path"].tag.match(rgx_y.yaml_tag_is_str) + ) { + markup_contents_location_ = pod_manifest_yaml["doc"]["path"].get!string; + pod_manifest_root_content_paths_to_markup_location_raw_ = + pod_manifest_yaml["doc"]["path"].get!string; + } + } + if ("filename" in pod_manifest_yaml["doc"]) { + if (pod_manifest_yaml["doc"]["filename"].tag.match(rgx_y.yaml_tag_is_seq)) { + foreach (string _filename; pod_manifest_yaml["doc"]["filename"]) { + if ("language" in pod_manifest_yaml["doc"]) { + if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_seq)) { + foreach (string _lang; pod_manifest_yaml["doc"]["language"]) { + markup_contents_location_ ~= + "media/text/" + ~ _lang ~ "/" + ~ _filename ~ "\n"; + } + } else if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_str) + ) { + markup_contents_location_ = + "media/text/" + ~ pod_manifest_yaml["doc"]["language"].get!string + ~ "/" ~ _filename ~ "\n"; + } else { + string _lang_default = "en"; + markup_contents_location_ ~= + "media/text/" + ~ _lang_default ~ "/" + ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } + } else { + string _lang_default = "en"; + markup_contents_location_ ~= + "media/text/" + ~ _lang_default ~ "/" + ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } + } + } else if ( + pod_manifest_yaml["doc"]["filename"].type.string + && pod_manifest_yaml["doc"]["filename"].tag.match(rgx_y.yaml_tag_is_str) + ) { + if ("language" in pod_manifest_yaml["doc"]) { + if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_seq)) { + foreach (string _lang; pod_manifest_yaml["doc"]["language"]) { + markup_contents_location_ ~= + "media/text/" + ~ _lang ~ "/" + ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } + } else if (pod_manifest_yaml["doc"]["language"].tag.match(rgx_y.yaml_tag_is_str)) { + markup_contents_location_ = + "media/text/" + ~ pod_manifest_yaml["doc"]["language"].get!string + ~ "/" ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } else { + string _lang_default = "en"; + markup_contents_location_ ~= + "media/text/" + ~ _lang_default ~ "/" + ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } + } else { + string _lang_default = "en"; + markup_contents_location_ ~= + "media/text/" + ~ _lang_default ~ "/" + ~ pod_manifest_yaml["doc"]["filename"].get!string ~ "\n"; + } + } + } + } + } + } catch (ErrnoException ex) { + } catch (FileException ex) { + // Handle errors + } + } else { + writeln("manifest not found: ", sisudoc_txt_); + } + auto markup_contents_locations_arr + = (cast(char[]) markup_contents_location_).split; + auto tmp_dir_ = (sisudoc_txt_).dirName.array; + foreach (markup_contents_location; markup_contents_locations_arr) { + assert(markup_contents_location.match(rgx_files.src_pth_sst_or_ssm), + "not a recognised file: <<" ~ + markup_contents_location ~ ">>" + ); + auto markup_contents_location_pth_ = (markup_contents_location).to!string; + Regex!(char) lang_rgx_ = regex(r"/(" ~ _opt_action.languages_set.join("|") ~ ")/"); + if (_opt_action.languages_set[0] == "all" + || (markup_contents_location_pth_).match(lang_rgx_) + ) { + auto _fns = (((tmp_dir_).chainPath(markup_contents_location_pth_)).array).to!string; + _manifested = PathMatters!()(_opt_action, _env, _zip_pod_dir, _fns, markup_contents_locations_arr); + _manifests ~= _manifested; + } + } + } + } } else { // anything remaining, unused arg_unrecognized ~= " " ~ arg; } @@ -1277,4 +1449,8 @@ string program_name = "spine"; } } } // else { writeln("NO METADATA CURATED"); } + /+ ↓ clean up any extracted zip pod temp directories +/ + foreach (ref _zpr; _zip_pod_extractions) { + cleanupZipPod(_zpr); + } } |
