From 5fa79cb72d3cbc9bb6c6e85edafeca9eaa02a1d0 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 12 Jun 2019 18:31:06 -0400 Subject: harvest, document reporting, sorted topic register --- org/doc_reform.org | 84 ++++++++++++++++++++++++++++++++++++++++----- src/doc_reform/doc_reform.d | 84 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 152 insertions(+), 16 deletions(-) diff --git a/org/doc_reform.org b/org/doc_reform.org index b13d086..b4bce52 100644 --- a/org/doc_reform.org +++ b/org/doc_reform.org @@ -124,17 +124,50 @@ void main(string[] args) { string[] _document_topic_register; string[] _topic_register; string[] _sub_topic_register; + string[][string][string][string][string] subject_trees; foreach(k, doc_harvest; harvests) { _topic_register = []; foreach(topic; doc_harvest.topic_register_arr.sort) { _sub_topic_register = []; string _spaces; - foreach (i, _top; topic.split(mkup.sep)) { - _sub_topic_register ~= format( - " %s- %s", - " ".repeat(i).join, - _top, - ); + string[] subject_tree = topic.split(mkup.sep); + switch (subject_tree.length) { + case 1: + if (subject_tree[0] in subject_trees) { + subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] = [doc_harvest.uid]; + } + break; + case 2: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]]) { + subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] = [doc_harvest.uid]; + } + break; + case 3: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]] + && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]]) { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] = [doc_harvest.uid]; + } + break; + case 4: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]] + && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]] + && subject_tree[3] in subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]) { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] = [doc_harvest.uid]; + } + break; + default: + break; } _topic_register ~= _sub_topic_register.join("\n"); } @@ -150,8 +183,43 @@ void main(string[] args) { (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.stable).release.join("\n"), ); - foreach(_dtr; _document_topic_register.sort) { - writeln(_dtr); + } + foreach(k0; subject_trees.keys.sort) { + if (k0 != "_a") { + writeln("", k0); + if ("_a" in subject_trees[k0]) { + foreach (uid; subject_trees[k0]["_a"]["_a"]["_a"].sort) { + writeln("- ", uid); + } + } + } + foreach(k1; subject_trees[k0].keys.sort) { + if (k1 != "_a") { + writeln(" ", k1); + if ("_a" in subject_trees[k0][k1]) { + foreach (uid; subject_trees[k0][k1]["_a"]["_a"].sort) { + writeln(" - ", uid); + } + } + } + foreach(k2; subject_trees[k0][k1].keys.sort) { + if (k2 != "_a") { + writeln(" ", k2); + if ("_a" in subject_trees[k0][k1][k2]) { + foreach (uid; subject_trees[k0][k1][k2]["_a"].sort) { + writeln(" - ", uid); + } + } + } + foreach(k3; subject_trees[k0][k1][k2].keys.sort) { + if (k3 != "_a") { + writeln(" ", k3); + foreach (uid; subject_trees[k0][k1][k2][k3]) { + writeln(" - ", uid); + } + } + } + } } } } diff --git a/src/doc_reform/doc_reform.d b/src/doc_reform/doc_reform.d index 663fb4d..9a17f2e 100755 --- a/src/doc_reform/doc_reform.d +++ b/src/doc_reform/doc_reform.d @@ -829,17 +829,50 @@ void main(string[] args) { string[] _document_topic_register; string[] _topic_register; string[] _sub_topic_register; + string[][string][string][string][string] subject_trees; foreach(k, doc_harvest; harvests) { _topic_register = []; foreach(topic; doc_harvest.topic_register_arr.sort) { _sub_topic_register = []; string _spaces; - foreach (i, _top; topic.split(mkup.sep)) { - _sub_topic_register ~= format( - " %s- %s", - " ".repeat(i).join, - _top, - ); + string[] subject_tree = topic.split(mkup.sep); + switch (subject_tree.length) { + case 1: + if (subject_tree[0] in subject_trees) { + subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] = [doc_harvest.uid]; + } + break; + case 2: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]]) { + subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] = [doc_harvest.uid]; + } + break; + case 3: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]] + && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]]) { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] = [doc_harvest.uid]; + } + break; + case 4: + if (subject_tree[0] in subject_trees + && subject_tree[1] in subject_trees[subject_tree[0]] + && subject_tree[2] in subject_trees[subject_tree[0]][subject_tree[1]] + && subject_tree[3] in subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]) { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest.uid; + } else { + subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] = [doc_harvest.uid]; + } + break; + default: + break; } _topic_register ~= _sub_topic_register.join("\n"); } @@ -855,8 +888,43 @@ void main(string[] args) { (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.stable).release.join("\n"), ); - foreach(_dtr; _document_topic_register.sort) { - writeln(_dtr); + } + foreach(k0; subject_trees.keys.sort) { + if (k0 != "_a") { + writeln("", k0); + if ("_a" in subject_trees[k0]) { + foreach (uid; subject_trees[k0]["_a"]["_a"]["_a"].sort) { + writeln("- ", uid); + } + } + } + foreach(k1; subject_trees[k0].keys.sort) { + if (k1 != "_a") { + writeln(" ", k1); + if ("_a" in subject_trees[k0][k1]) { + foreach (uid; subject_trees[k0][k1]["_a"]["_a"].sort) { + writeln(" - ", uid); + } + } + } + foreach(k2; subject_trees[k0][k1].keys.sort) { + if (k2 != "_a") { + writeln(" ", k2); + if ("_a" in subject_trees[k0][k1][k2]) { + foreach (uid; subject_trees[k0][k1][k2]["_a"].sort) { + writeln(" - ", uid); + } + } + } + foreach(k3; subject_trees[k0][k1][k2].keys.sort) { + if (k3 != "_a") { + writeln(" ", k3); + foreach (uid; subject_trees[k0][k1][k2][k3]) { + writeln(" - ", uid); + } + } + } + } } } } -- cgit v1.2.3