1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
/+
- Name: Spine, Doc Reform [a part of]
- Description: documents, structuring, processing, publishing, search
- static content generator
- Author: Ralph Amissah
[ralph.amissah@gmail.com]
- Copyright: (C) 2015 - 2022 Ralph Amissah, All Rights
Reserved.
- License: AGPL 3 or later:
Spine (SiSU), a framework for document structuring, publishing and
search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU AFERO General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see [https://www.gnu.org/licenses/].
If you have Internet connection, the latest version of the AGPL should be
available at these locations:
[https://www.fsf.org/licensing/licenses/agpl.html]
[https://www.gnu.org/licenses/agpl.html]
- Spine (by Doc Reform, related to SiSU) uses standard:
- docReform markup syntax
- standard SiSU markup syntax with modified headers and minor modifications
- docReform object numbering
- standard SiSU object citation numbering & system
- Homepages:
[https://www.doc_reform.org]
[https://www.sisudoc.org]
- Git
[https://git.sisudoc.org/projects/?p=software/spine.git;a=summary]
+/
/++
regex: regular expressions used in sisu document parser
+/
module doc_reform.io_out.rgx;
static template spineRgxOut() {
static struct RgxO {
static make_breakpage = ctRegex!(`new=(?P<breakpage>.+?)(?:;|$)`);
static make_breakcolumn = ctRegex!(`break=(?P<breakcolumn>.+?)(?:;|$)`,);
static newline = ctRegex!("\n", "mg");
static space = ctRegex!(`[ ]`, "mg");
static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
static nbsp_char = ctRegex!(`░`, "mg");
static nbsp_chars = ctRegex!(`[░]+`, "mg");
static middle_dot = ctRegex!(`·`, "mg");
static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`);
static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`);
static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
static src_pth_types =
ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
static src_fn =
ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
/+ line breaks +/
static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg");
static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg");
static br_linebreaks = ctRegex!(`[┘┙]`, "mg");
static br_line = ctRegex!(`┘`, "mg");
static br_line_inline = ctRegex!(`┙`, "mg");
static br_line_spaced = ctRegex!(`┚`, "mg");
static brln = ctRegex!(`(?:\\\\)+`, "mg");
/+ quotation marks +/
static quotes_open_and_close = ctRegex!(`[“”]`, "mg");
static quote_open = ctRegex!(`[“]`, "mg");
static quote_close = ctRegex!(`[”]`, "mg");
/+ inline markup footnotes endnotes +/
static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen = ctRegex!(`【.+?】`, "m");
static inline_notes_al_regular = ctRegex!(`【(.+?)】`, "mg");
static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m");
static inline_notes_al_gen_ref = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
static inline_notes_al_all_note = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_special_char_note = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m");
static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
/+ inline markup links +/
static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
static inline_link_anchor = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
static inline_link_number_only = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
static inline_link_seg_and_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_link_toc_to_backmatter = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
static url = ctRegex!(`https?://`, "mg");
static uri = ctRegex!(`(?:https?|git)://`, "mg");
static uri_identify_components = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg");
static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
static mark_internal_site_lnk = ctRegex!(`¤`, "mg");
static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg");
/+ inline markup font face mod +/
static inline_mark_emphasis = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg");
static inline_mark_bold = ctRegex!(`(?P<mark>[!])\{(?P<text>.+?)\}[!]`, "mg");
static inline_mark_underscore = ctRegex!(`(?P<mark>[_])\{(?P<text>.+?)\}[_]`, "mg");
static inline_mark_italics = ctRegex!(`(?P<mark>[/])\{(?P<text>.+?)\}[/]`, "mg");
static inline_mark_superscript = ctRegex!(`(?P<mark>\^)\{(?P<text>.+?)\}\^`, "mg");
static inline_mark_subscript = ctRegex!(`(?P<mark>[,])\{(?P<text>.+?)\}[,]`, "mg");
static inline_mark_strike = ctRegex!(`(?P<mark>[-])\{(?P<text>.+?)\}[-]`, "mg");
static inline_mark_insert = ctRegex!(`(?P<mark>[+])\{(?P<text>.+?)\}[+]`, "mg");
static inline_mark_mono = ctRegex!(`(?P<mark>[#])\{(?P<text>.+?)\}[#]`, "mg");
static inline_mark_cite = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg");
static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
/+ inline markup font face mod +/
static inline_emphasis = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg");
static inline_bold = ctRegex!(`[!]┨(?P<text>.+?)┣[!]`, "mg");
static inline_underscore = ctRegex!(`[_]┨(?P<text>.+?)┣[_]`, "mg");
static inline_italics = ctRegex!(`[/]┨(?P<text>.+?)┣[/]`, "mg");
static inline_superscript = ctRegex!(`\^┨(?P<text>.+?)┣\^`, "mg");
// static inline_superscript = ctRegex!(`[\^]┨(?P<text>.+?)┣[\^]`, "mg");
static inline_subscript = ctRegex!(`[,]┨(?P<text>.+?)┣[,]`, "mg");
static inline_strike = ctRegex!(`[-]┨(?P<text>.+?)┣[-]`, "mg");
static inline_insert = ctRegex!(`[+]┨(?P<text>.+?)┣[+]`, "mg");
static inline_mono = ctRegex!(`[■]┨(?P<text>.+?)┣[■]`, "mg");
static inline_cite = ctRegex!(`[‖]┨(?P<text>.+?)┣[‖]`, "mg");
static inline_fontface_clean = ctRegex!(`[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
/+ table delimiters +/
static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg");
static table_delimiter_row = ctRegex!("[ ]*\n", "mg");
static xhtml_ampersand = ctRegex!(`[&]`, "m"); // &
static xhtml_quotation = ctRegex!(`["]`, "m"); // "
static xhtml_less_than = ctRegex!(`[<]`, "m"); // <
static xhtml_greater_than = ctRegex!(`[>]`, "m"); // >
static xhtml_line_break = ctRegex!(` [\\]{2}`, "m"); // <br />
static latex_special_char = ctRegex!(`([%${}_#&\\])`);
static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`);
static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`);
static latex_special_char_for_escape_url = ctRegex!(`([%])`);
static latex_special_char_escaped = ctRegex!(`\\([%${}_#\\])`);
static latex_special_char_escaped_braced = ctRegex!(`[{]\\([&])[}]`);
static latex_identify_inline_link = ctRegex!(`┥.+?┝┤\S+?├`, "mg");
static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m");
static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg");
static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m");
/+ paragraph operators +/
static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m");
static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m");
static grouped_para_indent_3 = ctRegex!(`^_3[ ]`, "m");
static grouped_para_indent_4 = ctRegex!(`^_4[ ]`, "m");
static grouped_para_indent_5 = ctRegex!(`^_5[ ]`, "m");
static grouped_para_indent_6 = ctRegex!(`^_6[ ]`, "m");
static grouped_para_indent_7 = ctRegex!(`^_7[ ]`, "m");
static grouped_para_indent_8 = ctRegex!(`^_8[ ]`, "m");
static grouped_para_indent_9 = ctRegex!(`^_9[ ]`, "m");
static grouped_para_bullet = ctRegex!(`^_[*] `, "m");
static grouped_para_bullet_indent_1 = ctRegex!(`^_1[*] `, "m");
static grouped_para_bullet_indent_2 = ctRegex!(`^_2[*] `, "m");
static grouped_para_bullet_indent_3 = ctRegex!(`^_3[*] `, "m");
static grouped_para_bullet_indent_4 = ctRegex!(`^_4[*] `, "m");
static grouped_para_bullet_indent_5 = ctRegex!(`^_5[*] `, "m");
static grouped_para_bullet_indent_6 = ctRegex!(`^_6[*] `, "m");
static grouped_para_bullet_indent_7 = ctRegex!(`^_7[*] `, "m");
static grouped_para_bullet_indent_8 = ctRegex!(`^_8[*] `, "m");
static grouped_para_bullet_indent_9 = ctRegex!(`^_9[*] `, "m");
static grouped_para_bullet_indent = ctRegex!(`^_(?P<indent>[1-9])[*] `, "m");
static grouped_para_indent = ctRegex!(`^_(?P<indent>[1-9])[ ]`, "m");
static grouped_para_indent_hang = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`, "m");
}
}
|