aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sdp/ao_rgx.d
blob: f3208670fea6dcd985ad3ffcf3bcb249b1b634c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/++
  regex: regular expressions used in sisu document parser
+/
template RgxInit() {
  struct Rgx {
    /+ misc +/
    static true_dollar                                    = ctRegex!(`\$`, "gm");
    static flag_action                                    = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
    static flag_action_str                                = ctRegex!(` (--[a-z][a-z0-9-]+)`);
    static within_quotes                                  = ctRegex!(`"(.+?)"`);
    static make_heading_delimiter                         = ctRegex!(`[;][ ]*`);
    static arr_delimiter                                  = ctRegex!(`[ ]*[;][ ]*`);
    static name_delimiter                                 = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`);
    static book_index_go                                  = ctRegex!("([0-9]+)(?:-[0-9]+)?");
    static trailing_comma                                 = ctRegex!(",[ ]*$");
    static trailing_linebreak                             = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m");
    static newline_eol_delimiter                          = ctRegex!("\n");
    static newline_eol_strip_preceeding                   = ctRegex!("[ ]*\n");
    static newline_eol_delimiter_only                     = ctRegex!("^\n");
    static line_delimiter_ws_strip                        = ctRegex!("[ ]*\n[ ]*");
    static para_delimiter                                 = ctRegex!("\n[ ]*\n+");
    static levels_markup                                  = ctRegex!(`^[A-D1-4]$`);
    static levels_numbered                                = ctRegex!(`^[0-9]$`);
    static levels_numbered_headings                       = ctRegex!(`^[0-7]$`);
    static src_pth                                        = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
    static src_fn                                         =
      ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
    static src_fn_master                                  = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
    static src_fn_text                                    = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`);
    static src_fn_insert                                  = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`);
    static src_fn_find_inserts                            = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static insert_src_fn_ssi_or_sst                       = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
    /+ comments +/
    static comment                                        = ctRegex!(`^%+ `);
    static comments                                       = ctRegex!(`^%+ |^%+$`);
    /+ header +/
    static main_headers                                   =
      ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m");
    static native_header                                  = ctRegex!(`^@([a-z_]+):(?:\s|$)`);
    static native_header_make                             = ctRegex!(`^@(make):(?:\s|$)`);
    static native_header_meta                             =
      ctRegex!(`^@(?:creator|title|rights|date|original|classify|identifier|notes|publisher|links):(?:\s|$)`);
    static native_header_main                             = ctRegex!(`^@(?P<header>[a-z_]+):\s*(?P<content>.*)`, "m");
    static native_header_sub                              = ctRegex!(`^[ ]*:(?P<subheader>[a-z_]+):\s+(?P<content>.+)`, "m");
    static native_header_meta_title                       = ctRegex!(`^@title:\s`, "m");
    static variable_doc_title                             = ctRegex!(`@title`);
    static variable_doc_author                            = ctRegex!(`@author|@creator`);
    static raw_author_munge                               = ctRegex!(`(\S.+?),\s+(.+)`,"i");
    /+ head +/
    static native_subhead_creator                         = ctRegex!(`^(?:author|translator|illustrator)$`, "m");
    static native_subhead_title                           = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m");
    static native_subhead_rights                          = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m");
    static native_subhead_date                            = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m");
    static native_subhead_original                        = ctRegex!(`^(?:title|language|source)$`, "m");
    static native_subhead_classify                        = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m");
    static native_subhead_identifier                      = ctRegex!(`^(?:oclc|pg|isbn)$`, "m");
    static native_subhead_notes                           = ctRegex!(`^(?:abstract|description)$`, "m");
    static native_subhead_publisher                       = ctRegex!(`^(?:name)$`, "m");
    static native_subhead_make                            = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m");
    /+ heading & paragraph operators +/
    static heading_a                                      = ctRegex!(`^:?[A][~] `, "m");
    static heading                                        = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
    static heading_seg_and_above                          = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
    static heading_marker                                 = ctRegex!(`^:?([A-D1-4])[~]`);
    static heading_anchor_tag                             = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i");
    static heading_identify_anchor_tag                    = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i");
                                                            // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i
    static heading_extract_named_anchor_tag               = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i");
    static heading_extract_unnamed_anchor_tag             = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`);
    static heading_marker_missing_tag                     = ctRegex!(`^:?([A-D1-4])[~] `);
    static heading_title                                  = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`);
    static heading_all                                    = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3]
    static heading_backmatter                             = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i");
    static heading_biblio                                 = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`);
    static heading_glossary                               = ctRegex!(`^:?(1)[~][!](glossary)`);
    static heading_blurb                                  = ctRegex!(`^:?(1)[~][!](blurb)`);
    static para_bullet                                    = ctRegex!(`^_[*] `);
    static para_bullet_indent                             = ctRegex!(`^_([1-9])[*] `);
    static para_indent                                    = ctRegex!(`^_([1-9]) `);
    static para_indent_hang                               = ctRegex!(`^_([0-9])_([0-9]) `);
    static para_attribs                                   = ctRegex!(`^_(([0-9])(_([0-9]))?|_([1-9])?[*]) `);
    /+ blocked markup +/
    static block_open                                     = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)");
    static block_poem_open                                = ctRegex!("^((poem[{].*?$)|`{3} poem)");
    /+ blocked markup tics +/
    static block_tic_open                                 = ctRegex!("^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)");
    static block_tic_code_open                            = ctRegex!("^`{3} (code)([.][a-z][0-9a-z_]+)?");
    static block_tic_poem_open                            = ctRegex!("^`{3} (poem)");
    static block_tic_group_open                           = ctRegex!("^`{3} (group)");
    static block_tic_block_open                           = ctRegex!("^`{3} (block)");
    static block_tic_quote_open                           = ctRegex!("^`{3} (quote)");
    static block_tic_table_open                           = ctRegex!("^`{3} (table)");
    static block_tic_close                                = ctRegex!("^(`{3})$","m");
    /+ blocked markup curly +/
    static block_curly_open                               = ctRegex!(`^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)`);
    static block_curly_code_open                          = ctRegex!(`^(code([.][a-z][0-9a-z_]+)?[{].*?$)`);
    static block_curly_code_close                         = ctRegex!(`^([}]code)`);
    static block_curly_poem_open                          = ctRegex!(`^(poem[{].*?$)`);
    static block_curly_poem_close                         = ctRegex!(`^([}]poem)`);
    static block_curly_group_open                         = ctRegex!(`^(group[{].*?$)`);
    static block_curly_group_close                        = ctRegex!(`^([}]group)`);
    static block_curly_block_open                         = ctRegex!(`^(block[{].*?$)`);
    static block_curly_block_close                        = ctRegex!(`^([}]block)`);
    static block_curly_quote_open                         = ctRegex!(`^(quote[{].*?$)`);
    static block_curly_quote_close                        = ctRegex!(`^([}]quote)`);
    static block_curly_table_open                         = ctRegex!(`^(table[{].*?$)`);
    static block_curly_table_close                        = ctRegex!(`^([}]table)`);
    /+ inline markup font face mod +/
    static inline_emphasis                                = ctRegex!(`\*\{(?P<text>.+?)\}\*`);
    static inline_bold                                    = ctRegex!(`!\{(?P<text>.+?)\}!`);
    static inline_italics                                 = ctRegex!(`/\{(?P<text>.+?)\}/`);
    static inline_superscript                             = ctRegex!(`\^\{(?P<text>.+?)\}\^`);
    static inline_subscript                               = ctRegex!(`,\{(?P<text>.+?)\},`);
    static inline_strike                                  = ctRegex!(`-\{(?P<text>.+?)\}-`);
    static inline_insert                                  = ctRegex!(`\+\{(?P<text>.+?)\}\+`);
    static inline_mono                                    = ctRegex!(`#\{(?P<text>.+?)\}#`);
    /+ inline markup footnotes endnotes +/
    static inline_notes_curly_gen                         = ctRegex!(`~\{.+?\}~`, "m");
    static inline_notes_curly                             = ctRegex!(`~\{\s*(.+?)\}~`, "mg");
    static inline_curly_delimiter_open_and_close_regular  = ctRegex!(`~\{\s*|\s*\}~`, "m");
    static inline_notes_delimiter_curly_regular           = ctRegex!(`~\{[ ]*(.+?)\}~`, "m");
    static inline_notes_curly_sp                          = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m");
    static inline_notes_curly_sp_asterisk                 = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m");
    static inline_notes_curly_sp_plus                     = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m");
    static inline_note_curly_delimiters                   = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg");
    static inline_notes_square                            = ctRegex!(`~\[\s*(.+?)\]~`, "mg");
    static inline_text_and_note_square_sp                 = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg");
    static inline_text_and_note_square                    = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg");
    static inline_note_square_delimiters                  = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg");
    static inline_curly_delimiter_open_regular            = ctRegex!(`~\{\s*`, "m");
    static inline_curly_delimiter_close_regular           = ctRegex!(`\s*\}~`, "m");
    static inline_text_and_note_curly                     = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg");
    static note_ref                                       = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg");     // {^{73.}^}#noteref_73
    /+ inline markup footnotes endnotes +/
    static inline_notes_al                                = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
    static inline_notes_al_gen                            = ctRegex!(`【.+?】`, "m");
    static inline_al_delimiter_open_regular               = ctRegex!(`【\s`, "m");
    static inline_al_delimiter_open_symbol_star           = ctRegex!(`【[*]\s`, "m");
    static inline_al_delimiter_open_symbol_plus           = ctRegex!(`【[+]\s`, "m");
    static inline_al_delimiter_close_regular              = ctRegex!(`】`, "m");
    static inline_al_delimiter_open_and_close_regular     = ctRegex!(`【|】`, "m");
    static inline_notes_delimiter_al_regular              = ctRegex!(`【(.+?)】`, "mg");
    static inline_notes_delimiter_al_regular_number_note  = ctRegex!(`【(\d+)\s+(.+?)】`, "mg");
    static inline_al_delimiter_open_asterisk              = ctRegex!(`【\*`, "m");
    static inline_al_delimiter_open_plus                  = ctRegex!(`【\+`, "m");
    static inline_text_and_note_al                        = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
    static inline_text_and_note_al_                       = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg");
    static inline_url                                      = ctRegex!(`((?:(?:https?|git):\/\/|\.\.\/|\.\/|#)\S+)`, "mg");
    static inline_link_naked_url                           = ctRegex!(`(^|[ ])((?:(?:https?|git):\/\/|\.\.\/|\.\/|#)\S+?)([.,;:?!]?(?:[ ]|$))`, "mg");
    static inline_link_markup_regular                      = ctRegex!(`(^|[ ])\{\s*(.+?)\s*\}((?:(?:https?|git):\/\/|\.\.\/|\.\/|#)\S+?)([.,;:?!]?(?:[ ]|$))`, "mg");
    static inline_link_endnote_url_helper_punctuated       = ctRegex!(`\{~\^\s+(.+?)\}((?:(?:https?|git):\/\/|\.\.\/|\.\/|#)\S+?)([.,;:?!]?(?:[ ]|$))`, "mg");
    static inline_link_endnote_url_helper                  = ctRegex!(`\{~\^\s+(.+?)\}((?:(?:https?|git):\/\/|\.\.\/|\.\/|#)\S+)`, "mg");
    static inline_a_url                                    = ctRegex!(`(┤)(\S+?)(├)`, "mg");
    /+ inline markup book index +/
    static book_index                                     = ctRegex!(`^=\{\s*(.+?)\}$`, "m");
    static book_index_open                                = ctRegex!(`^=\{\s*([^}]+?)$`);
    static book_index_close                               = ctRegex!(`^(.*?)\}$`, "m"); // strip
    /+ no obj_cite_number object +/
    static obj_cite_number_off                            = ctRegex!(`~#$`, "m");
    static obj_cite_number_off_dh                         = ctRegex!(`-#$`, "m");
    static obj_cite_number_off_all                        = ctRegex!(`[~-]#$`, "m");
    /+ no obj_cite_number block +/
    static obj_cite_number_off_block                      = ctRegex!(`^--~#$`);
    static obj_cite_number_off_block_dh                   = ctRegex!(`^---#$`);
    static obj_cite_number_off_block_close                = ctRegex!(`^--\+#$`);
    static obj_cite_number_block_marks                    = ctRegex!(`^--[+~-]#$`);
    /+ ignore outside code blocks +/
    static skip_from_regular_parse    = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info
    /+ line & page breaks +/
    static break_line_within_object                       = ctRegex!(`[\\]{2}( |$)`);
    static break_page                                     = ctRegex!(`^-[\\]{2}-$`);
    static break_page_new                                 = ctRegex!(`^=[\\]{2}=$`);
    static break_page_line_across                         = ctRegex!(`^=[.]{2}=$`);
    static break_string                                   = ctRegex!(`』`);
    static parent                                         = ctRegex!(`([0-7]):([0-9]+)`);
    /+ json +/
    static tailing_comma                                  = ctRegex!(`,$`, "m");
    /+ biblio tags +/
    static biblio_tags                                    = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`);
    static biblio_abbreviations                           = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`);
    /+ bookindex split +/
    static bi_main_terms_split                            = ctRegex!(`\s*;\s*`);
    static bi_main_term_plus_rest_split                   = ctRegex!(`\s*:\s*`);
    static bi_sub_terms_plus_obj_cite_number_offset_split = ctRegex!(`\s*\|\s*`);
    static bi_term_and_obj_cite_numbers_match             = ctRegex!(`^(.+?)\+(\d+)`);
  }
}