org/default_regex.org


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791

-*- mode: org -*-
#+TITLE:       sisudoc spine (doc_reform) regex defaults
#+DESCRIPTION: documents - structuring, publishing in multiple formats & search
#+FILETAGS:    :spine:regex:
#+AUTHOR:      Ralph Amissah
#+EMAIL:       [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
#+COPYRIGHT:   Copyright (C) 2015 - 2024 Ralph Amissah
#+LANGUAGE:    en
#+STARTUP:     content hideblocks hidestars noindent entitiespretty
#+PROPERTY:    header-args  :exports code
#+PROPERTY:    header-args+ :noweb yes
#+PROPERTY:    header-args+ :results no
#+PROPERTY:    header-args+ :cache no
#+PROPERTY:    header-args+ :padline no
#+PROPERTY:    header-args+ :mkdirp yes
#+OPTIONS:     H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t

- [[./doc-reform.org][doc-reform.org]]  [[./][org/]]

* meta ctRegex

- [[./doc-reform.org][doc-reform.org]]  [[./][org/]]

https://dlang.org/phobos/std_regex.html
- Plain string, in which case it's compiled to bytecode before matching.
- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode.
- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code.

22 special characters used:

#+BEGIN_SRC txt
【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■☼
#+END_SRC

** _module template_ :module:

#+HEADER: :tangle "../src/sisudoc/meta/rgx.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.meta.rgx;
@safe:
static template spineRgxIn() {
  static struct RgxI {
    <<meta_rgx_misc>>
    <<meta_rgx_comments>>
    <<meta_rgx_config>>
    <<meta_rgx_headers>>
    <<meta_rgx_heading_marks>>
    <<meta_rgx_paragraph_marks>>
    <<meta_rgx_blocks>>
    <<meta_rgx_block_tic>>
    <<meta_rgx_block_curly>>
    <<meta_rgx_sub_match_code>>
    <<meta_rgx_table>>
    <<meta_rgx_footnote_endnote>>
    <<meta_rgx_url>>
    <<meta_rgx_images>>
    <<meta_rgx_book_index>>
    <<meta_rgx_heading_number>>
    <<meta_rgx_object_number_off_object>>
    <<meta_rgx_object_number_off_block>>
    <<meta_rgx_code_block>>
    <<meta_rgx_line_and_page_breaks>>
    <<meta_rgx_bibliography>>
    <<meta_rgx_book_index_split>>
    <<meta_rgx_topic_register_split>>
    <<prgmkup_rgx_spaces>>
    <<prgmkup_rgx_filename_and_path>>
    <<prgmkup_rgx_inline_breaks>>
    <<prgmkup_rgx_internal_footnotes_and_endnotes>>
    <<prgmkup_rgx_inline_links>>
    <<prgmkup_rgx_font_face>>
    <<prgmkup_rgx_font_face_line>>
  }
}
#+END_SRC

** misc :misc:

#+NAME: meta_rgx_misc
#+BEGIN_SRC d
/+ misc +/
static flag_action                                    = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
static within_quotes                                  = ctRegex!(`"(.+?)"`, "m");
static make_heading_delimiter                         = ctRegex!(`[;][ ]*`);
static arr_delimiter                                  = ctRegex!(`[ ]*[;][ ]*`);
static name_delimiter                                 = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`);
static book_index_go                                  = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static trailing_comma                                 = ctRegex!(",[ ]*$");
static trailing_linebreak                             = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m");
static newline_eol_strip_preceding                    = ctRegex!("[ ]*\n");
static newline_eol_delimiter_only                     = ctRegex!("^\n");
static markup_inline_linebreak                        = ctRegex!(`\s*\\\\s*`, "m");
static para_delimiter                                 = ctRegex!("\n[ ]*\n+");
static table_col_delimiter                            = ctRegex!("[ ]*\n+", "mg");
static table_row_delimiter                            = ctRegex!("\n[ ]*\n+", "mg");
static table_row_delimiter_special                    = ctRegex!("[ ]*\n", "mg");
static table_col_delimiter_special                    = ctRegex!("[ ]*[|][ ]*", "mg");
static levels_numbered                                = ctRegex!(`^[0-9]$`);
static levels_numbered_headings                       = ctRegex!(`^[0-7]$`);
static numeric_col                                    = ctRegex!(`^[ 0-9,.%$£₤Є€€¥()-]+$`);
#+END_SRC

#+BEGIN_SRC d
// static true_dollar                                    = ctRegex!(`\$`, "gm");
// static sep                                            = ctRegex!(`␣`, "gm");
// static uid_sep                                        = ctRegex!(`:`, "gm"); // ctRegex!(`␣`, "gm");
// static book_index_go_scroll                           = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
// static book_index_go_seg                              = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?):(?P<seg>[a-z0-9_-]+)");
// static book_index_go_seg_                             = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)(:(?P<seg>[a-z0-9_-]+))?");
// static book_index_go_seg_anchorless                   = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
// static numeric                                        = ctRegex!(`[ 0-9,.-]+`);
#+END_SRC

** comments :comment:

#+NAME: meta_rgx_comments
#+BEGIN_SRC d
/+ comments +/
static comment                                        = ctRegex!(`^%+ `);
#+END_SRC

** config

#+NAME: meta_rgx_config
#+BEGIN_SRC d
/+ header +/
#+END_SRC

** native headers
*** native header :native:header:

#+NAME: meta_rgx_headers
#+BEGIN_SRC d
/+ header +/
static variable_doc_title_author_date           = ctRegex!(`@title-author-date`);
static variable_doc_title_author                = ctRegex!(`@title-author`);
static variable_doc_title                       = ctRegex!(`@title`);
static variable_doc_author                      = ctRegex!(`@author|@creator`);
static variable_doc_date                        = ctRegex!(`@date`);
static raw_author_munge                         = ctRegex!(`(?P<last>\S.+?),\s+(?P<first>.+)`,"i");
static yaml_config                              = ctRegex!(`^[a-z]+\s*:\s*(?:"?\w|$)`, "m");
#+END_SRC

** heading operators :heading:operator:

#+NAME: meta_rgx_heading_marks
#+BEGIN_SRC d
/+ heading operators +/
static heading_a                                = ctRegex!(`^:?[A][~] `, "m");
static heading                                  = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static headings                                 = ctRegex!(`^:?(?P<level>[A-D1-4])[~](?:[a-z0-9_.-]*[?]?|[!](?:glossary|bibliogrphy|biblio|references?|blurb))(?:\s|$)`,"i");
static heading_seg_and_above                    = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static heading_anchor_tag                       = ctRegex!(`^:?[A-D1-4][~](?P<anchor>[a-z0-9_.-]+) `,"i");
static heading_identify_anchor_tag              = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i");
static heading_extract_named_anchor_tag         = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+[.:])*[0-9]+)(?=[.:;, ]|$)`,"i");
static heading_extract_unnamed_anchor_tag       = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?=[.:;, ]|$)`);
static heading_marker_missing_tag               = ctRegex!(`^:?([A-D1-4])[~] `);
static heading_anchor_tag_plus_colon            = ctRegex!(`^:?([A-D1-4][~])([a-z0-9_.:-]+) `,"i");
static heading_marker_tag_has_colon             = ctRegex!(`([:])`);
static heading_biblio                           = ctRegex!(`^1[~][!](biblio(?:graphy)?|references?)`);
static heading_glossary                         = ctRegex!(`^1[~][!](glossary)`);
static heading_blurb                            = ctRegex!(`^1[~][!](blurb)`);
#+END_SRC

#+BEGIN_SRC d
// static heading_marker                                 = ctRegex!(`^:?([A-D1-4])[~]`);
#+END_SRC

** paragraph operators :paragraph:operator:

#+NAME: meta_rgx_paragraph_marks
#+BEGIN_SRC d
/+ paragraph operators +/
static para_bullet                              = ctRegex!(`^_[*] `);
static para_bullet_indent                       = ctRegex!(`^_(?P<indent>[1-9])[*] `);
static para_indent                              = ctRegex!(`^_(?P<indent>[1-9])[ ]`);
static para_indent_hang                         = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`);
static para_attribs                             = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `);
static para_inline_link_anchor                  = ctRegex!(`\*[~](?P<anchor>[a-z0-9_.-]+)(?= |$)`,"i");
#+END_SRC

#+NAME: grouped_text_rgx_paragraph_marks
#+BEGIN_SRC d
/+ paragraph operators +/
static grouped_para_indent_1                    = ctRegex!(`^_1[ ]`, "m");
static grouped_para_indent_2                    = ctRegex!(`^_2[ ]`, "m");
static grouped_para_indent_3                    = ctRegex!(`^_3[ ]`, "m");
static grouped_para_indent_4                    = ctRegex!(`^_4[ ]`, "m");
static grouped_para_indent_5                    = ctRegex!(`^_5[ ]`, "m");
static grouped_para_indent_6                    = ctRegex!(`^_6[ ]`, "m");
static grouped_para_indent_7                    = ctRegex!(`^_7[ ]`, "m");
static grouped_para_indent_8                    = ctRegex!(`^_8[ ]`, "m");
static grouped_para_indent_9                    = ctRegex!(`^_9[ ]`, "m");
static grouped_para_bullet                      = ctRegex!(`^_[*] `, "m");
static grouped_para_bullet_indent_1             = ctRegex!(`^_1[*] `, "m");
static grouped_para_bullet_indent_2             = ctRegex!(`^_2[*] `, "m");
static grouped_para_bullet_indent_3             = ctRegex!(`^_3[*] `, "m");
static grouped_para_bullet_indent_4             = ctRegex!(`^_4[*] `, "m");
static grouped_para_bullet_indent_5             = ctRegex!(`^_5[*] `, "m");
static grouped_para_bullet_indent_6             = ctRegex!(`^_6[*] `, "m");
static grouped_para_bullet_indent_7             = ctRegex!(`^_7[*] `, "m");
static grouped_para_bullet_indent_8             = ctRegex!(`^_8[*] `, "m");
static grouped_para_bullet_indent_9             = ctRegex!(`^_9[*] `, "m");
static grouped_para_bullet_indent               = ctRegex!(`^_(?P<indent>[1-9])[*] `, "m");
static grouped_para_indent_hang                 = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`, "m");
#+END_SRC

#+BEGIN_SRC d
// static grouped_para_indent                                    = ctRegex!(`^_(?P<indent>[1-9])[ ]`, "m");
#+END_SRC

** blocked markup
*** blocked markup curly & tic :block:

#+NAME: meta_rgx_blocks
#+BEGIN_SRC d
/+ blocked markup +/
static block_open                               = ctRegex!("^((code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?|^[{]table[(](?:h;)?(?P<columns>(?:[ ,]+[0-9]+)+)[)][}]");
static block_poem_open                          = ctRegex!("^((poem(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|`{3} poem(?:[(][ a-zA-Z0-9;:,]*[)])?)");
#+END_SRC

*** blocked markup tic :block:tic:

#+NAME: meta_rgx_block_tic
#+BEGIN_SRC d
/+ blocked markup tics +/
static block_tic_code_open                      = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_poem_open                      = ctRegex!("^`{3} poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_group_open                     = ctRegex!("^`{3} group(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_block_open                     = ctRegex!("^`{3} block(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_quote_open                     = ctRegex!("^`{3} quote(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_table_open                     = ctRegex!("^`{3} table(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); // ctRegex!("^`{3} table(?:\(.*?\))?");
static block_tic_close                          = ctRegex!("^(`{3})$","m");
#+END_SRC

*** blocked markup curly :block:curly:

#+NAME: meta_rgx_block_curly
#+BEGIN_SRC d
/+ blocked markup curly +/
static block_curly_code_open                    = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_code_close                   = ctRegex!(`^([}]code)`);
static block_curly_poem_open                    = ctRegex!(`^(poem(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_poem_close                   = ctRegex!(`^([}]poem)`);
static block_curly_group_open                   = ctRegex!(`^(group(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_group_close                  = ctRegex!(`^([}]group)`);
static block_curly_block_open                   = ctRegex!(`^(block(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_block_close                  = ctRegex!(`^([}]block)`);
static block_curly_quote_open                   = ctRegex!(`^(quote(?:[.](?P<lang>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_quote_close                  = ctRegex!(`^([}]quote)`);
static block_curly_table_open                   = ctRegex!(`^table(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$`);
static block_curly_table_close                  = ctRegex!(`^([}]table)`);
static block_curly_table_special_markup         = ctRegex!(`^[{]table[(](?P<attrib>(?:(h);)?(?P<columns>(?:[, ]+[0-9]+)+))[)][}]`, "mg");
#+END_SRC

*** block sub-matches :block:
**** code

#+NAME: meta_rgx_sub_match_code
#+BEGIN_SRC d
static code_numbering                           = ctRegex!(`(?P<number>\blinenumber\b|\bnumber\b|\blnr\b)`);
#+END_SRC

**** table

#+NAME: meta_rgx_table
#+BEGIN_SRC d
static table_head_instructions                  = ctRegex!(`(?:(?P<c_heading>h);)?(?:[ ]+c(?P<c_num>[0-9]):)?(?P<c_widths>(?:[, ]+[0-9]+[lr]?)+)`);
static table_col_widths_and_alignment           = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`);
static table_col_widths                         = ctRegex!(`(?P<widths>[0-9]+)`);
static table_col_align_match                    = ctRegex!(`(?P<align>[lr])`);
static table_col_separator_nl                   = ctRegex!(`[┊]$`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static table_col_align                                = ctRegex!(`(?P<align>[lr]?)`);
// static table_col_separator                            = ctRegex!(`┊`);
#+END_SRC

** inline markup :inline:footnote:
*** footnotes & endnotes

#+NAME: meta_rgx_footnote_endnote
#+BEGIN_SRC d
/+ inline markup footnotes endnotes +/
static inline_notes_curly_gen                   = ctRegex!(`~\{.+?\}~`, "m");
static inline_notes_curly                       = ctRegex!(`~\{\s*(.+?)\}~`, "mg");
static inline_notes_curly_sp_asterisk           = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m");
static inline_notes_curly_sp_plus               = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m");
static note_ref                                 = ctRegex!(`^\S+?noteref_(?P<ref>[0-9]+)`, "mg");     // {^{73.}^}#noteref_73
#+END_SRC

#+BEGIN_SRC d
// static inline_notes_curly_sp                          = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m");
// static inline_note_curly_delimiters                   = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg");
// static inline_notes_square                            = ctRegex!(`~\[\s*(.+?)\]~`, "mg");
// static inline_text_and_note_square_sp                 = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg");
// static inline_text_and_note_square                    = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg");
// static inline_note_square_delimiters                  = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg");
#+END_SRC

*** links/ urls :inline:footnote:

#+NAME: meta_rgx_url
#+BEGIN_SRC d
static smid_inline_url_generic                        = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg");
static smid_inline_url                                = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg");
static smid_inline_link_naked_url                     = ctRegex!(`(?P<pre>^|[ (\[])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?=[.,;:?!'"]?([ )\]]|$))`, "mg");
static smid_inline_link_markup_regular                = ctRegex!(`(?P<pre>^|[ (\[])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg");
static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static webserv_url_doc_root                           = ctRegex!(`(?P<url>(?P<domain>https?:\/\/[^ /]+)\/(?P<path>\S*))`, "mg");
#+END_SRC

*** images :images:

#+NAME: meta_rgx_images
#+BEGIN_SRC d
static image                                    = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");
static smid_image                               = ctRegex!(`(?P<pre>(?:^|[ ])[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_image_generic                       = ctRegex!(`(?:^|[ ])[{┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_with_dimensions               = ctRegex!(`(?P<pre>(?:^|[ ])[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_mod_image_without_dimensions        = ctRegex!(`[{┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}┝](?:image|┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_delimit                       = ctRegex!(`(?P<pre>^|[ ])\{\s*(?P<text>.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static smid_a_image                                    = ctRegex!(`(?P<pre>(?:^|[ ])[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
#+END_SRC

*** inline markup book index :inline:bookindex:

#+NAME: meta_rgx_book_index
#+BEGIN_SRC d
/+ inline markup book index +/
static book_index_item                          = ctRegex!(`^=\{\s*(?P<bookindex>.+?)\}$`, "m");
static book_index_item_open                     = ctRegex!(`^=\{\s*([^}]*?)$`);
static book_index_item_close                    = ctRegex!(`^(.*?)\}$`, "m");
#+END_SRC

** switch
*** switch off auto-heading number

#+NAME: meta_rgx_heading_number
#+BEGIN_SRC d
static auto_heading_numbering_lv1               = ctRegex!(`^1~`, "m");
static auto_heading_numbering_off_lv1           = ctRegex!(`^1~\S*?-\s`, "m");
static auto_heading_numbering_off_lv2           = ctRegex!(`^2~\S*?-\s`, "m");
static auto_heading_numbering_off_lv3           = ctRegex!(`^3~\S*?-\s`, "m");
static auto_heading_numbering_off_lv4           = ctRegex!(`^4~\S*?-\s`, "m");
#+END_SRC

#+BEGIN_SRC d
// static auto_heading_numbering_lv2                    = ctRegex!(`^2~`, "m"); // bug?
// static auto_heading_numbering_lv3                    = ctRegex!(`^3~`, "m"); // bug?
// static auto_heading_numbering_lv4                    = ctRegex!(`^4~`, "m"); // bug?
// static auto_heading_numbering_off                    = ctRegex!(`^[A-D1-4]~\S*?-\s`, "m");
#+END_SRC

** no object_number object :ocn:off:object:

#+NAME: meta_rgx_object_number_off_object
#+BEGIN_SRC d
/+ no object_number object +/
static object_number_off                        = ctRegex!(`~#[ ]*$`, "m");
static object_number_off_dummy_heading          = ctRegex!(`-#$`, "m");
static object_number_off_all                    = ctRegex!(`[~-]#$`, "m");
static repeated_character_line_separator        = ctRegex!(`^(?:[ ]*(?:(?:[.][ ]*){4,}|(?:[-][ ]*|[~][ ]*|[*][ ]*|[$][ ]*|[#][ ]*|[\\][ ]*|[/][ ]*){2,})\s*?)+$`);
#+END_SRC

** no object_number block :ocn:off:block:

#+NAME: meta_rgx_object_number_off_block
#+BEGIN_SRC d
/+ no object_number block +/
static object_number_off_block                  = ctRegex!(`^--~#$`);
static object_number_off_block_dummy_heading    = ctRegex!(`^---#$`);
static object_number_off_block_close            = ctRegex!(`^--\+#$`);
static object_number_block_marks                = ctRegex!(`^--[+~-]#$`);
#+END_SRC

** ignore outside code blocks :block:code:

#+NAME: meta_rgx_code_block
#+BEGIN_SRC d
/+ ignore outside code blocks +/
static skip_from_regular_parse                  = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`);
#+END_SRC

** line & page breaks :break:

#+NAME: meta_rgx_line_and_page_breaks
#+BEGIN_SRC d
/+ line & page breaks +/
static break_string                             = ctRegex!(`』`);
#+END_SRC

** biblio tags :biblio:tags:

#+NAME: meta_rgx_bibliography
#+BEGIN_SRC d
/+ biblio tags +/
static biblio_tags                              = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`);
static biblio_abbreviations                     = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`);
#+END_SRC

** bookindex split :bookindex:split:

#+NAME: meta_rgx_book_index_split
#+BEGIN_SRC d
/+ bookindex split +/
static bi_main_terms_split                            = ctRegex!(`\s*;\s*`);
static bi_main_term_plus_rest_split                   = ctRegex!(`\s*:\s*`);
static bi_sub_terms_plus_object_number_offset_split   = ctRegex!(`\s*\|\s*`);
static bi_term_and_object_numbers_match               = ctRegex!(`^(.+?)\+(\d+)`);
#+END_SRC

** topic register split (document classify)

#+NAME: meta_rgx_topic_register_split
#+BEGIN_SRC d
static topic_register_main_terms_split          = ctRegex!(`\s*;\s*`);
static topic_register_main_term_plus_rest_split = ctRegex!(`\s*:\s*`);
static topic_register_sub_terms_split           = ctRegex!(`\s*\|\s*`);
static topic_register_multiple_sub_terms_split  = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`);
#+END_SRC

** language codes :language:codes:

#+NAME: meta_rgx_language_codes
#+BEGIN_SRC d
/+ language codes +/
auto language_code_and_filename                                    =
   ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$");
#+END_SRC

* 1. output ctRegex

- [[./doc-reform.org][doc-reform.org]]  [[./][org/]]

https://dlang.org/phobos/std_regex.html
- Plain string, in which case it's compiled to bytecode before matching.
- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode.
- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code.

** _module template_ :module:output:

#+HEADER: :tangle "../src/sisudoc/io_out/rgx.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.io_out.rgx;
@safe:
static template spineRgxOut() {
  static struct RgxO {
    <<makes>>
    <<prgmkup_rgx_spaces>>
    <<prgmkup_rgx_filename_and_path>>
    <<prgmkup_rgx_inline_breaks>>
    <<prgmkup_rgx_inline_quotes>>
    <<prgmkup_rgx_internal_footnotes_and_endnotes>>
    <<prgmkup_rgx_inline_links>>
    <<prgmkup_rgx_inline_font_face>>
    <<prgmkup_rgx_table>>
    <<grouped_text_rgx_paragraph_marks>>
  }
}
#+END_SRC

** make
*** various

#+NAME: makes
#+BEGIN_SRC d
static make_breakpage                           = ctRegex!(`new=(?P<breakpage>.+?)(?:;|$)`);
static make_breakcolumn                         = ctRegex!(`break=(?P<breakcolumn>.+?)(?:;|$)`,);
#+END_SRC

* 2. ctRegex defaults shared by meta & output (generic)

** meta

#+NAME: prgmkup_rgx_meta
#+BEGIN_SRC d
static space                                    = ctRegex!(`[ ]`, "mg");
static spaces_keep                              = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
#+END_SRC

** spine & source_in

#+NAME: prgmkup_rgx_in
#+BEGIN_SRC d
#+END_SRC

** misc generic

#+NAME: prgmkup_rgx_spaces
#+BEGIN_SRC d
static newline                                  = ctRegex!("\n", "mg");
static space                                    = ctRegex!(`[ ]`, "mg");
static spaces_keep                              = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
static spaces_line_start                        = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
static nbsp_char                                = ctRegex!(`░`, "mg");
static nbsp_chars                               = ctRegex!(`[░]+`, "mg");
static middle_dot                               = ctRegex!(`·`, "mg");
#+END_SRC

** inline markup

*** inline breaks

#+NAME: prgmkup_rgx_inline_breaks
#+BEGIN_SRC d
/+ line breaks +/
static br_empty_line                            = ctRegex!(`\n[ ]*\n`, "mg");
static br_linebreaks_newlines                   = ctRegex!(`[\n┘┙]`, "mg");
static br_linebreaks                            = ctRegex!(`[┘┙]`, "mg");
static br_line                                  = ctRegex!(`┘`, "mg");
static br_line_inline                           = ctRegex!(`┙`, "mg");
static br_line_spaced                           = ctRegex!(`┚`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static brln                                           = ctRegex!(`(?:\\\\)+`, "mg");
#+END_SRC

*** quote marks

#+NAME: prgmkup_rgx_inline_quotes
#+BEGIN_SRC d
/+ quotation marks +/
static quotes_open_and_close                    = ctRegex!(`[“”]`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static quote_open                                    = ctRegex!(`[“]`, "mg");
// static quote_close                                   = ctRegex!(`[”]`, "mg");
#+END_SRC

*** inline markup font face mod :inline:font:face:

#+NAME: prgmkup_rgx_font_face
#+BEGIN_SRC d
/+ inline markup font face mod +/
static inline_mark_emphasis                     = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg");
static inline_mark_bold                         = ctRegex!(`(?P<mark>[!])\{(?P<text>.+?)\}[!]`, "mg");
static inline_mark_underscore                   = ctRegex!(`(?P<mark>[_])\{(?P<text>.+?)\}[_]`, "mg");
static inline_mark_italics                      = ctRegex!(`(?P<mark>[/])\{(?P<text>.+?)\}[/]`, "mg");
static inline_mark_superscript                  = ctRegex!(`(?P<mark>\^)\{(?P<text>.+?)\}\^`, "mg");
static inline_mark_subscript                    = ctRegex!(`(?P<mark>[,])\{(?P<text>.+?)\}[,]`, "mg");
static inline_mark_strike                       = ctRegex!(`(?P<mark>[-])\{(?P<text>.+?)\}[-]`, "mg");
static inline_mark_insert                       = ctRegex!(`(?P<mark>[+])\{(?P<text>.+?)\}[+]`, "mg");
static inline_mark_mono                         = ctRegex!(`(?P<mark>[#])\{(?P<text>.+?)\}[#]`, "mg");
static inline_mark_cite                         = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg");
#+END_SRC

#+NAME: prgmkup_rgx_font_face_line
#+BEGIN_SRC d
static inline_faces_line                        = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_emphasis_line                     = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_bold_line                         = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_italics_line                      = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_underscore_line                   = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
#+END_SRC

*** inline (internal program) markup footnotes endnotes :inline:footnote:

#+NAME: prgmkup_rgx_internal_footnotes_and_endnotes
#+BEGIN_SRC d
/+ inline markup footnotes endnotes +/
static inline_notes_al                          = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
static inline_notes_al_special                  = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen                      = ctRegex!(`【.+?】`, "m");
static inline_notes_al_gen_text                 = ctRegex!(`【(?P<text>.+?)】`, "m");
static inline_notes_al_all_note                 = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_regular_number_note      = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_notes_al_special_char_note        = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
static inline_al_delimiter_open_regular         = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star     = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus     = ctRegex!(`【[+]\s`, "m");
static inline_text_and_note_al_                 = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static inline_notes_al_regular                        = ctRegex!(`【(.+?)】`, "mg");
// static inline_notes_al_gen_ref                        = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
#+END_SRC

*** inline links

#+NAME: prgmkup_rgx_inline_links
#+BEGIN_SRC d
/+ inline markup links +/
static inline_image                             = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions          = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_info                        = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
static inline_link_anchor                       = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
static inline_link                              = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
static inline_link_empty                        = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
static inline_link_number                       = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
static inline_link_number_only                  = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
static inline_link_stow_uri                     = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
static inline_link_hash                         = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
static inline_link_seg_and_hash                 = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
static url                                      = ctRegex!(`https?://`, "mg");
static uri                                      = ctRegex!(`(?:https?|git)://`, "mg");
static uri_identify_components                  = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg");
static inline_link_subtoc                       = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static inline_link_fn_suffix                    = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
static inline_seg_link                          = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
static mark_internal_site_lnk                   = ctRegex!(`¤`, "mg");
static quotation_mark_sql_insert_delimiter      = ctRegex!("[']", "mg");
#+END_SRC

*** inline markup font face mod :inline:font:face:

#+NAME: prgmkup_rgx_inline_font_face
#+BEGIN_SRC d
/+ inline markup font face mod +/
static inline_emphasis                          = ctRegex!(`⑆[*]┨(?P<text>.+?)┣[*]`, "mg");
static inline_bold                              = ctRegex!(`⑆[!]┨(?P<text>.+?)┣[!]`, "mg");
static inline_underscore                        = ctRegex!(`⑆[_]┨(?P<text>.+?)┣[_]`, "mg");
static inline_italics                           = ctRegex!(`⑆[/]┨(?P<text>.+?)┣[/]`, "mg");
static inline_superscript                       = ctRegex!(`⑆\^┨(?P<text>.+?)┣\^`, "mg");
static inline_subscript                         = ctRegex!(`⑆[,]┨(?P<text>.+?)┣[,]`, "mg");
static inline_strike                            = ctRegex!(`⑆[-]┨(?P<text>.+?)┣[-]`, "mg");
static inline_insert                            = ctRegex!(`⑆[+]┨(?P<text>.+?)┣[+]`, "mg");
static inline_mono                              = ctRegex!(`⑆[■]┨(?P<text>.+?)┣[■]`, "mg");
static inline_cite                              = ctRegex!(`⑆[‖]┨(?P<text>.+?)┣[‖]`, "mg");
#+END_SRC

#+BEGIN_SRC d
// static inline_superscript                             = ctRegex!(`⑆[\^]┨(?P<text>.+?)┣[\^]`, "mg");
// static inline_fontface_clean                          = ctRegex!(`⑆[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
#+END_SRC

*** table related

#+NAME: prgmkup_rgx_table
#+BEGIN_SRC d
/+ table delimiters +/
static table_delimiter_col                      = ctRegex!("[ ]*[┊][ ]*", "mg");
static table_delimiter_row                      = ctRegex!("[ ]*\n", "mg");
#+END_SRC

** files filename (& path) (including insert file) :insert:file:path:filename:

#+HEADER: :tangle "../src/sisudoc/meta/rgx_files.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.meta.rgx_files;
@safe:
static template spineRgxFiles() {
  static struct RgxFiles {
    <<prgmkup_rgx_filename_and_path>>
    <<meta_rgx_language_codes>>
  }
}
#+END_SRC

#+NAME: prgmkup_rgx_filename_and_path
#+BEGIN_SRC d
static src_pth_sst_or_ssm                       = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
static src_pth_pod_sst_or_ssm                   = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`);
static src_pth_contents                         = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`);
static src_pth_zip                              = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
static src_pth_types                            = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
static src_fn                                   = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
static src_fn_master                            = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
static src_fn_find_inserts                      = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static insert_src_fn_ssi_or_sst                 = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
static src_base_parent_dir_name                 = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_formalised_file_path_parts           = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
#+END_SRC

** _module template yaml tags

#+HEADER: :tangle "../src/sisudoc/meta/rgx_yaml_tags.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.meta.rgx_yaml;
@safe:
static template spineRgxYamlTags() {
  static struct RgxYaml {
    <<meta_rgx_yaml>>
  }
}
#+END_SRC

#+NAME: meta_rgx_yaml
#+BEGIN_SRC d
static yaml_tag_is_str                          = ctRegex!(`:str$`);
static yaml_tag_is_int                          = ctRegex!(`:int$`);
static yaml_tag_is_map                          = ctRegex!(`:map$`);
static yaml_tag_is_seq                          = ctRegex!(`:seq$`);
#+END_SRC

** special characters
*** xhtml special characters template

#+HEADER: :tangle "../src/sisudoc/io_out/rgx_xhtml.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.io_out.rgx_xhtml;
@safe:
static template spineRgxXHTML() {
  static struct RgxXHTML {
    <<sp_ch_xhtml_rgx>>
  }
}
#+END_SRC

*** xhtml special characters

#+NAME: sp_ch_xhtml_rgx
#+BEGIN_SRC d
static ampersand                                = ctRegex!(`[&]`, "m");      // &amp;
static quotation                                = ctRegex!(`["]`, "m");      // &quot;
static less_than                                = ctRegex!(`[<]`, "m");      // &lt;
static greater_than                             = ctRegex!(`[>]`, "m");      // &gt;
static line_break                               = ctRegex!(` [\\]{2}`, "m"); // <br />
#+END_SRC

*** LaTeX special characters template

#+HEADER: :tangle "../src/sisudoc/io_out/rgx_latex.d"
#+HEADER: :noweb yes
#+BEGIN_SRC d
<<doc_header_including_copyright_and_license>>
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.io_out.rgx_latex;
@safe:
static template spineRgxLSC() {
  static struct RgxLSC {
    <<sp_ch_latex_rgx>>
  }
}
#+END_SRC

*** latex special characters

#+NAME: sp_ch_latex_rgx
#+BEGIN_SRC d
static latex_special_char                       = ctRegex!(`([%${}_#&\\])`);
static latex_special_char_for_escape            = ctRegex!(`([%${}_#\\])`);
static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`);
static latex_special_char_for_escape_url        = ctRegex!(`([%])`);
static latex_special_char_escaped               = ctRegex!(`\\([%${}_#\\])`);
static latex_special_char_escaped_braced        = ctRegex!(`[{]\\([&])[}]`);
static latex_identify_inline_link               = ctRegex!(`┥.+?┝┤\S+?├`, "mg");
static latex_identify_inline_fontface           = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg");
static latex_clean_internal_link                = ctRegex!(`^(?:#|¤\S+?#)`, "m");
static latex_clean_bookindex_linebreak          = ctRegex!(`\s*\\\\\\\\\s*`, "m");
#+END_SRC

* document header including copyright & license

#+NAME: doc_header_including_copyright_and_license
#+HEADER: :noweb yes
#+BEGIN_SRC emacs-lisp
<<./sisudoc_spine_version_info_and_doc_header_including_copyright_and_license.org:spine_doc_header_including_copyright_and_license()>>
#+END_SRC

* __END__