diff options
author | Ralph Amissah <ralph@amissah.com> | 2017-04-20 13:57:03 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-10 15:14:14 -0400 |
commit | 9cf470e69b2d579701ee607f796de612dc600565 (patch) | |
tree | ff16c8b547767e4b441655df39511c1757f819c3 /org/default_regex.org | |
parent | section keys sequence (diff) |
0.14.0 reorganized files, and paths
Diffstat (limited to 'org/default_regex.org')
-rw-r--r-- | org/default_regex.org | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/org/default_regex.org b/org/default_regex.org new file mode 100644 index 0000000..92810ea --- /dev/null +++ b/org/default_regex.org @@ -0,0 +1,475 @@ +#+TITLE: sdp regex defaults +#+AUTHOR: Ralph Amissah +#+EMAIL: ralph.amissah@gmail.com +#+STARTUP: indent +#+LANGUAGE: en +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc +#+OPTIONS: author:nil email:nil creator:nil timestamp:nil +#+PROPERTY: header-args :padline no :exports code :noweb yes +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+FILETAGS: :sdp:rel:ao: +#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) + +[[./sdp.org][sdp]] [[./][org/]] +* 0. ao ctRegex :regex: +[[./sdp.org][sdp]] [[./][org/]] +http://dlang.org/phobos/std_regex.html +- Plain string, in which case it's compiled to bytecode before matching. +- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode. +- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code. + +** 0. rgx code template: :ao_rgx: + +#+name: tangle_ao_rgx +#+BEGIN_SRC d :tangle ../src/sdp/ao_rgx.d +/++ + regex: regular expressions used in sisu document parser ++/ +template SiSUrgxInit() { + private import ao_defaults; + struct Rgx { + <<ao_rgx>> + <<prgmkup_rgx>> + } +} +#+END_SRC + +** misc :misc: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ misc +/ +static true_dollar = ctRegex!(`\$`, "gm"); +static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); +static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); +static within_quotes = ctRegex!(`"(.+?)"`); +static make_heading_delimiter = ctRegex!(`[;][ ]*`); +static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`); +static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); +static book_index_go = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)"); +static book_index_go_scroll = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)"); +static book_index_go_seg = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?):(?P<seg>[a-z0-9_-]+)"); +static book_index_go_seg_ = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)(:(?P<seg>[a-z0-9_-]+))?"); +static book_index_go_seg_anchorless = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)"); +static trailing_comma = ctRegex!(",[ ]*$"); +static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); +static newline_eol_delimiter = ctRegex!("\n"); +static newline_eol_strip_preceding = ctRegex!("[ ]*\n"); +static newline_eol_delimiter_only = ctRegex!("^\n"); +static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); +static para_delimiter = ctRegex!("\n[ ]*\n+"); +static table_col_delimiter = ctRegex!("[ ]*\n+", "mg"); +static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); +static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); // +static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); // +static levels_markup = ctRegex!(`^[A-D1-4]$`); +static levels_numbered = ctRegex!(`^[0-9]$`); +static levels_numbered_headings = ctRegex!(`^[0-7]$`); +static numeric = ctRegex!(`[ 0-9,.-]+`); +static numeric_col = ctRegex!(`^[ 0-9,.$£₤Є€€¥-]+$`); +#+END_SRC + +** comments :comment: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ comments +/ +static comment = ctRegex!(`^%+ `); +static comments = ctRegex!(`^%+ |^%+$`); +#+END_SRC + +** native headers +*** native header :native:header: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ header +/ +static main_headers = + ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m"); +static native_header = ctRegex!(`^@([a-z_]+):(?:\s|$)`); +static native_header_make = ctRegex!(`^@(make):(?:\s|$)`); +static native_header_meta = + ctRegex!(`^@(?:creator|title|rights|date|original|classify|identifier|notes|publisher|links):(?:\s|$)`); +static native_header_main = ctRegex!(`^@(?P<header>[a-z_]+):\s*(?P<content>.*)`, "m"); +static native_header_sub = ctRegex!(`^[ ]*:(?P<subheader>[a-z_]+):\s+(?P<content>.+)`, "m"); +static native_header_meta_title = ctRegex!(`^@title:\s`, "m"); +static variable_doc_title = ctRegex!(`@title`); +static variable_doc_author = ctRegex!(`@author|@creator`); +static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i"); +#+END_SRC + +*** subheader :native:subheader: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ head +/ +static native_subhead_creator = ctRegex!(`^(?:author|translator|illustrator)$`, "m"); +static native_subhead_title = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m"); +static native_subhead_rights = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m"); +static native_subhead_date = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m"); +static native_subhead_original = ctRegex!(`^(?:title|language|source)$`, "m"); +static native_subhead_classify = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m"); +static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m"); +static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m"); +static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m"); +static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m"); +#+END_SRC + +** heading & paragraph operators :paragraph:operator: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ heading & paragraph operators +/ +static heading_a = ctRegex!(`^:?[A][~] `, "m"); +static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); +static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); +static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); +static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); +static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); + // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i +static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i"); +static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`); +static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); +static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); +static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] +static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); +static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); +static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); +static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); +static heading_biblio_glossary = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|glossary)|[A-D1][~])`); +static heading_biblio_blurb = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|blurb)|[A-D1][~])`); +static heading_blurb_glossary = ctRegex!(`^:?(?:(1)[~][!](?:blurb|glossary)|[A-D1][~])`); +static para_bullet = ctRegex!(`^_[*] `); +static para_bullet_indent = ctRegex!(`^_([1-9])[*] `); +static para_indent = ctRegex!(`^_([1-9]) `); +static para_indent_hang = ctRegex!(`^_([0-9])_([0-9]) `); +static para_attribs = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `); +#+END_SRC + +** blocked markup +*** blocked markup :block:tic: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ blocked markup +/ +static block_open = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)|^[{]table(~h)?(?P<columns>(?:[ ]+[0-9]+;)+)[}]"); +static block_poem_open = ctRegex!("^((poem[{].*?$)|`{3} poem)"); +#+END_SRC + +** blocked markup tics :block:tic: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ blocked markup tics +/ +static block_tic_open = ctRegex!("^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); // what of numbered code? +static block_tic_code_open = ctRegex!("^`{3} (?:code)(?:[.]([a-z][0-9a-z_]+))?(?:[ ]+([#]))?"); // extract additional info +static block_tic_poem_open = ctRegex!("^`{3} (poem)"); +static block_tic_group_open = ctRegex!("^`{3} (group)"); +static block_tic_block_open = ctRegex!("^`{3} (block)"); +static block_tic_quote_open = ctRegex!("^`{3} (quote)"); +static block_tic_table_open = ctRegex!("^`{3} table(.*)"); +static block_tic_close = ctRegex!("^(`{3})$","m"); +#+END_SRC + +*** blocked markup curly :block:curly: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ blocked markup curly +/ +static block_curly_open = ctRegex!(`^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)`); +static block_curly_code_open = ctRegex!(`^(?:code(?:[.]([a-z][0-9a-z_]+))?[{]([#]?)\s*$)`); // extract additional info +static block_curly_code_close = ctRegex!(`^([}]code)`); +static block_curly_poem_open = ctRegex!(`^(poem[{].*?$)`); +static block_curly_poem_close = ctRegex!(`^([}]poem)`); +static block_curly_group_open = ctRegex!(`^(group[{].*?$)`); +static block_curly_group_close = ctRegex!(`^([}]group)`); +static block_curly_block_open = ctRegex!(`^(block[{].*?$)`); +static block_curly_block_close = ctRegex!(`^([}]block)`); +static block_curly_quote_open = ctRegex!(`^(quote[{].*?$)`); +static block_curly_quote_close = ctRegex!(`^([}]quote)`); +static block_curly_table_open = ctRegex!(`^table[{](.*)`); +static block_curly_table_close = ctRegex!(`^([}]table)`); +static block_curly_table_special_markup = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg"); // sepcial table block markup +#+END_SRC + +*** block sub-matches :block:curly: + +#+name: ao_rgx +#+BEGIN_SRC d +static table_head_instructions = ctRegex!(`(?P<c_heading>h)?(?:[ ]+c(?P<c_num>[0-9]);)?(?P<c_widths>(?:[ ]+[0-9]+[lr]?;)+)`); +static table_col_widths_and_alignment = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`); +static table_col_widths = ctRegex!(`(?P<widths>[0-9]+)`); +static table_col_align = ctRegex!(`(?P<align>[lr]?)`); +static table_col_align_match = ctRegex!(`(?P<align>[lr])`); +static table_col_separator = ctRegex!(`┊`); +static table_col_separator_nl = ctRegex!(`[┊]$`, "mg"); +#+END_SRC + +** inline markup footnotes endnotes :inline:footnote: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ inline markup footnotes endnotes +/ +static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); +static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); +static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); +static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); +static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); +static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); +static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); +static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg"); +static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg"); +static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); +static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); +static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); +static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); +static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); // +static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); // +static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); // +static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); +static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg"); +static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 +#+END_SRC + +*** links/ urls :inline:footnote: + +#+name: ao_rgx +#+BEGIN_SRC d +static inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); +static inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); +static inline_link_naked_url = ctRegex!(`(?P<before>^|[ ])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!'"]?(?:[ ]|$))`, "mg"); +static inline_link_markup_regular = ctRegex!(`(?P<before>^|[ ])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!]?(?:[ ]|$))`, "mg"); +static inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!]?(?:[ ]|$))`, "mg"); +static inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg"); +#+END_SRC + +*** images :images: + +#+name: ao_rgx +#+BEGIN_SRC d +static image = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg"); +#+END_SRC + +*** inline markup book index :inline:bookindex: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ inline markup book index +/ +static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); +static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); +static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip +#+END_SRC + +** no obj_cite_number object :ocn:off:object: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ no obj_cite_number object +/ +static obj_cite_number_off = ctRegex!(`~#$`, "m"); +static obj_cite_number_off_dh = ctRegex!(`-#$`, "m"); +static obj_cite_number_off_all = ctRegex!(`[~-]#$`, "m"); +#+END_SRC + +** no obj_cite_number block :ocn:off:block: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ no obj_cite_number block +/ +static obj_cite_number_off_block = ctRegex!(`^--~#$`); +static obj_cite_number_off_block_dh = ctRegex!(`^---#$`); +static obj_cite_number_off_block_close = ctRegex!(`^--\+#$`); +static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); +#+END_SRC + +** ignore outside code blocks :block:code: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ ignore outside code blocks +/ +static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info +#+END_SRC + +** line & page breaks :break: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ line & page breaks +/ +static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); +static break_page = ctRegex!(`^-[\\]{2}-$`); +static break_page_new = ctRegex!(`^=[\\]{2}=$`); +static break_page_line_across = ctRegex!(`^=[.]{2}=$`); +static break_string = ctRegex!(`』`); +static parent = ctRegex!(`([0-7]):([0-9]+)`); +#+END_SRC + +** json :json: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ json +/ +static tailing_comma = ctRegex!(`,$`, "m"); +#+END_SRC + +** biblio tags :biblio:tags: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ biblio tags +/ +static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); +static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`); +#+END_SRC + +** bookindex split :bookindex:split: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ bookindex split +/ +static bi_main_terms_split = ctRegex!(`\s*;\s*`); +static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`); +static bi_sub_terms_plus_obj_cite_number_offset_split = ctRegex!(`\s*\|\s*`); +static bi_term_and_obj_cite_numbers_match = ctRegex!(`^(.+?)\+(\d+)`); +#+END_SRC + +** language codes :language:codes: + +#+name: ao_rgx +#+BEGIN_SRC d +/+ language codes +/ +auto language_codes = + ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)"); +auto language_code_and_filename = + ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$"); +#+END_SRC + +* 1. output ctRegex :regex: +[[./sdp.org][sdp]] [[./][org/]] +http://dlang.org/phobos/std_regex.html +- Plain string, in which case it's compiled to bytecode before matching. +- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode. +- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code. + +** 0. code template: :rgx: + +#+name: tangle_ao_rgx +#+BEGIN_SRC d :tangle ../src/sdp/output_rgx.d +/++ + regex: regular expressions used in sisu document parser ++/ +template SiSUoutputRgxInit() { + private import defaults; + struct Rgx { + <<prgmkup_rgx>> + <<sp_ch_xhtml_rgx>> + } +} +#+END_SRC + +** special characters +*** xhtml special characters + +#+name: sp_ch_xhtml_rgx +#+BEGIN_SRC d +static xhtml_ampersand = ctRegex!(`[&]`); // & +static xhtml_less_than = ctRegex!(`[<]`); // < +static xhtml_greater_than = ctRegex!(`[>]`); // > +static xhtml_line_break = ctRegex!(` [\\]{2}`); // <br /> +#+END_SRC + +* 2. ctRegex defaults shared by ao & output (generic) +** misc generic + +#+name: prgmkup_rgx +#+BEGIN_SRC d +static newline = ctRegex!("\n", "mg"); +static strip_br = ctRegex!("^<br>\n|<br>\n*$"); +static space = ctRegex!(`[ ]`, "mg"); +static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); +static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); // could be issues for endnotes +static two_spaces = ctRegex!(`[ ]{2}`, "mg"); +static nbsp_char = ctRegex!(`░`, "mg"); +static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); +static nbsp_and_space = ctRegex!(` [ ]`, "mg"); +static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); +#+END_SRC + +** filename (and path) matching (including markup insert file) :insert:file:path:filename: + +#+name: prgmkup_rgx +#+BEGIN_SRC d +static src_pth = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`); +static src_fn = + ctRegex!(`^([a-zA-Z0-9._-]+/)*(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); +static src_fn_master = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); +static src_fn_text = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`); +static src_fn_insert = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`); +static src_fn_find_inserts = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); +static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); +#+END_SRC + +** inline markup +*** inline (internal program) markup footnotes endnotes :inline:footnote: + +#+name: prgmkup_rgx +#+BEGIN_SRC d +/+ inline markup footnotes endnotes +/ +static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); +static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); +static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); +static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); +static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); +static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); +static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); +static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "mg"); +static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "mg"); +static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); +static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); +static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg"); +static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); +#+END_SRC + +*** inline links + +#+name: prgmkup_rgx +#+BEGIN_SRC d +/+ inline markup footnotes endnotes +/ +static inline_link = ctRegex!(`┥(.+?)┝┤(.+?)├`, "mg"); +static inline_a_url = ctRegex!(`(┤)(\S+?)(├)`, "mg"); +static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); +static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg"); +static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg"); +static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); +#+END_SRC + +*** inline markup font face mod :inline:font:face: + +#+name: prgmkup_rgx +#+BEGIN_SRC d +/+ inline markup font face mod +/ +static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+#-])\{(?P<text>.+?)\}[*!_^,+#-])`, "mg"); +static inline_emphasis = ctRegex!(`\*\{(?P<text>.+?)\}\*`, "mg"); +static inline_bold = ctRegex!(`!\{(?P<text>.+?)\}!`, "mg"); +static inline_underscore = ctRegex!(`_\{(?P<text>.+?)\}_`, "mg"); +static inline_italics = ctRegex!(`/\{(?P<text>.+?)\}/`, "mg"); +static inline_superscript = ctRegex!(`\^\{(?P<text>.+?)\}\^`, "mg"); +static inline_subscript = ctRegex!(`,\{(?P<text>.+?)\},`, "mg"); +static inline_strike = ctRegex!(`-\{(?P<text>.+?)\}-`, "mg"); +static inline_insert = ctRegex!(`\+\{(?P<text>.+?)\}\+`, "mg"); +static inline_mono = ctRegex!(`#\{(?P<text>.+?)\}#`, "mg"); +static inline_cite = ctRegex!(`"\{(?P<text>.+?)\}"`, "mg"); +static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); +static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); +static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); +static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); +static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); +#+END_SRC + +*** table related + +#+name: prgmkup_rgx +#+BEGIN_SRC d +/+ table delimiters +/ +static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); // +static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); // +#+END_SRC |