diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2026-05-10 00:02:55 -0400 |
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2026-05-10 00:02:55 -0400 |
| commit | daf5457585e55a7a76391b90d79c47d021163325 (patch) | |
| tree | 31cf911f21e281d7f8f7a8c945f191ed88ab9fe8 | |
| parent | grammar: allow multi-line footnote and editor-note bodies (diff) | |
grammar: allow digit-leading segment names
Segment ids such as `2~1` and `1~12` appear in real samples (most
visibly /Free Culture/'s `2~1 1. More Formalities`). The previous
segment_name regex required a leading [a-zA-Z_!]; relax to also
accept digits at the first position. The internal-hyphen and
no-trailing-hyphen constraints (the latter to avoid colliding with
the suppress marker) are unchanged.
(assisted by Claude-Code)
| -rw-r--r-- | grammar.js | 6 | ||||
| -rw-r--r-- | src/grammar.json | 2 | ||||
| -rw-r--r-- | src/parser.c | 1 |
3 files changed, 6 insertions, 3 deletions
@@ -124,8 +124,10 @@ module.exports = grammar({ ), segment_marker: $ => token(prec(20, choice('1~', '2~', '3~'))), - // Allow internal hyphens but not trailing hyphen (which is the suppress marker) - segment_name: $ => /[a-zA-Z_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*/, + // Allow internal hyphens but not trailing hyphen (which is the suppress + // marker). Names may begin with a digit to support numeric section ids + // such as `2~1 ...` seen in real-world markup (e.g. Free Culture). + segment_name: $ => /[a-zA-Z0-9_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*/, // ================================================================= // Code blocks (raw content - no inline parsing) diff --git a/src/grammar.json b/src/grammar.json index e8ce79a..3974246 100644 --- a/src/grammar.json +++ b/src/grammar.json @@ -425,7 +425,7 @@ }, "segment_name": { "type": "PATTERN", - "value": "[a-zA-Z_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*" + "value": "[a-zA-Z0-9_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*" }, "code_block_curly": { "type": "PREC", diff --git a/src/parser.c b/src/parser.c index e194b48..12d39c9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2093,6 +2093,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { if (lookahead == '-') ADVANCE(214); if (lookahead == '}') ADVANCE(499); if (lookahead == '!' || + ('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || lookahead == '_' || ('a' <= lookahead && lookahead <= 'z')) ADVANCE(217); |
