summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-05-10 00:02:55 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-05-10 00:02:55 -0400
commitdaf5457585e55a7a76391b90d79c47d021163325 (patch)
tree31cf911f21e281d7f8f7a8c945f191ed88ab9fe8
parentgrammar: allow multi-line footnote and editor-note bodies (diff)
grammar: allow digit-leading segment names
Segment ids such as `2~1` and `1~12` appear in real samples (most visibly /Free Culture/'s `2~1 1. More Formalities`). The previous segment_name regex required a leading [a-zA-Z_!]; relax to also accept digits at the first position. The internal-hyphen and no-trailing-hyphen constraints (the latter to avoid colliding with the suppress marker) are unchanged. (assisted by Claude-Code)
-rw-r--r--grammar.js6
-rw-r--r--src/grammar.json2
-rw-r--r--src/parser.c1
3 files changed, 6 insertions, 3 deletions
diff --git a/grammar.js b/grammar.js
index 34582fa..a49da94 100644
--- a/grammar.js
+++ b/grammar.js
@@ -124,8 +124,10 @@ module.exports = grammar({
),
segment_marker: $ => token(prec(20, choice('1~', '2~', '3~'))),
- // Allow internal hyphens but not trailing hyphen (which is the suppress marker)
- segment_name: $ => /[a-zA-Z_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*/,
+ // Allow internal hyphens but not trailing hyphen (which is the suppress
+ // marker). Names may begin with a digit to support numeric section ids
+ // such as `2~1 ...` seen in real-world markup (e.g. Free Culture).
+ segment_name: $ => /[a-zA-Z0-9_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*/,
// =================================================================
// Code blocks (raw content - no inline parsing)
diff --git a/src/grammar.json b/src/grammar.json
index e8ce79a..3974246 100644
--- a/src/grammar.json
+++ b/src/grammar.json
@@ -425,7 +425,7 @@
},
"segment_name": {
"type": "PATTERN",
- "value": "[a-zA-Z_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*"
+ "value": "[a-zA-Z0-9_!][a-zA-Z0-9_]*(-[a-zA-Z0-9_]+)*"
},
"code_block_curly": {
"type": "PREC",
diff --git a/src/parser.c b/src/parser.c
index e194b48..12d39c9 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2093,6 +2093,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
if (lookahead == '-') ADVANCE(214);
if (lookahead == '}') ADVANCE(499);
if (lookahead == '!' ||
+ ('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
lookahead == '_' ||
('a' <= lookahead && lookahead <= 'z')) ADVANCE(217);