Skip to content

Commit e017af7

Browse files
committed
Fix backreferences \\k<+1-2>
1 parent 3306772 commit e017af7

File tree

2 files changed

+61
-76
lines changed

2 files changed

+61
-76
lines changed

src/tree-sitter/tree-sitter-regex/grammar.js

Lines changed: 30 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -733,89 +733,59 @@ module.exports = grammar({
733733
'\'',
734734
),
735735
),
736-
backreference: $ => choice(
736+
backreference: $ => choice( // \\k<name+1>
737737
seq(
738738
'\\\\k<',
739-
optional(
740-
alias(
741-
repeat1(/[^\w>"()\\\[]/),
742-
$.error
743-
),
739+
alias(
740+
repeat(/[^A-Za-z_1-9>)]/),
741+
$.error,
744742
),
745-
choice(
746-
seq(
743+
optional(
744+
choice(
747745
alias(
748746
token(
749-
seq(
750-
optional(
751-
choice(
752-
'+',
753-
'-',
754-
),
755-
),
756-
repeat1(/\d/),
747+
prec(1,
748+
/[+-]?0*[1-9]\d*/, // ref number `0` is not allowed
757749
),
758750
),
759-
$.number
751+
$.number,
760752
),
761-
optional(
762-
alias(
763-
repeat1(/[^>"()\\\[]/),
764-
$.error
765-
),
753+
alias(
754+
/[A-Za-z_]\w*/, // Should allow all unicode numbers and letters
755+
$.name,
766756
),
767757
),
758+
),
759+
optional(
768760
alias(
769-
token(
770-
seq(
771-
/[A-Za-z_]/,
772-
repeat(/[^>]/),
773-
),
774-
),
775-
$.name
761+
/[+-]\d+/,
762+
$.level,
776763
),
777764
),
778765
'>',
779766
),
780767
seq(
781768
'\\\\k\'',
782-
optional(
783-
alias(
784-
repeat1(/[^\w>"()\\\[]/),
785-
$.error
786-
),
769+
alias(
770+
repeat(/[^A-Za-z_1-9')]/),
771+
$.error,
787772
),
788-
choice(
789-
seq(
773+
optional(
774+
choice(
790775
alias(
791-
token(
792-
seq(
793-
optional(
794-
choice(
795-
'+',
796-
'-',
797-
),
798-
),
799-
repeat1(/\d/),
800-
),
801-
),
802-
$.number
776+
/[+-]?0*[1-9]\d*/, // ref number `0` is not allowed
777+
$.number,
803778
),
804-
optional(
805-
alias(
806-
repeat1(/[^>"()\\\[]/),
807-
$.error
808-
),
779+
alias(
780+
/[A-Za-z_]\w*/, // Should allow all unicode numbers and letters
781+
$.name,
809782
),
810783
),
784+
),
785+
optional(
811786
alias(
812-
token(
813-
seq(
814-
/[A-Za-z_]/,
815-
repeat(/[^']/),
816-
),
817-
),
818-
$.name
787+
/[+-]\d+/,
788+
$.level,
819789
),
820790
),
821791
'\'',

syntaxes/regex.tmLanguage.json

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,23 +1432,38 @@
14321432
}
14331433
},
14341434
{
1435-
"comment": "\\k<1> \\k'-2' \\k'+003' \\k<name> \\k'name_123+5' \\k<9-2>",
1436-
"match": "(?D)(\\\\{2}k)(?>((?<quote>')|<)(?>((?~|(?>(?(<quote>)'|>)|\\)|(?<!\\\\)\\\\{2}*\")|.*?))(?>(?>(?>((0*)[1-9]\\d{,2}(\\d{,2}))|([\\w&&[^0-9]]\\w*))([-+](0*)[12]?\\d{1,9})?)|([-+](0*)[1-9]\\d*)))?((?~(?('quote')'|>)|\\)|(?<!\\\\)\\\\{2}*\"))((?('quote')'|>))?)",
1437-
"captures": {
1435+
"comment": "\\k<1> \\k'-2' \\k<name> \\k'name_123+5' \\k<+000999-2147483647>",
1436+
"begin": "(\\\\{2}k)('|<)",
1437+
"end": "(?((?=\\2))'|>)|(?=[\")])",
1438+
"beginCaptures": {
14381439
"1": { "name": "string.regexp.tm" },
1439-
"2": { "name": "constant.character.set.regexp.tm" },
1440-
"4": { "name": "invalid.illegal.tm markup.underline regex" },
1441-
"5": { "name": "constant.numeric.tm" },
1442-
"6": { "name": "punctuation.definition.tag.tm" },
1443-
"7": { "name": "invalid.illegal.tm markup.italic" },
1444-
"8": { "name": "markup.bold" },
1445-
"9": { "name": "constant.numeric.tm" },
1446-
"10": { "name": "punctuation.definition.tag.tm" },
1447-
"11": { "name": "constant.numeric.tm" },
1448-
"12": { "name": "punctuation.definition.tag.tm" },
1449-
"13": { "name": "invalid.illegal.tm markup.underline regex" },
1450-
"14": { "name": "constant.character.set.regexp.tm" }
1451-
}
1440+
"2": { "name": "constant.character.set.regexp.tm" }
1441+
},
1442+
"endCaptures": { "0": { "name": "constant.character.set.regexp.tm" } },
1443+
"patterns": [
1444+
{
1445+
"comment": "Capture group nesting level",
1446+
"match": "(?<=\\w)[+-]0*+(?>2(?>0[0-9]{8}|1(?>[0-3][0-9]{7}|4(?>[0-6][0-9]{6}|7(?>[0-3][0-9]{5}|4(?>[0-7][0-9]{4}|8(?>[0-2][0-9]{3}|3(?>[0-5][0-9]{2}|6(?>[0-3][0-9]|4[0-7]))))))))|1?+[0-9]{9,0})(\\w*+)",
1447+
"captures": { "1": { "name": "invalid.illegal.tm" } },
1448+
"name": "constant.numeric.tm"
1449+
},
1450+
{
1451+
"comment": "Capture group reference number",
1452+
"match": "[+-]?+0*+[1-9][0-9]{2,0}(\\w*+)",
1453+
"captures": { "1": { "name": "invalid.illegal.tm" } },
1454+
"name": "constant.numeric.tm"
1455+
},
1456+
{
1457+
"comment": "Named capture group reference name",
1458+
"match": "(?!0)\\w++",
1459+
"name": "markup.bold.tm"
1460+
},
1461+
{ "include": "source.json.textmate#escape-characters" },
1462+
{
1463+
"match": ".[^\\\\\"'>\\w+-]*+",
1464+
"name": "invalid.illegal.tm"
1465+
}
1466+
]
14521467
}
14531468
]
14541469
},

0 commit comments

Comments
 (0)