Skip to content

Commit fd6ab05

Browse files
authored
More regex error recovery in reScanSlashToken (#174)
1 parent b6322c4 commit fd6ab05

File tree

1 file changed

+57
-6
lines changed

1 file changed

+57
-6
lines changed

internal/scanner/scanner.go

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,7 @@ func (s *Scanner) ReScanTemplateToken(isTaggedTemplate bool) ast.Kind {
805805
func (s *Scanner) ReScanSlashToken() ast.Kind {
806806
if s.token == ast.KindSlashToken || s.token == ast.KindSlashEqualsToken {
807807
s.pos = s.tokenStart + 1
808+
startOfRegExpBody := s.pos
808809
inEscape := false
809810
inCharacterClass := false
810811
loop:
@@ -824,7 +825,6 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
824825
case ch == '/' && !inCharacterClass:
825826
// A slash within a character class is permissible,
826827
// but in general it signals the end of the regexp literal.
827-
s.pos++
828828
break loop
829829
case ch == '[':
830830
inCharacterClass = true
@@ -835,12 +835,63 @@ func (s *Scanner) ReScanSlashToken() ast.Kind {
835835
}
836836
s.pos += size
837837
}
838-
for {
839-
ch, size := s.charAndSize()
840-
if size == 0 || !isIdentifierPart(ch, s.languageVersion) {
841-
break
838+
if s.tokenFlags&ast.TokenFlagsUnterminated != 0 {
839+
// Search for the nearest unbalanced bracket for better recovery. Since the expression is
840+
// invalid anyways, we take nested square brackets into consideration for the best guess.
841+
endOfRegExpBody := s.pos
842+
s.pos = startOfRegExpBody
843+
inEscape = false
844+
characterClassDepth := 0
845+
inDecimalQuantifier := false
846+
groupDepth := 0
847+
for s.pos < endOfRegExpBody {
848+
ch, size := s.charAndSize()
849+
if inEscape {
850+
inEscape = false
851+
} else if ch == '\\' {
852+
inEscape = true
853+
} else if ch == '[' {
854+
characterClassDepth++
855+
} else if ch == ']' && characterClassDepth != 0 {
856+
characterClassDepth--
857+
} else if characterClassDepth == 0 {
858+
if ch == '{' {
859+
inDecimalQuantifier = true
860+
} else if ch == '}' && inDecimalQuantifier {
861+
inDecimalQuantifier = false
862+
} else if !inDecimalQuantifier {
863+
if ch == '(' {
864+
groupDepth++
865+
} else if ch == ')' && groupDepth != 0 {
866+
groupDepth--
867+
} else if ch == ')' || ch == ']' || ch == '}' {
868+
// We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
869+
break
870+
}
871+
}
872+
}
873+
s.pos += size
874+
}
875+
// Whitespaces and semicolons at the end are not likely to be part of the regex
876+
for {
877+
ch, size := utf8.DecodeLastRuneInString(s.text[:s.pos])
878+
if stringutil.IsWhiteSpaceLike(ch) || ch == ';' {
879+
s.pos -= size
880+
} else {
881+
break
882+
}
883+
}
884+
s.errorAt(diagnostics.Unterminated_regular_expression_literal, s.tokenStart, s.pos-s.tokenStart)
885+
} else {
886+
// Consume the slash character
887+
s.pos++
888+
for {
889+
ch, size := s.charAndSize()
890+
if size == 0 || !isIdentifierPart(ch, s.languageVersion) {
891+
break
892+
}
893+
s.pos += size
842894
}
843-
s.pos += size
844895
}
845896
s.tokenValue = s.text[s.tokenStart:s.pos]
846897
s.token = ast.KindRegularExpressionLiteral

0 commit comments

Comments
 (0)