Merge pull request #1303 from ahoppen/ahoppen/5.8-lexing-error

ahoppen · web-flow · commit dc1a32fe2d05 · 2023-02-02T08:08:42.000+01:00
[5.8] Parse `0.2` after `…` as float literal, not member access
diff --git a/Sources/SwiftParser/Lexer.swift b/Sources/SwiftParser/Lexer.swift
@@ -214,6 +214,8 @@ extension Lexer {
   public struct Cursor: Equatable {
     var input: UnsafeBufferPointer<UInt8>
     var previous: UInt8
+    /// If we have already lexed a token, the kind of the previously lexed token
+    var previousTokenKind: RawTokenKind?
 
     @_spi(LexerDiagnostics)
     public init(input: UnsafeBufferPointer<UInt8>, previous: UInt8) {
@@ -338,6 +340,10 @@ extension Lexer.Cursor {
 }
 
 extension Lexer.Cursor {
+  /// Revert the lexer by `offset` bytes. This should only be used by `resetForSplit`.
+  /// This must not back up by more bytes than the last token because that would
+  /// require us to also update `previousTokenKind`, which we don't do in this
+  /// function
   fileprivate mutating func backUp(by offset: Int) {
     assert(!self.isAtStartOfFile)
     self.previous = self.input.baseAddress!.advanced(by: -(offset + 1)).pointee
@@ -796,6 +802,9 @@ extension Lexer.Cursor {
     if newlineInLeadingTrivia == .present {
       flags.insert(.isAtStartOfLine)
     }
+
+    self.previousTokenKind = kind
+
     return .init(
       tokenKind: kind,
       flags: flags,
@@ -1393,7 +1402,15 @@ extension Lexer.Cursor {
     if !self.isAtEndOfFile, self.peek() == UInt8(ascii: ".") {
       // NextToken is the soon to be previous token
       // Therefore: x.0.1 is sub-tuple access, not x.float_literal
-      if self.input.count > 1, !Unicode.Scalar(self.peek(at: 1)).isDigit || TokStart.previous == UInt8(ascii: ".") {
+      if self.input.count <= 1 {
+        // If there are no more digits following the '.', we don't have a float
+        // literal.
+        return (.integerLiteral, [])
+      } else if !Unicode.Scalar(self.peek(at: 1)).isDigit {
+        // ".a" is a member access and certainly not a float literal
+        return (.integerLiteral, [])
+      } else if self.previousTokenKind == .period {
+        // Lex x.0.1 as sub-tuple access, not x.float_literal.
         return (.integerLiteral, [])
       }
     } else {
diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift
@@ -935,6 +935,55 @@ public class LexerTests: XCTestCase {
       ]
     )
   }
+
+  func testMultiDigitTupleAccess() {
+    var data = "x.13.1"
+    data.withUTF8 { buf in
+      let lexemes = Lexer.lex(buf)
+      AssertEqualTokens(
+        lexemes,
+        [
+          lexeme(.identifier, "x"),
+          lexeme(.period, "."),
+          lexeme(.integerLiteral, "13"),
+          lexeme(.period, "."),
+          lexeme(.integerLiteral, "1"),
+          lexeme(.eof, ""),
+        ]
+      )
+    }
+  }
+
+  func testFloatingPointNumberAfterRangeOperator() {
+    var data = "0.1...0.2"
+    data.withUTF8 { buf in
+      let lexemes = Lexer.lex(buf)
+      AssertEqualTokens(
+        lexemes,
+        [
+          lexeme(.floatingLiteral, "0.1"),
+          lexeme(.unspacedBinaryOperator, "..."),
+          lexeme(.floatingLiteral, "0.2"),
+          lexeme(.eof, ""),
+        ]
+      )
+    }
+  }
+
+  func testUnterminatedFloatLiteral() {
+    var data = "0."
+    data.withUTF8 { buf in
+      let lexemes = Lexer.lex(buf)
+      AssertEqualTokens(
+        lexemes,
+        [
+          lexeme(.integerLiteral, "0"),
+          lexeme(.unknown, "."),
+          lexeme(.eof, ""),
+        ]
+      )
+    }
+  }
 }
 
 extension Lexer {