Skip to content

Commit be58d53

Browse files
committed
Parse 0.2 after as float literal, not member access
Deciding whether `0.2` should be lexed as a float literal or a member access is a little more difficult than just looking at the previous character because `0.2` might be preceeded by an operator like `…` or `.^.`, in which case it should be lexed as a float literal and not a member access. We might be able to do some disambiguation magic on whether the character before the period is also an operator continuation point but that seems fairly brittle to me. The sanest way of doing this, is to store the previously lexed token’s kind in the cursor and checking that. I measured and did not see a performance regregssion when parsing MovieSwiftUI. rdar://103273988
1 parent edd2d0c commit be58d53

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

Sources/SwiftParser/Lexer.swift

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ extension Lexer {
214214
public struct Cursor: Equatable {
215215
var input: UnsafeBufferPointer<UInt8>
216216
var previous: UInt8
217+
/// If we have already lexed a token, the kind of the previously lexed token
218+
var previousTokenKind: RawTokenKind?
217219

218220
@_spi(LexerDiagnostics)
219221
public init(input: UnsafeBufferPointer<UInt8>, previous: UInt8) {
@@ -338,6 +340,10 @@ extension Lexer.Cursor {
338340
}
339341

340342
extension Lexer.Cursor {
343+
/// Revert the lexer by `offset` bytes. This should only be used by `resetForSplit`.
344+
/// This must not back up by more bytes than the last token because that would
345+
/// require us to also update `previousTokenKind`, which we don't do in this
346+
/// function
341347
fileprivate mutating func backUp(by offset: Int) {
342348
assert(!self.isAtStartOfFile)
343349
self.previous = self.input.baseAddress!.advanced(by: -(offset + 1)).pointee
@@ -796,6 +802,9 @@ extension Lexer.Cursor {
796802
if newlineInLeadingTrivia == .present {
797803
flags.insert(.isAtStartOfLine)
798804
}
805+
806+
self.previousTokenKind = kind
807+
799808
return .init(
800809
tokenKind: kind,
801810
flags: flags,
@@ -1393,7 +1402,15 @@ extension Lexer.Cursor {
13931402
if !self.isAtEndOfFile, self.peek() == UInt8(ascii: ".") {
13941403
// NextToken is the soon to be previous token
13951404
// Therefore: x.0.1 is sub-tuple access, not x.float_literal
1396-
if self.input.count > 1, !Unicode.Scalar(self.peek(at: 1)).isDigit || TokStart.previous == UInt8(ascii: ".") {
1405+
if self.input.count <= 1 {
1406+
// If there are no more digits following the '.', we don't have a float
1407+
// literal.
1408+
return (.integerLiteral, [])
1409+
} else if !Unicode.Scalar(self.peek(at: 1)).isDigit {
1410+
// ".a" is a member access and certainly not a float literal
1411+
return (.integerLiteral, [])
1412+
} else if self.previousTokenKind == .period {
1413+
// Lex x.0.1 as sub-tuple access, not x.float_literal.
13971414
return (.integerLiteral, [])
13981415
}
13991416
} else {

Tests/SwiftParserTest/LexerTests.swift

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,55 @@ public class LexerTests: XCTestCase {
935935
]
936936
)
937937
}
938+
939+
func testMultiDigitTupleAccess() {
940+
var data = "x.13.1"
941+
data.withUTF8 { buf in
942+
let lexemes = Lexer.lex(buf)
943+
AssertEqualTokens(
944+
lexemes,
945+
[
946+
lexeme(.identifier, "x"),
947+
lexeme(.period, "."),
948+
lexeme(.integerLiteral, "13"),
949+
lexeme(.period, "."),
950+
lexeme(.integerLiteral, "1"),
951+
lexeme(.eof, ""),
952+
]
953+
)
954+
}
955+
}
956+
957+
func testFloatingPointNumberAfterRangeOperator() {
958+
var data = "0.1...0.2"
959+
data.withUTF8 { buf in
960+
let lexemes = Lexer.lex(buf)
961+
AssertEqualTokens(
962+
lexemes,
963+
[
964+
lexeme(.floatingLiteral, "0.1"),
965+
lexeme(.unspacedBinaryOperator, "..."),
966+
lexeme(.floatingLiteral, "0.2"),
967+
lexeme(.eof, ""),
968+
]
969+
)
970+
}
971+
}
972+
973+
func testUnterminatedFloatLiteral() {
974+
var data = "0."
975+
data.withUTF8 { buf in
976+
let lexemes = Lexer.lex(buf)
977+
AssertEqualTokens(
978+
lexemes,
979+
[
980+
lexeme(.integerLiteral, "0"),
981+
lexeme(.unknown, "."),
982+
lexeme(.eof, ""),
983+
]
984+
)
985+
}
986+
}
938987
}
939988

940989
extension Lexer {

0 commit comments

Comments
 (0)