Skip to content

Commit 7cf739e

Browse files
Lexer, helpers and comments cleanup (#5063)
* Move the building of the comments hash into its own function, to make clearer that it only happens once * Abstract token comments dictionary * Format comments * Allow attaching a “data” property to tokens, to allow extra info to pass through the parser into the node classes * Add data to StringLiteral tokens such that we should be able to deindent them in the nodes class * Update output * Another comment fix * Add data to every token, with common properties * Be more defensive, even though we apparently don't need to be * Improve comments * Remove the token data property, leaving all unrelated improvements * Code review improvements; update output
1 parent 41185ca commit 7cf739e

File tree

6 files changed

+82
-54
lines changed

6 files changed

+82
-54
lines changed

lib/coffeescript/coffeescript.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/coffeescript/helpers.js

Lines changed: 38 additions & 21 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/coffeescript/lexer.js

Lines changed: 10 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/coffeescript.coffee

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ lexer = new Lexer
193193

194194
# The real Lexer produces a generic stream of tokens. This object provides a
195195
# thin wrapper around it, compatible with the Jison API. We can then pass it
196-
# directly as a "Jison lexer".
196+
# directly as a Jison lexer.”
197197
parser.lexer =
198198
lex: ->
199199
token = parser.tokens[@pos++]

src/helpers.coffee

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,30 +111,40 @@ buildLocationData = (first, last) ->
111111
buildLocationHash = (loc) ->
112112
"#{loc.first_line}x#{loc.first_column}-#{loc.last_line}x#{loc.last_column}"
113113

114+
# Build a dictionary of extra token properties organized by tokens’ locations
115+
# used as lookup hashes.
116+
buildTokenDataDictionary = (parserState) ->
117+
tokenData = {}
118+
for token in parserState.parser.tokens when token.comments
119+
tokenHash = buildLocationHash token[2]
120+
# Multiple tokens might have the same location hash, such as the generated
121+
# `JS` tokens added at the start or end of the token stream to hold
122+
# comments that start or end a file.
123+
tokenData[tokenHash] ?= {}
124+
if token.comments # `comments` is always an array.
125+
# For “overlapping” tokens, that is tokens with the same location data
126+
# and therefore matching `tokenHash`es, merge the comments from both/all
127+
# tokens together into one array, even if there are duplicate comments;
128+
# they will get sorted out later.
129+
(tokenData[tokenHash].comments ?= []).push token.comments...
130+
tokenData
131+
114132
# This returns a function which takes an object as a parameter, and if that
115133
# object is an AST node, updates that object's locationData.
116134
# The object is returned either way.
117135
exports.addDataToNode = (parserState, first, last) ->
118136
(obj) ->
119-
# Add location data
137+
# Add location data.
120138
if obj?.updateLocationDataIfMissing? and first?
121139
obj.updateLocationDataIfMissing buildLocationData(first, last)
122140

123-
# Add comments data
124-
unless parserState.tokenComments
125-
parserState.tokenComments = {}
126-
for token in parserState.parser.tokens when token.comments
127-
tokenHash = buildLocationHash token[2]
128-
unless parserState.tokenComments[tokenHash]?
129-
parserState.tokenComments[tokenHash] = token.comments
130-
else
131-
parserState.tokenComments[tokenHash].push token.comments...
132-
141+
# Add comments, building the dictionary of token data if it hasn’t been
142+
# built yet.
143+
parserState.tokenData ?= buildTokenDataDictionary parserState
133144
if obj.locationData?
134145
objHash = buildLocationHash obj.locationData
135-
if parserState.tokenComments[objHash]?
136-
attachCommentsToNode parserState.tokenComments[objHash], obj
137-
146+
if parserState.tokenData[objHash]?.comments?
147+
attachCommentsToNode parserState.tokenData[objHash].comments, obj
138148
obj
139149

140150
exports.attachCommentsToNode = attachCommentsToNode = (comments, node) ->

src/lexer.coffee

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ exports.Lexer = class Lexer
305305
else
306306
@mergeInterpolationTokens tokens, {delimiter}, (value, i) =>
307307
value = @formatString value, delimiter: quote
308+
# Remove indentation from multiline single-quoted strings.
308309
value = value.replace SIMPLE_STRING_OMIT, (match, offset) ->
309310
if (i is 0 and offset is 0) or
310311
(i is $ and offset + match.length is value.length)
@@ -780,7 +781,7 @@ exports.Lexer = class Lexer
780781
rest = str[interpolationOffset..]
781782
{tokens: nested, index} =
782783
new Lexer().tokenize rest, line: line, column: column, untilBalanced: on
783-
# Account for the `#` in `#{`
784+
# Account for the `#` in `#{`.
784785
index += interpolationOffset
785786

786787
braceInterpolator = str[index - 1] is '}'
@@ -877,7 +878,7 @@ exports.Lexer = class Lexer
877878
locationToken = token
878879
tokensToPush = [token]
879880
if @tokens.length > firstIndex
880-
# Create a 0-length "+" token.
881+
# Create a 0-length `+` token.
881882
plusToken = @token '+', '+'
882883
plusToken[2] =
883884
first_line: locationToken[2].first_line
@@ -946,19 +947,19 @@ exports.Lexer = class Lexer
946947

947948
# Same as `token`, except this just returns the token without adding it
948949
# to the results.
949-
makeToken: (tag, value, offsetInChunk = 0, length = value.length) ->
950+
makeToken: (tag, value, offsetInChunk = 0, length = value.length, origin) ->
950951
locationData = {}
951952
[locationData.first_line, locationData.first_column] =
952953
@getLineAndColumnFromChunk offsetInChunk
953954

954-
# Use length - 1 for the final offset - we're supplying the last_line and the last_column,
955-
# so if last_column == first_column, then we're looking at a character of length 1.
955+
# Use length - 1 for the final offset - were supplying the last_line and the last_column,
956+
# so if last_column == first_column, then were looking at a character of length 1.
956957
lastCharacter = if length > 0 then (length - 1) else 0
957958
[locationData.last_line, locationData.last_column] =
958959
@getLineAndColumnFromChunk offsetInChunk + lastCharacter
959960

960961
token = [tag, value, locationData]
961-
962+
token.origin = origin if origin
962963
token
963964

964965
# Add a token to the results.
@@ -968,8 +969,7 @@ exports.Lexer = class Lexer
968969
#
969970
# Returns the new token.
970971
token: (tag, value, offsetInChunk, length, origin) ->
971-
token = @makeToken tag, value, offsetInChunk, length
972-
token.origin = origin if origin
972+
token = @makeToken tag, value, offsetInChunk, length, origin
973973
@tokens.push token
974974
token
975975

0 commit comments

Comments
 (0)