Skip to content

Commit 8c8c14a

Browse files
Ignore heredoc end delimiter unless in a new line (#28)
1 parent a11a686 commit 8c8c14a

File tree

3 files changed

+86
-16
lines changed

3 files changed

+86
-16
lines changed

grammar.js

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -888,11 +888,11 @@ function defineQuoted(start, end, name) {
888888
[`_quoted_i_${name}`]: ($) =>
889889
seq(
890890
field("quoted_start", start),
891+
optional(alias($[`_quoted_content_i_${name}`], $.quoted_content)),
891892
repeat(
892-
choice(
893-
alias($[`_quoted_content_i_${name}`], $.quoted_content),
894-
$.interpolation,
895-
$.escape_sequence
893+
seq(
894+
choice($.interpolation, $.escape_sequence),
895+
optional(alias($[`_quoted_content_i_${name}`], $.quoted_content))
896896
)
897897
),
898898
field("quoted_end", end)
@@ -901,11 +901,12 @@ function defineQuoted(start, end, name) {
901901
[`_quoted_${name}`]: ($) =>
902902
seq(
903903
field("quoted_start", start),
904+
optional(alias($[`_quoted_content_${name}`], $.quoted_content)),
904905
repeat(
905-
choice(
906-
alias($[`_quoted_content_${name}`], $.quoted_content),
907-
// The end delimiter may always be escaped
908-
$.escape_sequence
906+
seq(
907+
// The end delimiter may be escaped in non-interpolating strings too
908+
$.escape_sequence,
909+
optional(alias($[`_quoted_content_${name}`], $.quoted_content))
909910
)
910911
),
911912
field("quoted_end", end)

src/scanner.cc

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,22 @@ int8_t find_quoted_token_info(const bool* valid_symbols) {
174174
bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
175175
lexer->result_symbol = info.token_type;
176176

177+
bool is_heredoc = (info.delimiter_length == 3);
178+
177179
for (bool has_content = false; true; has_content = true) {
180+
bool newline = false;
181+
182+
if (is_newline(lexer->lookahead)) {
183+
advance(lexer);
184+
185+
has_content = true;
186+
newline = true;
187+
188+
while (is_whitespace(lexer->lookahead)) {
189+
advance(lexer);
190+
}
191+
}
192+
178193
lexer->mark_end(lexer);
179194

180195
if (lexer->lookahead == info.end_delimiter) {
@@ -189,7 +204,7 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
189204
}
190205
}
191206

192-
if (length == info.delimiter_length) {
207+
if (length == info.delimiter_length && (!is_heredoc || newline)) {
193208
return has_content;
194209
}
195210
} else {
@@ -199,16 +214,18 @@ bool scan_quoted_content(TSLexer* lexer, const QuotedContentInfo& info) {
199214
return has_content;
200215
}
201216
} else if (lexer->lookahead == '\\') {
202-
if (info.supports_interpol) {
217+
advance(lexer);
218+
if (is_heredoc && lexer->lookahead == '\n') {
219+
// We need to know about the newline to correctly recognise
220+
// heredoc end delimiter, so we intentionally ignore escaping
221+
} else if (info.supports_interpol || lexer->lookahead == info.end_delimiter) {
203222
return has_content;
204-
} else {
205-
advance(lexer);
206-
if (lexer->lookahead == info.end_delimiter) {
207-
return has_content;
208-
}
209223
}
210224
} else if (lexer->lookahead == '\0') {
211-
return false;
225+
// If we reached the end of the file, this means there is no
226+
// end delimiter, so the syntax is invalid. In that case we
227+
// want to treat all the scanned content as quoted content.
228+
return has_content;
212229
} else {
213230
advance(lexer);
214231
}

test/corpus/term/string.txt

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
=====================================
2+
empty
3+
=====================================
4+
5+
""
6+
7+
---
8+
9+
(source
10+
(string))
11+
112
=====================================
213
single line
314
=====================================
@@ -171,6 +182,47 @@ this is #{
171182
(quoted_content)))
172183
(quoted_content)))
173184

185+
=====================================
186+
heredoc / delimiter in the middle
187+
=====================================
188+
189+
"""
190+
hey """
191+
"""
192+
193+
---
194+
195+
(source
196+
(string
197+
(quoted_content)))
198+
199+
=====================================
200+
heredoc / escaped newline (ignored)
201+
=====================================
202+
203+
"""
204+
hey \
205+
"""
206+
207+
"""
208+
hey \
209+
"""
210+
211+
"""
212+
hey \
213+
there
214+
"""
215+
216+
---
217+
218+
(source
219+
(string
220+
(quoted_content))
221+
(string
222+
(quoted_content))
223+
(string
224+
(quoted_content)))
225+
174226
=====================================
175227
heredoc / escaped delimiter
176228
=====================================

0 commit comments

Comments
 (0)