Skip to content

Commit c891016

Browse files
committed
fix(parser): emit lexer error on escaped surrogate to avoid panic
Fixes #608
1 parent 9fd66c8 commit c891016

File tree

7 files changed

+62
-19
lines changed

7 files changed

+62
-19
lines changed

crates/apollo-parser/src/ast/node_ext.rs

+2
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ fn unescape_string(input: &str) -> String {
167167
break;
168168
};
169169

170+
// TODO: https://github.com/apollographql/apollo-rs/issues/657 needs
171+
// changes both here and in `lexer/mod.rs`
170172
let mut unicode = || {
171173
// 1. Let value be the 16-bit hexadecimal value represented
172174
// by the sequence of hexadecimal digits within EscapedUnicode.

crates/apollo-parser/src/lexer/cursor.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use crate::Error;
66
#[derive(Debug, Clone)]
77
pub(crate) struct Cursor<'a> {
88
index: usize,
9-
offset: usize,
10-
source: &'a str,
9+
pub(super) offset: usize,
10+
pub(super) source: &'a str,
1111
chars: CharIndices<'a>,
1212
pending: Option<char>,
1313
pub(crate) err: Option<Error>,

crates/apollo-parser/src/lexer/mod.rs

+18-1
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,24 @@ impl<'a> Cursor<'a> {
327327
_ => {
328328
if remaining <= 1 {
329329
state = State::StringLiteral;
330-
330+
let hex_end = self.offset + 1;
331+
let hex_start = hex_end - 4;
332+
let hex = &self.source[hex_start..hex_end];
333+
// `is_ascii_hexdigit()` checks in previous iterations ensures
334+
// this `unwrap()` does not panic:
335+
let code_point = u32::from_str_radix(hex, 16).unwrap();
336+
if char::from_u32(code_point).is_none() {
337+
// TODO: https://github.com/apollographql/apollo-rs/issues/657 needs
338+
// changes both here and in `ast/node_ext.rs`
339+
let escape_sequence_start = hex_start - 2; // include "\u"
340+
let escape_sequence = &self.source[escape_sequence_start..hex_end];
341+
self.add_err(Error::new(
342+
"surrogate code point is invalid in unicode escape sequence \
343+
(paired surrogate not supported yet: \
344+
https://github.com/apollographql/apollo-rs/issues/657)",
345+
escape_sequence.to_owned(),
346+
));
347+
}
331348
continue;
332349
}
333350

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# TODO: move these cases back to ok/0004_string_value.graphql when
2+
# https://github.com/apollographql/apollo-rs/issues/657 is implemented
3+
"string with unicode surrogate pair escape \uD83D\uDE00"
4+
"string with minimal surrogate pair escape \uD800\uDC00"
5+
"string with maximal surrogate pair escape \uDBFF\uDFFF"
6+
7+
# TODO: emit two errors: https://github.com/apollographql/apollo-rs/issues/319
8+
"split pair \uD83D \uDE00"
9+
10+
"Backwards pair \uDE00\uD83D"
11+
"Lone lead surrogate \uD83E"
12+
"Lone trail surrogate \uDD80"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
COMMENT@0:66 "# TODO: move these cases back to ok/0004_string_value.graphql when"
2+
WHITESPACE@66:67 "\n"
3+
COMMENT@67:137 "# https://github.com/apollographql/apollo-rs/issues/657 is implemented"
4+
WHITESPACE@137:138 "\n"
5+
ERROR@138:194 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "string with unicode surrogate pair escape \uD83D\uDE00"
6+
WHITESPACE@194:195 "\n"
7+
ERROR@195:251 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "string with minimal surrogate pair escape \uD800\uDC00"
8+
WHITESPACE@251:252 "\n"
9+
ERROR@252:308 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "string with maximal surrogate pair escape \uDBFF\uDFFF"
10+
WHITESPACE@308:310 "\n\n"
11+
COMMENT@310:388 "# TODO: emit two errors: https://github.com/apollographql/apollo-rs/issues/319"
12+
WHITESPACE@388:389 "\n"
13+
ERROR@389:415 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "split pair \uD83D \uDE00"
14+
WHITESPACE@415:417 "\n\n"
15+
ERROR@417:446 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "Backwards pair \uDE00\uD83D"
16+
WHITESPACE@446:447 "\n"
17+
ERROR@447:475 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "Lone lead surrogate \uD83E"
18+
WHITESPACE@475:476 "\n"
19+
ERROR@476:505 "surrogate code point is invalid in unicode escape sequence (paired surrogate not supported yet: https://github.com/apollographql/apollo-rs/issues/657)" "Lone trail surrogate \uDD80"
20+
WHITESPACE@505:506 "\n"
21+
EOF@506:506

crates/apollo-parser/test_data/lexer/ok/0004_string_value.graphql

-3
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
"simple"
33
" white space "
44
"unicode \u1234\u5678\u90AB\uCDEF"
5-
"string with unicode surrogate pair escape \uD83D\uDE00"
6-
"string with minimal surrogate pair escape \uD800\uDC00"
7-
"string with maximal surrogate pair escape \uDBFF\uDFFF"
85
"string with \"escaped\" characters"
96
"string with multiple languages котя, 猫, ねこ, قطة"
107
"""

crates/apollo-parser/test_data/lexer/ok/0004_string_value.txt

+7-13
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,10 @@ STRING_VALUE@12:27 "\" white space \""
66
WHITESPACE@27:28 "\n"
77
STRING_VALUE@28:62 "\"unicode \\u1234\\u5678\\u90AB\\uCDEF\""
88
WHITESPACE@62:63 "\n"
9-
STRING_VALUE@63:119 "\"string with unicode surrogate pair escape \\uD83D\\uDE00\""
10-
WHITESPACE@119:120 "\n"
11-
STRING_VALUE@120:176 "\"string with minimal surrogate pair escape \\uD800\\uDC00\""
12-
WHITESPACE@176:177 "\n"
13-
STRING_VALUE@177:233 "\"string with maximal surrogate pair escape \\uDBFF\\uDFFF\""
14-
WHITESPACE@233:234 "\n"
15-
STRING_VALUE@234:270 "\"string with \\\"escaped\\\" characters\""
16-
WHITESPACE@270:271 "\n"
17-
STRING_VALUE@271:333 "\"string with multiple languages котя, 猫, ねこ, قطة\""
18-
WHITESPACE@333:334 "\n"
19-
STRING_VALUE@334:421 "\"\"\"\nblock string with unusual whitespaces\na b c\nd\n\ne\tf\ng\u{2028}\u{2029}h\ni\u{b}j\u{c}k\u{feff}l\u{85}\u{200e}\u{200f}m\n\"\"\""
20-
WHITESPACE@421:422 "\n"
21-
EOF@422:422
9+
STRING_VALUE@63:99 "\"string with \\\"escaped\\\" characters\""
10+
WHITESPACE@99:100 "\n"
11+
STRING_VALUE@100:162 "\"string with multiple languages котя, 猫, ねこ, قطة\""
12+
WHITESPACE@162:163 "\n"
13+
STRING_VALUE@163:250 "\"\"\"\nblock string with unusual whitespaces\na b c\nd\n\ne\tf\ng\u{2028}\u{2029}h\ni\u{b}j\u{c}k\u{feff}l\u{85}\u{200e}\u{200f}m\n\"\"\""
14+
WHITESPACE@250:251 "\n"
15+
EOF@251:251

0 commit comments

Comments
 (0)