Skip to content

Commit cc1daba

Browse files
committed
Further improve parsing errors
Report EOF when applicable instead of an empty fragment. Also stop fragment extraction on first whitespace.
1 parent f3dde3c commit cc1daba

File tree

3 files changed

+65
-38
lines changed

3 files changed

+65
-38
lines changed

ext/json/ext/parser/parser.c

+46-34
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ RBIMPL_ATTR_NORETURN()
393393
#endif
394394
static void raise_parse_error(const char *format, JSON_ParserState *state)
395395
{
396-
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
396+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
397397

398398
const char *cursor = state->cursor;
399399
long column = 0;
@@ -412,22 +412,34 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
412412
}
413413
}
414414

415-
const char *ptr = state->cursor;
416-
size_t len = ptr ? strnlen(ptr, PARSE_ERROR_FRAGMENT_LEN) : 0;
415+
const char *ptr = "EOF";
416+
if (state->cursor && state->cursor < state->end) {
417+
ptr = state->cursor;
418+
size_t len = 0;
419+
while (len < PARSE_ERROR_FRAGMENT_LEN) {
420+
char ch = ptr[len];
421+
if (!ch || ch == '\n' || ch == ' ' || ch == '\t' || ch == '\r') {
422+
break;
423+
}
424+
len++;
425+
}
417426

418-
if (len == PARSE_ERROR_FRAGMENT_LEN) {
419-
MEMCPY(buffer, ptr, char, PARSE_ERROR_FRAGMENT_LEN);
427+
if (len) {
428+
buffer[0] = '\'';
429+
MEMCPY(buffer + 1, ptr, char, len);
420430

421-
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
422-
len--;
423-
}
431+
while (buffer[len] >= 0x80 && buffer[len] < 0xC0) { // Is continuation byte
432+
len--;
433+
}
424434

425-
if (buffer[len - 1] >= 0xC0) { // multibyte character start
426-
len--;
427-
}
435+
if (buffer[len] >= 0xC0) { // multibyte character start
436+
len--;
437+
}
428438

429-
buffer[len] = '\0';
430-
ptr = (const char *)buffer;
439+
buffer[len + 1] = '\'';
440+
buffer[len + 2] = '\0';
441+
ptr = (const char *)buffer;
442+
}
431443
}
432444

433445
VALUE msg = rb_sprintf(format, ptr);
@@ -473,16 +485,16 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p
473485
signed char b;
474486
uint32_t result = 0;
475487
b = digit_values[p[0]];
476-
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
488+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
477489
result = (result << 4) | (unsigned char)b;
478490
b = digit_values[p[1]];
479-
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
491+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
480492
result = (result << 4) | (unsigned char)b;
481493
b = digit_values[p[2]];
482-
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
494+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
483495
result = (result << 4) | (unsigned char)b;
484496
b = digit_values[p[3]];
485-
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, (char *)p - 2);
497+
if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
486498
result = (result << 4) | (unsigned char)b;
487499
return result;
488500
}
@@ -532,11 +544,11 @@ json_eat_comments(JSON_ParserState *state)
532544
break;
533545
}
534546
default:
535-
raise_parse_error("unexpected token '%s'", state);
547+
raise_parse_error("unexpected token %s", state);
536548
break;
537549
}
538550
} else {
539-
raise_parse_error("unexpected token '%s'", state);
551+
raise_parse_error("unexpected token %s", state);
540552
}
541553
}
542554

@@ -655,7 +667,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
655667
break;
656668
case 'u':
657669
if (pe > stringEnd - 5) {
658-
raise_parse_error_at("incomplete unicode character escape sequence at '%s'", state, p);
670+
raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
659671
} else {
660672
uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
661673
pe += 3;
@@ -672,7 +684,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
672684
if ((ch & 0xFC00) == 0xD800) {
673685
pe++;
674686
if (pe > stringEnd - 6) {
675-
raise_parse_error_at("incomplete surrogate pair at '%s'", state, p);
687+
raise_parse_error_at("incomplete surrogate pair at %s", state, p);
676688
}
677689
if (pe[0] == '\\' && pe[1] == 'u') {
678690
uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
@@ -894,15 +906,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
894906
return json_push_value(state, config, Qnil);
895907
}
896908

897-
raise_parse_error("unexpected token '%s'", state);
909+
raise_parse_error("unexpected token %s", state);
898910
break;
899911
case 't':
900912
if ((state->end - state->cursor >= 4) && (memcmp(state->cursor, "true", 4) == 0)) {
901913
state->cursor += 4;
902914
return json_push_value(state, config, Qtrue);
903915
}
904916

905-
raise_parse_error("unexpected token '%s'", state);
917+
raise_parse_error("unexpected token %s", state);
906918
break;
907919
case 'f':
908920
// Note: memcmp with a small power of two compile to an integer comparison
@@ -911,7 +923,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
911923
return json_push_value(state, config, Qfalse);
912924
}
913925

914-
raise_parse_error("unexpected token '%s'", state);
926+
raise_parse_error("unexpected token %s", state);
915927
break;
916928
case 'N':
917929
// Note: memcmp with a small power of two compile to an integer comparison
@@ -920,15 +932,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
920932
return json_push_value(state, config, CNaN);
921933
}
922934

923-
raise_parse_error("unexpected token '%s'", state);
935+
raise_parse_error("unexpected token %s", state);
924936
break;
925937
case 'I':
926938
if (config->allow_nan && (state->end - state->cursor >= 8) && (memcmp(state->cursor, "Infinity", 8) == 0)) {
927939
state->cursor += 8;
928940
return json_push_value(state, config, CInfinity);
929941
}
930942

931-
raise_parse_error("unexpected token '%s'", state);
943+
raise_parse_error("unexpected token %s", state);
932944
break;
933945
case '-':
934946
// Note: memcmp with a small power of two compile to an integer comparison
@@ -937,7 +949,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
937949
state->cursor += 9;
938950
return json_push_value(state, config, CMinusInfinity);
939951
} else {
940-
raise_parse_error("unexpected token '%s'", state);
952+
raise_parse_error("unexpected token %s", state);
941953
}
942954
}
943955
// Fallthrough
@@ -1062,7 +1074,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10621074
}
10631075

10641076
if (*state->cursor != '"') {
1065-
raise_parse_error("expected object key, got '%s'", state);
1077+
raise_parse_error("expected object key, got %s", state);
10661078
}
10671079
json_parse_string(state, config, true);
10681080

@@ -1097,13 +1109,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10971109
}
10981110

10991111
if (*state->cursor != '"') {
1100-
raise_parse_error("expected object key, got: '%s'", state);
1112+
raise_parse_error("expected object key, got: %s", state);
11011113
}
11021114
json_parse_string(state, config, true);
11031115

11041116
json_eat_whitespace(state);
11051117
if ((state->cursor >= state->end) || (*state->cursor != ':')) {
1106-
raise_parse_error("expected ':' after object key, got: '%s'", state);
1118+
raise_parse_error("expected ':' after object key, got: %s", state);
11071119
}
11081120
state->cursor++;
11091121

@@ -1113,24 +1125,24 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
11131125
}
11141126
}
11151127

1116-
raise_parse_error("expected ',' or '}' after object value, got: '%s'", state);
1128+
raise_parse_error("expected ',' or '}' after object value, got: %s", state);
11171129
}
11181130
break;
11191131
}
11201132

11211133
default:
1122-
raise_parse_error("unexpected character: '%s'", state);
1134+
raise_parse_error("unexpected character: %s", state);
11231135
break;
11241136
}
11251137

1126-
raise_parse_error("unreacheable: '%s'", state);
1138+
raise_parse_error("unreacheable: %s", state);
11271139
}
11281140

11291141
static void json_ensure_eof(JSON_ParserState *state)
11301142
{
11311143
json_eat_whitespace(state);
11321144
if (state->cursor != state->end) {
1133-
raise_parse_error("unexpected token at end of stream '%s'", state);
1145+
raise_parse_error("unexpected token at end of stream %s", state);
11341146
}
11351147
}
11361148

test/json/json_ext_parser_test.rb

+18-3
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,35 @@ def test_allocate
1414
end
1515

1616
def test_error_messages
17-
ex = assert_raise(ParserError) { parse('Infinity') }
17+
ex = assert_raise(ParserError) { parse('Infinity something') }
1818
unless RUBY_PLATFORM =~ /java/
1919
assert_equal "unexpected token 'Infinity' at line 1 column 1", ex.message
2020
end
2121

22-
ex = assert_raise(ParserError) { parse('-Infinity') }
22+
ex = assert_raise(ParserError) { parse('foo bar') }
23+
unless RUBY_PLATFORM =~ /java/
24+
assert_equal "unexpected token 'foo' at line 1 column 1", ex.message
25+
end
26+
27+
ex = assert_raise(ParserError) { parse('-Infinity something') }
2328
unless RUBY_PLATFORM =~ /java/
2429
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
2530
end
2631

27-
ex = assert_raise(ParserError) { parse('NaN') }
32+
ex = assert_raise(ParserError) { parse('NaN something') }
2833
unless RUBY_PLATFORM =~ /java/
2934
assert_equal "unexpected token 'NaN' at line 1 column 1", ex.message
3035
end
36+
37+
ex = assert_raise(ParserError) { parse(' ') }
38+
unless RUBY_PLATFORM =~ /java/
39+
assert_equal "unexpected end of input at line 1 column 4", ex.message
40+
end
41+
42+
ex = assert_raise(ParserError) { parse('{ ') }
43+
unless RUBY_PLATFORM =~ /java/
44+
assert_equal "expected object key, got EOF at line 1 column 5", ex.message
45+
end
3146
end
3247

3348
if GC.respond_to?(:stress=)

test/json/json_parser_test.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ def test_parse_error_incomplete_hash
646646
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"[email protected]"}')
647647
end
648648
if RUBY_ENGINE == "ruby"
649-
assert_equal %(expected ',' or '}' after object value, got: '' at line 1 column 72), error.message
649+
assert_equal %(expected ',' or '}' after object value, got: EOF at line 1 column 72), error.message
650650
end
651651
end
652652

0 commit comments

Comments
 (0)