Skip to content

Commit dc19b2f

Browse files
authored
Merge pull request #671 from casperisfine/unescape-string-rstring
json_string_unescape: Use the returned RString as buffer
2 parents 35cf2b8 + 5e1ec4a commit dc19b2f

File tree

2 files changed

+33
-67
lines changed

2 files changed

+33
-67
lines changed

ext/json/ext/parser/parser.c

+25-42
Original file line numberDiff line numberDiff line change
@@ -1476,10 +1476,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
14761476
return result;
14771477
}
14781478

1479-
static const size_t MAX_STACK_BUFFER_SIZE = 128;
14801479
static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
14811480
{
1482-
VALUE result = Qnil;
14831481
size_t bufferSize = stringEnd - string;
14841482
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
14851483
int unescape_len;
@@ -1490,19 +1488,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
14901488
return build_string(string, stringEnd, intern, symbolize);
14911489
}
14921490

1493-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1494-
# ifdef HAVE_RB_ENC_INTERNED_STR
1495-
bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
1496-
# else
1497-
bufferStart = buffer = ALLOC_N(char, bufferSize);
1498-
# endif
1499-
} else {
1500-
# ifdef HAVE_RB_ENC_INTERNED_STR
1501-
bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
1502-
# else
1503-
bufferStart = buffer = ALLOCA_N(char, bufferSize);
1504-
# endif
1505-
}
1491+
VALUE result = rb_str_buf_new(bufferSize);
1492+
rb_enc_associate_index(result, utf8_encindex);
1493+
buffer = bufferStart = RSTRING_PTR(result);
15061494

15071495
while (pe < stringEnd) {
15081496
if (*pe == '\\') {
@@ -1536,9 +1524,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
15361524
break;
15371525
case 'u':
15381526
if (pe > stringEnd - 4) {
1539-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1540-
ruby_xfree(bufferStart);
1541-
}
15421527
raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
15431528
} else {
15441529
uint32_t ch = unescape_unicode((unsigned char *) ++pe);
@@ -1556,9 +1541,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
15561541
if ((ch & 0xFC00) == 0xD800) {
15571542
pe++;
15581543
if (pe > stringEnd - 6) {
1559-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1560-
ruby_xfree(bufferStart);
1561-
}
15621544
raise_parse_error("incomplete surrogate pair at '%s'", p);
15631545
}
15641546
if (pe[0] == '\\' && pe[1] == 'u') {
@@ -1591,26 +1573,27 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
15911573
MEMCPY(buffer, p, char, pe - p);
15921574
buffer += pe - p;
15931575
}
1576+
rb_str_set_len(result, buffer - bufferStart);
15941577

1595-
result = build_string(bufferStart, buffer, intern, symbolize);
1596-
1597-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
1598-
ruby_xfree(bufferStart);
1578+
if (symbolize) {
1579+
result = rb_str_intern(result);
1580+
} else if (intern) {
1581+
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
15991582
}
16001583

16011584
return result;
16021585
}
16031586

16041587

1605-
#line 1606 "parser.c"
1588+
#line 1589 "parser.c"
16061589
enum {JSON_string_start = 1};
16071590
enum {JSON_string_first_final = 8};
16081591
enum {JSON_string_error = 0};
16091592

16101593
enum {JSON_string_en_main = 1};
16111594

16121595

1613-
#line 634 "parser.rl"
1596+
#line 617 "parser.rl"
16141597

16151598

16161599
static int
@@ -1631,15 +1614,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
16311614
VALUE match_string;
16321615

16331616

1634-
#line 1635 "parser.c"
1617+
#line 1618 "parser.c"
16351618
{
16361619
cs = JSON_string_start;
16371620
}
16381621

1639-
#line 654 "parser.rl"
1622+
#line 637 "parser.rl"
16401623
json->memo = p;
16411624

1642-
#line 1643 "parser.c"
1625+
#line 1626 "parser.c"
16431626
{
16441627
if ( p == pe )
16451628
goto _test_eof;
@@ -1664,7 +1647,7 @@ case 2:
16641647
goto st0;
16651648
goto st2;
16661649
tr2:
1667-
#line 621 "parser.rl"
1650+
#line 604 "parser.rl"
16681651
{
16691652
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
16701653
if (NIL_P(*result)) {
@@ -1674,14 +1657,14 @@ case 2:
16741657
{p = (( p + 1))-1;}
16751658
}
16761659
}
1677-
#line 631 "parser.rl"
1660+
#line 614 "parser.rl"
16781661
{ p--; {p++; cs = 8; goto _out;} }
16791662
goto st8;
16801663
st8:
16811664
if ( ++p == pe )
16821665
goto _test_eof8;
16831666
case 8:
1684-
#line 1685 "parser.c"
1667+
#line 1668 "parser.c"
16851668
goto st0;
16861669
st3:
16871670
if ( ++p == pe )
@@ -1757,7 +1740,7 @@ case 7:
17571740
_out: {}
17581741
}
17591742

1760-
#line 656 "parser.rl"
1743+
#line 639 "parser.rl"
17611744

17621745
if (json->create_additions && RTEST(match_string = json->match_string)) {
17631746
VALUE klass;
@@ -1954,15 +1937,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
19541937
}
19551938

19561939

1957-
#line 1958 "parser.c"
1940+
#line 1941 "parser.c"
19581941
enum {JSON_start = 1};
19591942
enum {JSON_first_final = 10};
19601943
enum {JSON_error = 0};
19611944

19621945
enum {JSON_en_main = 1};
19631946

19641947

1965-
#line 866 "parser.rl"
1948+
#line 849 "parser.rl"
19661949

19671950

19681951
/*
@@ -1980,16 +1963,16 @@ static VALUE cParser_parse(VALUE self)
19801963
GET_PARSER;
19811964

19821965

1983-
#line 1984 "parser.c"
1966+
#line 1967 "parser.c"
19841967
{
19851968
cs = JSON_start;
19861969
}
19871970

1988-
#line 883 "parser.rl"
1971+
#line 866 "parser.rl"
19891972
p = json->source;
19901973
pe = p + json->len;
19911974

1992-
#line 1993 "parser.c"
1975+
#line 1976 "parser.c"
19931976
{
19941977
if ( p == pe )
19951978
goto _test_eof;
@@ -2023,7 +2006,7 @@ case 1:
20232006
cs = 0;
20242007
goto _out;
20252008
tr2:
2026-
#line 858 "parser.rl"
2009+
#line 841 "parser.rl"
20272010
{
20282011
char *np = JSON_parse_value(json, p, pe, &result, 0);
20292012
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2033,7 +2016,7 @@ cs = 0;
20332016
if ( ++p == pe )
20342017
goto _test_eof10;
20352018
case 10:
2036-
#line 2037 "parser.c"
2019+
#line 2020 "parser.c"
20372020
switch( (*p) ) {
20382021
case 13: goto st10;
20392022
case 32: goto st10;
@@ -2122,7 +2105,7 @@ case 9:
21222105
_out: {}
21232106
}
21242107

2125-
#line 886 "parser.rl"
2108+
#line 869 "parser.rl"
21262109

21272110
if (cs >= JSON_first_final && p == pe) {
21282111
return result;

ext/json/ext/parser/parser.rl

+8-25
Original file line numberDiff line numberDiff line change
@@ -487,10 +487,8 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
487487
return result;
488488
}
489489

490-
static const size_t MAX_STACK_BUFFER_SIZE = 128;
491490
static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
492491
{
493-
VALUE result = Qnil;
494492
size_t bufferSize = stringEnd - string;
495493
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
496494
int unescape_len;
@@ -501,19 +499,9 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
501499
return build_string(string, stringEnd, intern, symbolize);
502500
}
503501

504-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
505-
# ifdef HAVE_RB_ENC_INTERNED_STR
506-
bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
507-
# else
508-
bufferStart = buffer = ALLOC_N(char, bufferSize);
509-
# endif
510-
} else {
511-
# ifdef HAVE_RB_ENC_INTERNED_STR
512-
bufferStart = buffer = ALLOCA_N(char, bufferSize ? bufferSize : 1);
513-
# else
514-
bufferStart = buffer = ALLOCA_N(char, bufferSize);
515-
# endif
516-
}
502+
VALUE result = rb_str_buf_new(bufferSize);
503+
rb_enc_associate_index(result, utf8_encindex);
504+
buffer = bufferStart = RSTRING_PTR(result);
517505

518506
while (pe < stringEnd) {
519507
if (*pe == '\\') {
@@ -547,9 +535,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
547535
break;
548536
case 'u':
549537
if (pe > stringEnd - 4) {
550-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
551-
ruby_xfree(bufferStart);
552-
}
553538
raise_parse_error("incomplete unicode character escape sequence at '%s'", p);
554539
} else {
555540
uint32_t ch = unescape_unicode((unsigned char *) ++pe);
@@ -567,9 +552,6 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
567552
if ((ch & 0xFC00) == 0xD800) {
568553
pe++;
569554
if (pe > stringEnd - 6) {
570-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
571-
ruby_xfree(bufferStart);
572-
}
573555
raise_parse_error("incomplete surrogate pair at '%s'", p);
574556
}
575557
if (pe[0] == '\\' && pe[1] == 'u') {
@@ -602,11 +584,12 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
602584
MEMCPY(buffer, p, char, pe - p);
603585
buffer += pe - p;
604586
}
587+
rb_str_set_len(result, buffer - bufferStart);
605588

606-
result = build_string(bufferStart, buffer, intern, symbolize);
607-
608-
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
609-
ruby_xfree(bufferStart);
589+
if (symbolize) {
590+
result = rb_str_intern(result);
591+
} else if (intern) {
592+
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
610593
}
611594

612595
return result;

0 commit comments

Comments
 (0)