Skip to content

Commit 7e0f665

Browse files
committed
json_string_unescape: assume the string doesn't need escaping
If that assumption holds true, then we don't need to copy the string into a buffer to unescape it. For small string is just saves copying, but for large ones it also saves a malloc/free combo. Before: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 52.000 i/100ms oj 61.000 i/100ms oj strict 70.000 i/100ms Oj::Parser 71.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 510.111 (± 2.9%) i/s (1.96 ms/i) - 2.548k in 5.000029s oj 610.232 (± 3.1%) i/s (1.64 ms/i) - 3.050k in 5.003725s oj strict 713.231 (± 3.2%) i/s (1.40 ms/i) - 3.570k in 5.010902s Oj::Parser 762.598 (± 3.0%) i/s (1.31 ms/i) - 3.834k in 5.033130s rapidjson 553.029 (± 7.4%) i/s (1.81 ms/i) - 2.750k in 5.022630s Comparison: json: 510.1 i/s Oj::Parser: 762.6 i/s - 1.49x faster oj strict: 713.2 i/s - 1.40x faster oj: 610.2 i/s - 1.20x faster rapidjson: 553.0 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 28.000 i/100ms oj 33.000 i/100ms oj strict 37.000 i/100ms Oj::Parser 43.000 i/100ms rapidjson 38.000 i/100ms Calculating ------------------------------------- json 303.853 (± 3.6%) i/s (3.29 ms/i) - 1.540k in 5.076079s oj 348.009 (± 2.0%) i/s (2.87 ms/i) - 1.749k in 5.027738s oj strict 396.679 (± 3.3%) i/s (2.52 ms/i) - 1.998k in 5.042271s Oj::Parser 406.699 (± 2.2%) i/s (2.46 ms/i) - 2.064k in 5.077587s rapidjson 393.463 (± 3.3%) i/s (2.54 ms/i) - 1.976k in 5.028501s Comparison: json: 303.9 i/s Oj::Parser: 406.7 i/s - 1.34x faster oj strict: 396.7 i/s - 1.31x faster rapidjson: 393.5 i/s - 1.29x faster oj: 348.0 i/s - 1.15x faster ``` After: ``` == Parsing twitter.json (567916 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 56.000 i/100ms oj 62.000 i/100ms oj strict 72.000 i/100ms Oj::Parser 77.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 568.025 (± 2.1%) i/s (1.76 ms/i) - 2.856k in 5.030272s oj 630.936 (± 1.4%) i/s (1.58 ms/i) - 3.162k in 5.012630s oj strict 705.784 (±11.2%) i/s (1.42 ms/i) - 3.456k in 5.006706s Oj::Parser 783.989 (± 1.7%) i/s (1.28 ms/i) - 3.927k in 5.010343s rapidjson 557.630 (± 2.0%) i/s (1.79 ms/i) - 2.805k in 5.032388s Comparison: json: 568.0 i/s Oj::Parser: 784.0 i/s - 1.38x faster oj strict: 705.8 i/s - 1.24x faster oj: 630.9 i/s - 1.11x faster rapidjson: 557.6 i/s - same-ish: difference falls within error == Parsing citm_catalog.json (1727030 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 29.000 i/100ms oj 33.000 i/100ms oj strict 38.000 i/100ms Oj::Parser 43.000 i/100ms rapidjson 37.000 i/100ms Calculating ------------------------------------- json 319.271 (± 3.1%) i/s (3.13 ms/i) - 1.595k in 5.001128s oj 347.946 (± 1.7%) i/s (2.87 ms/i) - 1.749k in 5.028395s oj strict 396.914 (± 3.0%) i/s (2.52 ms/i) - 2.014k in 5.079645s Oj::Parser 409.311 (± 2.7%) i/s (2.44 ms/i) - 2.064k in 5.046626s rapidjson 394.752 (± 1.5%) i/s (2.53 ms/i) - 1.998k in 5.062776s Comparison: json: 319.3 i/s Oj::Parser: 409.3 i/s - 1.28x faster oj strict: 396.9 i/s - 1.24x faster rapidjson: 394.8 i/s - 1.24x faster oj: 347.9 i/s - 1.09x faster ```
1 parent 7e557ee commit 7e0f665

File tree

3 files changed

+57
-30
lines changed

3 files changed

+57
-30
lines changed

ext/json/ext/parser/parser.c

+28-23
Original file line numberDiff line numberDiff line change
@@ -1450,20 +1450,20 @@ case 16:
14501450
}
14511451
}
14521452

1453-
static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize)
1453+
static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
14541454
{
14551455
if (symbolize) {
14561456
intern = true;
14571457
}
14581458
VALUE result;
14591459
# ifdef HAVE_RB_ENC_INTERNED_STR
14601460
if (intern) {
1461-
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
1461+
result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding());
14621462
} else {
1463-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1463+
result = rb_utf8_str_new(start, (long)(end - start));
14641464
}
14651465
# else
1466-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
1466+
result = rb_utf8_str_new(start, (long)(end - start));
14671467
if (intern) {
14681468
# if STR_UMINUS_DEDUPE_FROZEN
14691469
// Starting from MRI 3.0 it is preferable to freeze the string
@@ -1488,14 +1488,19 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo
14881488
}
14891489

14901490
static const size_t MAX_STACK_BUFFER_SIZE = 128;
1491-
static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
1491+
static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
14921492
{
14931493
VALUE result = Qnil;
14941494
size_t bufferSize = stringEnd - string;
14951495
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
14961496
int unescape_len;
14971497
char buf[4];
14981498

1499+
pe = memchr(p, '\\', bufferSize);
1500+
if (RB_LIKELY(pe == NULL)) {
1501+
return build_string(string, stringEnd, intern, symbolize);
1502+
}
1503+
14991504
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
15001505
# ifdef HAVE_RB_ENC_INTERNED_STR
15011506
bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
@@ -1598,7 +1603,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
15981603
buffer += pe - p;
15991604
}
16001605

1601-
result = build_string(buffer, bufferStart, intern, symbolize);
1606+
result = build_string(bufferStart, buffer, intern, symbolize);
16021607

16031608
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
16041609
ruby_xfree(bufferStart);
@@ -1608,15 +1613,15 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
16081613
}
16091614

16101615

1611-
#line 1612 "parser.c"
1616+
#line 1617 "parser.c"
16121617
enum {JSON_string_start = 1};
16131618
enum {JSON_string_first_final = 8};
16141619
enum {JSON_string_error = 0};
16151620

16161621
enum {JSON_string_en_main = 1};
16171622

16181623

1619-
#line 640 "parser.rl"
1624+
#line 645 "parser.rl"
16201625

16211626

16221627
static int
@@ -1637,15 +1642,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
16371642
VALUE match_string;
16381643

16391644

1640-
#line 1641 "parser.c"
1645+
#line 1646 "parser.c"
16411646
{
16421647
cs = JSON_string_start;
16431648
}
16441649

1645-
#line 660 "parser.rl"
1650+
#line 665 "parser.rl"
16461651
json->memo = p;
16471652

1648-
#line 1649 "parser.c"
1653+
#line 1654 "parser.c"
16491654
{
16501655
if ( p == pe )
16511656
goto _test_eof;
@@ -1670,7 +1675,7 @@ case 2:
16701675
goto st0;
16711676
goto st2;
16721677
tr2:
1673-
#line 627 "parser.rl"
1678+
#line 632 "parser.rl"
16741679
{
16751680
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
16761681
if (NIL_P(*result)) {
@@ -1680,14 +1685,14 @@ case 2:
16801685
{p = (( p + 1))-1;}
16811686
}
16821687
}
1683-
#line 637 "parser.rl"
1688+
#line 642 "parser.rl"
16841689
{ p--; {p++; cs = 8; goto _out;} }
16851690
goto st8;
16861691
st8:
16871692
if ( ++p == pe )
16881693
goto _test_eof8;
16891694
case 8:
1690-
#line 1691 "parser.c"
1695+
#line 1696 "parser.c"
16911696
goto st0;
16921697
st3:
16931698
if ( ++p == pe )
@@ -1763,7 +1768,7 @@ case 7:
17631768
_out: {}
17641769
}
17651770

1766-
#line 662 "parser.rl"
1771+
#line 667 "parser.rl"
17671772

17681773
if (json->create_additions && RTEST(match_string = json->match_string)) {
17691774
VALUE klass;
@@ -1960,15 +1965,15 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
19601965
}
19611966

19621967

1963-
#line 1964 "parser.c"
1968+
#line 1969 "parser.c"
19641969
enum {JSON_start = 1};
19651970
enum {JSON_first_final = 10};
19661971
enum {JSON_error = 0};
19671972

19681973
enum {JSON_en_main = 1};
19691974

19701975

1971-
#line 872 "parser.rl"
1976+
#line 877 "parser.rl"
19721977

19731978

19741979
/*
@@ -1986,16 +1991,16 @@ static VALUE cParser_parse(VALUE self)
19861991
GET_PARSER;
19871992

19881993

1989-
#line 1990 "parser.c"
1994+
#line 1995 "parser.c"
19901995
{
19911996
cs = JSON_start;
19921997
}
19931998

1994-
#line 889 "parser.rl"
1999+
#line 894 "parser.rl"
19952000
p = json->source;
19962001
pe = p + json->len;
19972002

1998-
#line 1999 "parser.c"
2003+
#line 2004 "parser.c"
19992004
{
20002005
if ( p == pe )
20012006
goto _test_eof;
@@ -2029,7 +2034,7 @@ case 1:
20292034
cs = 0;
20302035
goto _out;
20312036
tr2:
2032-
#line 864 "parser.rl"
2037+
#line 869 "parser.rl"
20332038
{
20342039
char *np = JSON_parse_value(json, p, pe, &result, 0);
20352040
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2039,7 +2044,7 @@ cs = 0;
20392044
if ( ++p == pe )
20402045
goto _test_eof10;
20412046
case 10:
2042-
#line 2043 "parser.c"
2047+
#line 2048 "parser.c"
20432048
switch( (*p) ) {
20442049
case 13: goto st10;
20452050
case 32: goto st10;
@@ -2128,7 +2133,7 @@ case 9:
21282133
_out: {}
21292134
}
21302135

2131-
#line 892 "parser.rl"
2136+
#line 897 "parser.rl"
21322137

21332138
if (cs >= JSON_first_final && p == pe) {
21342139
return result;

ext/json/ext/parser/parser.h

+18-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,23 @@
33

44
#include "ruby.h"
55

6+
/* This is the fallback definition from Ruby 3.4 */
7+
#ifndef RBIMPL_STDBOOL_H
8+
#if defined(__cplusplus)
9+
# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
10+
# include <cstdbool>
11+
# endif
12+
#elif defined(HAVE_STDBOOL_H)
13+
# include <stdbool.h>
14+
#elif !defined(HAVE__BOOL)
15+
typedef unsigned char _Bool;
16+
# define bool _Bool
17+
# define true ((_Bool)+1)
18+
# define false ((_Bool)+0)
19+
# define __bool_true_false_are_defined
20+
#endif
21+
#endif
22+
623
#ifndef MAYBE_UNUSED
724
# define MAYBE_UNUSED(x) x
825
#endif
@@ -46,7 +63,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
4663
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
4764
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
4865
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
49-
static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize);
66+
static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize);
5067
static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
5168
static VALUE convert_encoding(VALUE source);
5269
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);

ext/json/ext/parser/parser.rl

+11-6
Original file line numberDiff line numberDiff line change
@@ -461,20 +461,20 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
461461
}
462462
}
463463

464-
static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize)
464+
static inline VALUE build_string(const char *start, const char *end, bool intern, bool symbolize)
465465
{
466466
if (symbolize) {
467467
intern = true;
468468
}
469469
VALUE result;
470470
# ifdef HAVE_RB_ENC_INTERNED_STR
471471
if (intern) {
472-
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
472+
result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding());
473473
} else {
474-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
474+
result = rb_utf8_str_new(start, (long)(end - start));
475475
}
476476
# else
477-
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
477+
result = rb_utf8_str_new(start, (long)(end - start));
478478
if (intern) {
479479
# if STR_UMINUS_DEDUPE_FROZEN
480480
// Starting from MRI 3.0 it is preferable to freeze the string
@@ -499,14 +499,19 @@ static inline VALUE build_string(const char *buffer, const char *bufferStart, bo
499499
}
500500

501501
static const size_t MAX_STACK_BUFFER_SIZE = 128;
502-
static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
502+
static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
503503
{
504504
VALUE result = Qnil;
505505
size_t bufferSize = stringEnd - string;
506506
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
507507
int unescape_len;
508508
char buf[4];
509509

510+
pe = memchr(p, '\\', bufferSize);
511+
if (RB_LIKELY(pe == NULL)) {
512+
return build_string(string, stringEnd, intern, symbolize);
513+
}
514+
510515
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
511516
# ifdef HAVE_RB_ENC_INTERNED_STR
512517
bufferStart = buffer = ALLOC_N(char, bufferSize ? bufferSize : 1);
@@ -609,7 +614,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
609614
buffer += pe - p;
610615
}
611616

612-
result = build_string(buffer, bufferStart, intern, symbolize);
617+
result = build_string(bufferStart, buffer, intern, symbolize);
613618

614619
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
615620
ruby_xfree(bufferStart);

0 commit comments

Comments
 (0)