@@ -55,22 +55,31 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
55
55
puts
56
56
end
57
57
58
- # On the first two micro benchmarks, the limitting factor is that we have to create a Generator::State object for every
59
- # call to `JSON.dump`, so we cause 2 allocations per call where alternatives only do one allocation.
60
- # The performance difference is mostly more time spent in GC because of this extra pressure.
61
- # If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer
62
- # on the Hash one .
58
+ # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
59
+
60
+ # On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the
61
+ # generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster
62
+ # than `Oj.dump` .
63
63
benchmark_encoding "small mixed" , [ 1 , "string" , { a : 1 , b : 2 } , [ 3 , 4 , 5 ] ]
64
64
benchmark_encoding "small nested array" , [ [ 1 , 2 , 3 , 4 , 5 ] ] *10
65
+
66
+ # On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why
67
+ # Hash serialization doesn't perform as well as other types.
65
68
benchmark_encoding "small hash" , { "username" => "jhawthorn" , "id" => 123 , "event" => "wrote json serializer" }
66
69
67
- # On these benchmarks we perform well. Either on par or very closely faster/ slower
68
- benchmark_encoding "integers" , ( 1_000_000 .. 1_001_000 ) . to_a , except : %i( json_state )
70
+ # On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing
71
+ # with mostly multi-byte characters. This is a tradeoff.
69
72
benchmark_encoding "mixed utf8" , ( [ ( "a" * 5000 ) + "€" + ( "a" * 5000 ) ] * 500 ) , except : %i( json_state )
70
73
benchmark_encoding "mostly utf8" , ( [ ( "€" * 3333 ) ] * 500 ) , except : %i( json_state )
71
- benchmark_encoding "twitter.json" , JSON . load_file ( "#{ __dir__ } /data/twitter.json" ) , except : %i( json_state )
74
+
75
+ # On these benchmarks we perform well, we're on par or better.
76
+ benchmark_encoding "integers" , ( 1_000_000 ..1_001_000 ) . to_a , except : %i( json_state )
77
+ benchmark_encoding "activitypub.json" , JSON . load_file ( "#{ __dir__ } /data/activitypub.json" ) , except : %i( json_state )
72
78
benchmark_encoding "citm_catalog.json" , JSON . load_file ( "#{ __dir__ } /data/citm_catalog.json" ) , except : %i( json_state )
73
79
80
+ # On twitter.json we're still about 10% slower, this is worth investigating.
81
+ benchmark_encoding "twitter.json" , JSON . load_file ( "#{ __dir__ } /data/twitter.json" ) , except : %i( json_state )
82
+
74
83
# This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
75
84
# which uses a relatively old version of dtoa.c from David M. Gay.
76
85
# Oj in `compat` mode is ~10% slower than `json`, but in its default mode is noticeably faster here because
@@ -82,4 +91,6 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
82
91
# Oj speed without losing precision.
83
92
benchmark_encoding "canada.json" , JSON . load_file ( "#{ __dir__ } /data/canada.json" ) , check_expected : false , except : %i( json_state )
84
93
94
+ # We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be
95
+ # opportunities here.
85
96
benchmark_encoding "many #to_json calls" , [ { object : Object . new , int : 12 , float : 54.3 , class : Float , time : Time . now , date : Date . today } ] * 20 , except : %i( json_state )
0 commit comments