Update benchmark notes

byroot · byroot · commit 217017e83d12 · 2024-11-02T09:16:24.000+01:00
And add a new activitypub (mastodon) benchmark.
diff --git a/benchmark/data/activitypub.json b/benchmark/data/activitypub.json
diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb
@@ -55,22 +55,31 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
   puts
 end
 
-# On the first two micro benchmarks, the limitting factor is that we have to create a Generator::State object for every
-# call to `JSON.dump`, so we cause 2 allocations per call where alternatives only do one allocation.
-# The performance difference is mostly more time spent in GC because of this extra pressure.
-# If we re-use the same `JSON::State` instance, we're faster than Oj on the array benchmark, and much closer
-# on the Hash one.
+# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
+
+# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the
+# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster
+# than `Oj.dump`.
 benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]]
 benchmark_encoding "small nested array", [[1,2,3,4,5]]*10
+
+# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why
+# Hash serialization doesn't perform as well as other types.
 benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }
 
-# On these benchmarks we perform well. Either on par or very closely faster/slower
-benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state)
+# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing
+# with mostly multi-byte characters. This is a tradeoff.
 benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state)
 benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state)
-benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
+
+# On these benchmarks we perform well, we're on par or better.
+benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state)
+benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state)
 benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state)
 
+# On twitter.json we're still about 10% slower, this is worth investigating.
+benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state)
+
 # This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
 # which uses a relatively old version of dtoa.c from David M. Gay.
 # Oj in `compat` mode is ~10% slower than `json`, but in its default mode is noticeably faster here because
@@ -82,4 +91,6 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
 # Oj speed without losing precision.
 benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state)
 
+# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be
+# opportunities here.
 benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state)
diff --git a/benchmark/parser.rb b/benchmark/parser.rb
@@ -26,24 +26,32 @@ def benchmark_parsing(name, json_output)
   puts
 end
 
+# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
+
 # Oj::Parser is very significanly faster (2.70x) on the nested array benchmark
 # thanks to its stack implementation that saves resizing arrays.
+# But we're on par with `Oj.dumo`
 benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10)
 
-# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks
-# in large part thanks to its string caching.
+# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part thanks to its string caching.
+
 # Other than that we're either a bit slower or a bit faster than regular `Oj.load`.
 benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" })
 
 benchmark_parsing "test from oj", <<JSON
 {"a":"Alpha","b":true,"c":12345,"d":[true,[false,[-123456789,null],3.9676,["Something else.",false],null]],"e":{"zero":null,"one":1,"two":2,"three":[3],"four":[0,1,2,3,4]},"f":null,"h":{"a":{"b":{"c":{"d":{"e":{"f":{"g":null}}}}}}},"i":[[[[[[[null]]]]]]]}
 JSON
 
+# On these two more realistic benchmarks, still significanlty slower than alternatives.
+# Caching of keys is likely required to be able to match performance.
+# On the twitter and activitypub payloads the difference isn't that big (~10%)
+# but on citm_catalog it's up to a 50% difference.
+benchmark_parsing "activitypub.json", File.read("#{__dir__}/data/activitypub.json")
 benchmark_parsing "twitter.json", File.read("#{__dir__}/data/twitter.json")
 benchmark_parsing "citm_catalog.json", File.read("#{__dir__}/data/citm_catalog.json")
 
 # rapidjson is 8x faster thanks to it's much more performant float parser.
 # Unfortunately, there isn't a lot of existing fast float parsers in pure C,
 # and including C++ is problematic.
-# Aside from that, we're faster than other alternatives here.
+# Aside from that, we're much faster than other alternatives here.
 benchmark_parsing "float parsing", File.read("#{__dir__}/data/canada.json")