Skip to content

Commit 5f2a563

Browse files
committed
📉 Add SequenceSet benchmarks
This adds a simple ruby script for measuring `ObjectSpace.memsize_of`, and several benchmark-driver scripts for: * `SequenceSet.new` (indirectly via `::[]`) * `SequenceSet#slice` (aka `#[]`) * Various set ops: `&`, `|`, `-`, `^`, `~` * Various set predicates: `#intersect?`, `#disjoint?`, `#cover?` * Several alternate implementations of: * AND — `#&` and `#intersect!` * NOT — `#~` and `#complement!` * XOR — `#^` and `#xor!`
1 parent 2bc753f commit 5f2a563

8 files changed

+486
-0
lines changed

benchmarks/seqset-memsize.rb

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# frozen_string_literal: true
2+
3+
$LOAD_PATH.unshift "./lib"
4+
require "net/imap"
5+
require "objspace"
6+
7+
def seqset(n, min: 1, max: (n * 1.25).to_i)
8+
inputs = Array.new(n) { rand(min..max) }
9+
Net::IMAP::SequenceSet[inputs]
10+
end
11+
12+
def obj_tree(obj, seen: Set.new)
13+
seen << obj
14+
children = ObjectSpace.reachable_objects_from(obj)
15+
.reject { _1 in Module or seen.include?(_1) }
16+
.flat_map { obj_tree(_1, seen:) }
17+
[obj, *children]
18+
end
19+
20+
def memsize(obj) = obj_tree(obj).sum { ObjectSpace.memsize_of _1 }
21+
22+
def avg(ary) = ary.sum / ary.count.to_f
23+
24+
def print_avg(n, count: 10, **)
25+
print "Average memsize of SequenceSet with %6d inputs: " % [n]
26+
sizes = Array.new(count) {
27+
print "."
28+
memsize seqset(n, **)
29+
}
30+
puts "%9.1f" % [avg(sizes)]
31+
end
32+
33+
# pp obj_tree(seqset(200, min: 1_000_000, max: 1_000_999)).to_h { [_1, memsize(_1)] }
34+
print_avg 1
35+
print_avg 10
36+
print_avg 100
37+
38+
print_avg 1_000
39+
print_avg 10_000
40+
print_avg 100_000

benchmarks/sequence_set-and.yml

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000)
7+
MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400)
8+
WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200)
9+
10+
SETS = Array.new(1000) {
11+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
12+
}
13+
14+
def sets
15+
l, r = SETS.sample(2)
16+
[l.dup, r]
17+
end
18+
19+
class Net::IMAP
20+
class SequenceSet
21+
def and0(other) remain_frozen dup.and0! other end
22+
def and1(other) remain_frozen dup.and1! other end
23+
def and2(other) remain_frozen dup.and2! other end
24+
25+
# L - ~R
26+
def and0!(other)
27+
modifying!
28+
subtract SequenceSet.new(other).complement!
29+
end
30+
31+
# L - (L - R)
32+
def and1!(other)
33+
modifying!
34+
subtract dup.subtract(SequenceSet.new(other))
35+
end
36+
37+
# TODO: add this as a public method
38+
def xor!(other) # :nodoc:
39+
modifying!
40+
copy = dup
41+
other = SequenceSet.new(other)
42+
merge(other).subtract(other.subtract(copy.complement!))
43+
end
44+
45+
# L - (L ^ R)
46+
def and2!(other)
47+
modifying!
48+
subtract SequenceSet.new(other).xor! self
49+
end
50+
end
51+
end
52+
53+
# warmup (esp. for JIT)
54+
WARMUP_RUNS.times do
55+
lhs, rhs = sets
56+
lhs | rhs
57+
lhs & rhs
58+
lhs - rhs
59+
lhs ^ rhs
60+
~lhs
61+
lhs.and0 rhs
62+
lhs.and1 rhs
63+
lhs.and2 rhs
64+
end
65+
66+
benchmark:
67+
" L & R": l, r = sets; l & r
68+
" L - ~R": l, r = sets; l - ~r
69+
"and0 L - ~R": l, r = sets; l.and0 r
70+
"and0! L - ~R": l, r = sets; l.and0! r
71+
" L - (L - R)": l, r = sets; l - (l - r)
72+
"and1 L - (L - R)": l, r = sets; l.and1 r
73+
"and1! L - (L - R)": l, r = sets; l.and1! r
74+
" L - (L ^ R)": l, r = sets; l - (l ^ r)
75+
"and2 L - (L ^ R)": l, r = sets; l.and2 r
76+
"and2! L - (L ^ R)": l, r = sets; l.and2! r

benchmarks/sequence_set-new.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
---
2+
prelude: |
3+
$LOAD_PATH.unshift "./lib"
4+
require "net/imap"
5+
SeqSet = Net::IMAP::SequenceSet
6+
7+
N_RAND = 100
8+
9+
def rand_nums(n, min: 1, max: (n * 1.25).to_i) = Array.new(n) { rand(1..max) }
10+
def rand_entries(...) = SeqSet[rand_nums(...)].elements.shuffle
11+
def rand_string(...) = SeqSet[rand_nums(...)].string.split(?,).shuffle.join(?,)
12+
13+
def build_string_inputs(n, n_rand, **)
14+
Array.new(n_rand) { rand_string(n, **) }
15+
end
16+
17+
def build_int_inputs(n, n_rand, **)
18+
Array.new(n_rand) { rand_entries(n, **) }
19+
end
20+
21+
inputs = nil
22+
i = 0
23+
24+
# warm up, especially for YJIT
25+
300.times do
26+
ints = rand_nums(1000)
27+
seqset = SeqSet[ints]
28+
string = seqset.string.split(?,).shuffle.join(?,)
29+
SeqSet[string]
30+
end
31+
32+
benchmark:
33+
34+
- name: n=10 ints
35+
prelude: inputs = build_int_inputs 10, N_RAND
36+
script: 10_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
37+
38+
- name: n=10 string
39+
prelude: inputs = build_string_inputs 10, N_RAND
40+
script: 10_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
41+
42+
- name: n=100 ints
43+
prelude: inputs = build_int_inputs 100, N_RAND
44+
script: 1_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
45+
46+
- name: n=100 string
47+
prelude: inputs = build_string_inputs 100, N_RAND
48+
script: 1_000.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
49+
50+
- name: n=1000 ints
51+
prelude: inputs = build_int_inputs 1000, N_RAND
52+
script: 100.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
53+
54+
- name: n=1000 string
55+
prelude: inputs = build_string_inputs 1000, N_RAND
56+
script: 100.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
57+
58+
- name: n=10,000 ints
59+
prelude: inputs = build_int_inputs 10_000, N_RAND
60+
script: 10.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
61+
62+
- name: n=10,000 string
63+
prelude: inputs = build_string_inputs 10_000, N_RAND
64+
script: 10.times do SeqSet[inputs[i = (i+1) % N_RAND]] end
65+
66+
- name: n=100,000 ints
67+
prelude: inputs = build_int_inputs 100_000, N_RAND / 2
68+
script: SeqSet[inputs[i = (i+1) % N_RAND]]
69+
70+
- name: n=100,000 string
71+
prelude: inputs = build_string_inputs 100_000, N_RAND / 2
72+
script: SeqSet[inputs[i = (i+1) % (N_RAND / 2)]]
73+
74+
# - name: n=1,000,000 ints
75+
# prelude: inputs = build_int_inputs 1_000_000
76+
# script: SeqSet[inputs[i = (i+1) % N_RAND]]
77+
78+
# - name: n=10,000,000 ints
79+
# prelude: inputs = build_int_inputs 10_000_000
80+
# script: SeqSet[inputs[i = (i+1) % N_RAND]]

benchmarks/sequence_set-not.yml

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000)
7+
MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400)
8+
WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200)
9+
10+
SETS = Array.new(1000) {
11+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
12+
}
13+
14+
class Net::IMAP
15+
class SequenceSet
16+
def orig_not
17+
remain_frozen dup.orig_not!
18+
end
19+
20+
# 0.5.8 implementation
21+
def orig_not!
22+
return replace(self.class.full) if empty?
23+
return clear if full?
24+
flat = @tuples.flat_map { [_1 - 1, _2 + 1] }
25+
if flat.first < 1 then flat.shift else flat.unshift 1 end
26+
if STAR_INT < flat.last then flat.pop else flat.push STAR_INT end
27+
@tuples = flat.each_slice(2).to_a
28+
normalize!
29+
end
30+
31+
# enumerator based
32+
def enum_not
33+
result = SequenceSet.new
34+
each_tuple_complement do |min, max| result.tuples << [min, max] end
35+
remain_frozen result
36+
end
37+
38+
# enumerator based
39+
def enum_not_2
40+
remain_frozen dup.enum_not!
41+
end
42+
43+
def enum_not!
44+
last = -1
45+
each_tuple_complement.with_index do |minmax, idx|
46+
last = idx
47+
@tuples[idx] = minmax
48+
end
49+
@tuples.delete_at(last + 1)
50+
self
51+
end
52+
53+
private
54+
55+
def each_tuple_complement
56+
return to_enum(__method__) unless block_given?
57+
if full? then # no yield
58+
elsif empty? then yield 1, STAR_INT
59+
else
60+
yield 1, min - 1 unless min <= 1
61+
tuples.each_cons(2) do |(_, a), (b,_)| yield a+1, b-1 end
62+
yield max + 1, STAR_INT unless max == STAR_INT
63+
end
64+
nil
65+
end
66+
67+
end
68+
end
69+
70+
# warmup (esp. for JIT)
71+
WARMUP_RUNS.times do
72+
~SETS.sample
73+
SETS.sample.orig_not
74+
SETS.sample.enum_not
75+
SETS.sample.dup.orig_not!
76+
SETS.sample.dup.enum_not!
77+
end
78+
79+
benchmark:
80+
" ~set": ~SETS.sample
81+
"0.5.8 ~set": SETS.sample.orig_not
82+
"enum ~set": SETS.sample.enum_not
83+
"enum2 ~set": SETS.sample.enum_not_2
84+
"0.5.8 ~dup": SETS.sample.dup.orig_not!
85+
"enum ~dup": SETS.sample.dup.enum_not!

benchmarks/sequence_set-ops.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000)
7+
MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400)
8+
WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200)
9+
10+
SETS = Array.new(1000) {
11+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
12+
}
13+
14+
def sets
15+
l, r = SETS.sample(2)
16+
[l.dup, r]
17+
end
18+
19+
# warmup (esp. for JIT)
20+
200.times do
21+
lhs, rhs = sets
22+
lhs | rhs
23+
lhs & rhs
24+
lhs - rhs
25+
lhs ^ rhs
26+
~lhs
27+
end
28+
29+
benchmark:
30+
union: l, r = sets; l | r
31+
intersection: l, r = sets; l & r
32+
difference: l, r = sets; l - r
33+
xor: l, r = sets; l ^ r
34+
complement: l, _ = sets; ~l
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
---
2+
prelude: |
3+
require "yaml"
4+
require "net/imap"
5+
6+
INPUT_COUNT = Integer ENV.fetch("PROFILE_INPUT_COUNT", 1000)
7+
MAX_INPUT = Integer ENV.fetch("PROFILE_MAX_INPUT", 1400)
8+
WARMUP_RUNS = Integer ENV.fetch("PROFILE_WARMUP_RUNS", 200)
9+
10+
SETS = Array.new(1000) {
11+
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
12+
}
13+
14+
def sets
15+
l, r = SETS.sample(2)
16+
[l.dup, r]
17+
end
18+
19+
# warmup (esp. for JIT)
20+
WARMUP_RUNS.times do
21+
lhs, rhs = sets
22+
lhs | rhs
23+
lhs & rhs
24+
lhs - rhs
25+
lhs ^ rhs
26+
~lhs
27+
end
28+
29+
benchmark:
30+
intersect?: l, r = sets; l.intersect? r
31+
disjoint?: l, r = sets; l.disjoint? r
32+
cover?: l, r = sets; l.cover? r

0 commit comments

Comments
 (0)