-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathprocess.rb
executable file
·177 lines (149 loc) · 5.91 KB
/
process.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#!/usr/bin/env ruby
require "json"
require "optparse"
cohorts_by = "RUBY_VERSION,warmup_iterations,discourse_revision,random_seed"
input_glob = "rails_ruby_bench_*.json"
OptionParser.new do |opts|
opts.banner = "Usage: ruby process.rb [options]"
opts.on("-c", "--cohorts-by COHORTS", "Variables to partition data by, incl. RUBY_VERSION,warmup_iterations,etc.") do |c|
cohorts_by = c #.to_i
end
opts.on("-i", "--input-glob GLOB", "File pattern to match on (default *.json)") do |s|
input_glob = s
end
end.parse!
OUTPUT_FILE = "process_output.json"
cohort_indices = cohorts_by.strip.split(",")
req_time_by_cohort = {}
run_by_cohort = {}
throughput_by_cohort = {}
startup_by_cohort = {}
INPUT_FILES = Dir[input_glob]
process_output = {
cohort_indices: cohort_indices,
input_files: INPUT_FILES,
req_time_by_cohort: req_time_by_cohort,
run_by_cohort: run_by_cohort,
throughput_by_cohort: throughput_by_cohort,
startup_by_cohort: startup_by_cohort,
processed: {
:cohort => {},
},
}
INPUT_FILES.each do |f|
begin
d = JSON.load File.read(f)
rescue JSON::ParserError
raise "Error parsing JSON in file: #{f.inspect}"
end
# Assign a cohort to these samples
cohort_parts = cohort_indices.map do |cohort_elt|
raise "Unexpected file format for file #{f.inspect}!" unless d && d["settings"] && d["environment"]
item = nil
if d["settings"].has_key?(cohort_elt)
item = d["settings"][cohort_elt]
elsif d["environment"].has_key?(cohort_elt)
item = d["environment"][cohort_elt]
else
raise "Can't find setting or environment object #{cohort_elt}!"
end
item
end
cohort = cohort_parts.join(",")
# Update data format to latest version
if d["version"].nil?
times = d["requests"]["times"].flat_map do |items|
out_items = []
cur_time = 0.0
items.each do |i|
out_items.push(i - cur_time)
cur_time = i
end
out_items
end
runs = d["requests"]["times"].map { |thread_times| thread_times[-1] }
raise "Error with request times! #{d["requests"]["times"].inspect}" if runs.nil? || runs.any?(:nil?)
elsif [2,3].include?(d["version"])
times = d["requests"]["times"].flatten(1)
runs = d["requests"]["times"].map { |thread_times| thread_times.inject(0.0, &:+) }
else
raise "Unrecognized data version #{d["version"].inspect} in JSON file #{f.inspect}!"
end
startup_by_cohort[cohort] ||= []
startup_by_cohort[cohort].concat d["startup"]["times"]
req_time_by_cohort[cohort] ||= []
req_time_by_cohort[cohort].concat times
run_by_cohort[cohort] ||= []
run_by_cohort[cohort].push runs
throughput_by_cohort[cohort] ||= []
throughput_by_cohort[cohort].push (d["requests"]["times"].flatten.size / runs.max) unless runs.empty?
end
def percentile(list, pct)
len = list.length
how_far = pct * 0.01 * (len - 1)
prev_item = how_far.to_i
return list[prev_item] if prev_item >= len - 1
return list[0] if prev_item < 0
linear_combination = how_far - prev_item
list[prev_item] + (list[prev_item + 1] - list[prev_item]) * linear_combination
end
def array_mean(arr)
return nil if arr.empty?
arr.inject(0.0, &:+) / arr.size
end
# Calculate variance based on the Wikipedia article of algorithms for variance.
# https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
# Includes Bessel's correction.
def array_variance(arr)
n = arr.size
return nil if arr.empty? || n < 2
ex = ex2 = 0
arr.each do |x|
diff = x - arr[0]
ex += diff
ex2 += diff * diff
end
(ex2 - (ex * ex) / arr.size) / (arr.size - 1)
end
req_time_by_cohort.keys.sort.each do |cohort|
data = req_time_by_cohort[cohort]
data.sort! # Sort request times lowest-to-highest for use with percentile()
runs = run_by_cohort[cohort]
flat_runs = runs.flatten.sort
run_longest = runs.map { |worker_times| worker_times.max }
throughputs = throughput_by_cohort[cohort].sort
startup_times = startup_by_cohort[cohort].sort
cohort_printable = cohort_indices.zip(cohort.split(",")).map { |a, b| "#{a}: #{b}" }.join(", ")
print "=====\nCohort: #{cohort_printable}, # of data points: #{data.size} http / #{startup_times.size} startup, full runs: #{runs.size}\n"
process_output[:processed][:cohort][cohort] = {
data_points: data.size,
full_runs: runs.size,
request_percentiles: {},
run_percentiles: {},
throughputs: throughputs,
}
[0, 1, 5, 10, 50, 90, 95, 99, 100].each do |p|
process_output[:processed][:cohort][cohort][:request_percentiles][p.to_s] = percentile(data, p)
print " #{"%2d" % p}%ile: #{percentile(data, p)}\n"
end
print "--\n Overall thread completion times:\n"
[0, 10, 50, 90, 100].each do |p|
process_output[:processed][:cohort][cohort][:run_percentiles][p.to_s] = percentile(flat_runs, p)
print " #{"%2d" % p}%ile: #{percentile(flat_runs, p)}\n"
end
print "--\n Throughput in reqs/sec for each full run:\n"
print " Mean: #{array_mean(throughputs).inspect} Median: #{percentile(throughputs, 50).inspect} Variance: #{array_variance(throughputs).inspect}\n"
process_output[:processed][:cohort][cohort][:throughput_mean] = array_mean(throughputs)
process_output[:processed][:cohort][cohort][:throughput_median] = percentile(throughputs, 50)
process_output[:processed][:cohort][cohort][:throughput_variance] = array_variance(throughputs)
print " #{throughputs.inspect}\n\n"
process_output[:processed][:cohort][cohort][:startup_mean] = array_mean(startup_times)
process_output[:processed][:cohort][cohort][:startup_median] = percentile(startup_times, 50)
process_output[:processed][:cohort][cohort][:startup_variance] = array_variance(startup_times)
print "--\n Startup times for this cohort:\n"
print " Mean: #{array_mean(startup_times).inspect} Median: #{percentile(startup_times, 50).inspect} Variance: #{array_variance(startup_times).inspect}\n"
end
print "******************\n"
File.open(OUTPUT_FILE, "w") do |f|
f.print JSON.pretty_generate(process_output)
end