-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplots.py
executable file
·127 lines (123 loc) · 5.63 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
"""
This script provides tools for parsing and plotting benchmark results. See
the `plots.ipynb` notebook for usage.
"""
import os
import re
import numpy as np
import proplot as plot
#------------------------------------------------------------------------------#
# Helper functions
#------------------------------------------------------------------------------#
def sanitize(cell):
"""
Retrieves cell contents, and converts file size specifiers like 50kb,
1GB, and 20MB to the raw number of bytes.
"""
cell = cell.strip(' *')
regex = re.match('^([0-9.]+)([kKmMgG])?$', cell) # *optional* size parameter, test
if regex:
num, size = regex.groups()
mult = 1
if size:
mult = {'k':1e-3, 'g':1e3}.get(size.lower(), 1)
cell = float(num)*mult
return cell or None # replace empty string with None
def runtimes(name, dir_, server='uriah'):
"""
Calculates runtimes for different languages from the Markdown-style
tables generated by the benchmark scripts.
"""
table = f'results/{name}_{dir_}_{server}.log'
fname = f'results/{name}_{dir_}_{server}.png'
with open(table, 'r') as f:
data = f.read()
# Tables are formatted with leading and trailing columns divisors '|', ignore these; also ignore
# trailing spaces and italics or bold asterisks in each cell, and the 2 header rows
tables = [[[sanitize(cell) for cell in row.split('|')[1:-1]]
for row in table.strip().split('\n')[2:]]
for table in data.split('\n\n') if table] # ignore "empty" tables, i.e. extra newlines
if not tables or not any(bool(table) for table in tables):
raise ValueError(f'No tables found for file "{table}".')
# Check tables
# Change this as table format changes!
cats = [row[2] for row in tables[0]]
nlats = [int(table[0][0]) for table in tables]
sizes = [table[0][1] for table in tables]
for table in tables: # make sure each table has same categories as first one
if [row[2] for row in table] != cats:
raise ValueError('Inconsistent tables.')
for i,vec in enumerate((nlats,sizes)): # make sure each table has same nlats, size in every row
for ivec,table in zip(vec,tables):
if any(ivec!=jvec for jvec in [row[i] for row in table]):
raise ValueError('Inconsistent tables.')
return cats, nlats, sizes, tables, fname
#------------------------------------------------------------------------------#
# Plotting functions
#------------------------------------------------------------------------------#
def benchmark(name, dir_, ncols=3, ymin=0.1, ymax=50, server='uriah'):
"""
Plots benchmark result summaries.
"""
cats, nlats, sizes, tables, fname = runtimes(name, dir_, server=server)
plot.rc.cycle = 'colorblind10'
server = {'uriah':'macbook', 'cheyenne4':'supercomputer', 'monde':'server'}.get(server, server)
f, axs = plot.subplots(axwidth=2.5, ncols=2, aspect=(2,3), legend='b', span=False, share=1)
nxarray = len([cat for cat in cats if 'xarray' in cat.lower()])
xcolors = plot.colors('greys', nxarray, left=0.3)
ocolors = [color for i,color in enumerate(plot.colors('colorblind10')) if i not in (5,7)]
idxs = np.argsort(cats) # use alphabetical order
for ax,scale in zip(axs,('linear','log')):
hs = []
ic, xc = 0, 0 # make xarray lines different shades of same color
for i in idxs:
cat = cats[i]
if 'xarray' in cat.lower():
color = xcolors[xc]
xc += 1
else:
# color = f'C{ic}'
color = ocolors[ic]
ic += 1
times = [table[i][3] for table in tables] # 4th cell contains 'real' time
hs += ax.plot(sizes, times, color=color, marker='o', markersize=6, label=cat, lw=1)
# hs += [ax.scatter(sizes, times, color=[color], label=cat)]
ax.format(xlabel='file size (MB)', ylabel='time (seconds)', gridminor=True,
# ax.format(xlabel='latitude count', ylabel='time (seconds)',
ylim=(0, ymax) if scale=='linear' else (ymin, ymax), yscale=scale, yformatter='scalar',
xlim=(min(sizes), max(sizes)), xscale=scale, xformatter='scalar',
title=f'{scale.title()} scale', suptitle=f'{name.title()} benchmark on {server}')
f.bpanel.legend(hs, ncols=ncols, order='F')
f.save(fname)
return f
def relative(name, dir_):
"""
Plots relative time comparisons.
"""
cats, nlats, sizes, tables = runtimes(name, dir_)
inco, = np.where(np.array([cat.lower() for cat in cats])=='nco')
inco = inco[0]
f, ax = plot.subplots(axwidth=2.5, aspect=(2,3), legend='b', span=False, share=1)
hs = []
ic, xc = 0, 0 # make xarray lines different shades of same color
ref = np.array([table[inco][3] for table in tables])
for i in idxs:
if i==inco:
continue
cat = cats[i]
if 'xarray' in cat.lower():
color = xcolors[xc]
xc += 1
else:
# color = f'C{ic}'
color = ocolors[ic]
ic += 1
times = [table[i][3] for table in tables] # 4th cell contains 'real' time
hs += ax.plot(sizes, np.array(times)/ref, color=color, marker='o', markersize=6, label=cat, lw=1)
ax.format(xlabel='file size (MB)', ylabel='ratio', gridminor=True,
ylim=(0.1, 3), yscale='log', yformatter='scalar',
xlim=(min(sizes), max(sizes)), xscale='log', xformatter='scalar',
suptitle=f'{name.title()} benchmark relative to NCO')
f.bpanel.legend(hs, ncols=2, order='F')
return f