Skip to content

Commit f02c2df

Browse files
committed
Adapt presentation plots for new algo names and results
1 parent 8b49658 commit f02c2df

File tree

1 file changed

+16
-270
lines changed

1 file changed

+16
-270
lines changed

bio_plot_presentation.py

Lines changed: 16 additions & 270 deletions
Original file line numberDiff line numberDiff line change
@@ -3,310 +3,56 @@
33
import pandas as pd
44
import matplotlib.pyplot as plt
55
import numpy as np
6-
from matplotlib import gridspec, cm, ticker
76
import seaborn as sns
87
import argparse
98
from collections import defaultdict
109

11-
import matplotlib.scale as mscale
12-
import matplotlib.transforms as mtransforms
13-
14-
class CubeRootScale(mscale.ScaleBase):
15-
#ScaleBase class for generating cubee root scale.
16-
name = 'cuberoot'
17-
18-
def __init__(self, axis, **kwargs):
19-
mscale.ScaleBase.__init__(self)
20-
21-
def set_default_locators_and_formatters(self, axis):
22-
axis.set_major_locator(ticker.AutoLocator())
23-
axis.set_major_formatter(ticker.ScalarFormatter())
24-
axis.set_minor_locator(ticker.NullLocator())
25-
axis.set_minor_formatter(ticker.NullFormatter())
26-
27-
def limit_range_for_scale(self, vmin, vmax, minpos):
28-
return max(0., vmin), vmax
29-
30-
class CubeRootTransform(mtransforms.Transform):
31-
input_dims = 1
32-
output_dims = 1
33-
is_separable = True
34-
35-
def transform_non_affine(self, a):
36-
x = np.array(a)
37-
return np.sign(x) * (np.abs(x)**(1.0/2.0))
38-
39-
def inverted(self):
40-
return CubeRootScale.InvertedCubeRootTransform()
41-
42-
class InvertedCubeRootTransform(mtransforms.Transform):
43-
input_dims = 1
44-
output_dims = 1
45-
is_separable = True
46-
47-
def transform(self, a):
48-
x = np.array(a)
49-
return np.sign(x) * (np.abs(x)**2)
50-
51-
def inverted(self):
52-
return CubeRootScale.CubeRootTransform()
53-
54-
def get_transform(self):
55-
return self.CubeRootTransform()
56-
57-
mscale.register_scale(CubeRootScale)
58-
59-
sns.set(style="whitegrid")
60-
61-
algo_order = ["No Optimization", "No Undo", "No Redundancy", "Skip Conversion", "GreedyLB-First", "GreedyLB-Most", "GreedyLB-Most Pruned", "LocalSearchLB-First", "LocalSearchLB-Most", "LocalSearchLB-Most Pruned", "LocalSearchLB-Most Pruned-MT"]
62-
63-
gurobi_algos = ["Gurobi", "Gurobi-Sparse", "Gurobi-Single", "Gurobi-Sparse-MT"]#, "Gurobi-Lazy", "Gurobi-Sparse-Lazy", "Gurobi-Single-Lazy"]
64-
gurobi_palette = sns.color_palette('Set1', n_colors=len(gurobi_algos))
65-
gurobi_colors = [gurobi_palette[i] for i, v in enumerate(gurobi_algos)]
66-
67-
color_palette = sns.color_palette('bright', len(algo_order))
68-
algo_colors = [color_palette[i] for i, v in enumerate(algo_order)]
69-
70-
thread_order = [1, 2, 4, 8, 16]
71-
thread_colors = [cm.plasma(i/len(thread_order)) for i in range(len(thread_order))]
72-
73-
def my_performanceplot(data, measure, logy=True):
74-
# (graph, k, algorithm) -> [t1, t2, t3, t4, t5]
75-
76-
data_grouped = data.groupby(["Graph", "k", "Algorithm"])
77-
num_permutations = 16 #len(data_grouped.get_group((data.Graph[0], 0, "Base")))
78-
79-
for name, required_measures, summarize_function in [('min', 1, lambda x : x.min()), ('median', num_permutations / 2 + 1, lambda x : x.quantile(0.5 * (num_permutations - 1) / (len(x) - 1))), ('max', num_permutations, lambda x : x.max())]:
80-
81-
ratios = defaultdict(list)
82-
83-
for graph, graph_data in data.groupby(["Graph"]):
84-
algorithm_groups = graph_data.groupby(["Algorithm"])
85-
max_k = algorithm_groups.max().k
86-
87-
times = dict() # algorithm -> {"k" : k, "measure" : measure}
88-
89-
best_algo = None
90-
best_k = -1
91-
best_measure = np.inf
92-
93-
for algorithm, k in max_k.iteritems():
94-
k_data = data_grouped.get_group((graph, k, algorithm))
95-
96-
while len(k_data) < required_measures:
97-
k = k - 1
98-
k_data = data_grouped.get_group((graph, k, algorithm))
99-
100-
v = summarize_function(k_data[measure])
101-
102-
times[algorithm] = {measure : v, "k" : k}
103-
104-
if k > best_k or (k == best_k and v < best_measure):
105-
best_k = k
106-
best_algo = algorithm
107-
best_measure = v
108-
109-
assert(not best_algo is None)
110-
111-
if best_k < 10:
112-
continue
113-
114-
for algo, values in times.items():
115-
reference_measure = best_measure
116-
if values['k'] < best_k:
117-
reference_measure = summarize_function(data_grouped.get_group((graph, values['k'], best_algo))[measure])
118-
ratios[algo].append(1.0 - reference_measure / values[measure])
119-
120-
if ('Most' in algo or algo == 'Single') and reference_measure * 2 < values[measure]:
121-
print('Graph: {}, algo: {}, {}s, best algo: {}, {}s at best k: {}'.format(graph, algo, values[measure], best_algo, best_measure, best_k))
122-
123-
sorted_ratios = {algo : sorted(v) for algo, v in ratios.items()}
124-
125-
pd.DataFrame(sorted_ratios).plot()
126-
plt.show()
127-
128-
def my_runtime_plot(data, measure):
129-
# (graph, k, algorithm) -> [t1, t2, t3, t4, t5]
130-
131-
data_grouped = data.groupby(["Graph", "k", "Algorithm"])
132-
num_permutations = 16 #len(data_grouped.get_group((data.Graph[0], 0, "Base")))
133-
134-
plot_data = list() # (Graph: graph, algo: measure)
135-
136-
algorithms = data.Algorithm.unique()
137-
138-
above_limit = 10000
139-
140-
for graph, graph_data in data.groupby(["Graph"]):
141-
max_k = graph_data.k.max()
142-
max_k_data = graph_data[graph_data.k == max_k]
143-
144-
for algo in algorithms:
145-
measurements = max_k_data[max_k_data.Algorithm == algo][measure]
146-
for v in measurements:
147-
plot_data.append({'Graph': graph, 'Algorithm': algo, measure: v})
148-
for i in range(len(measurements), 16):
149-
plot_data.append({'Graph': graph, 'Algorithm': algo, measure: above_limit})
150-
151-
pd.DataFrame(plot_data).groupby(['Graph', 'Algorithm']).median()[measure].unstack().sort_values(by='Most Pruned').plot(logy=True)
152-
plt.show()
153-
154-
def solved_instances_over_measure_plot(data_dfs, measure, ax):
155-
x_plot_data=defaultdict(list)
156-
y_plot_data=defaultdict(list)
157-
num_solved=defaultdict(int)
158-
159-
num_graphs = 0
160-
161-
for data in data_dfs:
162-
num_graphs = max(num_graphs, len(data.groupby(['Graph', 'Permutation'])))
163-
164-
sorted_data = data[data.Solved == True].sort_values(by=measure)
165-
166-
for algo, val, mt in zip(sorted_data.Algorithm, sorted_data[measure], sorted_data.MT):
167-
if mt:
168-
algo += "-MT"
169-
x_plot_data[algo].append(val)
170-
y_plot_data[algo].append(num_solved[algo])
171-
num_solved[algo] += 1
172-
x_plot_data[algo].append(val)
173-
y_plot_data[algo].append(num_solved[algo])
174-
175-
min_val = np.inf
176-
max_val = 0
177-
178-
for algos, colors in [(algo_order, algo_colors), (gurobi_algos, gurobi_colors)]:
179-
for algo, color in zip(algos, colors):
180-
if not algo in x_plot_data:
181-
continue
182-
min_val = min(x_plot_data[algo][0], min_val)
183-
max_val = max(x_plot_data[algo][-1], max_val)
184-
ax.plot(x_plot_data[algo], y_plot_data[algo], label=algo, color=color)
185-
186-
ax.axhline(num_graphs, color='k')
187-
188-
ax.set_xlabel(measure)
189-
ax.set_xscale('log')
190-
ax.set_xlim(min_val, 1000)
191-
192-
def plot_speedup_per_instance_for_one_algorithm(data, ax):
193-
assert(len(data.Algorithm.unique()) == 1)
194-
195-
mt_data = data[(data.MT == True) & (np.isnan(data.Speedup) == False)].copy()
196-
grouped_data = mt_data.groupby(['Graph', 'Algorithm', 'Permutation', 'Threads'])
197-
mt_data['Total Calls'] = grouped_data['Calls'].transform(np.sum)
198-
data_for_max_k = mt_data[grouped_data['k'].transform(np.max) == mt_data.k]
199-
200-
data_for_max_k.sort_values(by='Total Calls')
201-
202-
for thread, color in zip(thread_order, thread_colors):
203-
thread_data = data_for_max_k[data_for_max_k.Threads == thread]
204-
ax.scatter(thread_data['Total Calls'], thread_data.Speedup, label=thread, color=color, s=2)
205-
206-
ax.set_xlabel('Calls')
207-
ax.set_xscale('log')
208-
ax.set_ylim(0, 16)
209-
210-
def plot_max_k_for_all_algorithms(data, qtm_data):
211-
data_for_max_k = data.groupby(['Graph']).max()
212-
213-
qtm_indexed = qtm_data.set_index('Graph')
214-
215-
data_for_max_k['QTM'] = qtm_indexed
216-
217-
k1_data = data_for_max_k[data_for_max_k.k == 1]
218-
print("hiding {} graphs with for k == 1".format(len(k1_data)))
219-
k1_qtm_larger = k1_data[k1_data.QTM > 1]
220-
for graph, qtm_k in zip(k1_qtm_larger.index, k1_qtm_larger.QTM):
221-
print("Found a graph with exact k = 1 and QTM k = {}".format(qtm_k))
222-
223-
data_solved = data_for_max_k[data_for_max_k.Solved & (data_for_max_k.k > 1)]
224-
225-
qtm_exact_graphs = data_solved[data_solved.k == data_solved.QTM]
226-
print("Out of {} solved graphs, QTM has {} graphs correct".format(len(data_solved), len(qtm_exact_graphs)))
227-
228-
data_unsolved = data_for_max_k[data_for_max_k.Solved == False]
229-
230-
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4), sharey=True, gridspec_kw={'width_ratios':[3, 1]})
231-
232-
for title, plot_data, ax, label in [('Solved', data_solved, ax1, 'Exact'), ('Unsolved', data_unsolved, ax2, 'Lower Bound')]:
233-
ax.set_yscale('log', basey=2)
234-
ax.set_title(title)
235-
sorted_plot_data = plot_data.sort_values(by=['k', 'QTM'])
236-
237-
if title == 'Solved':
238-
qtm_precise = [cm.Set1(1) if (qtm_k == k) else cm.Set1(0) for k, qtm_k in zip(sorted_plot_data.k, sorted_plot_data.QTM)]
239-
else:
240-
qtm_precise = [cm.Set1(1) if (qtm_k == k + 1) else cm.Set1(0) for k, qtm_k in zip(sorted_plot_data.k, sorted_plot_data.QTM)]
241-
242-
ax.scatter(range(len(sorted_plot_data.index)), sorted_plot_data.k, label=label, s=2, color=cm.Set1(2))
243-
ax.scatter(range(len(sorted_plot_data.index)), sorted_plot_data.QTM, label='QTM', s=2, color=qtm_precise)
244-
ax.set_xlabel('Graphs')
245-
246-
ax1.legend()
247-
ax1.set_ylabel('k')
248-
249-
return fig
250-
251-
def plot_solutions(data):
252-
data_for_solutions = data[data.Solved & (data.k > 0)].groupby(['Graph']).max()
253-
fig, ax = plt.subplots(1, figsize=(10, 4))
254-
255-
print("Max k in solutions plot: {}".format(data_for_solutions.k.max()))
256-
257-
ax.set_yscale('log')
258-
259-
ax.scatter(data_for_solutions.k, data_for_solutions.Solutions, s=2)
260-
ax.set_xlabel('k')
261-
ax.set_ylabel('Number of Solutions')
262-
ax.set_yscale('log')
263-
ax.set_xscale('log')
264-
265-
return fig
10+
from plot_functions import solved_instances_over_measure_plot, plot_speedup_per_instance_for_one_algorithm, plot_solutions
26611

26712
if __name__ == "__main__":
26813
parser = argparse.ArgumentParser(description="Create plots out of the result data.")
26914
parser.add_argument("csv", help="The CSV input file")
15+
parser.add_argument("fpt_gurobi_csv", help="The FPT Gurobi comparison CSV input file")
27016
#parser.add_argument("qtm_csv", help="The QTM CSV input file")
27117
parser.add_argument("gurobi_csv", help="The Gurobi CSV input file")
27218
parser.add_argument("output_dir", help="The output directory where plots shall be written")
27319
parser.add_argument('--min-k', type=int, help="The minimum value of k to use, default: 10", default=10)
274-
parser.add_argument('--solved-only', action="store_true", help="If only solved graphs shall be considered")
20+
parser.add_argument('--time-limit', type=int, help="The maximum running time to use in seconds, default: 1000", default=1000)
27521

27622
args = parser.parse_args()
27723

27824
df = pd.read_csv(args.csv)
25+
fpt_gurobi_df = pd.read_csv(args.fpt_gurobi_csv)
27926
gurobi_df = pd.read_csv(args.gurobi_csv)
28027

28128
max_ks = df.groupby('Graph').max().k
28229
larger_k_names = max_ks[max_ks >= args.min_k].index
28330

284-
if args.solved_only:
285-
solved_graphs = df[df.Solved == True].groupby('Graph').first().index
286-
filtered_df = df[df.Graph.isin(larger_k_names) & df.Graph.isin(solved_graphs)]
287-
else:
288-
filtered_df = df[(df.Permutation < 4) & df.Graph.isin(larger_k_names)]
31+
filtered_df = df[(df.Permutation < 4) & df.Graph.isin(larger_k_names) & (df['Total Time [s]'] <= args.time_limit)]
32+
33+
filtered_gurobi_fpt_df = fpt_gurobi_df[(fpt_gurobi_df.Permutation < 4) & fpt_gurobi_df.Graph.isin(larger_k_names) & (fpt_gurobi_df['Total Time [s]'] <= args.time_limit)]
28934

29035
gurobi_filtered_df = gurobi_df[(gurobi_df.Permutation < 4) & gurobi_df.Graph.isin(larger_k_names)]
29136

29237
#my_runtime_plot(filtered_df, 'Time [s]')
29338
#my_performanceplot(filtered_df, 'Time [s]', False)
294-
df_st_4 = filtered_df[(filtered_df.MT == False) & (filtered_df.l == 4) & (filtered_df.Algorithm.str.contains('GreedyLB') == False)]
295-
df_st_best = df_st_4[(df_st_4.Algorithm == "LocalSearchLB-Most Pruned")]
39+
df_st_4 = filtered_df[(filtered_df.MT == False) & (filtered_df.l == 4) & filtered_df.Algorithm.str.contains('-LS-')]
40+
41+
df_st_best_first = filtered_gurobi_fpt_df[~filtered_gurobi_fpt_df.MT & (filtered_gurobi_fpt_df.Algorithm == "FPT-LS-MP") & (~filtered_gurobi_fpt_df['All Solutions'])]
29642

29743
gurobi_st = gurobi_filtered_df[gurobi_filtered_df.MT == False]
298-
gurobi_st_best = gurobi_st[gurobi_st.Algorithm == 'Gurobi-Sparse']
44+
gurobi_st_best = gurobi_st[gurobi_st.Algorithm == 'ILP-S-R-C4']
29945

300-
df_mt = filtered_df[(filtered_df.l == 4) & (filtered_df.Threads == 16) & (filtered_df.Algorithm == 'LocalSearchLB-Most Pruned')]
301-
gurobi_mt = gurobi_filtered_df[(gurobi_filtered_df.Threads == 16) & (gurobi_filtered_df.Algorithm == 'Gurobi-Sparse')]
46+
df_mt = filtered_gurobi_fpt_df[(filtered_gurobi_fpt_df.l == 4) & (filtered_gurobi_fpt_df.Threads == 16) & (filtered_gurobi_fpt_df.Algorithm == 'FPT-LS-MP') & (~filtered_gurobi_fpt_df['All Solutions'])]
47+
gurobi_mt = gurobi_filtered_df[(gurobi_filtered_df.Threads == 16) & (gurobi_filtered_df.Algorithm == 'ILP-S-R-C4')]
30248

30349
for dfs, path in [
30450
# Plot 1: ST FPT variants
305-
([df_st_4], '{}/bio_times_st_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
51+
(df_st_4, '{}/bio_times_st_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
30652
# Plot 2: ST Gurobi vs. best FPT
307-
([df_st_best, gurobi_st], '{}/bio_times_gurobi_st_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
53+
(pd.concat([df_st_best_first, gurobi_st_best]), '{}/bio_times_gurobi_st_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
30854
# Plot 3: MT vs. ST best Gurobi/FPT
309-
([df_st_best, gurobi_st_best, df_mt, gurobi_mt], '{}/bio_times_gurobi_mt_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
55+
(pd.concat([df_st_best_first, gurobi_st_best, df_mt, gurobi_mt]), '{}/bio_times_gurobi_mt_min_k_{}.pdf'.format(args.output_dir, args.min_k)),
31056
]:
31157

31258
#fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10, 4))

0 commit comments

Comments
 (0)