-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcdf.py
43 lines (32 loc) · 972 Bytes
/
cdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import os
import numpy as np
import argparse
parser = argparse.ArgumentParser('Plot the a cumulative distribution of the probalitity correct given a variable')
parser.add_argument('file', type=str,
metavar='DF',
help='Location where pkl file saved')
parser.add_argument('--xvar', type=str, default='gradx_modelsq_2norm')
#parser.add_argument('--c', type=float, required=True)
args = parser.parse_args()
df = pd.read_pickle(args.file)
Nsamples = len(df)
wrong = df['type']=='mis-classified'
top5 = np.logical_not(wrong)
top1 = df['type']=='top1'
Nt5 = sum(top5)
Nt1 = sum(top1)
X = df[args.xvar]
I = np.argsort(X)
Xs = X[I]
t5s = top5[I]
t1s = top1[I]
ws = wrong[I]
N = np.arange(1,Nsamples+1)
Nt5 = t5s.cumsum()
Nw = ws.cumsum()
Nt1 = t1s.cumsum()
cdf = pd.DataFrame({'X': Xs, 'p(top5|x<X)': Nt5/N,
'p(wrong|x<X)':Nw/N,
'p(top1|x<X)':Nt1/N,
'p(x<X)': N/Nsamples})