Skip to content

Commit 93aac0e

Browse files
committed
Add example
1 parent 43e4902 commit 93aac0e

File tree

7 files changed

+3263
-2
lines changed

7 files changed

+3263
-2
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,8 @@ dmypy.json
130130

131131
# Custom
132132
datasets/**/**.csv
133-
results/**/**.npy
133+
results/**/**.npy
134+
reports/**/**.html
135+
!datasets/example/voice.csv
136+
!reports/example/voice.html
137+
!results/example/voice.npy

datasets/example/voice.csv

Lines changed: 3169 additions & 0 deletions
Large diffs are not rendered by default.

get_max_score.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
for key, value in amount.items():
2020
print(str(key) + ' : ' + str(value) + ' , ' + str(round(value/total, 4)))
2121

22-
array = np.load('results/brazil_dataset_french_clean.npy', allow_pickle=True)
2322

2423
max_score = 0
2524
max_score_method = None

get_max_scores.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import numpy as np
2+
import pandas as pd
3+
import sys
4+
import os
5+
6+
7+
8+
def getMaxScoreMethod(label, arr):
9+
max_score = 0
10+
max_score_method = None
11+
for experiment in arr:
12+
if (experiment[label] >= max_score):
13+
max_score = experiment[label]
14+
max_score_method = experiment
15+
return max_score_method
16+
17+
def renderMaxScoreMethod(label, arr):
18+
d = getMaxScoreMethod(label, arr)
19+
html = "<h3>Method for: maximum of " + label + "</h3>"
20+
html += "<p><ul>"
21+
for key, value in d.items():
22+
if (key == 'oversampler' or key == 'classifier'):
23+
value = removeRandomStateParam(value)
24+
html += "<li>" + key + " "+str(value)+"</li>"
25+
html+= "</ul></p>"
26+
return html
27+
28+
def removeRandomStateParam(model):
29+
if (model != None):
30+
d=model.get_params()
31+
d['random_state'] = None
32+
model.set_params(**d)
33+
return model
34+
35+
def renderHTML(name,arr,dataset):
36+
if (len(arr) == 0):
37+
return "Cannot render"
38+
prettyName = os.path.basename(os.path.normpath(name))
39+
html = "<html><head><meta name='viewport' content='width'>\
40+
<link rel='stylesheet' href='https://unpkg.com/marx-css/css/marx.min.css'><title>"
41+
html += prettyName
42+
html += " dataset</title></head><body><h1>"
43+
html += prettyName
44+
html += " dataset</h1>"
45+
46+
47+
unique, counts = np.unique(dataset.iloc[:, -1], return_counts=True)
48+
amount = dict(zip(unique, counts))
49+
total = sum(counts)
50+
html += "<p>Total: " + str(total)
51+
for key, value in amount.items():
52+
html += "<br>" + str(key) + ": " + str(value) + " , " + str(round(value/total, 4))
53+
html += "</p>"
54+
55+
56+
for key, value in arr[0].items():
57+
if (key != "classifier" and key != "oversampler" and key != "cm"):
58+
html += renderMaxScoreMethod(key, arr)
59+
html += "</body></html>"
60+
return html
61+
62+
63+
def writeHTML(name, arr, dataset):
64+
f = open("reports/" + name + ".html", "w")
65+
f.write(renderHTML(name, arr, dataset))
66+
f.close()
67+
68+
69+
70+
if __name__ == "__main__":
71+
default_path = ''
72+
if (len(sys.argv) != 1):
73+
default_path = sys.argv[1]
74+
75+
if (default_path == ''):
76+
exit('No dataset specified.')
77+
dataset = pd.read_csv('datasets/' + default_path + '.csv')
78+
array = np.load('results/' + default_path + '.npy', allow_pickle=True)
79+
writeHTML(default_path, array, dataset)

reports/.gitkeep

Whitespace-only changes.

reports/example/voice.html

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<html><head><meta name='viewport' content='width'> <link rel='stylesheet' href='https://unpkg.com/marx-css/css/marx.min.css'><title>voice dataset</title></head><body><h1>voice dataset</h1><p>Total: 3168<br>0: 1584 , 0.5<br>1: 1584 , 0.5</p><h3>Method for: maximum of train_score</h3><p><ul><li>train_score 1.0</li><li>test_score 0.9772870662460568</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2',
2+
n_estimators=200)</li><li>oversampler BorderlineSMOTE(k_neighbors=20, kind='borderline-2')</li><li>cm [[305. 6.9]
3+
[ 7.5 314.6]]</li><li>recall_score 0.9767642400937866</li><li>roc_auc_score 0.977244181476375</li><li>precision_score 0.9786422056161875</li><li>f1_score 0.9776969602541465</li><li>f1_score_macro 0.977250275353066</li><li>f1_score_weighted 0.9772867960003525</li></ul></p><h3>Method for: maximum of test_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
4+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of recall_score</h3><p><ul><li>train_score 1.0</li><li>test_score 0.9813880126182966</li><li>classifier GradientBoostingClassifier(n_estimators=200)</li><li>oversampler None</li><li>cm [[312.2 8.3]
5+
[ 3.5 310. ]]</li><li>recall_score 0.9888748276576491</li><li>roc_auc_score 0.9815830301137695</li><li>precision_score 0.9739028313866314</li><li>f1_score 0.9812525514595002</li><li>f1_score_macro 0.9813704251179102</li><li>f1_score_weighted 0.9813899553288561</li></ul></p><h3>Method for: maximum of roc_auc_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
6+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of precision_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
7+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
8+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score_macro</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
9+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score_weighted</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6 4.7]
10+
[ 4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p></body></html>

results/example/voice.npy

71.1 MB
Binary file not shown.

0 commit comments

Comments
 (0)