cedced19
diff --git a/‎.gitignore
Lines changed: 5 additions & 1 deletion b/‎.gitignore
Lines changed: 5 additions & 1 deletion
diff --git a/‎datasets/example/voice.csv
Lines changed: 3169 additions & 0 deletions b/‎datasets/example/voice.csv
Lines changed: 3169 additions & 0 deletions
diff --git a/‎get_max_score.py
Lines changed: 0 additions & 1 deletion b/‎get_max_score.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎get_max_scores.py
Lines changed: 79 additions & 0 deletions b/‎get_max_scores.py
Lines changed: 79 additions & 0 deletions
diff --git a/‎reports/.gitkeep b/‎reports/.gitkeep
diff --git a/‎reports/example/voice.html
Lines changed: 10 additions & 0 deletions b/‎reports/example/voice.html
Lines changed: 10 additions & 0 deletions
diff --git a/‎results/example/voice.npy
71.1 MB b/‎results/example/voice.npy
71.1 MB
@@ -130,4 +130,8 @@ dmypy.json
 
 # Custom
 datasets/**/**.csv
-results/**/**.npy
+results/**/**.npy
+reports/**/**.html
+!datasets/example/voice.csv
+!reports/example/voice.html
+!results/example/voice.npy
@@ -19,7 +19,6 @@
 for key, value in amount.items():
     print(str(key) + ' : ' + str(value) + ' , ' + str(round(value/total, 4)))
 
-array = np.load('results/brazil_dataset_french_clean.npy', allow_pickle=True)
 
 max_score = 0
 max_score_method = None
 
@@ -0,0 +1,79 @@
+import numpy as np
+import pandas as pd
+import sys
+import os
+
+
+
+def getMaxScoreMethod(label, arr):
+    max_score = 0
+    max_score_method = None
+    for experiment in arr:
+        if (experiment[label] >= max_score):
+            max_score = experiment[label]
+            max_score_method = experiment
+    return max_score_method
+
+def renderMaxScoreMethod(label, arr):
+    d = getMaxScoreMethod(label, arr)
+    html = "<h3>Method for: maximum of " + label + "</h3>"
+    html += "<p><ul>"
+    for key, value in d.items():
+        if (key == 'oversampler' or key == 'classifier'):
+            value = removeRandomStateParam(value)
+        html += "<li>" + key + " "+str(value)+"</li>"
+    html+= "</ul></p>"
+    return html
+
+def removeRandomStateParam(model):
+    if (model != None):
+        d=model.get_params()
+        d['random_state'] = None
+        model.set_params(**d)
+    return model
+
+def renderHTML(name,arr,dataset):
+    if (len(arr) == 0): 
+        return "Cannot render"
+    prettyName =  os.path.basename(os.path.normpath(name))
+    html = "<html><head><meta name='viewport' content='width'>\
+            <link rel='stylesheet' href='https://unpkg.com/marx-css/css/marx.min.css'><title>"
+    html += prettyName
+    html += " dataset</title></head><body><h1>"
+    html += prettyName
+    html += " dataset</h1>"
+
+
+    unique, counts = np.unique(dataset.iloc[:, -1], return_counts=True)
+    amount = dict(zip(unique, counts))
+    total = sum(counts)
+    html += "<p>Total: " + str(total)
+    for key, value in amount.items():
+        html += "<br>" + str(key) + ": " + str(value) + " , " + str(round(value/total, 4))
+    html += "</p>"
+
+
+    for key, value in arr[0].items():
+        if (key != "classifier" and key != "oversampler" and key != "cm"):
+            html += renderMaxScoreMethod(key, arr)
+    html += "</body></html>"
+    return html
+
+
+def writeHTML(name, arr, dataset):
+    f = open("reports/" + name + ".html", "w")
+    f.write(renderHTML(name, arr, dataset))
+    f.close()
+
+
+
+if __name__ == "__main__":
+    default_path = ''
+    if (len(sys.argv) != 1):
+        default_path = sys.argv[1]
+
+    if (default_path == ''):
+        exit('No dataset specified.')
+    dataset = pd.read_csv('datasets/' + default_path + '.csv')
+    array = np.load('results/' + default_path + '.npy', allow_pickle=True)
+    writeHTML(default_path, array, dataset)
@@ -0,0 +1,10 @@
+<html><head><meta name='viewport' content='width'>            <link rel='stylesheet' href='https://unpkg.com/marx-css/css/marx.min.css'><title>voice dataset</title></head><body><h1>voice dataset</h1><p>Total: 3168<br>0: 1584 , 0.5<br>1: 1584 , 0.5</p><h3>Method for: maximum of train_score</h3><p><ul><li>train_score 1.0</li><li>test_score 0.9772870662460568</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2',
+                       n_estimators=200)</li><li>oversampler BorderlineSMOTE(k_neighbors=20, kind='borderline-2')</li><li>cm [[305.    6.9]
+ [  7.5 314.6]]</li><li>recall_score 0.9767642400937866</li><li>roc_auc_score 0.977244181476375</li><li>precision_score 0.9786422056161875</li><li>f1_score 0.9776969602541465</li><li>f1_score_macro 0.977250275353066</li><li>f1_score_weighted 0.9772867960003525</li></ul></p><h3>Method for: maximum of test_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of recall_score</h3><p><ul><li>train_score 1.0</li><li>test_score 0.9813880126182966</li><li>classifier GradientBoostingClassifier(n_estimators=200)</li><li>oversampler None</li><li>cm [[312.2   8.3]
+ [  3.5 310. ]]</li><li>recall_score 0.9888748276576491</li><li>roc_auc_score 0.9815830301137695</li><li>precision_score 0.9739028313866314</li><li>f1_score 0.9812525514595002</li><li>f1_score_macro 0.9813704251179102</li><li>f1_score_weighted 0.9813899553288561</li></ul></p><h3>Method for: maximum of roc_auc_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of precision_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score_macro</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p><h3>Method for: maximum of f1_score_weighted</h3><p><ul><li>train_score 0.999960722702278</li><li>test_score 0.9853312302839117</li><li>classifier RandomForestClassifier(criterion='entropy', max_features='log2')</li><li>oversampler SMOTE(k_neighbors=3)</li><li>cm [[304.6   4.7]
+ [  4.6 320.1]]</li><li>recall_score 0.9857921852691428</li><li>roc_auc_score 0.9852988467694319</li><li>precision_score 0.9855472263856319</li><li>f1_score 0.9856502927885332</li><li>f1_score_macro 0.9853132217519034</li><li>f1_score_weighted 0.9853309441448725</li></ul></p></body></html>