-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation_metrics_handler.py
185 lines (154 loc) · 7.74 KB
/
evaluation_metrics_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
'''
class EvaluationMetricsHandler main goal is to compute metrics for each class
from expected and predicted results, and a list of classes
'''
class EvaluationMetricsHandler:
"""
:method __init__
:argument expected_results are the real labels of each value x \in X, the training set
:argument predicted_results are the classification resulting labels for each x \in X
:argument classes is the label list
"""
def __init__(self, expected_results, predicted_results, classes):
self.expected_results = expected_results
self.predicted_results = predicted_results
self.classes = classes
"""
:method init_values initialize all values to compute metrics
"""
def init_values(self):
self.precision = 0.0 # initialize all global metrics
self.recall = 0.0
self.agree_rate = 0.0
self.fp_rate = 0.0
self.f1_measure = 0.0
self.mse = 0.0
self.precision_by_class = {} # create all per class metrics
self.recall_by_class = {}
self.agree_rate_by_class = {}
self.fp_rate_by_class = {}
self.f1_measure_by_class = {}
self.mse_by_class = {}
self.class_no = {}
for i, c in enumerate(self.classes): # initialize every metrics per class
self.class_no.update({c: i+1})
self.precision_by_class.update({c: 0})
self.recall_by_class.update({c: 0})
self.agree_rate_by_class.update({c: 0})
self.fp_rate_by_class.update({c: 0})
self.f1_measure_by_class.update({c: 0})
self.mse_by_class.update({c: 0})
"""
:method: compute_metrics_per_label
We use True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN) per each class where:
for each label l \in classes do
TP: predicted_list(i) iff predicted_list(i) == real_list(i) and predicted_list(i) == l
FP: predicted_list(i) iff predicted_list(i) != real_list(i) and predicted_list(i) == l
TN: predicted_list(i) iff predicted_list(i) == real_list(i) and predicted_list(i) != l
FN: predicted_list(i) iff predicted_list(i) != real_list(i) and predicted_list(i) != l
precision_by_class = TP / (TP + FP)
recall_by_class = TP / (TP + FN)
agree_rate_by_class = (TP + TN) / (TP + TN + FP + FN)
fp_rate_by_class = FP / (TP + FN)
f1_measure_by_class = 2 * ((precision_by_class * recall_by_class) / (precision_by_class + recall_by_class))
mse_by_class = values_per_label(predicted_list(i)) - values_per_label(real_list(i)) ^ 2
"""
def compute_metrics_per_label(self):
true_p = {} # first we initialize metrics base values TP, FP, FN, TN
false_p = {}
false_neg = {}
true_neg = {}
n_by_class = {} # this means number of samples per class
self.init_values() # we initialize all metric values
class_no = self.class_no #
n = 0 # we initialize the total number of samples
for i, c in enumerate(self.classes):
true_p.update({c: 0}) # initialize basic metric values per class
false_p.update({c: 0})
true_neg.update({c: 0})
false_neg.update({c: 0})
n_by_class.update({c: 0})
for idx, value in enumerate(self.predicted_results): # for any predicted result
error = (class_no[self.predicted_results[idx]] - class_no[
self.expected_results[idx]]) # we compute the error
if value == c: # positive cases
if value == self.expected_results[idx]:
true_p[c] += 1
else:
false_p[c] += 1
self.mse_by_class[c] += error * error
n_by_class[c] += 1
else: # negatives cases
if value == self.expected_results[idx]:
true_neg[c] += 1
else:
false_neg[c] += 1
div = true_p[c] + true_neg[c] + false_p[c] + false_neg[c]
div = 1 if div == 0 else div
self.agree_rate_by_class[c] = (true_p[c] + true_neg[c]) / div
div = (true_p[c] + false_p[c])
div = 1 if div == 0 else div
self.precision_by_class[c] += (true_p[c]) / div
div = (true_p[c] + false_neg[c])
div = 1 if div == 0 else div
self.recall_by_class[c] = (true_p[c]) / div
div = (true_p[c] + false_neg[c])
div = 1 if div == 0 else div
self.fp_rate_by_class[c] = (false_p[c]) / div
div = self.precision_by_class[c] + self.recall_by_class[c]
if div == 0.0:
div = 1
self.f1_measure_by_class[c] += 2 * ((self.precision_by_class[c] * self.recall_by_class[c]) / (div))
n += n_by_class[c]
return n
"""
:method compute_metrics this compute all gobal metrics taking in count all per-label metrics
Global metrics are computed by the average of each per-label metrics
"""
def compute_metrics(self):
n = self.compute_metrics_per_label() # we compute every metric per label and returns total number of samples
for c in self.classes:
self.agree_rate += self.agree_rate_by_class[c]
self.precision += self.precision_by_class[c]
self.recall += self.recall_by_class[c]
self.fp_rate += self.fp_rate_by_class[c]
self.mse += self.mse_by_class[c]
self.f1_measure += self.f1_measure_by_class[c]
num_classes = len(self.classes)
if not num_classes:
num_classes = 1
self.agree_rate /= num_classes
self.precision /= num_classes
self.recall /= num_classes
self.fp_rate /= num_classes
self.f1_measure /= num_classes
self.mse /= n
for l in self.classes:
print ("\n\n**--*-*-*-*-*-*-*-*-*-*-*\n agree_rate: {0}\nprecision: {1}\nrecall: {2}\nfp_rate: {3}\nmse: {4}\nf1_measure: {5}".format(self.agree_rate, self.precision, self.recall, self.fp_rate, self.mse, self.f1_measure))
print("\n\n**{6}: --*-*-*-*-*-*-*-*-*-*-*\n agree_rate: {0}\nprecision: {1}\nrecall: {2}\nfp_rate: {3}\nmse: {4}\nf1_measure: {5}".format(
self.agree_rate_by_class[l],
self.precision_by_class[l],
self.recall_by_class[l],
self.fp_rate_by_class[l],
self.mse_by_class[l],
self.f1_measure_by_class[l], l))
def save_to_file(self, fmetrics):
fmetrics.write("\n\n**--*-*-*-*-*-*-*-*-*-*-*\n agree_rate: {0}\nprecision: {1}\nrecall: {2}\nfp_rate: {3}\nmse: {4}\nf1_measure: {5}".format(
self.agree_rate, self.precision, self.recall, self.fp_rate, self.mse, self.f1_measure))
for l in self.classes:
fmetrics.write ("\n\n**--*-*-*-*-*-*-*-*-*-*-*\n agree_rate: {0}\nprecision: {1}\nrecall: {2}\nfp_rate: {3}\nmse: {4}\nf1_measure: {5}".format(self.agree_rate, self.precision, self.recall, self.fp_rate, self.mse, self.f1_measure))
fmetrics.write("\n\n**{6}: --*-*-*-*-*-*-*-*-*-*-*\n agree_rate: {0}\nprecision: {1}\nrecall: {2}\nfp_rate: {3}\nmse: {4}\nf1_measure: {5}".format(
self.agree_rate_by_class[l],
self.precision_by_class[l],
self.recall_by_class[l],
self.fp_rate_by_class[l],
self.mse_by_class[l],
self.f1_measure_by_class[l], l))
fmetrics.flush()