-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathevaluation.py
97 lines (82 loc) · 2.63 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
def RMSE(pred, truth):
'''
Calculate Root Mean Square Error (RMSE).
Inputs:
pred (1D numpy array): numpy array containing predicted values.
truth (1D numpy array): numpy array containing the ground truth values.
Returns:
rmse (float): The Root Mean Square Error.
'''
return np.sqrt(np.sum(np.square(pred-truth)/float(pred.shape[0])))
def RMSE_mat(matA, matB):
'''
Calculate Root Mean Square Error (RMSE) between two matrices. Mainly used
to find error original and reconstructed matrices while working with
matrix decompositions.
Inputs:
matA (2D numpy array): Matrix A
matB (2D numpy array): Matrix B
Returns:
rmse (float): Root Mean Square Error.
'''
return np.sqrt(np.sum(np.square(matA-matB))/(matA.shape[0]*matA.shape[1]))
def top_k_precision(pred, test, means_, map_, k=5, user_=True):
'''
Calculate Precision@top k.
Inputs:
pred (1D numpy array): numpy array containing predicted values.
test (1D numpy array): numpy array containing the ground truth values.
means_ (1D numpy array): user/item means
map_ (python dictionary): user map or item map
k (int): value of k
user_ (bool):
Returns:
(float): average Precision@top k.
'''
# THRESHOLD=3.5
# K=5
K=k
precision_list=[]
print 'test shape', test.shape, 'pred shape', pred.shape
test['prediction']=pred
if user_==True:
# unique_users=test['userId'].unique()
unique_values=test['userId'].unique()
else:
# unique_users=test['movieId'].unique()
unique_values=test['movieId'].unique()
for val in unique_values:
THRESHOLD=means_[map_[val]]
if user_==True:
temp_df=test[test['userId']==val].copy(deep=True)
else:
temp_df=test[test['movieId']==val].copy(deep=True)
temp_df.sort_values('prediction', inplace=True, ascending=False)
temp_df=temp_df.head(K)
temp_df['rating']=temp_df['rating']>=THRESHOLD
temp_df['prediction']=temp_df['prediction']>=THRESHOLD
no_equals = temp_df[temp_df["rating"] == temp_df["prediction"]].shape[0]
temp_precision=no_equals/float(temp_df.shape[0])
# print no_equals, temp_precision
precision_list.append(temp_precision)
return np.mean(np.array(precision_list))
def spearman_rank_correlation(pred, truth):
'''
Calculate Spearman Rank Correlation.
Inputs:
pred (1D numpy array): numpy array containing predicted values.
truth (1D numpy array): numpy array containing the ground truth values.
Returns:
rho (float): Spearman Rank Correlation
'''
d=np.sum(np.square(pred-truth))
n=len(pred)
rho=1-6.0*d/(n*(n*n-1))
return rho
if __name__=='__main__':
shp=[100, 100]
a=np.random.randint(1, 6, shp)
b=np.random.randint(1, 6, shp)
print RMSE_mat(a,b)
print spearman_rank_correlation(a,b)