-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLDA.py
158 lines (136 loc) · 6.1 KB
/
LDA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
A simple implementation of Fisher linear classifier/Linear discriminator analysis
@data: 2019.12.18
@author: Tingyu Mo
"""
import pandas as pd
import numpy as np
import os
import math
import csv
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score,accuracy_score
class LDA():
'''
A simple implementation of Fisher linear classifier/Linear discriminator analysis
'''
def __init__(self):
self.w0 = 0
self.w_opt = 0
def cal_means(self,x,axis=0):
return x.mean(axis=axis)
def cal_inter_variance_matrix(self,x,mean,axis=0):
'''
Si = sum((xi-mi)*(xi-mi)T)
'''
return (x-mean).T.dot((x-mean))
def cal_inv_matrix(self,x):
return np.linalg.inv(x)
def cal_optim_weight(self,Sw_inv,m1,m2):
'''
W^*= S_W^(-1) (m_1-m_2)
'''
w_opt = Sw_inv.dot((m1-m2).T)
self.w_opt = w_opt
return w_opt
def cal_mapping_means(self,w_opt,mean):
'''
mi* = w_opt^T * mean
'''
return mean.dot(w_opt)
def cal_decision_threshold(self,m1,m2):
'''
w0 = - m1+m2/2
'''
w0 = - (m1 + m2)*1.0/2
self.w0 = w0
return w0
def predict(self,x_test):
w_opt = self.w_opt
w0 = self.w0
y_pred= list()
for x in x_test:
y = x.dot(w_opt)+w0
if(y>=0):
y_pred.append(1)
if(y<0):
y_pred.append(-1)
# print(y_pred)
return np.array(y_pred)
def plot_decision_boundary(self,X,y):
# 设定最大最小值,附加一点点边缘填充
x1_min, x1_max = X[:, 0].min() - .5, X[:, 0].max() + .5
x2_min, x2_max = X[:, 1].min() - .5, X[:, 1].max() + .5
h = 0.01
x1_grid, x2_grid = np.meshgrid(np.arange(x1_min, x1_max, h), np.arange(x2_min, x2_max, h))
# 用预测函数预测一下
Z = self.predict(np.c_[x1_grid.ravel(), x2_grid.ravel()])
Z = Z.reshape(x1_grid.shape)
# 然后画出图
plt.title("Fisher linear classifier")
plt.contourf(x1_grid, x2_grid, Z, cmap=plt.cm.Spectral)
plt.scatter(X[:, 0], X[:, 1], c=y,marker='x',cmap=plt.cm.Spectral)
def viz(self, data_one,data_two):
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.title("样本特征空间分布图")
plt.scatter(data_one[:,0], data_one[:,1],marker='+',color='b',label='w1',s=47)
plt.scatter(data_two[:,0], data_two[:,1],marker='o',color='r',label='w2',s=47)
plt.legend(loc = 'upper right')
plt.show()
# self.plot_decision_boundary(X,y)
# plt.show()
if __name__=="__main__":
w1_features = np.array([[0.2331, 2.3385], [1.5207, 2.1946], [0.6499, 1.6730], [0.7757, 1.6365],
[1.0524, 1.7844], [1.1974, 2.0155], [0.2908, 2.0681], [0.2518, 2.1213],
[0.6682, 2.4797], [0.5622, 1.5118], [0.9023, 1.9692], [0.1333, 1.8340],
[-0.5431, 1.8704], [0.9407, 2.2948], [-0.2126, 1.7714], [0.0507, 2.3939],
[-0.0810, 1.5648], [0.7315, 1.9329], [0.3345, 2.2027], [1.0650, 2.4568],
[-0.0247, 1.7523], [0.1043, 1.6991], [0.3122, 2.4883], [0.6655, 1.7259],
[0.5838, 2.0466], [1.1653, 2.0226], [1.2653, 2.3757], [0.8137, 1.7987],
[-0.3399, 2.0828], [0.5152, 2.0798], [0.7226, 1.9449], [-0.2015, 2.3801],
[0.4070, 2.2373], [-0.1717, 2.1614], [-1.0573, 1.9235], [-0.2099, 2.2604]])
w2_features = np.array([[1.4010, 1.0298], [1.2301, 0.9611], [2.0814, 0.9154], [1.1655, 1.4901],
[1.3740, 0.8200], [1.1829, 0.9399], [1.7632, 1.1405], [1.9739, 1.0678],
[2.4152, 0.8050], [2.5890, 1.2889], [2.8472, 1.4601], [1.9539, 1.4334],
[1.2500, 0.7091], [1.2864, 1.2942], [1.2614, 1.3744], [2.0071, 0.9387],
[2.1831, 1.2266], [1.7909, 1.1833], [1.3322, 0.8798], [1.1466, 0.5592],
[1.7087, 0.5150], [1.5920, 0.9983], [2.9353, 0.9120], [1.4664, 0.7126],
[2.9313, 1.2833], [1.8349, 1.1029], [1.8340, 1.2680], [2.5096, 0.7140],
[2.7198, 1.2446], [2.3148, 1.3392], [2.0353, 1.1808], [2.6030, 0.5503],
[1.2327, 1.4708], [2.1465, 1.1435], [1.5673, 0.7679], [2.9414, 1.1288]])
w1_labels = np.ones(w1_features.shape[0]) # 1 为w1类的label
w2_labels = -1*np.ones(w2_features.shape[0])# -1 为w2类的label
Fisher = LDA()
#计算样本均值mi
w1_mean = Fisher.cal_means(w1_features) #shape = (1,n)
w2_mean = Fisher.cal_means(w2_features)
#计算类内离散程度矩阵Si
S1 = Fisher.cal_inter_variance_matrix(w1_features,w1_mean)
S2 = Fisher.cal_inter_variance_matrix(w2_features,w2_mean)
#求总类内离散程度矩阵Sw
Sw = S1+S2
#求总类内离散程度矩阵Sw的逆矩阵Sw^(-1)
Sw_inv = Fisher.cal_inv_matrix(Sw)
#求最优投影方向权向量w*
w_opt = Fisher.cal_optim_weight(Sw_inv,w1_mean,w2_mean)
#将样本均值mi投影到投影方向上得到mi^
w1_mean_map = Fisher.cal_mapping_means(w_opt,w1_mean)
w2_mean_map = Fisher.cal_mapping_means(w_opt,w2_mean)
#求分解阈值点w0
w0 = Fisher.cal_decision_threshold(w1_mean_map,w2_mean_map)
#待判别样本分类
x_test = np.array([[1, 1.5], [1.2, 1.0], [2.0, 0.9], [1.2, 1.5], [0.23, 2.33]])
# x_test = np.array([[1.0524, 1.7844], [1.1974, 2.0155], [0.2908, 2.0681], [0.2518, 2.1213]]) #正类
# x_test = np.array([[1.3740, 0.8200], [1.1829, 0.9399], [1.7632, 1.1405], [1.9739, 1.0678]]) #负类
y_pred = list()
for x in x_test:
y = x.dot(w_opt)+w0
if(y>=0):
y_pred.append(1)
if(y<0):
y_pred.append(-1)
print(y_pred)
Fisher.viz(np.c_[w1_features,w1_labels],np.c_[w2_features,w2_labels] )