Skip to content

Commit 20e3a2f

Browse files
committed
add knn weights and distances
1 parent dcb0222 commit 20e3a2f

File tree

1 file changed

+34
-31
lines changed

1 file changed

+34
-31
lines changed

knn.py

+34-31
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,48 @@
66

77

88
"""K-nearest neighbors"""
9-
109
class KNNClassifier():
11-
12-
def __init__(self, k = 5) -> None:
13-
self.k = k
14-
self.data = None
15-
self.labels = None
16-
17-
18-
def euclidean_distance(self, vector1, vector2):
19-
20-
return np.linalg.norm(vector1 - vector2)
21-
22-
23-
def find_nearest_neighbour(self, this_sample):
24-
distances = np.asfarray([self.euclidean_distance(this_sample, sample) for sample in self.data])
25-
26-
indexes = distances.argsort()
27-
28-
neighbours = self.labels[indexes]
29-
30-
k_neighbours = neighbours[:self.k]
10+
def __init__(self, n_neighbors=3, metric='euclidean', weights = 'uniform'):
11+
self.k = n_neighbors
12+
self.metric = metric
13+
self.weights = weights
14+
self.distance = {
15+
'euclidean': lambda x, y: np.linalg.norm(x - y),
16+
'manhattan': lambda x, y: np.sum(np.abs(x - y)),
17+
'chebyshev': lambda x, y: np.max(np.abs(x - y)),
18+
'cosine': lambda x, y: 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y)),
19+
'canberra': lambda x, y: np.sum(np.abs(x - y) / (np.abs(x) + np.abs(y) + 1e-15)),
20+
'braycurtis': lambda x, y: np.sum(np.abs(x - y) / (np.sum(np.abs(x)) + np.sum(np.abs(y)) + 1e-15)),
21+
'hamming': lambda x, y: np.average(np.atleast_1d(x) != np.atleast_1d(y))
22+
23+
}[metric]
24+
25+
def fit(self, X, y):
26+
self.X = X
27+
self.y = y
3128

32-
return np.argmax(np.bincount(k_neighbours.astype(int)))
33-
34-
def fit(self, data, labels) -> None:
35-
self.data = data
36-
self.labels = labels
37-
38-
def predict(self, new_data):
39-
40-
return np.asfarray([self.find_nearest_neighbour(sample) for sample in new_data])
29+
def predict(self, X):
30+
predictions = []
31+
for x in X:
32+
distances = np.array([self.distance(x, y) for y in self.X])
33+
k_nearest_neighbors = self.y[distances.argsort()[:self.k]]
34+
35+
if self.weights == 'distance':
36+
k_weights = np.array([1 / (distance + 1E-15) for distance in np.sort(distances)[:self.k]]) #distances[distances.argsort()[:self.k]]
37+
k_weights = k_weights / np.sum(k_weights)
38+
39+
predictions.append(np.argmax(np.bincount(k_nearest_neighbors, weights = k_weights)))
40+
elif self.weights == 'uniform':
41+
predictions.append(np.argmax(np.bincount(k_nearest_neighbors)))
42+
43+
return np.array(predictions)
4144

4245

4346

4447
if __name__ == "__main__":
4548
X_train, y_test = generate_clusterization_data(n_clusters = 3, n_samples = 30)
4649

47-
knn = KNNClassifier(k = 5)
50+
knn = KNNClassifier(n_neighbors = 5)
4851
knn.fit(X_train, y_test)
4952

5053

0 commit comments

Comments
 (0)