|
6 | 6 |
|
7 | 7 |
|
8 | 8 | """K-nearest neighbors"""
|
9 |
| - |
10 | 9 | class KNNClassifier():
|
11 |
| - |
12 |
| - def __init__(self, k = 5) -> None: |
13 |
| - self.k = k |
14 |
| - self.data = None |
15 |
| - self.labels = None |
16 |
| - |
17 |
| - |
18 |
| - def euclidean_distance(self, vector1, vector2): |
19 |
| - |
20 |
| - return np.linalg.norm(vector1 - vector2) |
21 |
| - |
22 |
| - |
23 |
| - def find_nearest_neighbour(self, this_sample): |
24 |
| - distances = np.asfarray([self.euclidean_distance(this_sample, sample) for sample in self.data]) |
25 |
| - |
26 |
| - indexes = distances.argsort() |
27 |
| - |
28 |
| - neighbours = self.labels[indexes] |
29 |
| - |
30 |
| - k_neighbours = neighbours[:self.k] |
| 10 | + def __init__(self, n_neighbors=3, metric='euclidean', weights = 'uniform'): |
| 11 | + self.k = n_neighbors |
| 12 | + self.metric = metric |
| 13 | + self.weights = weights |
| 14 | + self.distance = { |
| 15 | + 'euclidean': lambda x, y: np.linalg.norm(x - y), |
| 16 | + 'manhattan': lambda x, y: np.sum(np.abs(x - y)), |
| 17 | + 'chebyshev': lambda x, y: np.max(np.abs(x - y)), |
| 18 | + 'cosine': lambda x, y: 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y)), |
| 19 | + 'canberra': lambda x, y: np.sum(np.abs(x - y) / (np.abs(x) + np.abs(y) + 1e-15)), |
| 20 | + 'braycurtis': lambda x, y: np.sum(np.abs(x - y) / (np.sum(np.abs(x)) + np.sum(np.abs(y)) + 1e-15)), |
| 21 | + 'hamming': lambda x, y: np.average(np.atleast_1d(x) != np.atleast_1d(y)) |
| 22 | + |
| 23 | + }[metric] |
| 24 | + |
| 25 | + def fit(self, X, y): |
| 26 | + self.X = X |
| 27 | + self.y = y |
31 | 28 |
|
32 |
| - return np.argmax(np.bincount(k_neighbours.astype(int))) |
33 |
| - |
34 |
| - def fit(self, data, labels) -> None: |
35 |
| - self.data = data |
36 |
| - self.labels = labels |
37 |
| - |
38 |
| - def predict(self, new_data): |
39 |
| - |
40 |
| - return np.asfarray([self.find_nearest_neighbour(sample) for sample in new_data]) |
| 29 | + def predict(self, X): |
| 30 | + predictions = [] |
| 31 | + for x in X: |
| 32 | + distances = np.array([self.distance(x, y) for y in self.X]) |
| 33 | + k_nearest_neighbors = self.y[distances.argsort()[:self.k]] |
| 34 | + |
| 35 | + if self.weights == 'distance': |
| 36 | + k_weights = np.array([1 / (distance + 1E-15) for distance in np.sort(distances)[:self.k]]) #distances[distances.argsort()[:self.k]] |
| 37 | + k_weights = k_weights / np.sum(k_weights) |
| 38 | + |
| 39 | + predictions.append(np.argmax(np.bincount(k_nearest_neighbors, weights = k_weights))) |
| 40 | + elif self.weights == 'uniform': |
| 41 | + predictions.append(np.argmax(np.bincount(k_nearest_neighbors))) |
| 42 | + |
| 43 | + return np.array(predictions) |
41 | 44 |
|
42 | 45 |
|
43 | 46 |
|
44 | 47 | if __name__ == "__main__":
|
45 | 48 | X_train, y_test = generate_clusterization_data(n_clusters = 3, n_samples = 30)
|
46 | 49 |
|
47 |
| - knn = KNNClassifier(k = 5) |
| 50 | + knn = KNNClassifier(n_neighbors = 5) |
48 | 51 | knn.fit(X_train, y_test)
|
49 | 52 |
|
50 | 53 |
|
|
0 commit comments