Skip to content

Commit d97c3c8

Browse files
committed
Added KNN
1 parent c9a57aa commit d97c3c8

File tree

1 file changed

+87
-0
lines changed
  • K-Nearest-Neighbor

1 file changed

+87
-0
lines changed

K-Nearest-Neighbor/KNN

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Sun Sep 23 23:32:17 2018
5+
6+
@author: sahith
7+
"""
8+
9+
#KNN
10+
11+
import pandas as pd
12+
import numpy as np
13+
Training_data = pd.read_csv('park_train.data',header = None)
14+
Test_data = pd.read_csv('park_test.data',header = None)
15+
Validation_data = pd.read_csv('park_validation.data',header = None)
16+
17+
trainData_len = len(Training_data)
18+
n = len(Training_data.columns) #number of columns
19+
20+
Train_x = np.array(Training_data.iloc[:,1:23])
21+
Train_y = np.array(Training_data.iloc[:,0])
22+
23+
24+
def computeAccuracy(k, data):
25+
data_x = np.array(data.iloc[:,1:23])
26+
data_y = np.array(data.iloc[:,0])
27+
dataLength = len(data)
28+
i = 0
29+
euclideanDistance = []
30+
while i < dataLength:
31+
j = 0
32+
temp = {}
33+
while j < trainData_len:
34+
dist = np.linalg.norm(Train_x[j]-data_x[i])
35+
temp[j] = dist
36+
j = j + 1
37+
euclideanDistance.append(temp)
38+
i = i + 1
39+
#sort the data values
40+
accuracy = 0
41+
i = 0
42+
while i < dataLength:
43+
sorted_by_value = sorted(euclideanDistance[i].items(), key=lambda kv: kv[1])
44+
#print(sorted_by_value)
45+
euclideanDistance[i] = sorted_by_value
46+
j = 0
47+
#select first k values and decide the target value
48+
count = 0 #count of values having target = 1
49+
while j < k:
50+
point = list(sorted_by_value)[j][0]
51+
#print('point ',point)
52+
if Train_y[point] == 1:
53+
count += 1
54+
j = j + 1
55+
if(count > k/2):
56+
predicted = 1
57+
else:
58+
predicted = 0
59+
if data_y[i] == predicted:
60+
accuracy += 1
61+
i = i + 1
62+
accuracy = accuracy/dataLength * 100
63+
return accuracy
64+
65+
66+
print(Train_x[0][2])
67+
KNN = [1, 5, 11, 15, 21]
68+
bestAccuracy = 0
69+
bestK = []
70+
#acc - accuracy
71+
for k in KNN:
72+
acc = computeAccuracy(k, Validation_data)
73+
print('Accuracy on Validation data set is',acc,' for value of K =',k)
74+
if(acc >= bestAccuracy):
75+
if acc > bestAccuracy:
76+
bestK = []
77+
bestK.append(k)
78+
bestAccuracy = acc
79+
80+
print(bestK)
81+
82+
acc = computeAccuracy(bestK[0], Test_data)
83+
print('The best value K can take is', bestK)
84+
print('Accuracy on test data set is ',acc)
85+
86+
87+

0 commit comments

Comments
 (0)