|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +""" |
| 4 | +Created on Sun Sep 23 23:32:17 2018 |
| 5 | +
|
| 6 | +@author: sahith |
| 7 | +""" |
| 8 | + |
| 9 | +#KNN |
| 10 | + |
| 11 | +import pandas as pd |
| 12 | +import numpy as np |
| 13 | +Training_data = pd.read_csv('park_train.data',header = None) |
| 14 | +Test_data = pd.read_csv('park_test.data',header = None) |
| 15 | +Validation_data = pd.read_csv('park_validation.data',header = None) |
| 16 | + |
| 17 | +trainData_len = len(Training_data) |
| 18 | +n = len(Training_data.columns) #number of columns |
| 19 | + |
| 20 | +Train_x = np.array(Training_data.iloc[:,1:23]) |
| 21 | +Train_y = np.array(Training_data.iloc[:,0]) |
| 22 | + |
| 23 | + |
| 24 | +def computeAccuracy(k, data): |
| 25 | + data_x = np.array(data.iloc[:,1:23]) |
| 26 | + data_y = np.array(data.iloc[:,0]) |
| 27 | + dataLength = len(data) |
| 28 | + i = 0 |
| 29 | + euclideanDistance = [] |
| 30 | + while i < dataLength: |
| 31 | + j = 0 |
| 32 | + temp = {} |
| 33 | + while j < trainData_len: |
| 34 | + dist = np.linalg.norm(Train_x[j]-data_x[i]) |
| 35 | + temp[j] = dist |
| 36 | + j = j + 1 |
| 37 | + euclideanDistance.append(temp) |
| 38 | + i = i + 1 |
| 39 | + #sort the data values |
| 40 | + accuracy = 0 |
| 41 | + i = 0 |
| 42 | + while i < dataLength: |
| 43 | + sorted_by_value = sorted(euclideanDistance[i].items(), key=lambda kv: kv[1]) |
| 44 | + #print(sorted_by_value) |
| 45 | + euclideanDistance[i] = sorted_by_value |
| 46 | + j = 0 |
| 47 | + #select first k values and decide the target value |
| 48 | + count = 0 #count of values having target = 1 |
| 49 | + while j < k: |
| 50 | + point = list(sorted_by_value)[j][0] |
| 51 | + #print('point ',point) |
| 52 | + if Train_y[point] == 1: |
| 53 | + count += 1 |
| 54 | + j = j + 1 |
| 55 | + if(count > k/2): |
| 56 | + predicted = 1 |
| 57 | + else: |
| 58 | + predicted = 0 |
| 59 | + if data_y[i] == predicted: |
| 60 | + accuracy += 1 |
| 61 | + i = i + 1 |
| 62 | + accuracy = accuracy/dataLength * 100 |
| 63 | + return accuracy |
| 64 | + |
| 65 | + |
| 66 | +print(Train_x[0][2]) |
| 67 | +KNN = [1, 5, 11, 15, 21] |
| 68 | +bestAccuracy = 0 |
| 69 | +bestK = [] |
| 70 | +#acc - accuracy |
| 71 | +for k in KNN: |
| 72 | + acc = computeAccuracy(k, Validation_data) |
| 73 | + print('Accuracy on Validation data set is',acc,' for value of K =',k) |
| 74 | + if(acc >= bestAccuracy): |
| 75 | + if acc > bestAccuracy: |
| 76 | + bestK = [] |
| 77 | + bestK.append(k) |
| 78 | + bestAccuracy = acc |
| 79 | + |
| 80 | +print(bestK) |
| 81 | + |
| 82 | +acc = computeAccuracy(bestK[0], Test_data) |
| 83 | +print('The best value K can take is', bestK) |
| 84 | +print('Accuracy on test data set is ',acc) |
| 85 | + |
| 86 | + |
| 87 | + |
0 commit comments