Skip to content

Commit 0881e21

Browse files
committed
Unsupervised and Supervised Learning Techniques
1 parent 2893f8a commit 0881e21

36 files changed

+13575
-0
lines changed
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Wed Oct 17 20:59:28 2018
5+
6+
@author: sahith
7+
"""
8+
9+
import pandas as pd
10+
import numpy as np
11+
import math
12+
import time
13+
Train_data = pd.read_csv('heart_train.data',header = None)
14+
Test_data = pd.read_csv('heart_test.data',header = None)
15+
16+
Train_data.loc[Train_data[0] == 0, 0] = -1
17+
Test_data.loc[Test_data[0] == 0, 0] = -1
18+
19+
20+
21+
attrs = len(Train_data.columns) #No of Attributes
22+
23+
rows = len(Train_data) #No of Training Data Points
24+
25+
26+
Train_data = np.array(Train_data.iloc[:,:])
27+
Test_data = np.array(Test_data.iloc[:,:])
28+
29+
30+
31+
def output(tree, lst):
32+
while(True):
33+
for key, value in tree.items():
34+
#print('key', key)
35+
#print('value', value)
36+
if value == 1 or value == -1:
37+
return value
38+
else:
39+
k = lst[key]
40+
tree = tree[key][k]
41+
if tree == 1 or tree == -1:
42+
return tree
43+
44+
def AdaBoost(rounds, model, data):
45+
weightedError = 1
46+
weight = [1/rows]*rows #Initial weight matrix
47+
alpha = []
48+
HSpace = [] #Total hypothesis space
49+
predictions = []
50+
labels = []
51+
eps = [] #Weighted errors
52+
for r in range(rounds):
53+
print('round',r+1)
54+
weightedError = 1
55+
it = 0
56+
for mod in model:
57+
w = 0
58+
if r == 0:
59+
labels.append([])
60+
lst = []
61+
for i in range(rows):
62+
row = data[i, :]
63+
if r == 0:
64+
k = output(mod, row)
65+
lst.append(k)
66+
if row[0] != k:
67+
w = w + weight[i]
68+
labels[it].append(k)
69+
else:
70+
k = labels[it][i]
71+
lst.append(k)
72+
if row[0] != k:
73+
w = w + weight[i]
74+
if w > weightedError or w > 0.5:#Optimization
75+
break
76+
if w < weightedError:
77+
weightedError = w
78+
bTree = mod #bTree indicates it is a best tree
79+
bestP = lst
80+
it += 1
81+
HSpace.append(bTree)
82+
eps.append(weightedError)
83+
t = 1/2 * math.log((1-weightedError)/weightedError)
84+
alpha.append(t)
85+
#Weight Updation
86+
sum1 = 0
87+
predictions.append(bestP)
88+
for i in range(rows):
89+
prediction = bestP[i]
90+
actual = data[i, 0]
91+
#print(prediction, actual)
92+
weight[i] = (weight[i] * (np.exp(-1 * prediction * actual * t)))/(2*np.sqrt(weightedError * (1-weightedError)))
93+
sum1 += weight[i]
94+
print("sum", sum1)
95+
#print(weight)
96+
#print(alpha)
97+
print(weightedError)
98+
return eps, alpha, HSpace, predictions
99+
100+
101+
102+
103+
104+
def Hypothesis():
105+
hypo = []
106+
lst = [1, -1]
107+
for i in range(1, attrs):#attribute iteration
108+
for j in range(1, attrs):
109+
for k in range(1, attrs):#5 different cases for 3 attribute splits
110+
for a in lst:
111+
for b in lst:
112+
for c in lst:
113+
for d in lst:
114+
tree = {} #case 1 LLL
115+
tree[i] = {}
116+
tree[i][1] = a
117+
tree1 = {}
118+
tree1[j] = {}
119+
tree1[j][1] = b
120+
tree2 = {}
121+
tree2[k] = {}
122+
tree2[k][0] = c
123+
tree2[k][1] = d
124+
tree1[j][0] = tree2
125+
tree[i][0] = tree1
126+
hypo.append(tree)
127+
128+
tree = {} #case 2 LLR
129+
tree[i] = {}
130+
tree[i][1] = a
131+
tree1 = {}
132+
tree1[j] = {}
133+
tree1[j][0] = b
134+
tree2 = {}
135+
tree2[k] = {}
136+
tree2[k][0] = c
137+
tree2[k][1] = d
138+
tree1[j][1] = tree2
139+
tree[i][0] = tree1
140+
hypo.append(tree)
141+
142+
tree = {} #case 3
143+
tree[i] = {}
144+
tree1 = {}
145+
tree1[j] = {}
146+
tree1[j][0] = a
147+
tree1[j][1] = b
148+
tree2 = {}
149+
tree2[k] = {}
150+
tree2[k][0] = c
151+
tree2[k][1] = d
152+
tree[i][0] = tree1
153+
tree[i][1] = tree2
154+
hypo.append(tree)
155+
156+
tree = {} #case 4
157+
tree[i] = {}
158+
tree[i][0] = a
159+
tree1 = {}
160+
tree1[j] = {}
161+
tree1[j][1] = b
162+
tree2 = {}
163+
tree2[k] = {}
164+
tree2[k][0] = c
165+
tree2[k][1] = d
166+
tree1[j][0] = tree2
167+
tree[i][1] = tree1
168+
hypo.append(tree)
169+
170+
tree = {} #case 5
171+
tree[i] = {}
172+
tree[i][0] = a
173+
tree1 = {}
174+
tree1[j] = {}
175+
tree1[j][0] = b
176+
tree2 = {}
177+
tree2[k] = {}
178+
tree2[k][0] = c
179+
tree2[k][1] = d
180+
tree1[j][1] = tree2
181+
tree[i][1] = tree1
182+
hypo.append(tree)
183+
return hypo
184+
185+
186+
rounds = 5
187+
print('starting time')
188+
t1 = time.time()
189+
Thypo = Hypothesis() #Thypo mean Total hypothesis space
190+
print('end')
191+
t2 = time.time()
192+
print(t2-t1, 'secs')
193+
#print(hypo)
194+
print('Generated Hypothesis spaces')
195+
196+
epsilon, alpha, hypo, predictions = AdaBoost(rounds, Thypo, Train_data) #method for getting Hypothesis space and Alpha values
197+
t2 = time.time()
198+
print(t2-t1, 'secs')
199+
200+
print(hypo)
201+
202+
#accuracy calculation
203+
length = len(Test_data)
204+
205+
accuracy = 0
206+
for i in range(length):
207+
p = 0
208+
row = Test_data[i, :]
209+
y = row[0]
210+
for r in range(rounds):
211+
k = output(hypo[r], row)
212+
p = p + alpha[r] * k
213+
if p >= 0:
214+
if y == 1:
215+
accuracy += 1
216+
else:
217+
if y == -1:
218+
accuracy += 1
219+
220+
accuracy = accuracy/length * 100
221+
print("Accuracy on the test data set is", accuracy)
222+
223+
224+
for r in range(rounds):
225+
print('The value of epsilon and alpha for round,',r+1,'is',alpha[r],epsilon[r])
226+
227+
t2 = time.time()
228+
print(t2-t1, 'secs')
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Wed Oct 17 19:29:14 2018
5+
6+
@author: sahith
7+
"""
8+
9+
import pandas as pd
10+
import math
11+
12+
Train_data = pd.read_csv('heart_train.data',header = None)
13+
Test_data = pd.read_csv('heart_test.data',header = None)
14+
15+
16+
attrs = len(Train_data.columns) #No of Attributes
17+
18+
rows = len(Train_data) #No of Training Data Points
19+
20+
21+
22+
def splitData(data, attribute):
23+
data0 = data.loc[data[attribute] == 0, :]
24+
data1 = data.loc[data[attribute] == 1, :]
25+
return data0, data1
26+
27+
def label(data):
28+
p = len(data.loc[data[0] == 1, :])
29+
n = len(data.loc[data[0] == 0, :])
30+
if p >= n:
31+
return 1
32+
else:
33+
return 0
34+
35+
def Train(data, attrList):
36+
maxProb = 1
37+
bestAttr = -1
38+
for i in attrList:
39+
prob = 0 #prob of the target given an attribute
40+
for j in range(2): #binary values so j is 0, 1
41+
temp = data.loc[data[i] == j, [0, i]]
42+
tempLen = len(temp)
43+
p = len(temp.loc[temp[0] == 1])
44+
e = len(temp.loc[temp[0] == 0])
45+
logp = 0
46+
loge = 0
47+
if(tempLen > 0):
48+
if p > 0:
49+
logp = (math.log(p/tempLen))/(math.log(2))
50+
if e > 0:
51+
loge = (math.log(e/tempLen))/(math.log(2))
52+
prob = prob + -1 * tempLen/rows * ((p/tempLen)*logp+(e/tempLen)*loge)
53+
if(prob <= maxProb):
54+
maxProb = prob
55+
bestAttr = i
56+
#print(maxProb)
57+
#print(bestAttr)
58+
tree = {}
59+
tree[bestAttr] = {}
60+
data0, data1 = splitData(data, bestAttr)
61+
tree[bestAttr][0] = label(data0)
62+
tree[bestAttr][1] = label(data1)
63+
return tree, bestAttr
64+
65+
66+
67+
def output(tree, lst):
68+
while(True):
69+
for key, value in tree.items():
70+
#print('key', key)
71+
#print('value', value)
72+
if value == 1 or value == 0:
73+
return value
74+
else:
75+
k = lst[key]
76+
tree = tree[key][k]
77+
if tree == 1 or tree == 0:
78+
return tree
79+
80+
81+
82+
def acc(model, data):
83+
r = len(data)
84+
accuracy = 0
85+
for i in range(r):
86+
row = data.loc[i, :]
87+
lst = [0] * 2
88+
for mod in model:
89+
lst[output(mod, row)] += 1
90+
if lst[0] > lst[1]:
91+
if row[0] == 0:
92+
accuracy += 1
93+
else:
94+
if row[0] == 1:
95+
accuracy += 1
96+
97+
accuracy = accuracy/r * 100
98+
return accuracy
99+
100+
101+
102+
103+
104+
105+
it = 25 #Running it for for 25 Iteration to select the best Test accuracy out of 25 Iterations
106+
Test_accuracy = 0
107+
while(it != 0):
108+
n = 20 #20 samplings
109+
classifiers = []
110+
attributeList = []
111+
112+
for i in range(1, attrs):
113+
attributeList.append(i)
114+
115+
for i in range(20):
116+
data = Train_data.sample(n = rows, replace = True)
117+
Hspace, attr = Train(data, attributeList)
118+
attributeList.remove(attr)
119+
classifiers.append(Hspace)
120+
accuracy = acc(classifiers, Test_data)
121+
if accuracy > Test_accuracy:
122+
Test_accuracy = accuracy
123+
print('Iteration', it, 'Accuracy', Test_accuracy)
124+
it -= 1
125+
126+
127+
128+
129+
130+
131+
132+
print("Accuracy on test data set is", Test_accuracy)
133+
134+
print('Classifiers are', classifiers)
135+
print('length', len(classifiers))
136+
137+
138+
139+
140+
141+
142+
143+
144+
145+
146+
147+
148+
149+

0 commit comments

Comments
 (0)