Skip to content

Commit c9a57aa

Browse files
committed
Refactored Files
1 parent a4edccf commit c9a57aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1686
-87
lines changed
+191
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
import numpy as np
2+
3+
# Compute the value of the likelihood function
4+
def compute_loss(datarow, w, b):
5+
wxb = np.dot(w.T, datarow[1:23]) + b
6+
return (((datarow[0] + 1)/2) * wxb) - (np.log(1 + np.exp(wxb)))
7+
8+
# Compute w and b gradient values
9+
def compute_wb(datarow, w, b):
10+
wxb = np.exp(np.dot(w.T, datarow[1:23]) + b)
11+
py1 = wxb / (1 + wxb)
12+
diff_b = ((datarow[0] + 1) / 2) - py1
13+
diff_w = datarow[1:23] * diff_b
14+
return diff_w, diff_b
15+
16+
# Predict values
17+
def predict(w, b, datarow):
18+
wxb = np.exp(np.dot(w.T, datarow) + b)
19+
py_positive = wxb / (1 + wxb)
20+
py_negative = 1 / (1 + wxb)
21+
return 1.0 if py_positive >= py_negative else -1.0
22+
23+
# Accuracy calculation
24+
def accuracy(Y1,Y2):
25+
counter = 0
26+
index = 0
27+
for val1 in Y1:
28+
if val1 == Y2[index]:
29+
counter += 1
30+
index += 1
31+
32+
return counter/len(Y1)
33+
34+
########################## TRAIN ##########################
35+
# Opening training file and reading contents
36+
park_train_file = open('park_train.data','r')
37+
file_contents = ""
38+
if park_train_file.mode == 'r':
39+
file_contents = park_train_file.read()
40+
41+
park_train_file.close()
42+
43+
# Read file to parse contents and store in numpy array
44+
train_data = np.genfromtxt('park_train.data', delimiter=",")
45+
train_data_length = len(train_data)
46+
47+
# Gradient ascent parameter initialization
48+
step_size = 0.000001
49+
lambda_array = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
50+
51+
# w and b dict for all lambda values
52+
wb_lambda_dict = {}
53+
cols = train_data.shape[1] -1
54+
for lambda_value in lambda_array:
55+
56+
# Gradient ascent parameters
57+
iteration_counter = 1
58+
w_vector = 0.25 * np.ones((22,), dtype=float)
59+
b_value = 0.75
60+
total_loss = 0.0
61+
62+
# Iterating till convergence
63+
while True:
64+
65+
# Saving previous loss
66+
prev_loss = total_loss
67+
total_loss = 0.0
68+
grad_sum_w = np.zeros((22,))
69+
grad_sum_b = 0.0
70+
71+
# Iterating all training data
72+
for i in range(train_data_length):
73+
total_loss += compute_loss(train_data[i], w_vector, b_value)
74+
temp_w, temp_b = compute_wb(train_data[i], w_vector, b_value)
75+
grad_sum_w += temp_w
76+
grad_sum_b += temp_b
77+
78+
# L2 regularization
79+
total_loss -= (lambda_value/2) * (np.linalg.norm(w_vector))
80+
grad_sum_w -= lambda_value * np.array([1]*cols)
81+
82+
print("Iteration:", iteration_counter, "Total loss:", total_loss)
83+
84+
# If difference in loss in minimal
85+
if total_loss - prev_loss < 0.00015 and iteration_counter >= 2:
86+
break
87+
88+
# Updating w and b values
89+
w_vector += step_size * grad_sum_w
90+
b_value += step_size * grad_sum_b
91+
92+
# Increasing count
93+
iteration_counter += 1
94+
95+
wb_lambda_dict[lambda_value] = np.append(w_vector, b_value)
96+
97+
98+
############################# VALIDATION #########################################
99+
# Opening validation file and reading contents
100+
park_valid_file = open('park_validation.data','r')
101+
file_contents = ""
102+
if park_valid_file.mode == 'r':
103+
file_contents = park_valid_file.read()
104+
105+
park_valid_file.close()
106+
107+
# Creating variable to parse and store validation data
108+
valid_data = np.empty((58,23), dtype=float)
109+
110+
# Initialise row and column counters
111+
i = 0
112+
j = 0
113+
114+
# Read file to parse contents and store in numpy array
115+
for line in file_contents.split('\n'):
116+
for dt in line.split(","):
117+
# If y is 0 change to -1
118+
if j == 0 and dt == '0':
119+
valid_data[i, j] = -1
120+
else:
121+
valid_data[i, j] = dt
122+
j += 1
123+
j = 0
124+
print(i)
125+
i += 1
126+
127+
# Accuracy dictionary for validation data
128+
acc_dict_valid = {}
129+
130+
best_valid_lambda = -100
131+
best_valid_accuracy = -100
132+
133+
# Calculating accuracy for validation data
134+
for lamb, vector in wb_lambda_dict.items():
135+
if len(vector) != 0:
136+
pred_array = []
137+
actual_data = []
138+
for i in range(len(valid_data)):
139+
slice_data = valid_data[i][1:23]
140+
actual_data.append(valid_data[i][0])
141+
pred_array.append(predict(vector[0:22], vector[22], slice_data))
142+
143+
acc_dict_valid[lamb] = accuracy(pred_array, actual_data)
144+
145+
if acc_dict_valid[lamb] >= best_valid_accuracy:
146+
best_valid_lambda = lamb
147+
best_valid_accuracy = acc_dict_valid[lamb]
148+
149+
print("Accuracy on validation data for each lambda:", acc_dict_valid)
150+
151+
############################# TEST #########################################
152+
# Opening test file and reading contents
153+
park_test_file = open('park_test.data','r')
154+
file_contents = ""
155+
if park_test_file.mode == 'r':
156+
file_contents = park_test_file.read()
157+
158+
park_test_file.close()
159+
160+
# Creating variable to parse and store test data
161+
test_data = np.empty((59,23), dtype=float)
162+
163+
# Initialise row and column counters
164+
i = 0
165+
j = 0
166+
167+
# Read file to parse contents and store in numpy array
168+
for line in file_contents.split('\n'):
169+
for dt in line.split(","):
170+
# If y is 0 change to -1
171+
if j == 0 and dt == '0':
172+
test_data[i, j] = -1
173+
else:
174+
test_data[i, j] = dt
175+
j += 1
176+
j = 0
177+
i += 1
178+
179+
# Calculating accuracy for test data
180+
pred_array = []
181+
actual_data = []
182+
183+
for i in range(len(test_data)):
184+
slice_data = test_data[i][1:23]
185+
actual_data.append(test_data[i][0])
186+
pred_array.append(predict(wb_lambda_dict[best_valid_lambda][0:22], wb_lambda_dict[best_valid_lambda][22], slice_data))
187+
188+
print("Best w vector:", wb_lambda_dict[best_valid_lambda][0:22])
189+
print("Best b value:", wb_lambda_dict[best_valid_lambda][22])
190+
print("Accuracy on test data:", accuracy(pred_array, actual_data), "for best lambda:", best_valid_lambda)
191+
+190
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Fri Nov 23 13:38:21 2018
5+
6+
@author: sahith
7+
"""
8+
9+
import numpy as np
10+
11+
Train_data = np.genfromtxt('park_train.data', delimiter=",")
12+
Test_data = np.genfromtxt('park_test.data', delimiter=",")
13+
Validation_data = np.genfromtxt('park_validation.data', delimiter=",")
14+
15+
#setting 0 to -1 in the target
16+
17+
Train_data[Train_data[:, 0] == 0, 0] = -1
18+
Test_data[Test_data[:, 0] == 0, 0] = -1
19+
Validation_data[Validation_data[:, 0] == 0, 0] = -1
20+
21+
22+
23+
def sigmoid_function(theta):
24+
s = 1 / (1 + np.exp(-theta))
25+
return s
26+
27+
def compute_loss(data, weight, bias):
28+
m = data.shape[0]
29+
loss = 0
30+
for i in range(m):
31+
x = data[i, 1:]
32+
y = data[i, 0]
33+
k = np.dot(weight.T, x) + bias
34+
#print(1 + np.exp(k))
35+
loss += y*k - np.log(1 + np.exp(k))
36+
return loss
37+
38+
def logistic_regression(data, weight, bias, step):
39+
m = data.shape[0]
40+
loss = compute_loss(data, weight, bias)
41+
it = 0
42+
while True:
43+
it += 1
44+
gradient_w = 0
45+
gradient_b = 0
46+
for i in range(m):
47+
x = data[i, 1:]
48+
k = np.dot(weight.T, x) + bias
49+
p = sigmoid_function(k)
50+
gradient_w += x * ((data[i, 0]+1)/2 - p)
51+
gradient_b += ((data[i, 0]+1)/2 - p)
52+
w = weight + step * gradient_w
53+
b = bias + step * gradient_b
54+
loss1 = compute_loss(data, weight, bias)
55+
if loss1 - loss < 0.0001 and it >= 2:
56+
break
57+
loss = loss1
58+
weight = w
59+
bias = b
60+
print("Iterations", it)
61+
return w, b
62+
63+
def logistic_regression_l1(data, weight, bias, step, l1):
64+
m, n = data.shape
65+
n -= 1
66+
wt = np.array([1] * n)
67+
loss = compute_loss(data, weight, bias)
68+
it = 0
69+
while True:
70+
it += 1
71+
gradient_w = 0
72+
gradient_b = 0
73+
for i in range(m):
74+
x = data[i, 1:]
75+
k = np.dot(weight.T, x) + bias
76+
p = sigmoid_function(k)
77+
gradient_w += x * ((data[i, 0]+1)/2 - p)
78+
gradient_b += ((data[i, 0]+1)/2 - p)
79+
gradient_w = gradient_w - l1 * wt
80+
w = weight + step * gradient_w
81+
b = bias + step * gradient_b
82+
loss1 = compute_loss(data, weight, bias) - l1 * np.linalg.norm(weight)
83+
if loss1 - loss < 0.0001 and it >= 2:
84+
break
85+
loss = loss1
86+
weight = w
87+
bias = b
88+
return w, b
89+
90+
def logistic_regression_l2(data, weight, bias, step, l2):
91+
m = data.shape[0]
92+
it = 0
93+
loss = compute_loss(data, weight, bias)
94+
while True:
95+
it += 1
96+
gradient_w = 0
97+
gradient_b = 0
98+
for i in range(m):
99+
x = data[i, 1:]
100+
k = np.dot(weight.T, x) + bias
101+
p = sigmoid_function(k)
102+
gradient_w += x * ((data[i, 0]+1)/2 - p)
103+
gradient_b += ((data[i, 0]+1)/2 - p)
104+
gradient_w = gradient_w - (l2 * weight)
105+
w = weight + step * gradient_w
106+
b = bias + step * gradient_b
107+
loss1 = compute_loss(data, weight, bias) - l2 * (np.linalg.norm(weight) ** 2)
108+
if loss1 - loss < 0.0001 and it >= 2:
109+
break
110+
loss = loss1
111+
weight = w
112+
bias = b
113+
return w, b
114+
115+
116+
117+
def accuracy(data, weight, bias):
118+
m = data.shape[0]
119+
cnt = 0
120+
for i in range(m):
121+
x = data[i, 1:]
122+
t = np.dot(weight.T, x) + bias
123+
if t > 0:
124+
if data[i, 0] > 0:
125+
cnt += 1
126+
else:
127+
if data[i, 0] < 0:
128+
cnt += 1
129+
return cnt/m * 100
130+
131+
132+
133+
cols = Train_data.shape[1] - 1
134+
weight = [0.25] * cols
135+
weight = np.array(weight)
136+
bias = 0.75
137+
learningRate = [0.000001] #Defines the step size learning rate
138+
139+
140+
acc = 0
141+
for lr in learningRate:
142+
w, b = logistic_regression(Train_data, weight, bias, lr)
143+
valid_acc = accuracy(Validation_data, w, b)
144+
print("Validation accuracy is",valid_acc)
145+
if valid_acc >= acc:
146+
acc = valid_acc
147+
finalWeight = w
148+
finalBias = b
149+
150+
print(finalWeight)
151+
print('Accuarcy on test data is', accuracy(Test_data, finalWeight, finalBias))
152+
153+
acc = 0
154+
l2 = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 10, 1000]
155+
for lr in learningRate:
156+
for i in l2:
157+
w, b = logistic_regression_l2(Train_data, weight, bias, lr, i)
158+
valid_acc = accuracy(Validation_data, w, b)
159+
print("Validation accuracy for l2=",i,"is",valid_acc)
160+
if valid_acc >= acc:
161+
bestl2 = i
162+
acc = valid_acc
163+
finalWeight = w
164+
finalBias = b
165+
166+
print("Weight vector of l2 regularization", finalWeight)
167+
print("Bias of l2 regularization", finalBias)
168+
print("Best l2 constant",bestl2)
169+
print('Accuarcy on test data with l2 penalty is', accuracy(Test_data, finalWeight, finalBias))
170+
171+
172+
173+
#Logistic Regression
174+
acc = 0
175+
l1 = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 10, 1000]
176+
for lr in learningRate:
177+
for i in l1:
178+
w, b = logistic_regression_l1(Train_data, weight, bias, lr, i)
179+
valid_acc = accuracy(Validation_data, w, b)
180+
print("Validation accuracy for l1=",i,"is",valid_acc)
181+
if valid_acc >= acc:
182+
bestl1 = i
183+
acc = valid_acc
184+
finalWeight = w
185+
finalBias = b
186+
187+
print("Weight vector of l1 regularization", finalWeight)
188+
print("Bias of l1 regularization", finalBias)
189+
print("Best l1 constant",bestl1)
190+
print('Accuarcy on test data with l1 penalty is', accuracy(Test_data, finalWeight, finalBias))
Binary file not shown.

0 commit comments

Comments
 (0)