-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
106 lines (84 loc) · 3.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
import pandas as pd
class LogisticRegression:
# Z = X*w+p => h(x)
def __init__(self, x_train, y_train, w, b, numOfTrainingItems, learning_rate):
self.x_train = x_train
self.y_train = y_train
self.w = w
self.b = b
self.numOfTrainingItems = numOfTrainingItems
self.learning_rate = learning_rate
# Sigmoid Function (Z = w*X+p) .. w=theta1 , p=theta0
def Sigmoid_Hypothesis(self, w, x, b):
z = np.dot(x, w) + b
return 1 / (1 + np.e ** -z)
# Cost Function
def Cost_Function(self):
hypothesis = self.Sigmoid_Hypothesis(self.w, self.x_train, self.b)
return np.dot((self.x_train.T, self.y_train - hypothesis))
# get the values of w,b by gradient descent
def gradient_descent(self):
hypothesis = self.Sigmoid_Hypothesis(self.w, self.x_train, self.b)
self.w += (self.learning_rate * (np.dot(self.x_train.T, self.y_train - hypothesis)) / numOfTrainingItems)
self.b += (self.learning_rate * (np.sum(self.y_train - hypothesis)) / numOfTrainingItems)
return [self.w, self.b]
# training the data
def train(self):
for i in range(10000):
self.w, self.b = self.gradient_descent()
return [self.w, self.b]
# predict new values
def predict(self, x_test):
hypothesis = self.Sigmoid_Hypothesis(self.w, x_test, self.b)
return [1 if val >= 0.5 else 0 for val in hypothesis]
def calc_accuracy(self, y_test, y_predicted):
cnt = 0
for i in range(len(y_test)):
if y_test[i] == y_predicted[i]:
cnt += 1
return cnt / len(y_test)
# random initial values for w and b
w = np.random.random((2, 1))
b = np.random.random()
# loading the data (req1)
customerData = pd.read_csv('customer_data.csv')
# shuffle the data
customerData.sample(frac=1)
# print(customerData)
# X => values of features 1,2 (age, salary) & Y => values of output (purchased)
X = customerData.iloc[:, 0:2].values
Y = customerData.iloc[:, 2:3].values
# Feature Scaling using minmax normalization
X0_temp = []
X1_temp = []
for i in X:
X0_temp.append(i[0])
X1_temp.append(i[1])
minVal = min(X0_temp)
maxVal = max(X0_temp)
for i in range(len(X0_temp)):
X0_temp[i] = (X0_temp[i] - minVal) / (maxVal - minVal)
minVal = min(X1_temp)
maxVal = max(X1_temp)
for i in range(len(X1_temp)):
X1_temp[i] = (X1_temp[i] - minVal) / (maxVal - minVal)
X = X.astype(float)
for i in range(len(X)):
X[i][0] = (X0_temp[i])
X[i][1] = (X1_temp[i])
# Split the dataset into training and testing sets (req2)
X_Train = X[0:320]
Y_Train = Y[0:320]
X_Test = X[320:]
Y_Test = Y[320:]
numOfTrainingItems = int(X_Train.size / 2)
# print(numOfTrainingItems)
# logistic regression (req3)
LogisticReg = LogisticRegression(X_Train, Y_Train, w, b, numOfTrainingItems, 0.2)
w, b = LogisticReg.train()
# predictions on new data (req4)
Y_Predict = LogisticReg.predict(X_Test)
print(f"{Y_Predict}")
# Calculate the accuracy (req5)
print(f"Accuracy : {LogisticReg.calc_accuracy(Y_Test, Y_Predict) * 100}%")