sahith
diff --git a/‎ Logistic Regression/l2-logistic-reg.py
+191 b/‎ Logistic Regression/l2-logistic-reg.py
+191
diff --git a/‎ Logistic Regression/logisticRegression.py
+190 b/‎ Logistic Regression/logisticRegression.py
+190
diff --git a/‎DecisionTree_KNN_DualSVM/park_test.data renamed to ‎ Logistic Regression/park_test.data b/‎DecisionTree_KNN_DualSVM/park_test.data renamed to ‎ Logistic Regression/park_test.data
diff --git a/‎DecisionTree_KNN_DualSVM/park_train.data renamed to ‎ Logistic Regression/park_train.data b/‎DecisionTree_KNN_DualSVM/park_train.data renamed to ‎ Logistic Regression/park_train.data
diff --git a/‎DecisionTree_KNN_DualSVM/park_validation.data renamed to ‎ Logistic Regression/park_validation.data b/‎DecisionTree_KNN_DualSVM/park_validation.data renamed to ‎ Logistic Regression/park_validation.data
diff --git a/‎Boosting_Bagging_CordinateDescent/.MACHINE LEARNING ASSIGNMENT 3.pdf.icloud
-185 Bytes b/‎Boosting_Bagging_CordinateDescent/.MACHINE LEARNING ASSIGNMENT 3.pdf.icloud
-185 Bytes
@@ -0,0 +1,191 @@
+import numpy as np
+
+# Compute the value of the likelihood function
+def compute_loss(datarow, w, b):
+    wxb = np.dot(w.T, datarow[1:23]) + b
+    return (((datarow[0] + 1)/2) * wxb) - (np.log(1 + np.exp(wxb)))
+
+# Compute w and b gradient values
+def compute_wb(datarow, w, b):
+    wxb = np.exp(np.dot(w.T, datarow[1:23]) + b)
+    py1 = wxb / (1 + wxb)
+    diff_b = ((datarow[0] + 1) / 2) - py1
+    diff_w = datarow[1:23] * diff_b
+    return diff_w, diff_b
+
+# Predict values
+def predict(w, b, datarow):
+    wxb = np.exp(np.dot(w.T, datarow) + b)
+    py_positive = wxb / (1 + wxb)
+    py_negative = 1 / (1 + wxb)
+    return 1.0 if py_positive >= py_negative else -1.0
+
+# Accuracy calculation
+def accuracy(Y1,Y2):
+    counter = 0
+    index = 0
+    for val1 in Y1:
+        if val1 == Y2[index]:
+            counter += 1
+        index += 1
+
+    return counter/len(Y1)
+
+########################## TRAIN ##########################
+# Opening training file and reading contents
+park_train_file = open('park_train.data','r')
+file_contents = ""
+if park_train_file.mode == 'r':
+    file_contents = park_train_file.read()
+
+park_train_file.close()
+
+# Read file to parse contents and store in numpy array
+train_data = np.genfromtxt('park_train.data', delimiter=",")
+train_data_length = len(train_data)
+
+# Gradient ascent parameter initialization
+step_size = 0.000001
+lambda_array = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
+
+# w and b dict for all lambda values
+wb_lambda_dict = {}
+cols = train_data.shape[1] -1
+for lambda_value in lambda_array:
+
+    # Gradient ascent parameters
+    iteration_counter = 1
+    w_vector = 0.25 * np.ones((22,), dtype=float)
+    b_value = 0.75
+    total_loss = 0.0
+
+    # Iterating till convergence
+    while True:
+
+        # Saving previous loss
+        prev_loss = total_loss
+        total_loss = 0.0
+        grad_sum_w = np.zeros((22,))
+        grad_sum_b = 0.0
+
+        # Iterating all training data
+        for i in range(train_data_length):
+            total_loss += compute_loss(train_data[i], w_vector, b_value)
+            temp_w, temp_b = compute_wb(train_data[i], w_vector, b_value)
+            grad_sum_w += temp_w
+            grad_sum_b += temp_b
+
+        # L2 regularization
+        total_loss -= (lambda_value/2) * (np.linalg.norm(w_vector))
+        grad_sum_w -= lambda_value * np.array([1]*cols)
+
+        print("Iteration:", iteration_counter, "Total loss:", total_loss)
+
+        # If difference in loss in minimal
+        if total_loss - prev_loss < 0.00015 and iteration_counter >= 2:
+            break
+
+        # Updating w and b values
+        w_vector += step_size * grad_sum_w
+        b_value += step_size * grad_sum_b
+
+        # Increasing count
+        iteration_counter += 1
+
+    wb_lambda_dict[lambda_value] = np.append(w_vector, b_value)
+
+
+############################# VALIDATION #########################################
+# Opening validation file and reading contents
+park_valid_file = open('park_validation.data','r')
+file_contents = ""
+if park_valid_file.mode == 'r':
+    file_contents = park_valid_file.read()
+
+park_valid_file.close()
+
+# Creating variable to parse and store validation data
+valid_data = np.empty((58,23), dtype=float)
+
+# Initialise row and column counters
+i = 0
+j = 0
+
+# Read file to parse contents and store in numpy array
+for line in file_contents.split('\n'):
+    for dt in line.split(","):
+        # If y is 0 change to -1
+        if j == 0 and dt == '0':
+            valid_data[i, j] = -1
+        else:
+            valid_data[i, j] = dt
+        j += 1
+    j = 0
+    print(i)
+    i += 1
+
+# Accuracy dictionary for validation data
+acc_dict_valid = {}
+
+best_valid_lambda = -100
+best_valid_accuracy = -100
+
+# Calculating accuracy for validation data
+for lamb, vector in wb_lambda_dict.items():
+    if len(vector) != 0:
+        pred_array = []
+        actual_data = []
+        for i in range(len(valid_data)):
+            slice_data = valid_data[i][1:23]
+            actual_data.append(valid_data[i][0])
+            pred_array.append(predict(vector[0:22], vector[22], slice_data))
+
+        acc_dict_valid[lamb] = accuracy(pred_array, actual_data)
+
+        if acc_dict_valid[lamb] >= best_valid_accuracy:
+            best_valid_lambda = lamb
+            best_valid_accuracy = acc_dict_valid[lamb]
+
+print("Accuracy on validation data for each lambda:", acc_dict_valid)
+
+############################# TEST #########################################
+# Opening test file and reading contents
+park_test_file = open('park_test.data','r')
+file_contents = ""
+if park_test_file.mode == 'r':
+    file_contents = park_test_file.read()
+
+park_test_file.close()
+
+# Creating variable to parse and store test data
+test_data = np.empty((59,23), dtype=float)
+
+# Initialise row and column counters
+i = 0
+j = 0
+
+# Read file to parse contents and store in numpy array
+for line in file_contents.split('\n'):
+    for dt in line.split(","):
+        # If y is 0 change to -1
+        if j == 0 and dt == '0':
+            test_data[i, j] = -1
+        else:
+            test_data[i, j] = dt
+        j += 1
+    j = 0
+    i += 1
+
+# Calculating accuracy for test data
+pred_array = []
+actual_data = []
+
+for i in range(len(test_data)):
+    slice_data = test_data[i][1:23]
+    actual_data.append(test_data[i][0])
+    pred_array.append(predict(wb_lambda_dict[best_valid_lambda][0:22], wb_lambda_dict[best_valid_lambda][22], slice_data))
+
+print("Best w vector:", wb_lambda_dict[best_valid_lambda][0:22])
+print("Best b value:", wb_lambda_dict[best_valid_lambda][22])
+print("Accuracy on test data:", accuracy(pred_array, actual_data), "for best lambda:", best_valid_lambda)
+
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Nov 23 13:38:21 2018
+
+@author: sahith
+"""
+
+import numpy as np
+
+Train_data = np.genfromtxt('park_train.data', delimiter=",")
+Test_data = np.genfromtxt('park_test.data', delimiter=",")
+Validation_data = np.genfromtxt('park_validation.data', delimiter=",")
+
+#setting 0 to -1 in the target
+
+Train_data[Train_data[:, 0] == 0, 0] = -1
+Test_data[Test_data[:, 0] == 0, 0] = -1
+Validation_data[Validation_data[:, 0] == 0, 0] = -1
+
+
+
+def sigmoid_function(theta):
+    s = 1 / (1 + np.exp(-theta))
+    return s
+
+def compute_loss(data, weight, bias):
+    m = data.shape[0]
+    loss = 0
+    for i in range(m):
+        x = data[i, 1:]
+        y = data[i, 0]
+        k = np.dot(weight.T, x) + bias
+        #print(1 + np.exp(k))
+        loss += y*k - np.log(1 + np.exp(k))
+    return loss
+
+def logistic_regression(data, weight, bias, step):
+    m = data.shape[0]
+    loss = compute_loss(data, weight, bias)
+    it = 0
+    while True:
+        it += 1
+        gradient_w = 0
+        gradient_b = 0
+        for i in range(m):
+            x = data[i, 1:]
+            k = np.dot(weight.T, x) + bias
+            p = sigmoid_function(k)
+            gradient_w += x * ((data[i, 0]+1)/2 - p)
+            gradient_b += ((data[i, 0]+1)/2 - p)
+        w = weight + step * gradient_w
+        b = bias + step * gradient_b
+        loss1 = compute_loss(data, weight, bias)
+        if loss1 - loss < 0.0001 and it >= 2:
+            break
+        loss = loss1
+        weight = w
+        bias = b
+    print("Iterations", it)
+    return w, b
+
+def logistic_regression_l1(data, weight, bias, step, l1):
+    m, n = data.shape
+    n -= 1
+    wt = np.array([1] * n)
+    loss = compute_loss(data, weight, bias)
+    it = 0
+    while True:
+        it += 1
+        gradient_w = 0
+        gradient_b = 0
+        for i in range(m):
+            x = data[i, 1:]
+            k = np.dot(weight.T, x) + bias
+            p = sigmoid_function(k)
+            gradient_w += x * ((data[i, 0]+1)/2 - p)
+            gradient_b += ((data[i, 0]+1)/2 - p)
+        gradient_w = gradient_w - l1 * wt
+        w = weight + step * gradient_w
+        b = bias + step * gradient_b
+        loss1 = compute_loss(data, weight, bias) - l1 * np.linalg.norm(weight)
+        if loss1 - loss < 0.0001 and it >= 2:
+            break
+        loss = loss1
+        weight = w
+        bias = b
+    return w, b             
+            
+def logistic_regression_l2(data, weight, bias, step, l2):
+    m = data.shape[0]
+    it = 0
+    loss = compute_loss(data, weight, bias)
+    while True:
+        it += 1
+        gradient_w = 0
+        gradient_b = 0
+        for i in range(m):
+            x = data[i, 1:]
+            k = np.dot(weight.T, x) + bias
+            p = sigmoid_function(k)
+            gradient_w += x * ((data[i, 0]+1)/2 - p)
+            gradient_b += ((data[i, 0]+1)/2 - p)
+        gradient_w = gradient_w - (l2 * weight)
+        w = weight + step * gradient_w
+        b = bias + step * gradient_b
+        loss1 = compute_loss(data, weight, bias) - l2 * (np.linalg.norm(weight) ** 2)
+        if loss1 - loss < 0.0001 and it >= 2:
+            break
+        loss = loss1
+        weight = w
+        bias = b
+    return w, b           
+            
+
+
+def accuracy(data, weight, bias):
+    m = data.shape[0]
+    cnt = 0
+    for i in range(m):
+        x = data[i, 1:]
+        t = np.dot(weight.T, x) + bias
+        if t > 0:
+            if data[i, 0] > 0:
+                cnt += 1
+        else:
+            if data[i, 0] < 0:
+                cnt += 1
+    return cnt/m * 100
+
+
+
+cols = Train_data.shape[1] - 1
+weight = [0.25] * cols
+weight = np.array(weight)
+bias = 0.75
+learningRate = [0.000001] #Defines the step size learning rate
+
+
+acc = 0
+for lr in learningRate:
+    w, b = logistic_regression(Train_data, weight, bias, lr)
+    valid_acc = accuracy(Validation_data, w, b)
+    print("Validation accuracy is",valid_acc)
+    if valid_acc >= acc:
+        acc = valid_acc
+        finalWeight = w
+        finalBias = b
+
+print(finalWeight)  
+print('Accuarcy on test data is', accuracy(Test_data, finalWeight, finalBias))
+
+acc = 0
+l2 = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 10, 1000]
+for lr in learningRate:
+    for i in l2:
+        w, b = logistic_regression_l2(Train_data, weight, bias, lr, i)
+        valid_acc = accuracy(Validation_data, w, b)
+        print("Validation accuracy for l2=",i,"is",valid_acc)
+        if valid_acc >= acc:
+            bestl2 = i
+            acc = valid_acc
+            finalWeight = w
+            finalBias = b
+
+print("Weight vector of l2 regularization", finalWeight)  
+print("Bias of l2 regularization", finalBias)
+print("Best l2 constant",bestl2)
+print('Accuarcy on test data with l2 penalty is', accuracy(Test_data, finalWeight, finalBias))
+        
+
+    
+#Logistic Regression 
+acc = 0
+l1 = [0.0001, 0.001, 0.01, 0.1, 0.5, 1, 10, 1000]
+for lr in learningRate:
+    for i in l1:
+        w, b = logistic_regression_l1(Train_data, weight, bias, lr, i)
+        valid_acc = accuracy(Validation_data, w, b)
+        print("Validation accuracy for l1=",i,"is",valid_acc)
+        if valid_acc >= acc:
+            bestl1 = i
+            acc = valid_acc
+            finalWeight = w
+            finalBias = b
+
+print("Weight vector of l1 regularization", finalWeight)  
+print("Bias of l1 regularization", finalBias)
+print("Best l1 constant",bestl1)
+print('Accuarcy on test data with l1 penalty is', accuracy(Test_data, finalWeight, finalBias))