Fix: Cross Entropy loss function in logistict regression from Scratch

dheeraj-coding · dheeraj-coding · commit b95eb826d895 · 2019-08-12T22:35:18.000+05:30
Changes to be committed:
	modified:   LogisticRegressionGluon.ipynb
	modified:   LogisticRegressionScratch.ipynb
diff --git a/LogisticRegressionGluon.ipynb b/LogisticRegressionGluon.ipynb
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,13 +56,13 @@
     "        y_hat = net(X)\n",
     "        y_hat = softmax(y_hat)\n",
     "        accumulator += (y_hat.argmax(axis=1)==y.astype('float32')).sum()\n",
-    "        size = len(y)\n",
+    "        size += len(y)\n",
     "    return accumulator / size"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -94,22 +94,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0, acc: 501.333344\n",
-      "Epoch 1, acc: 514.375000\n",
-      "Epoch 2, acc: 518.010437\n",
-      "Epoch 3, acc: 521.375000\n"
+      "Epoch 0, acc: 0.805600\n",
+      "Epoch 1, acc: 0.822417\n",
+      "Epoch 2, acc: 0.826817\n",
+      "Epoch 3, acc: 0.831850\n",
+      "Epoch 4, acc: 0.838033\n"
      ]
     }
    ],
    "source": [
-    "epochs = 10\n",
+    "epochs = 5\n",
     "for epoch in range(epochs):\n",
     "    for X, y in train_iter:\n",
     "        with autograd.record():\n",
diff --git a/LogisticRegressionScratch.ipynb b/LogisticRegressionScratch.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 126,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -33,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -45,17 +45,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
+   "execution_count": 168,
    "metadata": {},
    "outputs": [],
    "source": [
     "def cross_entropy(y_hat, y):\n",
-    "    return -nd.pick(y_hat, y, axis=1)"
+    "    return -nd.pick(y_hat, y).log()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": 169,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
+   "execution_count": 170,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,7 +76,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
+   "execution_count": 171,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -87,13 +87,12 @@
     "        y_hat = net(X, W, b, num_features)\n",
     "        accumulator += accuracy(y_hat, y)\n",
     "        size += len(y)\n",
-    "    print(accumulator)\n",
     "    return accumulator / size"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
+   "execution_count": 172,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -103,7 +102,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 173,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -114,11 +113,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 104,
+   "execution_count": 174,
    "metadata": {},
    "outputs": [],
    "source": [
-    "num_inputs = 784\n",
+    "num_inputs = 28 * 28\n",
     "num_outputs = 10\n",
     "W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs))\n",
     "b = nd.zeros(num_outputs)\n",
@@ -128,53 +127,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 175,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "[40504.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 0, acc: 0.675067\n",
-      "\n",
-      "[41122.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 1, acc: 0.685367\n",
-      "\n",
-      "[42817.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 2, acc: 0.713617\n",
-      "\n",
-      "[44973.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 3, acc: 0.749550\n",
-      "\n",
-      "[45543.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 4, acc: 0.759050\n",
-      "\n",
-      "[45997.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 5, acc: 0.766617\n",
-      "\n",
-      "[46272.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 6, acc: 0.771200\n",
-      "\n",
-      "[46486.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 7, acc: 0.774767\n",
-      "\n",
-      "[46629.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 8, acc: 0.777150\n",
-      "\n",
-      "[46824.]\n",
-      "<NDArray 1 @cpu(0)>\n",
-      "Epoch 9, acc: 0.780400\n"
+      "Epoch 0, acc: 0.805417\n",
+      "Epoch 1, acc: 0.820217\n",
+      "Epoch 2, acc: 0.829133\n",
+      "Epoch 3, acc: 0.834200\n",
+      "Epoch 4, acc: 0.839000\n",
+      "Epoch 5, acc: 0.841550\n",
+      "Epoch 6, acc: 0.844400\n",
+      "Epoch 7, acc: 0.845233\n",
+      "Epoch 8, acc: 0.846100\n",
+      "Epoch 9, acc: 0.846767\n"
      ]
     }
    ],
@@ -197,7 +166,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
+   "execution_count": 154,
    "metadata": {},
    "outputs": [
     {
@@ -208,7 +177,7 @@
       "(256,)\n",
       "(256,)\n",
       "\n",
-      "[0.08203125]\n",
+      "[0.11328125]\n",
       "<NDArray 1 @cpu(0)>\n"
      ]
     }