From ff6c05dc528af3af9dde9671b33675bbf0a4ace1 Mon Sep 17 00:00:00 2001 From: Bryce McWilliams Date: Thu, 25 Jun 2020 20:15:26 +0200 Subject: [PATCH] feat: add cnn mnist --- cnn_mnist.ipynb | 873 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 873 insertions(+) create mode 100644 cnn_mnist.ipynb diff --git a/cnn_mnist.ipynb b/cnn_mnist.ipynb new file mode 100644 index 0000000..d4b5b18 --- /dev/null +++ b/cnn_mnist.ipynb @@ -0,0 +1,873 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "cnn_mnist.ipynb", + "provenance": [], + "collapsed_sections": [], + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "oVEReyyDtZxl" + }, + "source": [ + "# CNN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "gqj9X_tjSyXy" + }, + "source": [ + "## MNIST Dataset\n", + "The MNIST database contains 60,000 training images and 10,000 testing images taken from American Census Bureau employees and American high school students. The MNIST dataset is one of the most common datasets used for image classification and accessible from many different sources. In fact, even Tensorflow and Keras allow us to import and download the MNIST dataset directly from their API." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ZOk8Eu4_t70R" + }, + "source": [ + "Firstly, let's select TensorFlow version 2.x in colab" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "H6RZUm0p4wYJ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "ac8fb894-583b-4cb4-e3b1-7df7238f6810" + }, + "source": [ + "%tensorflow_version 2.x\n", + "import tensorflow\n", + "\n", + "tensorflow.__version__" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'2.2.0'" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "TWi96z-8SyX0", + "colab": {} + }, + "source": [ + "# Initialize the random number generator\n", + "import random\n", + "\n", + "random.seed(0)" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "bhLmt0A6n_jj", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Ignore the warnings\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ], + "execution_count": 36, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "roDpNYcESyXz" + }, + "source": [ + "Let's load MNIST dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "4kQf1NjrSyX5", + "colab": {} + }, + "source": [ + "from tensorflow.keras.datasets import mnist\n", + "\n", + "# the data, shuffled and split between train and test sets\n", + "(X_train, y_train), (X_test, y_test) = mnist.load_data()" + ], + "execution_count": 37, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "I9LvPEV7SyX9" + }, + "source": [ + "X_train and X_test contain greyscale RGB codes (from 0 to 255) while y_train and y_test contains labels from 0 to 9 which represents which number they actually are." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jFxVKfCPSyX-" + }, + "source": [ + "Let's visualize some numbers using matplotlib" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "OiTTWjsrSyX_", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 299 + }, + "outputId": "df8fa422-60b1-4e17-fbd1-3a9678622428" + }, + "source": [ + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "print(\"Label: {}\".format(y_train[8000]))\n", + "plt.imshow(X_train[8000], cmap='gray')" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Label: 0\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 38 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOYUlEQVR4nO3dX4wVZZrH8d8DAzE4iCDaorArM9EL4h8wxJCsGsyEEbkQxkQE44bFSXoSMRnMJiuZMYFENiGruIleEJtABs3AOEZtzWgGGDJZ9YKJaFxtcAdZAxn+SKsYRy4Mtv3sRRduA13vaU5VnTrN8/0knT6nnq5Tj0d/Vp1669Rr7i4AF75RdTcAoDUIOxAEYQeCIOxAEIQdCOIHrdyYmXHqH6iYu9tQywvt2c1svpn91cwOmNmqIq8FoFrW7Di7mY2WtF/SPEmHJb0jaam770usw54dqFgVe/ZbJB1w90/c/ZSk30laWOD1AFSoSNivlvS3Qc8PZ8vOYGadZrbHzPYU2BaAgio/QefuXZK6JA7jgToV2bMfkTRt0POp2TIAbahI2N+RdK2ZTTezsZKWSHqtnLYAlK3pw3h37zOzhyVtlzRa0mZ331taZwBK1fTQW1Mb4zM7ULlKLqoBMHIQdiAIwg4EQdiBIAg7EARhB4Ig7EAQhB0IgrADQRB2IAjCDgRB2IEgCDsQBGEHgiDsQBCEHQiCsANBEHYgCMIOBEHYgSAIOxBES6dsxshz/fXXJ+u33XZbsn7RRRfl1tavX59ct6enJ1l/4oknkvXnn38+WY+GPTsQBGEHgiDsQBCEHQiCsANBEHYgCMIOBMEsrheAK664Ird23XXXJde97777kvV77rknWb/yyiuT9Sp9++23yfqKFStya5s2bSq7nbaRN4troYtqzOygpK8lfSepz91nF3k9ANUp4wq6O9z98xJeB0CF+MwOBFE07C5ph5m9a2adQ/2BmXWa2R4z21NwWwAKKHoYf6u7HzGzKyTtNLP/cfc3B/+Bu3dJ6pI4QQfUqdCe3d2PZL97Jb0i6ZYymgJQvqbDbmYXm9n4048l/VRS+juJAGpT5DC+Q9IrZnb6dba6+x9L6QpnSH0nXJK2bt2aW7vjjjsKbTv795uryus0+vr6kvVGY+Xjx48vs50Rr+mwu/snkm4qsRcAFWLoDQiCsANBEHYgCMIOBEHYgSD4imsbmDBhQrLe3d2drN9+++1ltnOGOofeXn/99WT97rvvrmzbI1neV1zZswNBEHYgCMIOBEHYgSAIOxAEYQeCIOxAEIyzt4FGUwvff//9LerkXHWOs3/zzTfJ+v79+5P1e++9N7d24MCBpnoaCRhnB4Ij7EAQhB0IgrADQRB2IAjCDgRB2IEgypjYEQ3Mnz8/WZ83b15l2240rfFTTz2VrL/11lvJ+uOPP56sz5o1K1lPaXQL7RtvvDFZf/DBB3Nrjz32WHLd/v7+ZH0kYs8OBEHYgSAIOxAEYQeCIOxAEIQdCIKwA0HwffYWOHjwYLI+bdq0yrbdaBx8zZo1hV6/Ue+vvvpqbu2mm+qbBHj58uXJ+nPPPdeiTsrX9PfZzWyzmfWaWc+gZZPMbKeZfZz9nlhmswDKN5zD+N9IOvsSsFWSdrn7tZJ2Zc8BtLGGYXf3NyWdOGvxQklbssdbJC0quS8AJWv22vgOdz+WPf5UUkfeH5pZp6TOJrcDoCSFvwjj7p468ebuXZK6pLgn6IB20OzQ23EzmyJJ2e/e8loCUIVmw/6apGXZ42WS8sdXALSFhuPsZrZN0lxJkyUdl7RaUrek30v6B0mHJC1297NP4g31WiP2MH7s2LG5tY0bNybXbXTf91Gjil3b9OKLL+bWHnjggeS6fX19hbbdSEdH7ukcHT16tNJtpzT6nv/cuXOT9d27d5fYTbnyxtkbfmZ396U5pZ8U6ghAS3G5LBAEYQeCIOxAEIQdCIKwA0FwK+lhSt16uNHwVtXWrVuXW6t6aK2RL774Ire2du3a5LqPPvposj5mzJimehrOuqNHj276tdsVe3YgCMIOBEHYgSAIOxAEYQeCIOxAEIQdCIJx9mGaPn16bdvu7u5O1vft29eiTs5fapx/9erVyXWXLVuWrFd5C+4LEXt2IAjCDgRB2IEgCDsQBGEHgiDsQBCEHQiCcfZhSt3u2WzIO/cO28mTJ5P1rq6uZP3UqVOFtt+uGt1iu8j7XvTf2UjEnh0IgrADQRB2IAjCDgRB2IEgCDsQBGEHgmCcPXPzzTcn63fddVdurdG0142k7q0uSdu3by/0+iNVf39/sl7kfe/t7U3Wv/rqq6Zfu1013LOb2WYz6zWznkHL1pjZETN7P/tZUG2bAIoazmH8byTNH2L5f7r7zOznjXLbAlC2hmF39zclnWhBLwAqVOQE3cNm9kF2mD8x74/MrNPM9pjZngLbAlBQs2HfIOnHkmZKOiZpfd4funuXu89299lNbgtACZoKu7sfd/fv3L1f0kZJt5TbFoCyNRV2M5sy6OnPJPXk/S2A9tBwnN3MtkmaK2mymR2WtFrSXDObKcklHZT0iwp7bIlx48Yl6xMmTGhRJxeW1PfGFy9enFz3sssuK7ud7+3cuTNZ7+m58PZfDcPu7kuHWLypgl4AVIjLZYEgCDsQBGEHgiDsQBCEHQiCr7i2gUsuuSRZnzNnTrK+e/fuMtspVeqfbevWrS3s5Ew7duyobdt1Yc8OBEHYgSAIOxAEYQeCIOxAEIQdCIKwA0Ewzp758ssvk/WjR4/m1q666qpC27700kuT9TfeSN/Pc8GC/Jv71j0Gv3bt2tq23d3dnVt74YUXWthJe2DPDgRB2IEgCDsQBGEHgiDsQBCEHQiCsANBWNHphs9rY2at21jJnnnmmdzaQw891MJOzpW6BmDp0qFuDvz/3n777WR90qRJyfqMGTOS9W3btuXWil6f0NfXl6yn3pcbbrghue7Jkyeb6qkduPuQ9+9mzw4EQdiBIAg7EARhB4Ig7EAQhB0IgrADQTDOPkypKZtPnDjRwk7Oz2effZasb968OVm/8847k/WpU6cm65MnT07Wi3j66aeT9UceeaSybbezpsfZzWyamf3ZzPaZ2V4z+2W2fJKZ7TSzj7PfE8tuGkB5hnMY3yfpX919hqQ5klaY2QxJqyTtcvdrJe3KngNoUw3D7u7H3P297PHXkj6SdLWkhZK2ZH+2RdKiqpoEUNx53YPOzK6RNEvSXyR1uPuxrPSppI6cdToldTbfIoAyDPtsvJn9UNJLkla6+98H13zgLN+QJ9/cvcvdZ7v77EKdAihkWGE3szEaCPpv3f3lbPFxM5uS1adI6q2mRQBlaDj0Zmamgc/kJ9x95aDlT0j6wt3XmdkqSZPc/d8avNaIHXobN25cbu3ZZ59NrrtoUfp0Ruq16zZqVHp/0N/f3/Rr7927N1lvNKXzk08+maw3+grshSpv6G04n9n/SdI/S/rQzN7Plv1K0jpJvzezn0s6JGlxGY0CqEbDsLv725KG/D+FpJ+U2w6AqnC5LBAEYQeCIOxAEIQdCIKwA0HwFdcWWL58ebLe0THklcbfW7lyZbJ++eWXn3dPwzVwmUW+RrdcTo2lL1myJLnuoUOHknUMjVtJA8ERdiAIwg4EQdiBIAg7EARhB4Ig7EAQjLOPAHPmzEnWZ86cWdm2N2zYkKw3mq660fooH+PsQHCEHQiCsANBEHYgCMIOBEHYgSAIOxAE4+zABYZxdiA4wg4EQdiBIAg7EARhB4Ig7EAQhB0IomHYzWyamf3ZzPaZ2V4z+2W2fI2ZHTGz97OfBdW3C6BZDS+qMbMpkqa4+3tmNl7Su5IWaWA+9pPu/uSwN8ZFNUDl8i6qGc787MckHcsef21mH0m6utz2AFTtvD6zm9k1kmZJ+ku26GEz+8DMNpvZxJx1Os1sj5ntKdQpgEKGfW28mf1Q0n9J+nd3f9nMOiR9LsklPa6BQ/0HG7wGh/FAxfIO44cVdjMbI+kPkra7+1ND1K+R9Ad3v77B6xB2oGJNfxHGBqbx3CTpo8FBz07cnfYzST1FmwRQneGcjb9V0luSPpTUny3+laSlkmZq4DD+oKRfZCfzUq/Fnh2oWKHD+LIQdqB6fJ8dCI6wA0EQdiAIwg4EQdiBIAg7EARhB4Ig7EAQhB0IgrADQRB2IAjCDgRB2IEgCDsQRMMbTpbsc0mHBj2fnC1rR+3aW7v2JdFbs8rs7R/zCi39Pvs5Gzfb4+6za2sgoV17a9e+JHprVqt64zAeCIKwA0HUHfaumref0q69tWtfEr01qyW91fqZHUDr1L1nB9AihB0Iopawm9l8M/urmR0ws1V19JDHzA6a2YfZNNS1zk+XzaHXa2Y9g5ZNMrOdZvZx9nvIOfZq6q0tpvFOTDNe63tX9/TnLf/MbmajJe2XNE/SYUnvSFrq7vta2kgOMzsoaba7134BhpndLumkpOdOT61lZv8h6YS7r8v+RznR3R9tk97W6Dyn8a6ot7xpxv9FNb53ZU5/3ow69uy3SDrg7p+4+ylJv5O0sIY+2p67vynpxFmLF0rakj3eooH/WFoup7e24O7H3P297PHXkk5PM17re5foqyXqCPvVkv426Plhtdd87y5ph5m9a2addTczhI5B02x9KqmjzmaG0HAa71Y6a5rxtnnvmpn+vChO0J3rVne/WdJdklZkh6ttyQc+g7XT2OkGST/WwByAxyStr7OZbJrxlyStdPe/D67V+d4N0VdL3rc6wn5E0rRBz6dmy9qCux/JfvdKekUDHzvayfHTM+hmv3tr7ud77n7c3b9z935JG1Xje5dNM/6SpN+6+8vZ4trfu6H6atX7VkfY35F0rZlNN7OxkpZIeq2GPs5hZhdnJ05kZhdL+qnabyrq1yQtyx4vk/Rqjb2coV2m8c6bZlw1v3e1T3/u7i3/kbRAA2fk/1fSr+voIaevH0n67+xnb929SdqmgcO6bzVwbuPnki6TtEvSx5L+JGlSG/X2vAam9v5AA8GaUlNvt2rgEP0DSe9nPwvqfu8SfbXkfeNyWSAITtABQRB2IAjCDgRB2IEgCDsQBGEHgiDsQBD/B+78joxjEAC5AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mVVInu-RSyYJ" + }, + "source": [ + "### Print shape of the data" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "JcIEeC-ySyYK", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "ea7bab39-8e3a-49be-c00c-c6bb09168fc5" + }, + "source": [ + "print(X_train.shape)\n", + "print(y_train.shape)\n", + "print(X_test.shape)\n", + "print(y_test.shape)" + ], + "execution_count": 39, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(60000, 28, 28)\n", + "(60000,)\n", + "(10000, 28, 28)\n", + "(10000,)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "NvfxTB1CSyYO" + }, + "source": [ + "### Reshape train and test sets into compatible shapes\n", + "- Sequential model in tensorflow.keras expects data to be in the format (n_e, n_h, n_w, n_c)\n", + "- n_e= number of examples, n_h = height, n_w = width, n_c = number of channels\n", + "- do not reshape labels" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "0uIaqfYASyYP", + "colab": {} + }, + "source": [ + "X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)\n", + "X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)" + ], + "execution_count": 40, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "GHok6WPuSyYU" + }, + "source": [ + "### Normalize data\n", + "- we must normalize our data as it is always required in neural network models\n", + "- we can achieve this by dividing the RGB codes with 255 (which is the maximum RGB code minus the minimum RGB code)\n", + "- normalize X_train and X_test\n", + "- make sure that the values are float so that we can get decimal points after division" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "1xqxsH4XSyYV", + "colab": {} + }, + "source": [ + "X_train = X_train.astype('float32')\n", + "X_test = X_test.astype('float32')\n", + "\n", + "X_train /= 255\n", + "X_test /= 255" + ], + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Io8RGv39SyYZ" + }, + "source": [ + "### Print shape of data and number of images\n", + "- print shape of X_train\n", + "- print number of images in X_train\n", + "- print number of images in X_test" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "mhYZjEtKSyYb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "outputId": "51e018a2-ceef-4896-b16e-713e21975b68" + }, + "source": [ + "print(\"X_train shape:\", X_train.shape)\n", + "print(\"Images in X_train:\", X_train.shape[0])\n", + "print(\"Images in X_test:\", X_test.shape[0])\n", + "print(\"Max value in X_train:\", X_train.max())\n", + "print(\"Min value in X_train:\", X_train.min())" + ], + "execution_count": 42, + "outputs": [ + { + "output_type": "stream", + "text": [ + "X_train shape: (60000, 28, 28, 1)\n", + "Images in X_train: 60000\n", + "Images in X_test: 10000\n", + "Max value in X_train: 1.0\n", + "Min value in X_train: 0.0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kbCeNC9PSyYi" + }, + "source": [ + "### One-hot encode the class vector\n", + "- convert class vectors (integers) to binary class matrix\n", + "- convert y_train and y_test\n", + "- number of classes: 10\n", + "- we are doing this to use categorical_crossentropy as loss" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "KXDv1LqhSyYk", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "c472ca77-3aef-4370-89f7-6f0ce393eaab" + }, + "source": [ + "from tensorflow.keras.utils import to_categorical\n", + "\n", + "y_train = to_categorical(y_train, num_classes=10)\n", + "y_test = to_categorical(y_test, num_classes=10)\n", + "\n", + "print(\"Shape of y_train:\", y_train.shape)\n", + "print(\"One value of y_train:\", y_train[0])" + ], + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Shape of y_train: (60000, 10)\n", + "One value of y_train: [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "5jQGzVrX19zt" + }, + "source": [ + "### Initialize a sequential model again\n", + "- define a sequential model\n", + "- add 2 convolutional layers\n", + " - no of filters: 32\n", + " - kernel size: 3x3\n", + " - activation: \"relu\"\n", + " - input shape: (28, 28, 1) for first layer\n", + "- flatten the data\n", + " - add Flatten later\n", + " - flatten layers flatten 2D arrays to 1D array before building the fully connected layers\n", + "- add 2 dense layers\n", + " - number of neurons in first layer: 128\n", + " - number of neurons in last layer: number of classes\n", + " - activation function in first layer: relu\n", + " - activation function in last layer: softmax\n", + " - we may experiment with any number of neurons for the first Dense layer; however, the final Dense layer must have neurons equal to the number of output classes" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "gVAkTinRLBDp", + "colab": {} + }, + "source": [ + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Conv2D, Flatten, Dense\n", + "\n", + "model = Sequential()\n", + "model.add(Conv2D(filters=32, kernel_size=3, activation=\"relu\", input_shape=(28, 28, 1)))\n", + "model.add(Conv2D(filters=32, kernel_size=3, activation=\"relu\"))\n", + "model.add(Flatten())\n", + "model.add(Dense(128, activation=\"relu\"))\n", + "model.add(Dense(10, activation=\"softmax\"))" + ], + "execution_count": 44, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "c74LMVBg3AXb" + }, + "source": [ + "### Compile and fit the model\n", + "- let's compile our model\n", + " - loss: \"categorical_crossentropy\"\n", + " - metrics: \"accuracy\"\n", + " - optimizer: \"adam\"\n", + "- then next step will be to fit model\n", + " - give train data - training features and labels\n", + " - batch size: 32\n", + " - epochs: 10\n", + " - give validation data - testing features and labels" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "22PFHnospfxs", + "colab_type": "code", + "colab": {} + }, + "source": [ + "# Compile the model\n", + "model.compile(loss=\"categorical_crossentropy\", metrics=[\"accuracy\"], optimizer=\"adam\")" + ], + "execution_count": 45, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "ca0AdxDx3AXh", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 374 + }, + "outputId": "bb1faced-b9a9-48a1-8b47-72d1841b768b" + }, + "source": [ + "# Fit the model\n", + "model.fit(x=X_train, y=y_train, batch_size=32, epochs=10, validation_split = 0.3)" + ], + "execution_count": 48, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1313/1313 [==============================] - 12s 9ms/step - loss: 0.0048 - accuracy: 0.9983 - val_loss: 0.0861 - val_accuracy: 0.9861\n", + "Epoch 2/10\n", + "1313/1313 [==============================] - 11s 9ms/step - loss: 0.0043 - accuracy: 0.9986 - val_loss: 0.0944 - val_accuracy: 0.9858\n", + "Epoch 3/10\n", + "1313/1313 [==============================] - 11s 9ms/step - loss: 0.0030 - accuracy: 0.9992 - val_loss: 0.0867 - val_accuracy: 0.9873\n", + "Epoch 4/10\n", + "1313/1313 [==============================] - 11s 9ms/step - loss: 0.0041 - accuracy: 0.9987 - val_loss: 0.1033 - val_accuracy: 0.9851\n", + "Epoch 5/10\n", + "1313/1313 [==============================] - 11s 9ms/step - loss: 0.0038 - accuracy: 0.9989 - val_loss: 0.0935 - val_accuracy: 0.9870\n", + "Epoch 6/10\n", + "1313/1313 [==============================] - 11s 8ms/step - loss: 0.0021 - accuracy: 0.9993 - val_loss: 0.1025 - val_accuracy: 0.9857\n", + "Epoch 7/10\n", + "1313/1313 [==============================] - 12s 9ms/step - loss: 9.9950e-04 - accuracy: 0.9997 - val_loss: 0.0932 - val_accuracy: 0.9880\n", + "Epoch 8/10\n", + "1313/1313 [==============================] - 12s 9ms/step - loss: 0.0014 - accuracy: 0.9996 - val_loss: 0.1254 - val_accuracy: 0.9831\n", + "Epoch 9/10\n", + "1313/1313 [==============================] - 11s 9ms/step - loss: 0.0054 - accuracy: 0.9986 - val_loss: 0.1086 - val_accuracy: 0.9867\n", + "Epoch 10/10\n", + "1313/1313 [==============================] - 11s 8ms/step - loss: 0.0026 - accuracy: 0.9993 - val_loss: 0.1029 - val_accuracy: 0.9851\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 48 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ecgTBHmn37lq" + }, + "source": [ + "### Final loss and accuracy" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "7_pf1d-J37l0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "fc750adc-4a2d-4e77-92a9-64510e5d1c63" + }, + "source": [ + "model.evaluate(X_test, y_test)" + ], + "execution_count": 49, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 [==============================] - 1s 4ms/step - loss: 0.0915 - accuracy: 0.9863\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.09153053909540176, 0.986299991607666]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 49 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "-dqbAnSdvITA" + }, + "source": [ + "## Vanilla CNN + Pooling + Dropout" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SAA84Lfv4LVZ" + }, + "source": [ + "### Initialize a sequential model again\n", + "- define a sequential model\n", + "- add 2 convolutional layers\n", + " - no of filters: 32\n", + " - kernel size: 3x3\n", + " - activation: \"relu\"\n", + " - input shape: (28, 28, 1) for first layer\n", + "- add a max pooling layer of size 2x2\n", + "- add a dropout layer\n", + " - dropout layers fight with the overfitting by disregarding some of the neurons while training\n", + " - use dropout rate 0.2\n", + "- flatten the data\n", + " - add Flatten later\n", + " - flatten layers flatten 2D arrays to 1D array before building the fully connected layers\n", + "- add 2 dense layers\n", + " - number of neurons in first layer: 128\n", + " - number of neurons in last layer: number of classes\n", + " - activation function in first layer: relu\n", + " - activation function in last layer: softmax\n", + " - we may experiment with any number of neurons for the first Dense layer; however, the final Dense layer must have neurons equal to the number of output classes" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "tMnK0A84VqZc", + "colab": {} + }, + "source": [ + "from tensorflow.keras.layers import MaxPooling2D, Dropout\n", + "\n", + "# Initialize the model\n", + "model = Sequential()\n", + "\n", + "# Add a Convolutional Layer with 32 filters of size 3X3 and activation function as 'relu' \n", + "model.add(Conv2D(filters=32, kernel_size=3, activation=\"relu\", input_shape=(28, 28, 1)))\n", + "\n", + "# Add a Convolutional Layer with 32 filters of size 3X3 and activation function as 'relu' \n", + "model.add(Conv2D(filters=32, kernel_size=3, activation=\"relu\"))\n", + "\n", + "# Add a MaxPooling Layer of size 2X2 \n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "\n", + "# Apply Dropout with 0.2 probability \n", + "model.add(Dropout(rate=0.2))\n", + "\n", + "# Flatten the layer\n", + "model.add(Flatten())\n", + "\n", + "# Add Fully Connected Layer with 128 units and activation function as 'relu'\n", + "model.add(Dense(128, activation=\"relu\"))\n", + "\n", + "#Add Fully Connected Layer with 10 units and activation function as 'softmax'\n", + "model.add(Dense(10, activation=\"softmax\"))" + ], + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "rTTqCN1g5KVf" + }, + "source": [ + "### Compile and fit the model\n", + "- let's compile our model\n", + " - loss: \"categorical_crossentropy\"\n", + " - metrics: \"accuracy\"\n", + " - optimizer: \"adam\"\n", + "- Use Early Stopping\n", + "- then next step will be to fit model\n", + " - give train data - training features and labels\n", + " - batch size: 32\n", + " - epochs: 10\n", + " - give validation data - testing features and labels" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "ervzY5xS5KVn", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "outputId": "fad671b6-78a2-4cde-bac6-977438da2eea" + }, + "source": [ + "# Compile the model\n", + "model.compile(loss=\"categorical_crossentropy\", metrics=[\"accuracy\"], optimizer=\"adam\")\n", + "\n", + "# Use earlystopping\n", + "callback = tensorflow.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2, min_delta=0.01)\n", + "\n", + "# Fit the model\n", + "model.fit(x=X_train, y=y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test), callbacks=[callback])" + ], + "execution_count": 51, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "1875/1875 [==============================] - 14s 8ms/step - loss: 0.1232 - accuracy: 0.9611 - val_loss: 0.0472 - val_accuracy: 0.9849\n", + "Epoch 2/10\n", + "1875/1875 [==============================] - 13s 7ms/step - loss: 0.0424 - accuracy: 0.9867 - val_loss: 0.0394 - val_accuracy: 0.9878\n", + "Epoch 3/10\n", + "1875/1875 [==============================] - 13s 7ms/step - loss: 0.0300 - accuracy: 0.9903 - val_loss: 0.0406 - val_accuracy: 0.9875\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 51 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "o_n2Q4ag5KVw" + }, + "source": [ + "### Final loss and accuracy" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "6JqLcfUE5KVy", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "outputId": "c64e5e8f-9927-49c3-9976-152c0de58c90" + }, + "source": [ + "model.evaluate(X_test, y_test)" + ], + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 [==============================] - 1s 4ms/step - loss: 0.0406 - accuracy: 0.9875\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.04055158793926239, 0.987500011920929]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 53 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "-NqR3ygZVPGD" + }, + "source": [ + "Let's visualize results using matplotlib" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab_type": "code", + "id": "D77CuLM7VPGL", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 350 + }, + "outputId": "3f5d5e5f-9497-4df5-87ab-face3334df0f" + }, + "source": [ + "plt.imshow(X_test[100].reshape(28, 28), cmap='gray')\n", + "y_pred = model.predict(X_test[100].reshape(1, 28, 28, 1))\n", + "print(\"Predicted label:\", y_pred.argmax())\n", + "print(\"Softmax Outputs:\", y_pred)\n", + "print(y_pred.sum())" + ], + "execution_count": 54, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Predicted label: 6\n", + "Softmax Outputs: [[8.24671531e-07 3.88156757e-10 8.58352278e-11 2.80309803e-10\n", + " 9.62173896e-09 1.31236195e-08 9.99999166e-01 1.39017669e-11\n", + " 4.90354779e-08 7.54032184e-11]]\n", + "1.0\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOBUlEQVR4nO3da6xV9ZnH8d9PBE0oJigZxAtjp9GYpol0QnQSUZk0Jagx0hdIvTfT5BStphijQzovNJmMITOj4wsTwiHFMhOH0sRLTTMRHILDGGPDJYzipZUhXiAH0CFYGkUHeObFWXSOevZ/n7Nvax+e7yc52XuvZ6+9H3f4udZe/73W3xEhAKe+0+puAEBvEHYgCcIOJEHYgSQIO5DE6b18M9sc+ge6LCI82vK2tuy2F9r+re3dtpe381oAusutjrPbniTpd5K+K2mvpK2Sbo6INwvrsGUHuqwbW/bLJe2OiD0R8bmkX0i6sY3XA9BF7YT9fEkfjHi8t1r2BbYHbG+zva2N9wLQpq4foIuIQUmDErvxQJ3a2bLvk3ThiMcXVMsA9KF2wr5V0sW2v257iqTvS3q+M20B6LSWd+Mj4pjteyRtkDRJ0pqIeKNjnQHoqJaH3lp6M76zA13XlR/VAJg4CDuQBGEHkiDsQBKEHUiCsANJ9PR8dvTeQw89VKzfcccdxfqSJUuK9W3bOOVhomDLDiRB2IEkCDuQBGEHkiDsQBKEHUiCobdTwPz58xvWBgYGiut+8sknxfrcuXOLdYbeJg627EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBFeXnQCmTZtWrO/Zs6dhbe3atcV1ly8vT77b7N/H8ePHi3X0HleXBZIj7EAShB1IgrADSRB2IAnCDiRB2IEkOJ99ArjrrruK9aNHjzasPfroo8V1jx071lJPmHjaCrvtdyUdkXRc0rGIKF/pAEBtOrFl/8uI+KgDrwOgi/jODiTRbthD0kbb222PerEz2wO2t9nmYmVAjdrdjZ8XEfts/4mkF22/HRFbRj4hIgYlDUqcCAPUqa0te0Tsq24PSnpW0uWdaApA57UcdttTbU87eV/SAkm7OtUYgM5qZzd+pqRnbZ98nX+NiBc60hW+4MEHHyzWV61a1bA2NDTU6XYwQbUc9ojYI+myDvYCoIsYegOSIOxAEoQdSIKwA0kQdiAJTnHtA80uFX3GGWcU62+//XYn28Epii07kARhB5Ig7EAShB1IgrADSRB2IAnCDiTBOHsfWLhwYVvrv/ACZxajObbsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AE4+x9YOnSpcX6Z599Vqx/+OGHnWwHpyi27EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBOPsPVBNa93QOeecU6xv2rSpk+30jfnz5xfrS5Ysaev1Dx8+3LC2ZcuW4rrNrhEQES31VKemW3bba2wftL1rxLKzbb9o+53qdnp32wTQrrHsxv9c0pcvpbJc0qaIuFjSpuoxgD7WNOwRsUXSoS8tvlHS2ur+WkmLOtwXgA5r9Tv7zIgYqu7vlzSz0RNtD0gaaPF9AHRI2wfoIiJsNzxaERGDkgYlqfQ8AN3V6tDbAduzJKm6Pdi5lgB0Q6thf17SndX9OyX9qjPtAOgWNxsvtL1O0nxJMyQdkPSQpOck/VLSbEnvSbopIr58EG+010q5G3/eeecV63v37i3Wb7311mJ93bp14+6pU6ZMmVKsr1ixomFt2bJlxXXff//9Yv3IkSMtrz9v3rziuosXLy7WN27cWKzXKSJG/WFH0+/sEXFzg9J32uoIQE/xc1kgCcIOJEHYgSQIO5AEYQeS4BTXCaDOS0Wfdlp5e7B69epi/fbbb29Yu/vuu4vrPvnkk8V6s0tslyxaVD6dY9WqVcX6nDlzivWPP/543D11G1t2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCcfYemD17dlvrb926tUOdjN8TTzxRrC9YsKDlerNLZHfzcs0bNmwo1s8888xiferUqcU64+wAakPYgSQIO5AEYQeSIOxAEoQdSIKwA0kwzt4DM2c2nB2rdueee26xfsMNNxTrt9xyS7G+efPmcffUC59++mmxvnv37mL9qquuKtbXr18/7p66jS07kARhB5Ig7EAShB1IgrADSRB2IAnCDiTBOHsPfP75522tf8EFFxTr7Zw7fdtttxXrzcbhX3nllZbfeyKbNm1a3S2MW9Mtu+01tg/a3jVi2cO299neWf1d1902AbRrLLvxP5e0cJTl/xQRc6q/f+tsWwA6rWnYI2KLpEM96AVAF7VzgO4e269Vu/nTGz3J9oDtbba3tfFeANrUathXSvqGpDmShiQ92uiJETEYEXMjYm6L7wWgA1oKe0QciIjjEXFC0mpJl3e2LQCd1lLYbc8a8fB7knY1ei6A/tB0nN32OknzJc2wvVfSQ5Lm254jKSS9K+lHXexxwnv55ZeL9f379xfrS5cuLdbvvffecfd00quvvlqsn356+Z/INddcU6xv3Lhx3D31QrP/rrPOOqtYP3z4cCfb6YmmYY+Im0dZ/LMu9AKgi/i5LJAEYQeSIOxAEoQdSIKwA0lwimsPHDlypFjft29fsb548eJi/b777mtYO3bsWHHdQ4fKpz2cOHGiWJ80aVKx3q+aDVc2O7W32XTT/YgtO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4k4Yjo3ZvZvXuzCWTJkiXF+lNPPVWsr1y5smGtndNfJWlwcLBYv/7664v1NWvWNKwdPXq0pZ5Oanbq8OzZsxvWVq9eXVz32muvLdb7dSpqSYoIj7acLTuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJME4+wSwfv36Yn3RokUNa48//nhx3ccee6xYbzYd9MKFo835+f9mzJjRsGaPOhz8R1OmTCnWL7nkkmL9sssua1i7//77i+tu3769WO9njLMDyRF2IAnCDiRB2IEkCDuQBGEHkiDsQBKMs08AkydPLtYfeeSRhrVly5YV1212zfrnnnuuWP/ggw+K9ZLS7wMk6corryzWm127/YEHHmhY27lzZ3HdiazlcXbbF9rebPtN22/Y/km1/GzbL9p+p7qd3ummAXTOWHbjj0m6PyK+KekvJP3Y9jclLZe0KSIulrSpegygTzUNe0QMRcSO6v4RSW9JOl/SjZLWVk9bK6m8TwagVuOa6832RZK+Lek3kmZGxFBV2i9pZoN1BiQNtN4igE4Y89F421+T9LSkZRHx+5G1GD7KN+rBt4gYjIi5ETG3rU4BtGVMYbc9WcNBfyoinqkWH7A9q6rPknSwOy0C6ISmQ28ePg9xraRDEbFsxPJ/kPQ/EbHC9nJJZ0fEg01ei6G3HrviiiuK9ZtuuqlYv/rqq4v1Sy+9tFh/6aWXGtZ27NhRXHfLli3FerPLOTebbvpU1WjobSzf2a+UdLuk122fHJz8qaQVkn5p+4eS3pNU/lcDoFZNwx4RL0tqdJWB73S2HQDdws9lgSQIO5AEYQeSIOxAEoQdSIJTXIFTDJeSBpIj7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJJqG3faFtjfbftP2G7Z/Ui1/2PY+2zurv+u63y6AVjWdJML2LEmzImKH7WmStktapOH52P8QEf845jdjkgig6xpNEjGW+dmHJA1V94/YfkvS+Z1tD0C3jes7u+2LJH1b0m+qRffYfs32GtvTG6wzYHub7W1tdQqgLWOe68321yT9h6S/i4hnbM+U9JGkkPS3Gt7V/6smr8FuPNBljXbjxxR225Ml/VrShoh4bJT6RZJ+HRHfavI6hB3ospYndrRtST+T9NbIoFcH7k76nqRd7TYJoHvGcjR+nqT/lPS6pBPV4p9KulnSHA3vxr8r6UfVwbzSa7FlB7qsrd34TiHsQPcxPzuQHGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiCJphec7LCPJL034vGMalk/6tfe+rUvid5a1cne/rRRoafns3/lze1tETG3tgYK+rW3fu1LordW9ao3duOBJAg7kETdYR+s+f1L+rW3fu1LordW9aS3Wr+zA+idurfsAHqEsANJ1BJ22wtt/9b2btvL6+ihEdvv2n69moa61vnpqjn0DtreNWLZ2bZftP1OdTvqHHs19dYX03gXphmv9bOre/rznn9ntz1J0u8kfVfSXklbJd0cEW/2tJEGbL8raW5E1P4DDNtXS/qDpH8+ObWW7b+XdCgiVlT/o5weEX/dJ709rHFO492l3hpNM/4D1fjZdXL681bUsWW/XNLuiNgTEZ9L+oWkG2voo+9FxBZJh760+EZJa6v7azX8j6XnGvTWFyJiKCJ2VPePSDo5zXitn12hr56oI+znS/pgxOO96q/53kPSRtvbbQ/U3cwoZo6YZmu/pJl1NjOKptN499KXphnvm8+ulenP28UBuq+aFxF/LulaST+udlf7Ugx/B+unsdOVkr6h4TkAhyQ9Wmcz1TTjT0taFhG/H1mr87Mbpa+efG51hH2fpAtHPL6gWtYXImJfdXtQ0rMa/trRTw6cnEG3uj1Ycz9/FBEHIuJ4RJyQtFo1fnbVNONPS3oqIp6pFtf+2Y3WV68+tzrCvlXSxba/bnuKpO9Ler6GPr7C9tTqwIlsT5W0QP03FfXzku6s7t8p6Vc19vIF/TKNd6NpxlXzZ1f79OcR0fM/Sddp+Ij8f0v6mzp6aNDXn0n6r+rvjbp7k7ROw7t1/6vhYxs/lHSOpE2S3pH075LO7qPe/kXDU3u/puFgzaqpt3ka3kV/TdLO6u+6uj+7Ql89+dz4uSyQBAfogCQIO5AEYQeSIOxAEoQdSIKwA0kQdiCJ/wOjL3UFSPhQMQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "L_osbMLgr_He", + "colab_type": "code", + "colab": {} + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file