Implemented TicTacToe game

shashankms-dev · shashankms-dev · commit 0d69b4b362fc · 2023-04-25T16:25:54.000+05:30
diff --git a/alpha-zero.ipynb b/alpha-zero.ipynb
@@ -5,26 +5,185 @@
    "execution_count": 1,
    "id": "b86297a7",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'1.24.2'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "np.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e9f409ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TicTacToe:\n",
+    "    def __init__(self):\n",
+    "        self.row_count = 3\n",
+    "        self.column_count = 3\n",
+    "        self.action_size = self.row_count * self.column_count\n",
+    "        \n",
+    "    def get_initial_state(self):\n",
+    "        return np.zeros((self.row_count, self.column_count), dtype=int)\n",
+    "    \n",
+    "    def get_next_state(self, state, action, player):\n",
+    "        row = action // self.column_count\n",
+    "        column = action % self.column_count\n",
+    "        state[row, column] = player\n",
+    "        return state\n",
+    "    \n",
+    "    def get_valid_moves(self, state):\n",
+    "        return (state.reshape(-1) == 0).astype(np.uint8)\n",
+    "    \n",
+    "    def check_win(self, state, action):\n",
+    "        row = action // self.column_count\n",
+    "        column = action % self.column_count\n",
+    "        player = state[row, column]\n",
+    "        \n",
+    "        return (\n",
+    "            np.sum(state[row, :]) == player * self.column_count\n",
+    "            or np.sum(state[:, column]) == player * self.row_count\n",
+    "            or np.sum(np.diag(state)) == player * self.row_count # change to diagonal length\n",
+    "            or np.sum(np.diag(np.flip(state, axis = 0))) == player * self.row_count # change to diagonal length\n",
+    "        )\n",
+    "    \n",
+    "    def check_draw(self, state):\n",
+    "        if np.sum(self.get_valid_moves(state)) == 0:\n",
+    "            return True\n",
+    "        else:\n",
+    "            return False\n",
+    "        \n",
+    "    def get_opponent(self, player):\n",
+    "        return -player\n",
+    "    \n",
+    "    def get_value_and_terminated(self, state, action):\n",
+    "        if self.check_win(state, action):\n",
+    "            return 1, True\n",
+    "        if self.check_draw(state):\n",
+    "            return 0, True\n",
+    "        \n",
+    "        return 0, False\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "e60e21f1",
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Hello\n"
+      "[[0 0 0]\n",
+      " [0 0 0]\n",
+      " [0 0 0]]\n",
+      "valid_moves: [0, 1, 2, 3, 4, 5, 6, 7, 8]\n",
+      "1: 1\n",
+      "[[0 1 0]\n",
+      " [0 0 0]\n",
+      " [0 0 0]]\n",
+      "valid_moves: [0, 2, 3, 4, 5, 6, 7, 8]\n",
+      "-1: 3\n",
+      "[[ 0  1  0]\n",
+      " [-1  0  0]\n",
+      " [ 0  0  0]]\n",
+      "valid_moves: [0, 2, 4, 5, 6, 7, 8]\n",
+      "1: 0\n",
+      "[[ 1  1  0]\n",
+      " [-1  0  0]\n",
+      " [ 0  0  0]]\n",
+      "valid_moves: [2, 4, 5, 6, 7, 8]\n",
+      "-1: 2\n",
+      "[[ 1  1 -1]\n",
+      " [-1  0  0]\n",
+      " [ 0  0  0]]\n",
+      "valid_moves: [4, 5, 6, 7, 8]\n",
+      "1: 4\n",
+      "[[ 1  1 -1]\n",
+      " [-1  1  0]\n",
+      " [ 0  0  0]]\n",
+      "valid_moves: [5, 6, 7, 8]\n",
+      "-1: 7\n",
+      "[[ 1  1 -1]\n",
+      " [-1  1  0]\n",
+      " [ 0 -1  0]]\n",
+      "valid_moves: [5, 6, 8]\n",
+      "1: 6\n",
+      "[[ 1  1 -1]\n",
+      " [-1  1  0]\n",
+      " [ 1 -1  0]]\n",
+      "valid_moves: [5, 8]\n",
+      "-1: 8\n",
+      "[[ 1  1 -1]\n",
+      " [-1  1  0]\n",
+      " [ 1 -1 -1]]\n",
+      "valid_moves: [5]\n",
+      "1: 5\n",
+      "[[ 1  1 -1]\n",
+      " [-1  1  1]\n",
+      " [ 1 -1 -1]]\n",
+      "Game drawn\n"
      ]
     }
    ],
    "source": [
-    "print(\"Hello\")"
+    "tictactoe = TicTacToe()\n",
+    "player = 1\n",
+    "state = tictactoe.get_initial_state()\n",
+    "\n",
+    "while True:\n",
+    "    print(state)\n",
+    "    valid_moves = tictactoe.get_valid_moves(state)\n",
+    "    print(\"valid_moves:\", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])\n",
+    "    action = int(input(f\"{player}: \"))\n",
+    "    \n",
+    "    if valid_moves[action] == 0:\n",
+    "        print(\"invalid action\")\n",
+    "        continue\n",
+    "        \n",
+    "    state = tictactoe.get_next_state(state, action, player)\n",
+    "    \n",
+    "    value, terminated = tictactoe.get_value_and_terminated(state, action)\n",
+    "    \n",
+    "    if terminated:\n",
+    "        print(state)\n",
+    "        if value == 1:\n",
+    "            print(\"Player \", player, \" won\")\n",
+    "        else:\n",
+    "            print(\"Game drawn\")\n",
+    "        break\n",
+    "        \n",
+    "    player = tictactoe.get_opponent(player)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e9f409ff",
+   "id": "c09a4301",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "class MCTS:\n",
+    "    def __init__(self, game, args):\n",
+    "        self.game = game\n",
+    "        self.args = args\n",
+    "        \n",
+    "    "
+   ]
   }
  ],
  "metadata": {