make LQR ILQR work with attitude interface

Lui · Lui · commit bfaa1353859c · 2025-01-08T16:09:52.000+01:00
diff --git a/tutorials/LQR_ILQR.ipynb b/tutorials/LQR_ILQR.ipynb
@@ -176,8 +176,8 @@
     "\n",
     "print(\"A shape:\", A.shape)  # Should be (12, 12)\n",
     "print(\"B shape:\", B.shape)  # Should be (12, 4)\n",
-    "print(\"A :\\n\", A)\n",
-    "print(\"B :\\n\", B)"
+    "# print(\"A :\\n\", A)\n",
+    "# print(\"B :\\n\", B)"
    ]
   },
   {
@@ -271,23 +271,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "Ad, Bd = discretize_linear_system(A, B, dt) #, exact=True)\n",
-    "# print(\"A :\\n\", Ad)\n",
-    "# print(\"B :\\n\", Bd)\n",
-    "P = scipy.linalg.solve_discrete_are(Ad, Bd, Q_lqr, R_lqr)\n",
+    "Ad, Bd = discretize_linear_system(A, B, dt)\n",
     "\n",
+    "P = scipy.linalg.solve_discrete_are(Ad, Bd, Q_lqr, R_lqr)\n",
     "btp = np.dot(Bd.T, P)\n",
-    "\n",
     "gain_lqr = np.dot(np.linalg.inv(R_lqr + np.dot(btp, Bd)),\n",
     "                np.dot(btp, Ad))\n",
     "\n",
-    "# We can also comment out the above two lines of code \n",
-    "# and use the following line instead to compute for the continuous-time case\n",
+    "### We can also comment out the above lines of code \n",
+    "### and use the following lines instead to compute for the continuous-time case\n",
     "# P = scipy.linalg.solve_continuous_are(A, B, Q_lqr, R_lqr)\n",
     "# gain_lqr = np.dot(np.linalg.inv(R_lqr), np.dot(B.T, P))\n",
     "\n",
-    "# print(\"A (discretized):\\n\", Ad)\n",
-    "# print(\"B (discretized):\\n\", Bd)\n",
     "print(\"gain:\\n\", gain_lqr)\n",
     "print(\"shape of gain:\", gain_lqr.shape)\n"
    ]
@@ -309,7 +304,7 @@
    "source": [
     "SEED = 42\n",
     "\n",
-    "obs, info = envs.reset()#seed=SEED)\n",
+    "obs, info = envs.reset(seed=SEED)\n",
     "state = obs_to_state(obs)\n",
     "# print(obs)\n",
     "# Step through the environment\n",
@@ -324,7 +319,7 @@
     "    goal = np.array([0, 0, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0]) # set goal state\n",
     "\n",
     "    control_input = -gain_lqr @ (state - goal) + u_op\n",
-    "    # control_input = np.clip(control_input, MIN_THRUST, MAX_THRUST) \n",
+    "\n",
     "    control_input = np.clip(control_input, envs.action_space.low, envs.action_space.high)\n",
     "    action = control_input.reshape(1,4).astype(np.float32)\n",
     "    # print(action)\n",
@@ -353,7 +348,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### 2.5.6 Plots"
+    "### 2.1.6 Plots"
    ]
   },
   {
@@ -522,6 +517,13 @@
     "### 2.2.3 Recursion"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It will take some time. :)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -540,7 +542,7 @@
     "    input_pre = input_stack\n",
     "\n",
     "    # Forward / \"rollout\" of the current policy\n",
-    "    obs, info = envs.reset() #seed=SEED)\n",
+    "    obs, info = envs.reset(seed=SEED)\n",
     "    state = obs_to_state(obs) # (12,)\n",
     "\n",
     "    for step in range(2500):\n",
@@ -551,7 +553,7 @@
     "        # Clip the control input to the specified range\n",
     "        control_input = np.clip(control_input, envs.action_space.low, envs.action_space.high) \n",
     "        \n",
-    "        # Convert to np.ndarray\n",
+    "        # Reshape and Convert to np.ndarray\n",
     "        action = control_input.reshape(1,4).astype(np.float32) # (1, 4)\n",
     "\n",
     "        # Save rollout data.\n",
@@ -570,7 +572,7 @@
     "        \n",
     "    envs.close()\n",
     "    \n",
-    "    # TODO: Compute cost to see if it could converge\n",
+    "    # TODO: Compute cost to see if it diverse or converge\n",
     "    # cost_curr = 0\n",
     "    # for i in range(state_stack.shape[0]):\n",
     "\n",
@@ -665,15 +667,6 @@
     "    iter += 1"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(Sm)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -696,11 +689,14 @@
     "    # Compute control action (force) using the iLQR gain\n",
     "    control_input = input_ff[:, i] + gains_fb[i].dot(state) # gains_fb[:, i].dot(state) + input_ff[i]\n",
     "\n",
-    "    # Clip the control iptput to the specified range\n",
-    "    control_input = np.clip(control_input, 0.028161688, 0.14834145)\n",
+    "    # Clip the control input to the specified range\n",
+    "    control_input = np.clip(control_input, envs.action_space.low, envs.action_space.high) \n",
     "    \n",
     "    # Convert to np.ndarray\n",
-    "    action = np.array([control_input], dtype=np.float32)           \n",
+    "    action = control_input.reshape(1,4).astype(np.float32) # (1, 4)\n",
+    "    \n",
+    "    # # Convert to np.ndarray\n",
+    "    # action = np.array([control_input], dtype=np.float32)           \n",
     "\n",
     "    # Take a step in the environment with the computed action\n",
     "    obs, reward, terminated, truncated, _ = envs.step(action)\n",
@@ -716,7 +712,11 @@
     "    if terminated or truncated:\n",
     "        print(\"Episode ended at step:\", i)\n",
     "        break\n",
-    "    envs.render()\n",
+    "\n",
+    "    if (i * fps) % envs.sim.freq < fps:\n",
+    "        envs.render()\n",
+    "        time.sleep(1 / fps)\n",
+    "    # envs.render()\n",
     "# Close the environment\n",
     "envs.sim.close()\n",
     "envs.close()"
@@ -736,9 +736,9 @@
     "plt.plot(time_log, x_log_ilqr, label=\"x(iLQR)\", color=\"blue\")\n",
     "plt.plot(time_log, y_log_ilqr, label=\"y(iLQR)\", color=\"green\")\n",
     "plt.plot(time_log, z_log_ilqr, label=\"z(iLQR)\", color=\"red\")\n",
-    "# plt.plot(time_log, x_log, label=\"x(LQR)\", color=\"blue\", linestyle=\"--\")\n",
-    "# plt.plot(time_log, y_log, label=\"y(LQR)\", color=\"green\", linestyle=\"--\")\n",
-    "# plt.plot(time_log, z_log, label=\"z(LQR)\", color=\"red\", linestyle=\"--\")\n",
+    "plt.plot(time_log, x_log, label=\"x(LQR)\", color=\"blue\", linestyle=\"--\")\n",
+    "plt.plot(time_log, y_log, label=\"y(LQR)\", color=\"green\", linestyle=\"--\")\n",
+    "plt.plot(time_log, z_log, label=\"z(LQR)\", color=\"red\", linestyle=\"--\")\n",
     "plt.xlabel(\"Time (s)\")\n",
     "plt.ylabel(\"position\")\n",
     "plt.title(\"position vs Time\")\n",
@@ -763,10 +763,10 @@
     "plt.plot(time_log, thrust_values_ilqr[:, 2], label=\"Motor 3(iLQR)\", color=\"green\")\n",
     "plt.plot(time_log, thrust_values_ilqr[:, 3], label=\"Motor 4(iLQR)\", color=\"red\")\n",
     "\n",
-    "# plt.plot(time_log, thrust_values[:, 0], label=\"Motor 1(LQR)\", color=\"blue\", linestyle=\"--\" )\n",
-    "# plt.plot(time_log, thrust_values[:, 1], label=\"Motor 2(LQR)\", color=\"orange\", linestyle=\"--\")\n",
-    "# plt.plot(time_log, thrust_values[:, 2], label=\"Motor 3(LQR)\", color=\"green\", linestyle=\"--\")\n",
-    "# plt.plot(time_log, thrust_values[:, 3], label=\"Motor 4(LQR)\", color=\"red\", linestyle=\"--\")\n",
+    "plt.plot(time_log, thrust_values[:, 0], label=\"Motor 1(LQR)\", color=\"blue\", linestyle=\"--\" )\n",
+    "plt.plot(time_log, thrust_values[:, 1], label=\"Motor 2(LQR)\", color=\"orange\", linestyle=\"--\")\n",
+    "plt.plot(time_log, thrust_values[:, 2], label=\"Motor 3(LQR)\", color=\"green\", linestyle=\"--\")\n",
+    "plt.plot(time_log, thrust_values[:, 3], label=\"Motor 4(LQR)\", color=\"red\", linestyle=\"--\")\n",
     "\n",
     "plt.xlabel('Time Steps')\n",
     "plt.ylabel('Thrust (N)')\n",
@@ -775,13 +775,6 @@
     "plt.grid()\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {