|
18 | 18 | "outputs": [],
|
19 | 19 | "source": [
|
20 | 20 | "import stormvogel.model\n",
|
| 21 | + "from stormvogel.model import EmptyAction\n", |
21 | 22 | "from stormvogel.layout import Layout\n",
|
22 | 23 | "from stormvogel.show import show"
|
23 | 24 | ]
|
|
102 | 103 | " \n",
|
103 | 104 | " escaped = pomdp.new_state(\"escaped\")\n",
|
104 | 105 | " escaped.set_observation(END)\n",
|
105 |
| - " reward_model.set(escaped, 0)\n", |
| 106 | + " #reward_model.set(escaped, 0)\n", |
106 | 107 | " \n",
|
107 | 108 | " grid = [[None for x in range(width)] for y in range(height)]\n",
|
108 | 109 | " for x in range(width):\n",
|
109 | 110 | " for y in range(height):\n",
|
110 | 111 | " if level_list[y][x] == WALKABLE:\n",
|
111 | 112 | " grid[y][x] = pomdp.new_state([\"t\", f\"({x},{y})\"])\n",
|
112 | 113 | " grid[y][x].set_observation(UNKNOWN)\n",
|
113 |
| - " reward_model.set(grid[y][x], -1)\n", |
| 114 | + " #reward_model.set_state_action_reward(grid[y][x], -1)\n", |
114 | 115 | " if level_list[y][x] == EXIT:\n",
|
115 | 116 | " grid[y][x] = pomdp.new_state([\"e\", f\"({x},{y})\"])\n",
|
116 | 117 | " grid[y][x].set_observation(UNKNOWN)\n",
|
117 |
| - " reward_model.set(grid[y][x], 100)\n", |
| 118 | + " #reward_model.set_state_action_reward(grid[y][x], 100)\n", |
118 | 119 | " grid[y][x].add_transitions([(1, escaped)])\n",
|
119 | 120 | " dirs = {d: pomdp.new_action(d) for d in [UP, DOWN, LEFT, RIGHT]}\n",
|
120 | 121 | " positions = {}\n",
|
|
125 | 126 | " for d,action in dirs.items():\n",
|
126 | 127 | " positions[str(grid[y][x].id)] = {\"x\": x * position_scalar, \"y\": y * position_scalar}\n",
|
127 | 128 | " ((res_x, res_y), observation) = direction_result(x,y,d,level_list,width,height)\n",
|
| 129 | + "\n", |
| 130 | + " \n", |
| 131 | + " \n", |
128 | 132 | " if not observation == OUT_OF_BOUNDS:\n",
|
129 | 133 | " took_dir = pomdp.new_state([d, f\"({x},{y})\"])\n",
|
130 |
| - " reward_model.set(took_dir, 0)\n", |
131 | 134 | " grid[y][x].add_transitions([(action, took_dir)])\n",
|
| 135 | + " reward_model.set_state_action_reward(grid[y][x], action, 0)\n", |
132 | 136 | " # print(took_dir)\n",
|
133 | 137 | " took_dir.add_transitions([(1, grid[res_y][res_x])])\n",
|
| 138 | + " reward_model.set_state_action_reward(took_dir, EmptyAction, -1)\n", |
134 | 139 | " took_dir.set_observation(observation)\n",
|
135 | 140 | " pomdp.add_self_loops()\n",
|
| 141 | + " reward_model.set_unset_rewards(0)\n", |
136 | 142 | " return pomdp, positions"
|
137 | 143 | ]
|
138 | 144 | },
|
|
187 | 193 | "metadata": {},
|
188 | 194 | "outputs": [
|
189 | 195 | {
|
190 |
| - "ename": "TypeError", |
191 |
| - "evalue": "new_pomdp() got an unexpected keyword argument 'create_initial_state'", |
192 |
| - "output_type": "error", |
193 |
| - "traceback": [ |
194 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
195 |
| - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", |
196 |
| - "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m pomdp, positions \u001b[38;5;241m=\u001b[39m \u001b[43mgrid_world\u001b[49m\u001b[43m(\u001b[49m\u001b[43mLEVEL\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m vis \u001b[38;5;241m=\u001b[39m show(pomdp, layout\u001b[38;5;241m=\u001b[39mLayout(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlayouts/grid.json\u001b[39m\u001b[38;5;124m\"\u001b[39m), separate_labels\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mt\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124me\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n", |
197 |
| - "Cell \u001b[0;32mIn[2], line 69\u001b[0m, in \u001b[0;36mgrid_world\u001b[0;34m(level, position_scalar)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Create a grid world with an actor.\"\"\"\u001b[39;00m\n\u001b[1;32m 68\u001b[0m level_list, width, height \u001b[38;5;241m=\u001b[39m parse_level(level)\n\u001b[0;32m---> 69\u001b[0m pomdp \u001b[38;5;241m=\u001b[39m \u001b[43mstormvogel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnew_pomdp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcreate_initial_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 70\u001b[0m reward_model \u001b[38;5;241m=\u001b[39m pomdp\u001b[38;5;241m.\u001b[39madd_rewards(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 72\u001b[0m escaped \u001b[38;5;241m=\u001b[39m pomdp\u001b[38;5;241m.\u001b[39mnew_state(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mescaped\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", |
198 |
| - "\u001b[0;31mTypeError\u001b[0m: new_pomdp() got an unexpected keyword argument 'create_initial_state'" |
199 |
| - ] |
| 196 | + "data": { |
| 197 | + "application/vnd.jupyter.widget-view+json": { |
| 198 | + "model_id": "676e37cbe8a846d19550bccc6d5c4dd5", |
| 199 | + "version_major": 2, |
| 200 | + "version_minor": 0 |
| 201 | + }, |
| 202 | + "text/plain": [ |
| 203 | + "Output()" |
| 204 | + ] |
| 205 | + }, |
| 206 | + "metadata": {}, |
| 207 | + "output_type": "display_data" |
| 208 | + }, |
| 209 | + { |
| 210 | + "data": { |
| 211 | + "text/html": [ |
| 212 | + "<script>fetch('http://127.0.0.1:8889/vDdOnHCrvh/MESSAGE/' + 'test message')</script>" |
| 213 | + ], |
| 214 | + "text/plain": [ |
| 215 | + "<IPython.core.display.HTML object>" |
| 216 | + ] |
| 217 | + }, |
| 218 | + "metadata": {}, |
| 219 | + "output_type": "display_data" |
| 220 | + }, |
| 221 | + { |
| 222 | + "data": { |
| 223 | + "text/html": [ |
| 224 | + "<script>fetch('http://127.0.0.1:8889/vDdOnHCrvh/MESSAGE/' + 'test message')</script>" |
| 225 | + ], |
| 226 | + "text/plain": [ |
| 227 | + "<IPython.core.display.HTML object>" |
| 228 | + ] |
| 229 | + }, |
| 230 | + "metadata": {}, |
| 231 | + "output_type": "display_data" |
| 232 | + }, |
| 233 | + { |
| 234 | + "data": { |
| 235 | + "text/html": [ |
| 236 | + "<script>fetch('http://127.0.0.1:8889/vDdOnHCrvh/MESSAGE/' + 'test message')</script>" |
| 237 | + ], |
| 238 | + "text/plain": [ |
| 239 | + "<IPython.core.display.HTML object>" |
| 240 | + ] |
| 241 | + }, |
| 242 | + "metadata": {}, |
| 243 | + "output_type": "display_data" |
| 244 | + }, |
| 245 | + { |
| 246 | + "data": { |
| 247 | + "application/vnd.jupyter.widget-view+json": { |
| 248 | + "model_id": "b3f4a3d048254e1cb8573a25408495ac", |
| 249 | + "version_major": 2, |
| 250 | + "version_minor": 0 |
| 251 | + }, |
| 252 | + "text/plain": [ |
| 253 | + "Output()" |
| 254 | + ] |
| 255 | + }, |
| 256 | + "metadata": {}, |
| 257 | + "output_type": "display_data" |
| 258 | + }, |
| 259 | + { |
| 260 | + "data": { |
| 261 | + "application/vnd.jupyter.widget-view+json": { |
| 262 | + "model_id": "848d2f62271c4a4488bcebe203d3a59e", |
| 263 | + "version_major": 2, |
| 264 | + "version_minor": 0 |
| 265 | + }, |
| 266 | + "text/plain": [ |
| 267 | + "HBox(children=(Output(), Output()))" |
| 268 | + ] |
| 269 | + }, |
| 270 | + "metadata": {}, |
| 271 | + "output_type": "display_data" |
200 | 272 | }
|
201 | 273 | ],
|
202 | 274 | "source": [
|
|
223 | 295 | },
|
224 | 296 | {
|
225 | 297 | "cell_type": "code",
|
226 |
| - "execution_count": 35, |
| 298 | + "execution_count": 6, |
227 | 299 | "id": "8d5b0e8e-47db-4dfb-a533-a8181ec04751",
|
228 | 300 | "metadata": {},
|
229 | 301 | "outputs": [
|
230 | 302 | {
|
231 | 303 | "name": "stdout",
|
232 | 304 | "output_type": "stream",
|
233 | 305 | "text": [
|
234 |
| - "ERROR (Model.cpp:71): Invalid size (22) of state action reward vector (expected:34).\n" |
235 |
| - ] |
236 |
| - }, |
237 |
| - { |
238 |
| - "ename": "RuntimeError", |
239 |
| - "evalue": "IllegalArgumentException: Invalid size (22) of state action reward vector (expected:34).", |
240 |
| - "output_type": "error", |
241 |
| - "traceback": [ |
242 |
| - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
243 |
| - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", |
244 |
| - "Cell \u001b[0;32mIn[35], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mstormvogel\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmapping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m stormvogel_to_stormpy\n\u001b[0;32m----> 4\u001b[0m stormpy_model \u001b[38;5;241m=\u001b[39m \u001b[43mstormvogel_to_stormpy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpomdp\u001b[49m\u001b[43m)\u001b[49m\n", |
245 |
| - "File \u001b[0;32m~/git/env/lib/python3.12/site-packages/stormvogel/mapping.py:300\u001b[0m, in \u001b[0;36mstormvogel_to_stormpy\u001b[0;34m(model)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m map_ctmc(model)\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m model\u001b[38;5;241m.\u001b[39mget_type() \u001b[38;5;241m==\u001b[39m stormvogel\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mModelType\u001b[38;5;241m.\u001b[39mPOMDP:\n\u001b[0;32m--> 300\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmap_pomdp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m model\u001b[38;5;241m.\u001b[39mget_type() \u001b[38;5;241m==\u001b[39m stormvogel\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mModelType\u001b[38;5;241m.\u001b[39mMA:\n\u001b[1;32m 302\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m map_ma(model)\n", |
246 |
| - "File \u001b[0;32m~/git/env/lib/python3.12/site-packages/stormvogel/mapping.py:225\u001b[0m, in \u001b[0;36mstormvogel_to_stormpy.<locals>.map_pomdp\u001b[0;34m(model)\u001b[0m\n\u001b[1;32m 223\u001b[0m components\u001b[38;5;241m.\u001b[39mobservability_classes \u001b[38;5;241m=\u001b[39m observations\n\u001b[1;32m 224\u001b[0m components\u001b[38;5;241m.\u001b[39mchoice_labeling \u001b[38;5;241m=\u001b[39m choice_labeling\n\u001b[0;32m--> 225\u001b[0m pomdp \u001b[38;5;241m=\u001b[39m \u001b[43mstormpy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstorage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSparsePomdp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcomponents\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m pomdp\n", |
247 |
| - "\u001b[0;31mRuntimeError\u001b[0m: IllegalArgumentException: Invalid size (22) of state action reward vector (expected:34)." |
| 306 | + "-------------------------------------------------------------- \n", |
| 307 | + "Model type: \tPOMDP (sparse)\n", |
| 308 | + "States: \t22\n", |
| 309 | + "Transitions: \t34\n", |
| 310 | + "Choices: \t34\n", |
| 311 | + "Observations: \t5\n", |
| 312 | + "Reward Models: (default)\n", |
| 313 | + "State Labels: \t12 labels\n", |
| 314 | + " * (1,2) -> 1 item(s)\n", |
| 315 | + " * escaped -> 1 item(s)\n", |
| 316 | + " * ← -> 4 item(s)\n", |
| 317 | + " * (1,1) -> 5 item(s)\n", |
| 318 | + " * (2,1) -> 5 item(s)\n", |
| 319 | + " * t -> 4 item(s)\n", |
| 320 | + " * ↑ -> 4 item(s)\n", |
| 321 | + " * (3,2) -> 5 item(s)\n", |
| 322 | + " * → -> 4 item(s)\n", |
| 323 | + " * e -> 1 item(s)\n", |
| 324 | + " * ↓ -> 4 item(s)\n", |
| 325 | + " * (3,1) -> 5 item(s)\n", |
| 326 | + "Choice Labels: \t0 labels\n", |
| 327 | + "-------------------------------------------------------------- \n", |
| 328 | + "\n" |
248 | 329 | ]
|
249 | 330 | }
|
250 | 331 | ],
|
251 | 332 | "source": [
|
252 | 333 | "from stormvogel.mapping import stormvogel_to_stormpy\n",
|
253 | 334 | "\n",
|
254 | 335 | "stormpy_model = stormvogel_to_stormpy(pomdp)\n",
|
| 336 | + "print(stormpy_model)\n", |
255 | 337 | "\n",
|
256 |
| - "# TODO use stormpy to find the best policy/schedule, i.e. escape the maze as quickly as possible." |
| 338 | + "# TODO use stormpy to find the best policy/schedule, i.e. escape the maze as quickly as possible.\n", |
| 339 | + "# Ask Pim or Linus for help?" |
257 | 340 | ]
|
258 | 341 | },
|
259 | 342 | {
|
|
0 commit comments