Merge pull request #138 from moves-rwth/136-flexible-scheduler-for-th…

…e-simulator 136 flexible scheduler for the simulator
moves-rwth · Nov 14, 2024 · f46c2c8 · f46c2c8
2 parents 7e25dc8 + 3d6ca95
commit f46c2c8
Show file tree

Hide file tree

Showing 2 changed files with 116 additions and 56 deletions.
diff --git a/stormvogel/simulator.py b/stormvogel/simulator.py
@@ -5,6 +5,7 @@
 import stormvogel.model
 import stormpy.examples.files
 import stormpy.examples
+from typing import Callable
 import random
 
 
@@ -96,10 +97,32 @@ def __eq__(self, other):
             return False
 
 
+def get_range_index(
+    state: stormvogel.model.State,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action],
+) -> int:
+    """Helper function to convert the chosen action in a state by a scheduler to a range index."""
+    assert scheduler is not None
+    if isinstance(scheduler, stormvogel.result.Scheduler):
+        action = scheduler.get_choice_of_state(state)
+    elif callable(scheduler):
+        action = scheduler(state)
+    else:
+        raise TypeError("Must be of type Scheduler or a function")
+
+    available_actions = state.available_actions()
+
+    assert action is not None
+    return available_actions.index(action)
+
+
 def simulate_path(
     model: stormvogel.model.Model,
     steps: int = 1,
-    scheduler: stormvogel.result.Scheduler | None = None,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action]
+    | None = None,
     seed: int | None = None,
 ) -> Path:
     """
@@ -108,20 +131,12 @@ def simulate_path(
         model: The stormvogel model that the simulator should run on.
         steps: The number of steps the simulator walks through the model.
         scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
+                    (instead of a stormvogel scheduler, a function from states to actions can also be provided.)
         seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
 
     Returns a path object.
     """
 
-    def get_range_index(stateid: int):
-        """Helper function to convert the chosen action in a state by a scheduler to a range index."""
-        assert scheduler is not None
-        action = scheduler.get_choice_of_state(model.get_state_by_id(state))
-        available_actions = model.states[stateid].available_actions()
-
-        assert action is not None
-        return available_actions.index(action)
-
     # we initialize the simulator
     stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
     if seed:
@@ -131,36 +146,38 @@ def get_range_index(stateid: int):
     assert simulator is not None
 
     # we start adding states or state action pairs to the path
-    state = 0
+    state_id = 0
     path = {}
     simulator.restart()
     if not model.supports_actions():
         for i in range(steps):
             # for each step we add a state to the path
-            if not model.states[state].is_absorbing() and not simulator.is_done():
-                state, reward, labels = simulator.step()
-                path[i + 1] = model.states[state]
+            if not model.states[state_id].is_absorbing() and not simulator.is_done():
+                state_id, reward, labels = simulator.step()
+                path[i + 1] = model.states[state_id]
             else:
                 break
     else:
         for i in range(steps):
             # we first choose an action (randomly or according to scheduler)
             actions = simulator.available_actions()
             select_action = (
-                random.randint(0, len(actions) - 1)
-                if not scheduler
-                else get_range_index(state)
+                get_range_index(model.get_state_by_id(state_id), scheduler)
+                if scheduler
+                else random.randint(0, len(actions) - 1)
             )
 
             # we add the state action pair to the path
-            stormvogel_action = model.states[state].available_actions()[select_action]
+            stormvogel_action = model.states[state_id].available_actions()[
+                select_action
+            ]
 
             if (
-                not model.states[state].is_absorbing(stormvogel_action)
+                not model.states[state_id].is_absorbing(stormvogel_action)
                 and not simulator.is_done()
             ):
-                state, reward, labels = simulator.step(actions[select_action])
-                path[i + 1] = (stormvogel_action, model.states[state])
+                state_id, reward, labels = simulator.step(actions[select_action])
+                path[i + 1] = (stormvogel_action, model.states[state_id])
             else:
                 break
 
@@ -173,7 +190,9 @@ def simulate(
     model: stormvogel.model.Model,
     steps: int = 1,
     runs: int = 1,
-    scheduler: stormvogel.result.Scheduler | None = None,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action]
+    | None = None,
     seed: int | None = None,
 ) -> stormvogel.model.Model | None:
     """
@@ -183,20 +202,12 @@ def simulate(
         steps: The number of steps the simulator walks through the model
         runs: The number of times the model gets simulated.
         scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
+                    (instead of a stormvogel scheduler, a function from states to actions can also be provided.)
         seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
 
     Returns the partial model discovered by all the runs of the simulator together
     """
 
-    def get_range_index(stateid: int):
-        """Helper function to convert the chosen action in a state by a scheduler to a range index."""
-        assert scheduler is not None
-        action = scheduler.get_choice_of_state(model.get_state_by_id(state))
-        available_actions = model.states[stateid].available_actions()
-
-        assert action is not None
-        return available_actions.index(action)
-
     # we initialize the simulator
     stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
     assert stormpy_model is not None
@@ -226,53 +237,54 @@ def get_range_index(stateid: int):
     if not partial_model.supports_actions():
         for i in range(runs):
             simulator.restart()
-            last_state = 0
+            last_state_id = 0
             for j in range(steps):
-                state, reward, labels = simulator.step()
+                state_id, reward, labels = simulator.step()
+                # we get the rewards in reversed order
                 reward.reverse()
 
                 # we add to the partial model what we discovered (if new)
-                if state not in discovered_states:
-                    discovered_states.add(state)
+                if state_id not in discovered_states:
+                    discovered_states.add(state_id)
 
                     # we also add the transitions that we travelled through, so we need to keep track of the last state
                     probability = 0
-                    transitions = model.get_transitions(last_state)
+                    transitions = model.get_transitions(last_state_id)
                     for tuple in transitions.transition[
                         stormvogel.model.EmptyAction
                     ].branch:
-                        if tuple[1].id == state:
+                        if tuple[1].id == state_id:
                             probability += float(tuple[0])
-
                     new_state = partial_model.new_state(list(labels))
-                    partial_model.get_state_by_id(last_state).add_transitions(
+                    partial_model.get_state_by_id(last_state_id).add_transitions(
                         [(probability, new_state)]
                     )
 
+                    # we add the rewards
                     for index, rewardmodel in enumerate(partial_model.rewards):
                         rewardmodel.set(new_state, reward[index])
 
-                    last_state = state
+                    last_state_id = state_id
                 if simulator.is_done():
                     break
     else:
-        state = 0
-        last_state_partial = partial_model.get_initial_state()
-        last_state_id = 0
         for i in range(runs):
+            state_id = 0
+            last_state_partial = partial_model.get_initial_state()
+            last_state_id = 0
             simulator.restart()
             for j in range(steps):
                 # we first choose an action
                 actions = simulator.available_actions()
                 select_action = (
-                    random.randint(0, len(actions) - 1)
-                    if not scheduler
-                    else get_range_index(state)
+                    get_range_index(model.get_state_by_id(state_id), scheduler)
+                    if scheduler
+                    else random.randint(0, len(actions) - 1)
                 )
 
                 # we add the action to the partial model
                 assert partial_model.actions is not None
-                action = model.states[state].available_actions()[select_action]
+                action = model.states[state_id].available_actions()[select_action]
                 if action not in partial_model.actions.values():
                     partial_model.new_action(action.name)
 
@@ -281,28 +293,27 @@ def get_range_index(stateid: int):
                 reward = discovery[1]
                 for index, rewardmodel in enumerate(partial_model.rewards):
                     row_group = stormpy_model.transition_matrix.get_row_group_start(
-                        state
+                        state_id
                     )
                     state_action_pair = row_group + select_action
                     rewardmodel.set_action_state(state_action_pair, reward[index])
 
                 # we add the state
-                state, labels = discovery[0], discovery[2]
-                if state not in discovered_states:
-                    discovered_states.add(state)
+                state_id, labels = discovery[0], discovery[2]
+                if state_id not in discovered_states:
+                    discovered_states.add(state_id)
 
                     # we also add the transitions that we travelled through, so we need to keep track of the last state
                     probability = 0
                     transitions = model.get_transitions(last_state_id)
                     for tuple in transitions.transition[action].branch:
-                        if tuple[1].id == state:
+                        if tuple[1].id == state_id:
                             probability += float(tuple[0])
-
                     new_state = partial_model.new_state(list(labels))
                     last_state_partial.add_transitions([(probability, new_state)])
 
                     last_state_partial = new_state
-                    last_state_id = state
+                    last_state_id = state_id
                 if simulator.is_done():
                     break
 

diff --git a/tests/test_simulator.py b/tests/test_simulator.py
@@ -39,7 +39,7 @@ def test_simulate():
         rewardmodel3.rewards[stateid] = float(1)
 
     assert partial_model == other_dtmc
-
+    ######################################################################################################################
     # we make a monty hall mdp and run the simulator with it
     mdp = examples.monty_hall.create_monty_hall_mdp()
     rewardmodel = mdp.add_rewards("rewardmodel")
@@ -74,6 +74,31 @@ def test_simulate():
     rewardmodel2.rewards = {0: 0, 7: 7, 16: 16}
 
     assert partial_model == other_mdp
+    ######################################################################################################################
+
+    # we test the simulator for an mdp with a lambda as Scheduler
+
+    def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
+        actions = state.available_actions()
+        return actions[0]
+
+    mdp = examples.monty_hall.create_monty_hall_mdp()
+
+    partial_model = stormvogel.simulator.simulate(
+        mdp, runs=1, steps=3, seed=1, scheduler=scheduler
+    )
+
+    # we make the partial model that should be created by the simulator
+    other_mdp = stormvogel.model.new_mdp()
+    other_mdp.get_initial_state().set_transitions(
+        [(1 / 3, other_mdp.new_state("carchosen"))]
+    )
+    other_mdp.get_state_by_id(1).set_transitions([(1, other_mdp.new_state("open"))])
+    other_mdp.get_state_by_id(2).set_transitions(
+        [(1, other_mdp.new_state("goatrevealed"))]
+    )
+
+    assert partial_model == other_mdp
 
 
 def test_simulate_path():
@@ -93,7 +118,7 @@ def test_simulate_path():
     )
 
     assert path == other_path
-
+    ##############################################################################################
     # we make the monty hall pomdp and run simulate path with it
     pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
     taken_actions = {}
@@ -118,3 +143,27 @@ def test_simulate_path():
     )
 
     assert path == other_path
+
+    ##############################################################################################
+    # we test the monty hall pomdp with a lambda as scheduler
+    def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
+        actions = state.available_actions()
+        return actions[0]
+
+    pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
+    path = stormvogel.simulator.simulate_path(
+        pomdp, steps=4, seed=1, scheduler=scheduler
+    )
+
+    # we make the path that the simulate path function should create
+    other_path = stormvogel.simulator.Path(
+        {
+            1: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(3)),
+            2: (pomdp.actions["open0"], pomdp.get_state_by_id(10)),
+            3: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(21)),
+            4: (pomdp.actions["stay"], pomdp.get_state_by_id(41)),
+        },
+        pomdp,
+    )
+
+    assert path == other_path