From 0c5ac0ce36af96aa19d29b3edfb212bd275c7774 Mon Sep 17 00:00:00 2001
From: PimLeerkes <pimsimon.leerkes@gmail.com>
Date: Sun, 10 Nov 2024 11:21:47 +0100
Subject: [PATCH 1/2] we can now pass a function as scheduler to the simulator

---
 stormvogel/simulator.py | 27 +++++++++++++++++----
 tests/test_simulator.py | 53 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/stormvogel/simulator.py b/stormvogel/simulator.py
index 3b2e500..fefc64c 100644
--- a/stormvogel/simulator.py
+++ b/stormvogel/simulator.py
@@ -5,6 +5,7 @@
 import stormvogel.model
 import stormpy.examples.files
 import stormpy.examples
+from typing import Callable
 import random
 
 
@@ -99,7 +100,9 @@ def __eq__(self, other):
 def simulate_path(
     model: stormvogel.model.Model,
     steps: int = 1,
-    scheduler: stormvogel.result.Scheduler | None = None,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action]
+    | None = None,
     seed: int | None = None,
 ) -> Path:
     """
@@ -108,6 +111,7 @@ def simulate_path(
         model: The stormvogel model that the simulator should run on.
         steps: The number of steps the simulator walks through the model.
         scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
+                    (instead of a stormvogel scheduler, a function from states to actions can also be provided.)
         seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
 
     Returns a path object.
@@ -116,7 +120,13 @@ def simulate_path(
     def get_range_index(stateid: int):
         """Helper function to convert the chosen action in a state by a scheduler to a range index."""
         assert scheduler is not None
-        action = scheduler.get_choice_of_state(model.get_state_by_id(state))
+        if isinstance(scheduler, stormvogel.result.Scheduler):
+            action = scheduler.get_choice_of_state(model.get_state_by_id(state))
+        elif callable(scheduler):
+            action = scheduler(model.get_state_by_id(state))
+        else:
+            raise TypeError("Must be of type Scheduler or a function")
+
         available_actions = model.states[stateid].available_actions()
 
         assert action is not None
@@ -173,7 +183,9 @@ def simulate(
     model: stormvogel.model.Model,
     steps: int = 1,
     runs: int = 1,
-    scheduler: stormvogel.result.Scheduler | None = None,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action]
+    | None = None,
     seed: int | None = None,
 ) -> stormvogel.model.Model | None:
     """
@@ -183,6 +195,7 @@ def simulate(
         steps: The number of steps the simulator walks through the model
         runs: The number of times the model gets simulated.
         scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
+                    (instead of a stormvogel scheduler, a function from states to actions can also be provided.)
         seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
 
     Returns the partial model discovered by all the runs of the simulator together
@@ -191,7 +204,13 @@ def simulate(
     def get_range_index(stateid: int):
         """Helper function to convert the chosen action in a state by a scheduler to a range index."""
         assert scheduler is not None
-        action = scheduler.get_choice_of_state(model.get_state_by_id(state))
+        if isinstance(scheduler, stormvogel.result.Scheduler):
+            action = scheduler.get_choice_of_state(model.get_state_by_id(state))
+        elif callable(scheduler):
+            action = scheduler(model.get_state_by_id(state))
+        else:
+            raise TypeError("Must be of type Scheduler or a function")
+
         available_actions = model.states[stateid].available_actions()
 
         assert action is not None
diff --git a/tests/test_simulator.py b/tests/test_simulator.py
index b593956..c8d72d1 100644
--- a/tests/test_simulator.py
+++ b/tests/test_simulator.py
@@ -39,7 +39,7 @@ def test_simulate():
         rewardmodel3.rewards[stateid] = float(1)
 
     assert partial_model == other_dtmc
-
+    ######################################################################################################################
     # we make a monty hall mdp and run the simulator with it
     mdp = examples.monty_hall.create_monty_hall_mdp()
     rewardmodel = mdp.add_rewards("rewardmodel")
@@ -74,6 +74,31 @@ def test_simulate():
     rewardmodel2.rewards = {0: 0, 7: 7, 16: 16}
 
     assert partial_model == other_mdp
+    ######################################################################################################################
+
+    # we test the simulator for an mdp with a lambda as Scheduler
+
+    def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
+        actions = state.available_actions()
+        return actions[0]
+
+    mdp = examples.monty_hall.create_monty_hall_mdp()
+
+    partial_model = stormvogel.simulator.simulate(
+        mdp, runs=1, steps=3, seed=1, scheduler=scheduler
+    )
+
+    # we make the partial model that should be created by the simulator
+    other_mdp = stormvogel.model.new_mdp()
+    other_mdp.get_initial_state().set_transitions(
+        [(1 / 3, other_mdp.new_state("carchosen"))]
+    )
+    other_mdp.get_state_by_id(1).set_transitions([(1, other_mdp.new_state("open"))])
+    other_mdp.get_state_by_id(2).set_transitions(
+        [(1, other_mdp.new_state("goatrevealed"))]
+    )
+
+    assert partial_model == other_mdp
 
 
 def test_simulate_path():
@@ -93,7 +118,7 @@ def test_simulate_path():
     )
 
     assert path == other_path
-
+    ##############################################################################################
     # we make the monty hall pomdp and run simulate path with it
     pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
     taken_actions = {}
@@ -118,3 +143,27 @@ def test_simulate_path():
     )
 
     assert path == other_path
+
+    ##############################################################################################
+    # we test the monty hall pomdp with a lambda as scheduler
+    def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
+        actions = state.available_actions()
+        return actions[0]
+
+    pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
+    path = stormvogel.simulator.simulate_path(
+        pomdp, steps=4, seed=1, scheduler=scheduler
+    )
+
+    # we make the path that the simulate path function should create
+    other_path = stormvogel.simulator.Path(
+        {
+            1: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(3)),
+            2: (pomdp.actions["open0"], pomdp.get_state_by_id(10)),
+            3: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(21)),
+            4: (pomdp.actions["stay"], pomdp.get_state_by_id(41)),
+        },
+        pomdp,
+    )
+
+    assert path == other_path

From 3d6ca95db11e59ac6b6e5b0ffab750dde22f9f60 Mon Sep 17 00:00:00 2001
From: PimLeerkes <pimsimon.leerkes@gmail.com>
Date: Sun, 10 Nov 2024 11:45:56 +0100
Subject: [PATCH 2/2] refactoring the simulator

---
 stormvogel/simulator.py | 120 +++++++++++++++++++---------------------
 1 file changed, 56 insertions(+), 64 deletions(-)

diff --git a/stormvogel/simulator.py b/stormvogel/simulator.py
index fefc64c..36301f7 100644
--- a/stormvogel/simulator.py
+++ b/stormvogel/simulator.py
@@ -97,6 +97,26 @@ def __eq__(self, other):
             return False
 
 
+def get_range_index(
+    state: stormvogel.model.State,
+    scheduler: stormvogel.result.Scheduler
+    | Callable[[stormvogel.model.State], stormvogel.model.Action],
+) -> int:
+    """Helper function to convert the chosen action in a state by a scheduler to a range index."""
+    assert scheduler is not None
+    if isinstance(scheduler, stormvogel.result.Scheduler):
+        action = scheduler.get_choice_of_state(state)
+    elif callable(scheduler):
+        action = scheduler(state)
+    else:
+        raise TypeError("Must be of type Scheduler or a function")
+
+    available_actions = state.available_actions()
+
+    assert action is not None
+    return available_actions.index(action)
+
+
 def simulate_path(
     model: stormvogel.model.Model,
     steps: int = 1,
@@ -117,21 +137,6 @@ def simulate_path(
     Returns a path object.
     """
 
-    def get_range_index(stateid: int):
-        """Helper function to convert the chosen action in a state by a scheduler to a range index."""
-        assert scheduler is not None
-        if isinstance(scheduler, stormvogel.result.Scheduler):
-            action = scheduler.get_choice_of_state(model.get_state_by_id(state))
-        elif callable(scheduler):
-            action = scheduler(model.get_state_by_id(state))
-        else:
-            raise TypeError("Must be of type Scheduler or a function")
-
-        available_actions = model.states[stateid].available_actions()
-
-        assert action is not None
-        return available_actions.index(action)
-
     # we initialize the simulator
     stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
     if seed:
@@ -141,15 +146,15 @@ def get_range_index(stateid: int):
     assert simulator is not None
 
     # we start adding states or state action pairs to the path
-    state = 0
+    state_id = 0
     path = {}
     simulator.restart()
     if not model.supports_actions():
         for i in range(steps):
             # for each step we add a state to the path
-            if not model.states[state].is_absorbing() and not simulator.is_done():
-                state, reward, labels = simulator.step()
-                path[i + 1] = model.states[state]
+            if not model.states[state_id].is_absorbing() and not simulator.is_done():
+                state_id, reward, labels = simulator.step()
+                path[i + 1] = model.states[state_id]
             else:
                 break
     else:
@@ -157,20 +162,22 @@ def get_range_index(stateid: int):
             # we first choose an action (randomly or according to scheduler)
             actions = simulator.available_actions()
             select_action = (
-                random.randint(0, len(actions) - 1)
-                if not scheduler
-                else get_range_index(state)
+                get_range_index(model.get_state_by_id(state_id), scheduler)
+                if scheduler
+                else random.randint(0, len(actions) - 1)
             )
 
             # we add the state action pair to the path
-            stormvogel_action = model.states[state].available_actions()[select_action]
+            stormvogel_action = model.states[state_id].available_actions()[
+                select_action
+            ]
 
             if (
-                not model.states[state].is_absorbing(stormvogel_action)
+                not model.states[state_id].is_absorbing(stormvogel_action)
                 and not simulator.is_done()
             ):
-                state, reward, labels = simulator.step(actions[select_action])
-                path[i + 1] = (stormvogel_action, model.states[state])
+                state_id, reward, labels = simulator.step(actions[select_action])
+                path[i + 1] = (stormvogel_action, model.states[state_id])
             else:
                 break
 
@@ -201,21 +208,6 @@ def simulate(
     Returns the partial model discovered by all the runs of the simulator together
     """
 
-    def get_range_index(stateid: int):
-        """Helper function to convert the chosen action in a state by a scheduler to a range index."""
-        assert scheduler is not None
-        if isinstance(scheduler, stormvogel.result.Scheduler):
-            action = scheduler.get_choice_of_state(model.get_state_by_id(state))
-        elif callable(scheduler):
-            action = scheduler(model.get_state_by_id(state))
-        else:
-            raise TypeError("Must be of type Scheduler or a function")
-
-        available_actions = model.states[stateid].available_actions()
-
-        assert action is not None
-        return available_actions.index(action)
-
     # we initialize the simulator
     stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
     assert stormpy_model is not None
@@ -245,53 +237,54 @@ def get_range_index(stateid: int):
     if not partial_model.supports_actions():
         for i in range(runs):
             simulator.restart()
-            last_state = 0
+            last_state_id = 0
             for j in range(steps):
-                state, reward, labels = simulator.step()
+                state_id, reward, labels = simulator.step()
+                # we get the rewards in reversed order
                 reward.reverse()
 
                 # we add to the partial model what we discovered (if new)
-                if state not in discovered_states:
-                    discovered_states.add(state)
+                if state_id not in discovered_states:
+                    discovered_states.add(state_id)
 
                     # we also add the transitions that we travelled through, so we need to keep track of the last state
                     probability = 0
-                    transitions = model.get_transitions(last_state)
+                    transitions = model.get_transitions(last_state_id)
                     for tuple in transitions.transition[
                         stormvogel.model.EmptyAction
                     ].branch:
-                        if tuple[1].id == state:
+                        if tuple[1].id == state_id:
                             probability += float(tuple[0])
-
                     new_state = partial_model.new_state(list(labels))
-                    partial_model.get_state_by_id(last_state).add_transitions(
+                    partial_model.get_state_by_id(last_state_id).add_transitions(
                         [(probability, new_state)]
                     )
 
+                    # we add the rewards
                     for index, rewardmodel in enumerate(partial_model.rewards):
                         rewardmodel.set(new_state, reward[index])
 
-                    last_state = state
+                    last_state_id = state_id
                 if simulator.is_done():
                     break
     else:
-        state = 0
-        last_state_partial = partial_model.get_initial_state()
-        last_state_id = 0
         for i in range(runs):
+            state_id = 0
+            last_state_partial = partial_model.get_initial_state()
+            last_state_id = 0
             simulator.restart()
             for j in range(steps):
                 # we first choose an action
                 actions = simulator.available_actions()
                 select_action = (
-                    random.randint(0, len(actions) - 1)
-                    if not scheduler
-                    else get_range_index(state)
+                    get_range_index(model.get_state_by_id(state_id), scheduler)
+                    if scheduler
+                    else random.randint(0, len(actions) - 1)
                 )
 
                 # we add the action to the partial model
                 assert partial_model.actions is not None
-                action = model.states[state].available_actions()[select_action]
+                action = model.states[state_id].available_actions()[select_action]
                 if action not in partial_model.actions.values():
                     partial_model.new_action(action.name)
 
@@ -300,28 +293,27 @@ def get_range_index(stateid: int):
                 reward = discovery[1]
                 for index, rewardmodel in enumerate(partial_model.rewards):
                     row_group = stormpy_model.transition_matrix.get_row_group_start(
-                        state
+                        state_id
                     )
                     state_action_pair = row_group + select_action
                     rewardmodel.set_action_state(state_action_pair, reward[index])
 
                 # we add the state
-                state, labels = discovery[0], discovery[2]
-                if state not in discovered_states:
-                    discovered_states.add(state)
+                state_id, labels = discovery[0], discovery[2]
+                if state_id not in discovered_states:
+                    discovered_states.add(state_id)
 
                     # we also add the transitions that we travelled through, so we need to keep track of the last state
                     probability = 0
                     transitions = model.get_transitions(last_state_id)
                     for tuple in transitions.transition[action].branch:
-                        if tuple[1].id == state:
+                        if tuple[1].id == state_id:
                             probability += float(tuple[0])
-
                     new_state = partial_model.new_state(list(labels))
                     last_state_partial.add_transitions([(probability, new_state)])
 
                     last_state_partial = new_state
-                    last_state_id = state
+                    last_state_id = state_id
                 if simulator.is_done():
                     break