From 0c5ac0ce36af96aa19d29b3edfb212bd275c7774 Mon Sep 17 00:00:00 2001 From: PimLeerkes Date: Sun, 10 Nov 2024 11:21:47 +0100 Subject: [PATCH 1/2] we can now pass a function as scheduler to the simulator --- stormvogel/simulator.py | 27 +++++++++++++++++---- tests/test_simulator.py | 53 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/stormvogel/simulator.py b/stormvogel/simulator.py index 3b2e500..fefc64c 100644 --- a/stormvogel/simulator.py +++ b/stormvogel/simulator.py @@ -5,6 +5,7 @@ import stormvogel.model import stormpy.examples.files import stormpy.examples +from typing import Callable import random @@ -99,7 +100,9 @@ def __eq__(self, other): def simulate_path( model: stormvogel.model.Model, steps: int = 1, - scheduler: stormvogel.result.Scheduler | None = None, + scheduler: stormvogel.result.Scheduler + | Callable[[stormvogel.model.State], stormvogel.model.Action] + | None = None, seed: int | None = None, ) -> Path: """ @@ -108,6 +111,7 @@ def simulate_path( model: The stormvogel model that the simulator should run on. steps: The number of steps the simulator walks through the model. scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided. + (instead of a stormvogel scheduler, a function from states to actions can also be provided.) seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided. Returns a path object. @@ -116,7 +120,13 @@ def simulate_path( def get_range_index(stateid: int): """Helper function to convert the chosen action in a state by a scheduler to a range index.""" assert scheduler is not None - action = scheduler.get_choice_of_state(model.get_state_by_id(state)) + if isinstance(scheduler, stormvogel.result.Scheduler): + action = scheduler.get_choice_of_state(model.get_state_by_id(state)) + elif callable(scheduler): + action = scheduler(model.get_state_by_id(state)) + else: + raise TypeError("Must be of type Scheduler or a function") + available_actions = model.states[stateid].available_actions() assert action is not None @@ -173,7 +183,9 @@ def simulate( model: stormvogel.model.Model, steps: int = 1, runs: int = 1, - scheduler: stormvogel.result.Scheduler | None = None, + scheduler: stormvogel.result.Scheduler + | Callable[[stormvogel.model.State], stormvogel.model.Action] + | None = None, seed: int | None = None, ) -> stormvogel.model.Model | None: """ @@ -183,6 +195,7 @@ def simulate( steps: The number of steps the simulator walks through the model runs: The number of times the model gets simulated. scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided. + (instead of a stormvogel scheduler, a function from states to actions can also be provided.) seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided. Returns the partial model discovered by all the runs of the simulator together @@ -191,7 +204,13 @@ def simulate( def get_range_index(stateid: int): """Helper function to convert the chosen action in a state by a scheduler to a range index.""" assert scheduler is not None - action = scheduler.get_choice_of_state(model.get_state_by_id(state)) + if isinstance(scheduler, stormvogel.result.Scheduler): + action = scheduler.get_choice_of_state(model.get_state_by_id(state)) + elif callable(scheduler): + action = scheduler(model.get_state_by_id(state)) + else: + raise TypeError("Must be of type Scheduler or a function") + available_actions = model.states[stateid].available_actions() assert action is not None diff --git a/tests/test_simulator.py b/tests/test_simulator.py index b593956..c8d72d1 100644 --- a/tests/test_simulator.py +++ b/tests/test_simulator.py @@ -39,7 +39,7 @@ def test_simulate(): rewardmodel3.rewards[stateid] = float(1) assert partial_model == other_dtmc - + ###################################################################################################################### # we make a monty hall mdp and run the simulator with it mdp = examples.monty_hall.create_monty_hall_mdp() rewardmodel = mdp.add_rewards("rewardmodel") @@ -74,6 +74,31 @@ def test_simulate(): rewardmodel2.rewards = {0: 0, 7: 7, 16: 16} assert partial_model == other_mdp + ###################################################################################################################### + + # we test the simulator for an mdp with a lambda as Scheduler + + def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action: + actions = state.available_actions() + return actions[0] + + mdp = examples.monty_hall.create_monty_hall_mdp() + + partial_model = stormvogel.simulator.simulate( + mdp, runs=1, steps=3, seed=1, scheduler=scheduler + ) + + # we make the partial model that should be created by the simulator + other_mdp = stormvogel.model.new_mdp() + other_mdp.get_initial_state().set_transitions( + [(1 / 3, other_mdp.new_state("carchosen"))] + ) + other_mdp.get_state_by_id(1).set_transitions([(1, other_mdp.new_state("open"))]) + other_mdp.get_state_by_id(2).set_transitions( + [(1, other_mdp.new_state("goatrevealed"))] + ) + + assert partial_model == other_mdp def test_simulate_path(): @@ -93,7 +118,7 @@ def test_simulate_path(): ) assert path == other_path - + ############################################################################################## # we make the monty hall pomdp and run simulate path with it pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp() taken_actions = {} @@ -118,3 +143,27 @@ def test_simulate_path(): ) assert path == other_path + + ############################################################################################## + # we test the monty hall pomdp with a lambda as scheduler + def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action: + actions = state.available_actions() + return actions[0] + + pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp() + path = stormvogel.simulator.simulate_path( + pomdp, steps=4, seed=1, scheduler=scheduler + ) + + # we make the path that the simulate path function should create + other_path = stormvogel.simulator.Path( + { + 1: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(3)), + 2: (pomdp.actions["open0"], pomdp.get_state_by_id(10)), + 3: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(21)), + 4: (pomdp.actions["stay"], pomdp.get_state_by_id(41)), + }, + pomdp, + ) + + assert path == other_path From 3d6ca95db11e59ac6b6e5b0ffab750dde22f9f60 Mon Sep 17 00:00:00 2001 From: PimLeerkes Date: Sun, 10 Nov 2024 11:45:56 +0100 Subject: [PATCH 2/2] refactoring the simulator --- stormvogel/simulator.py | 120 +++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 64 deletions(-) diff --git a/stormvogel/simulator.py b/stormvogel/simulator.py index fefc64c..36301f7 100644 --- a/stormvogel/simulator.py +++ b/stormvogel/simulator.py @@ -97,6 +97,26 @@ def __eq__(self, other): return False +def get_range_index( + state: stormvogel.model.State, + scheduler: stormvogel.result.Scheduler + | Callable[[stormvogel.model.State], stormvogel.model.Action], +) -> int: + """Helper function to convert the chosen action in a state by a scheduler to a range index.""" + assert scheduler is not None + if isinstance(scheduler, stormvogel.result.Scheduler): + action = scheduler.get_choice_of_state(state) + elif callable(scheduler): + action = scheduler(state) + else: + raise TypeError("Must be of type Scheduler or a function") + + available_actions = state.available_actions() + + assert action is not None + return available_actions.index(action) + + def simulate_path( model: stormvogel.model.Model, steps: int = 1, @@ -117,21 +137,6 @@ def simulate_path( Returns a path object. """ - def get_range_index(stateid: int): - """Helper function to convert the chosen action in a state by a scheduler to a range index.""" - assert scheduler is not None - if isinstance(scheduler, stormvogel.result.Scheduler): - action = scheduler.get_choice_of_state(model.get_state_by_id(state)) - elif callable(scheduler): - action = scheduler(model.get_state_by_id(state)) - else: - raise TypeError("Must be of type Scheduler or a function") - - available_actions = model.states[stateid].available_actions() - - assert action is not None - return available_actions.index(action) - # we initialize the simulator stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model) if seed: @@ -141,15 +146,15 @@ def get_range_index(stateid: int): assert simulator is not None # we start adding states or state action pairs to the path - state = 0 + state_id = 0 path = {} simulator.restart() if not model.supports_actions(): for i in range(steps): # for each step we add a state to the path - if not model.states[state].is_absorbing() and not simulator.is_done(): - state, reward, labels = simulator.step() - path[i + 1] = model.states[state] + if not model.states[state_id].is_absorbing() and not simulator.is_done(): + state_id, reward, labels = simulator.step() + path[i + 1] = model.states[state_id] else: break else: @@ -157,20 +162,22 @@ def get_range_index(stateid: int): # we first choose an action (randomly or according to scheduler) actions = simulator.available_actions() select_action = ( - random.randint(0, len(actions) - 1) - if not scheduler - else get_range_index(state) + get_range_index(model.get_state_by_id(state_id), scheduler) + if scheduler + else random.randint(0, len(actions) - 1) ) # we add the state action pair to the path - stormvogel_action = model.states[state].available_actions()[select_action] + stormvogel_action = model.states[state_id].available_actions()[ + select_action + ] if ( - not model.states[state].is_absorbing(stormvogel_action) + not model.states[state_id].is_absorbing(stormvogel_action) and not simulator.is_done() ): - state, reward, labels = simulator.step(actions[select_action]) - path[i + 1] = (stormvogel_action, model.states[state]) + state_id, reward, labels = simulator.step(actions[select_action]) + path[i + 1] = (stormvogel_action, model.states[state_id]) else: break @@ -201,21 +208,6 @@ def simulate( Returns the partial model discovered by all the runs of the simulator together """ - def get_range_index(stateid: int): - """Helper function to convert the chosen action in a state by a scheduler to a range index.""" - assert scheduler is not None - if isinstance(scheduler, stormvogel.result.Scheduler): - action = scheduler.get_choice_of_state(model.get_state_by_id(state)) - elif callable(scheduler): - action = scheduler(model.get_state_by_id(state)) - else: - raise TypeError("Must be of type Scheduler or a function") - - available_actions = model.states[stateid].available_actions() - - assert action is not None - return available_actions.index(action) - # we initialize the simulator stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model) assert stormpy_model is not None @@ -245,53 +237,54 @@ def get_range_index(stateid: int): if not partial_model.supports_actions(): for i in range(runs): simulator.restart() - last_state = 0 + last_state_id = 0 for j in range(steps): - state, reward, labels = simulator.step() + state_id, reward, labels = simulator.step() + # we get the rewards in reversed order reward.reverse() # we add to the partial model what we discovered (if new) - if state not in discovered_states: - discovered_states.add(state) + if state_id not in discovered_states: + discovered_states.add(state_id) # we also add the transitions that we travelled through, so we need to keep track of the last state probability = 0 - transitions = model.get_transitions(last_state) + transitions = model.get_transitions(last_state_id) for tuple in transitions.transition[ stormvogel.model.EmptyAction ].branch: - if tuple[1].id == state: + if tuple[1].id == state_id: probability += float(tuple[0]) - new_state = partial_model.new_state(list(labels)) - partial_model.get_state_by_id(last_state).add_transitions( + partial_model.get_state_by_id(last_state_id).add_transitions( [(probability, new_state)] ) + # we add the rewards for index, rewardmodel in enumerate(partial_model.rewards): rewardmodel.set(new_state, reward[index]) - last_state = state + last_state_id = state_id if simulator.is_done(): break else: - state = 0 - last_state_partial = partial_model.get_initial_state() - last_state_id = 0 for i in range(runs): + state_id = 0 + last_state_partial = partial_model.get_initial_state() + last_state_id = 0 simulator.restart() for j in range(steps): # we first choose an action actions = simulator.available_actions() select_action = ( - random.randint(0, len(actions) - 1) - if not scheduler - else get_range_index(state) + get_range_index(model.get_state_by_id(state_id), scheduler) + if scheduler + else random.randint(0, len(actions) - 1) ) # we add the action to the partial model assert partial_model.actions is not None - action = model.states[state].available_actions()[select_action] + action = model.states[state_id].available_actions()[select_action] if action not in partial_model.actions.values(): partial_model.new_action(action.name) @@ -300,28 +293,27 @@ def get_range_index(stateid: int): reward = discovery[1] for index, rewardmodel in enumerate(partial_model.rewards): row_group = stormpy_model.transition_matrix.get_row_group_start( - state + state_id ) state_action_pair = row_group + select_action rewardmodel.set_action_state(state_action_pair, reward[index]) # we add the state - state, labels = discovery[0], discovery[2] - if state not in discovered_states: - discovered_states.add(state) + state_id, labels = discovery[0], discovery[2] + if state_id not in discovered_states: + discovered_states.add(state_id) # we also add the transitions that we travelled through, so we need to keep track of the last state probability = 0 transitions = model.get_transitions(last_state_id) for tuple in transitions.transition[action].branch: - if tuple[1].id == state: + if tuple[1].id == state_id: probability += float(tuple[0]) - new_state = partial_model.new_state(list(labels)) last_state_partial.add_transitions([(probability, new_state)]) last_state_partial = new_state - last_state_id = state + last_state_id = state_id if simulator.is_done(): break