Skip to content

Commit

Permalink
Merge pull request #138 from moves-rwth/136-flexible-scheduler-for-th…
Browse files Browse the repository at this point in the history
…e-simulator

136 flexible scheduler for the simulator
  • Loading branch information
PimLeerkes authored Nov 14, 2024
2 parents 7e25dc8 + 3d6ca95 commit f46c2c8
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 56 deletions.
119 changes: 65 additions & 54 deletions stormvogel/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import stormvogel.model
import stormpy.examples.files
import stormpy.examples
from typing import Callable
import random


Expand Down Expand Up @@ -96,10 +97,32 @@ def __eq__(self, other):
return False


def get_range_index(
state: stormvogel.model.State,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action],
) -> int:
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
if isinstance(scheduler, stormvogel.result.Scheduler):
action = scheduler.get_choice_of_state(state)
elif callable(scheduler):
action = scheduler(state)
else:
raise TypeError("Must be of type Scheduler or a function")

available_actions = state.available_actions()

assert action is not None
return available_actions.index(action)


def simulate_path(
model: stormvogel.model.Model,
steps: int = 1,
scheduler: stormvogel.result.Scheduler | None = None,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action]
| None = None,
seed: int | None = None,
) -> Path:
"""
Expand All @@ -108,20 +131,12 @@ def simulate_path(
model: The stormvogel model that the simulator should run on.
steps: The number of steps the simulator walks through the model.
scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
(instead of a stormvogel scheduler, a function from states to actions can also be provided.)
seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
Returns a path object.
"""

def get_range_index(stateid: int):
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
action = scheduler.get_choice_of_state(model.get_state_by_id(state))
available_actions = model.states[stateid].available_actions()

assert action is not None
return available_actions.index(action)

# we initialize the simulator
stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
if seed:
Expand All @@ -131,36 +146,38 @@ def get_range_index(stateid: int):
assert simulator is not None

# we start adding states or state action pairs to the path
state = 0
state_id = 0
path = {}
simulator.restart()
if not model.supports_actions():
for i in range(steps):
# for each step we add a state to the path
if not model.states[state].is_absorbing() and not simulator.is_done():
state, reward, labels = simulator.step()
path[i + 1] = model.states[state]
if not model.states[state_id].is_absorbing() and not simulator.is_done():
state_id, reward, labels = simulator.step()
path[i + 1] = model.states[state_id]
else:
break
else:
for i in range(steps):
# we first choose an action (randomly or according to scheduler)
actions = simulator.available_actions()
select_action = (
random.randint(0, len(actions) - 1)
if not scheduler
else get_range_index(state)
get_range_index(model.get_state_by_id(state_id), scheduler)
if scheduler
else random.randint(0, len(actions) - 1)
)

# we add the state action pair to the path
stormvogel_action = model.states[state].available_actions()[select_action]
stormvogel_action = model.states[state_id].available_actions()[
select_action
]

if (
not model.states[state].is_absorbing(stormvogel_action)
not model.states[state_id].is_absorbing(stormvogel_action)
and not simulator.is_done()
):
state, reward, labels = simulator.step(actions[select_action])
path[i + 1] = (stormvogel_action, model.states[state])
state_id, reward, labels = simulator.step(actions[select_action])
path[i + 1] = (stormvogel_action, model.states[state_id])
else:
break

Expand All @@ -173,7 +190,9 @@ def simulate(
model: stormvogel.model.Model,
steps: int = 1,
runs: int = 1,
scheduler: stormvogel.result.Scheduler | None = None,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action]
| None = None,
seed: int | None = None,
) -> stormvogel.model.Model | None:
"""
Expand All @@ -183,20 +202,12 @@ def simulate(
steps: The number of steps the simulator walks through the model
runs: The number of times the model gets simulated.
scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
(instead of a stormvogel scheduler, a function from states to actions can also be provided.)
seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.
Returns the partial model discovered by all the runs of the simulator together
"""

def get_range_index(stateid: int):
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
action = scheduler.get_choice_of_state(model.get_state_by_id(state))
available_actions = model.states[stateid].available_actions()

assert action is not None
return available_actions.index(action)

# we initialize the simulator
stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
assert stormpy_model is not None
Expand Down Expand Up @@ -226,53 +237,54 @@ def get_range_index(stateid: int):
if not partial_model.supports_actions():
for i in range(runs):
simulator.restart()
last_state = 0
last_state_id = 0
for j in range(steps):
state, reward, labels = simulator.step()
state_id, reward, labels = simulator.step()
# we get the rewards in reversed order
reward.reverse()

# we add to the partial model what we discovered (if new)
if state not in discovered_states:
discovered_states.add(state)
if state_id not in discovered_states:
discovered_states.add(state_id)

# we also add the transitions that we travelled through, so we need to keep track of the last state
probability = 0
transitions = model.get_transitions(last_state)
transitions = model.get_transitions(last_state_id)
for tuple in transitions.transition[
stormvogel.model.EmptyAction
].branch:
if tuple[1].id == state:
if tuple[1].id == state_id:
probability += float(tuple[0])

new_state = partial_model.new_state(list(labels))
partial_model.get_state_by_id(last_state).add_transitions(
partial_model.get_state_by_id(last_state_id).add_transitions(
[(probability, new_state)]
)

# we add the rewards
for index, rewardmodel in enumerate(partial_model.rewards):
rewardmodel.set(new_state, reward[index])

last_state = state
last_state_id = state_id
if simulator.is_done():
break
else:
state = 0
last_state_partial = partial_model.get_initial_state()
last_state_id = 0
for i in range(runs):
state_id = 0
last_state_partial = partial_model.get_initial_state()
last_state_id = 0
simulator.restart()
for j in range(steps):
# we first choose an action
actions = simulator.available_actions()
select_action = (
random.randint(0, len(actions) - 1)
if not scheduler
else get_range_index(state)
get_range_index(model.get_state_by_id(state_id), scheduler)
if scheduler
else random.randint(0, len(actions) - 1)
)

# we add the action to the partial model
assert partial_model.actions is not None
action = model.states[state].available_actions()[select_action]
action = model.states[state_id].available_actions()[select_action]
if action not in partial_model.actions.values():
partial_model.new_action(action.name)

Expand All @@ -281,28 +293,27 @@ def get_range_index(stateid: int):
reward = discovery[1]
for index, rewardmodel in enumerate(partial_model.rewards):
row_group = stormpy_model.transition_matrix.get_row_group_start(
state
state_id
)
state_action_pair = row_group + select_action
rewardmodel.set_action_state(state_action_pair, reward[index])

# we add the state
state, labels = discovery[0], discovery[2]
if state not in discovered_states:
discovered_states.add(state)
state_id, labels = discovery[0], discovery[2]
if state_id not in discovered_states:
discovered_states.add(state_id)

# we also add the transitions that we travelled through, so we need to keep track of the last state
probability = 0
transitions = model.get_transitions(last_state_id)
for tuple in transitions.transition[action].branch:
if tuple[1].id == state:
if tuple[1].id == state_id:
probability += float(tuple[0])

new_state = partial_model.new_state(list(labels))
last_state_partial.add_transitions([(probability, new_state)])

last_state_partial = new_state
last_state_id = state
last_state_id = state_id
if simulator.is_done():
break

Expand Down
53 changes: 51 additions & 2 deletions tests/test_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_simulate():
rewardmodel3.rewards[stateid] = float(1)

assert partial_model == other_dtmc

######################################################################################################################
# we make a monty hall mdp and run the simulator with it
mdp = examples.monty_hall.create_monty_hall_mdp()
rewardmodel = mdp.add_rewards("rewardmodel")
Expand Down Expand Up @@ -74,6 +74,31 @@ def test_simulate():
rewardmodel2.rewards = {0: 0, 7: 7, 16: 16}

assert partial_model == other_mdp
######################################################################################################################

# we test the simulator for an mdp with a lambda as Scheduler

def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
actions = state.available_actions()
return actions[0]

mdp = examples.monty_hall.create_monty_hall_mdp()

partial_model = stormvogel.simulator.simulate(
mdp, runs=1, steps=3, seed=1, scheduler=scheduler
)

# we make the partial model that should be created by the simulator
other_mdp = stormvogel.model.new_mdp()
other_mdp.get_initial_state().set_transitions(
[(1 / 3, other_mdp.new_state("carchosen"))]
)
other_mdp.get_state_by_id(1).set_transitions([(1, other_mdp.new_state("open"))])
other_mdp.get_state_by_id(2).set_transitions(
[(1, other_mdp.new_state("goatrevealed"))]
)

assert partial_model == other_mdp


def test_simulate_path():
Expand All @@ -93,7 +118,7 @@ def test_simulate_path():
)

assert path == other_path

##############################################################################################
# we make the monty hall pomdp and run simulate path with it
pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
taken_actions = {}
Expand All @@ -118,3 +143,27 @@ def test_simulate_path():
)

assert path == other_path

##############################################################################################
# we test the monty hall pomdp with a lambda as scheduler
def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
actions = state.available_actions()
return actions[0]

pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
path = stormvogel.simulator.simulate_path(
pomdp, steps=4, seed=1, scheduler=scheduler
)

# we make the path that the simulate path function should create
other_path = stormvogel.simulator.Path(
{
1: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(3)),
2: (pomdp.actions["open0"], pomdp.get_state_by_id(10)),
3: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(21)),
4: (pomdp.actions["stay"], pomdp.get_state_by_id(41)),
},
pomdp,
)

assert path == other_path

0 comments on commit f46c2c8

Please sign in to comment.