Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

136 flexible scheduler for the simulator #138

Merged
merged 2 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 65 additions & 54 deletions stormvogel/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import stormvogel.model
import stormpy.examples.files
import stormpy.examples
from typing import Callable
import random


Expand Down Expand Up @@ -96,10 +97,32 @@ def __eq__(self, other):
return False


def get_range_index(
state: stormvogel.model.State,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action],
) -> int:
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
if isinstance(scheduler, stormvogel.result.Scheduler):
action = scheduler.get_choice_of_state(state)
elif callable(scheduler):
action = scheduler(state)
else:
raise TypeError("Must be of type Scheduler or a function")

available_actions = state.available_actions()

assert action is not None
return available_actions.index(action)


def simulate_path(
model: stormvogel.model.Model,
steps: int = 1,
scheduler: stormvogel.result.Scheduler | None = None,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action]
| None = None,
seed: int | None = None,
) -> Path:
"""
Expand All @@ -108,20 +131,12 @@ def simulate_path(
model: The stormvogel model that the simulator should run on.
steps: The number of steps the simulator walks through the model.
scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
(instead of a stormvogel scheduler, a function from states to actions can also be provided.)
seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.

Returns a path object.
"""

def get_range_index(stateid: int):
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
action = scheduler.get_choice_of_state(model.get_state_by_id(state))
available_actions = model.states[stateid].available_actions()

assert action is not None
return available_actions.index(action)

# we initialize the simulator
stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
if seed:
Expand All @@ -131,36 +146,38 @@ def get_range_index(stateid: int):
assert simulator is not None

# we start adding states or state action pairs to the path
state = 0
state_id = 0
path = {}
simulator.restart()
if not model.supports_actions():
for i in range(steps):
# for each step we add a state to the path
if not model.states[state].is_absorbing() and not simulator.is_done():
state, reward, labels = simulator.step()
path[i + 1] = model.states[state]
if not model.states[state_id].is_absorbing() and not simulator.is_done():
state_id, reward, labels = simulator.step()
path[i + 1] = model.states[state_id]
else:
break
else:
for i in range(steps):
# we first choose an action (randomly or according to scheduler)
actions = simulator.available_actions()
select_action = (
random.randint(0, len(actions) - 1)
if not scheduler
else get_range_index(state)
get_range_index(model.get_state_by_id(state_id), scheduler)
if scheduler
else random.randint(0, len(actions) - 1)
)

# we add the state action pair to the path
stormvogel_action = model.states[state].available_actions()[select_action]
stormvogel_action = model.states[state_id].available_actions()[
select_action
]

if (
not model.states[state].is_absorbing(stormvogel_action)
not model.states[state_id].is_absorbing(stormvogel_action)
and not simulator.is_done()
):
state, reward, labels = simulator.step(actions[select_action])
path[i + 1] = (stormvogel_action, model.states[state])
state_id, reward, labels = simulator.step(actions[select_action])
path[i + 1] = (stormvogel_action, model.states[state_id])
else:
break

Expand All @@ -173,7 +190,9 @@ def simulate(
model: stormvogel.model.Model,
steps: int = 1,
runs: int = 1,
scheduler: stormvogel.result.Scheduler | None = None,
scheduler: stormvogel.result.Scheduler
| Callable[[stormvogel.model.State], stormvogel.model.Action]
| None = None,
seed: int | None = None,
) -> stormvogel.model.Model | None:
"""
Expand All @@ -183,20 +202,12 @@ def simulate(
steps: The number of steps the simulator walks through the model
runs: The number of times the model gets simulated.
scheduler: A stormvogel scheduler to determine what actions should be taken. Random if not provided.
(instead of a stormvogel scheduler, a function from states to actions can also be provided.)
seed: The seed for the function that determines for each state what the next state will be. Random seed if not provided.

Returns the partial model discovered by all the runs of the simulator together
"""

def get_range_index(stateid: int):
"""Helper function to convert the chosen action in a state by a scheduler to a range index."""
assert scheduler is not None
action = scheduler.get_choice_of_state(model.get_state_by_id(state))
available_actions = model.states[stateid].available_actions()

assert action is not None
return available_actions.index(action)

# we initialize the simulator
stormpy_model = stormvogel.mapping.stormvogel_to_stormpy(model)
assert stormpy_model is not None
Expand Down Expand Up @@ -226,53 +237,54 @@ def get_range_index(stateid: int):
if not partial_model.supports_actions():
for i in range(runs):
simulator.restart()
last_state = 0
last_state_id = 0
for j in range(steps):
state, reward, labels = simulator.step()
state_id, reward, labels = simulator.step()
# we get the rewards in reversed order
reward.reverse()

# we add to the partial model what we discovered (if new)
if state not in discovered_states:
discovered_states.add(state)
if state_id not in discovered_states:
discovered_states.add(state_id)

# we also add the transitions that we travelled through, so we need to keep track of the last state
probability = 0
transitions = model.get_transitions(last_state)
transitions = model.get_transitions(last_state_id)
for tuple in transitions.transition[
stormvogel.model.EmptyAction
].branch:
if tuple[1].id == state:
if tuple[1].id == state_id:
probability += float(tuple[0])

new_state = partial_model.new_state(list(labels))
partial_model.get_state_by_id(last_state).add_transitions(
partial_model.get_state_by_id(last_state_id).add_transitions(
[(probability, new_state)]
)

# we add the rewards
for index, rewardmodel in enumerate(partial_model.rewards):
rewardmodel.set(new_state, reward[index])

last_state = state
last_state_id = state_id
if simulator.is_done():
break
else:
state = 0
last_state_partial = partial_model.get_initial_state()
last_state_id = 0
for i in range(runs):
state_id = 0
last_state_partial = partial_model.get_initial_state()
last_state_id = 0
simulator.restart()
for j in range(steps):
# we first choose an action
actions = simulator.available_actions()
select_action = (
random.randint(0, len(actions) - 1)
if not scheduler
else get_range_index(state)
get_range_index(model.get_state_by_id(state_id), scheduler)
if scheduler
else random.randint(0, len(actions) - 1)
)

# we add the action to the partial model
assert partial_model.actions is not None
action = model.states[state].available_actions()[select_action]
action = model.states[state_id].available_actions()[select_action]
if action not in partial_model.actions.values():
partial_model.new_action(action.name)

Expand All @@ -281,28 +293,27 @@ def get_range_index(stateid: int):
reward = discovery[1]
for index, rewardmodel in enumerate(partial_model.rewards):
row_group = stormpy_model.transition_matrix.get_row_group_start(
state
state_id
)
state_action_pair = row_group + select_action
rewardmodel.set_action_state(state_action_pair, reward[index])

# we add the state
state, labels = discovery[0], discovery[2]
if state not in discovered_states:
discovered_states.add(state)
state_id, labels = discovery[0], discovery[2]
if state_id not in discovered_states:
discovered_states.add(state_id)

# we also add the transitions that we travelled through, so we need to keep track of the last state
probability = 0
transitions = model.get_transitions(last_state_id)
for tuple in transitions.transition[action].branch:
if tuple[1].id == state:
if tuple[1].id == state_id:
probability += float(tuple[0])

new_state = partial_model.new_state(list(labels))
last_state_partial.add_transitions([(probability, new_state)])

last_state_partial = new_state
last_state_id = state
last_state_id = state_id
if simulator.is_done():
break

Expand Down
53 changes: 51 additions & 2 deletions tests/test_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_simulate():
rewardmodel3.rewards[stateid] = float(1)

assert partial_model == other_dtmc

######################################################################################################################
# we make a monty hall mdp and run the simulator with it
mdp = examples.monty_hall.create_monty_hall_mdp()
rewardmodel = mdp.add_rewards("rewardmodel")
Expand Down Expand Up @@ -74,6 +74,31 @@ def test_simulate():
rewardmodel2.rewards = {0: 0, 7: 7, 16: 16}

assert partial_model == other_mdp
######################################################################################################################

# we test the simulator for an mdp with a lambda as Scheduler

def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
actions = state.available_actions()
return actions[0]

mdp = examples.monty_hall.create_monty_hall_mdp()

partial_model = stormvogel.simulator.simulate(
mdp, runs=1, steps=3, seed=1, scheduler=scheduler
)

# we make the partial model that should be created by the simulator
other_mdp = stormvogel.model.new_mdp()
other_mdp.get_initial_state().set_transitions(
[(1 / 3, other_mdp.new_state("carchosen"))]
)
other_mdp.get_state_by_id(1).set_transitions([(1, other_mdp.new_state("open"))])
other_mdp.get_state_by_id(2).set_transitions(
[(1, other_mdp.new_state("goatrevealed"))]
)

assert partial_model == other_mdp


def test_simulate_path():
Expand All @@ -93,7 +118,7 @@ def test_simulate_path():
)

assert path == other_path

##############################################################################################
# we make the monty hall pomdp and run simulate path with it
pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
taken_actions = {}
Expand All @@ -118,3 +143,27 @@ def test_simulate_path():
)

assert path == other_path

##############################################################################################
# we test the monty hall pomdp with a lambda as scheduler
def scheduler(state: stormvogel.model.State) -> stormvogel.model.Action:
actions = state.available_actions()
return actions[0]

pomdp = examples.monty_hall_pomdp.create_monty_hall_pomdp()
path = stormvogel.simulator.simulate_path(
pomdp, steps=4, seed=1, scheduler=scheduler
)

# we make the path that the simulate path function should create
other_path = stormvogel.simulator.Path(
{
1: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(3)),
2: (pomdp.actions["open0"], pomdp.get_state_by_id(10)),
3: (stormvogel.model.EmptyAction, pomdp.get_state_by_id(21)),
4: (pomdp.actions["stay"], pomdp.get_state_by_id(41)),
},
pomdp,
)

assert path == other_path