-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdp.py
50 lines (42 loc) · 1.72 KB
/
mdp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import random, time
class MDP:
def __init__(self, maze=None, isDeterministic=True):
self.maze = maze
self.goal = (1,1)
self.actions = {}
self._discount = 0.9
self.isDeterministic = isDeterministic
self.create_actions()
self.target = [self.goal]
def set_heuristics(self):
for key, value in self.actions.items():
for k, v in value.items():
# NON DETERMINISTIC
if k == 'N':
value[k] = 0.8
elif k == 'W':
value[k] = 0.1
elif k == 'E':
value[k] = 0.5
elif k == 'S':
value[k] = 0.5
def create_actions(self):
# DETERMINISTIC
for key, val in self.maze.maze_map.items():
self.actions[key] = dict([(k, v) for k, v in val.items() if v == 1])
# CHANGE TO STOCHASTIC
if not self.isDeterministic:
self.set_heuristics()
def calculate_ValueIterationUtility(self, prob, reward, stateNext, utility):
return prob * (reward + self._discount * utility[stateNext])
def calculate_PolicyIterationUtility(self, prob, reward, state, stateNext, utility):
return reward[state] + self._discount * (prob * utility[stateNext])
def move(self, currentNode, direction):
if direction == 'E':
return currentNode[0], currentNode[1] + 1
elif direction == 'W':
return currentNode[0], currentNode[1] - 1
elif direction == 'N':
return currentNode[0] - 1, currentNode[1]
elif direction == 'S':
return currentNode[0] + 1, currentNode[1]