-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpomdp_play.py
114 lines (85 loc) · 2.64 KB
/
pomdp_play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- encoding: utf-8 -*-
"""Run pbvi and naive_pbvi on a POMDP defined in AIMA."""
# This is messy, because it's my scratch file.
import matplotlib
from matplotlib import pyplot as plt
import numpy as np
from piglet_pbvi import pbvi
from piglet_pbvi import naive_pbvi
# POMDP definition adapted from:
# Stuart Russell, Peter Norvig. Artificial Intelligence: A Modern Approach. 3rd
# ed., 2010.
# s0 s1
cT = np.array([[ # s0
[0.9, 0.1], # as
[0.1, 0.9]], # ag
[ # s1
[0.1, 0.9], # as
[0.9, 0.1]]]) # ag
# as ag
cR = np.array([[0.0, 0.0], # s0
[1.0, 1.0]]) # s1
# o0 o1
cOmega = np.array([[ # a0
[0.6, 0.4], # s' = s0
[0.4, 0.6]], # s1
[ # a1
[0.6, 0.4], # s0
[0.4, 0.6]]]) # s1
gamma = 1.0
apbvi = pbvi.PBVI(cT, cOmega, cR, gamma)
V = np.zeros((1, 2), np.float64)
B = np.array([[0.5, 0.5],
[0.5, 0.5]])
b1 = np.linspace(0.1, 0.9, 8)
B = np.stack([1 - b1, b1], axis=-1)
if __name__ == '__main__':
pbvi_gen = pbvi.generator(apbvi, V, B, 4)
fig, ax = plt.subplots()
for __ in xrange(5):
V, __ = next(pbvi_gen)
for v in V:
ax.plot([0, 1], v)
plt.show()
apbvi = pbvi.PBVI(cT, cOmega, cR, gamma)
B = np.array([[0.7, 0.3]])
for _ in xrange(4):
B = apbvi.expanded_B(B)
print B
print 'NAIVE'
anpbvi = naive_pbvi.NaivePBVI(cT, cOmega, cR, gamma)
nB = np.array([[0.7, 0.3]])
for _ in xrange(4):
nB = anpbvi.expanded_B(nB)
print nB
print len(B), len(nB)
for _ in xrange(0):
Gamma = apbvi.Gamma(V)
Epsi = apbvi.Epsi(B, Gamma)
V = apbvi.V(Epsi, B)
print V
anpbvi = naive_pbvi.NaivePBVI(cT, cOmega, cR, gamma)
nV = np.zeros((1, 2), np.float64)
V = np.zeros((1, 2), np.float64)
for _ in xrange(0):
nGamma = anpbvi.Gamma(nV)
nEpsi = anpbvi.Epsi(B, nGamma)
nV = anpbvi.V(nEpsi, B)
print "\nV\n", nV
for i in xrange(0):
print "\n== Round %d ==" % i
print "GAMMA"
nGamma = anpbvi.Gamma(nV)
print nGamma
Gamma = apbvi.Gamma(V)
print Gamma
print "EPSILON"
nEpsi = anpbvi.Epsi(B, nGamma)
print nEpsi
Epsi = apbvi.Epsi(B, Gamma)
print Epsi
print "V"
nV = anpbvi.V(nEpsi, B)
print nV
V = apbvi.V(Epsi, B)
print V