-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAcrobotUtils.py
171 lines (150 loc) · 4.13 KB
/
AcrobotUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
Utility functions specific to Acrobot-v1.
I think a lot of OpenAI environments make
the distinction between the state, which is
an environment's internal representation
and the observation, which is presented to
the agent. This file has conversion functions
between them as they are often needed.
The acrobot reward bases are also found here.
It is difficult to figure out a common way of
determining bases for all environments. There is
always some critical information that simplifies the
design of the bases and this is different for
each environment. For example, the reward function
of Acrobot-v1 depends solely on the angles of
the two links and not the angu]ar velocities (which
are part of the observation), thus simplifying
the bases functions.
"""
from Utils import *
from Reward import Reward
from functional import compose
from functools import partial, reduce, lru_cache
from itertools import product
import numpy as np
import scipy.integrate
import gym
def findTheta (sin, cos) :
"""
Calculate theta in radians from sin
and cosine values.
The output is in the range [-pi, pi]
Parameters
----------
sin : float
cos : float
"""
if sin == 1 and cos == 0 :
return np.pi / 2
elif sin == -1 and cos == 0 :
return -np.pi / 2
elif cos > 0 :
return np.arctan(sin / cos)
elif sin > 0 :
return np.pi + np.arctan(sin / cos)
else :
return -np.pi + np.arctan(sin / cos)
def toInternalStateRep (s) :
"""
The acrobot environment maintains an
internal representation of the state
as a 4-tuple :
[theta1, theta2, dtheta1, dtheta2]
where theta1 is the angle made by the
first link with the vertical and theta2
is the angle of the second link with
respect to the vertical. dtheta1 and dtheta2
are the angular velocities.
This function converts the representation
that is visible to the agent to the
internal representation.
Parameters
----------
s : array-like
External representation of the state.
"""
theta1 = findTheta(s[1], s[0])
theta2 = findTheta(s[3], s[2])
return (theta1, theta2, s[4], s[5])
def toExternalStateRep (s) :
"""
Inverse function of toInternalStateRep.
Parameters
----------
s : array-like
Internal representation of the state.
"""
cos1 = np.cos(s[0])
sin1 = np.sin(s[0])
cos2 = np.cos(s[1])
sin2 = np.sin(s[1])
return np.array([cos1, sin1, cos2, sin2, s[2], s[3]])
def bound(x, m, M):
"""
Bound x between m and M.
Parameters
----------
x : float
Value.
m : float
Lower bound.
M : float
Upper bound.
"""
return min(max(x, m), M)
def wrap(x, m, M):
"""
Rotate x to fit in range (m, M).
Useful while dealing with angles.
Parameters
----------
x : float
Value.
m : float
Lower bound.
M : float
Upper bound.
"""
diff = M - m
while x > M:
x = x - diff
while x < m:
x = x + diff
return x
def stepFunction (s, xRange, yRange) :
"""
A step function in R^2. Given
a rectangle A in R^2 :
f(x, y) = 0 ; (x, y) \in A
-1 ; else
Parameters
----------
s : np.ndarray
4-D state vector of which
we are interested in the first
2 dimensions.
xRange : tuple
yRange : tuple
Together, these ranges define a
rectangle in R^2 over which the
reward will be -1. Everywhere
else, reward will be 0.
"""
x, y = s[0], s[1]
inRectangle = inRange(x, xRange) and inRange(y, yRange)
return -1 if inRectangle else 0
def acrobotRewardBases (delX, delY) :
xs = np.arange(-np.pi, np.pi, delX)
ys = np.arange(-np.pi, np.pi, delY)
bases = []
for x, y in product(xs, ys) :
x_, y_ = x + delX, y + delY
fn = reduce(compose,
[partial(stepFunction,
xRange=(x, x_),
yRange=(y, y_)),
toInternalStateRep,
tuple])
bases.append(Reward(fn, (-1, 0)))
return bases