-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbasket_v0.py
69 lines (58 loc) · 1.69 KB
/
basket_v0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
class BasketEnv(Env):
def __init__(self):
pass
def step(self):
pass
def render(self):
pass
def reset(self):
pass
class BasketEnv(Env):
def __init__(self, s, h):
# Calculate velocity
self.v = (s * math.sqrt(9.8 / (s - 3.05 + h)))
# Actions we can take, down, stay, up
self.action_space = Discrete(3)
# Velocity array
self.observation_space = Box(low=np.array([self.v - 3.0]), high=np.array([self.v + 3.0]))
# Set start velocity
self.state = self.v + random.randint(-1, 1)
# Set play length
self.play_length = 60
def step(self, action):
# Apply action
# 0/20 - 0.05 = -0.05 m/s
# 1/20 - 0.05 = 0
# 2/20 - 0.05 = 0.05 m/s
self.state += (action / 20) - 0.05
# Reduce play length by 1 second
self.play_length -= 1
# Calculate reward
if self.state >= math.floor(self.v) and self.state <= math.ceil(self.v):
reward = 1
else:
reward = -1
# Check if play is done
if self.play_length <= 0:
done = True
else:
done = False
# Apply velocity noise
self.state += random.uniform(-0.05, 0.05)
# Set info
info = {}
# Return step information
return self.state, reward, done, info
def render(self):
pass
def reset(self):
# Reset velocity
self.state = self.v + random.randint(-1, 1)
# Reset play time
self.play_length = 60
return self.state