-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathConfig.py
42 lines (35 loc) · 1.23 KB
/
Config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
""" All constants in one place! """
import gym
from AcrobotUtils import toInternalStateRep
import multiprocessing as mp
# Facts about the MDP
ENV = 'Acrobot-v1'
DISCOUNT = 0.99
HORIZON = 500
INTERNAL_STATE_FN = toInternalStateRep
# Trajectory sampling parameters
BATCH_T = 20
BATCH_B = 16
# Parameters for the IRL algorithm
IRL_ITR = 5 # number of optimization steps
L1_NORM_GAMMA = 1 # weight of L1 Norm in LP
STATE_SIMILARITY_THRESH = 1e-1
IRL_STATE_SAMPLES = 10
TRAJ_DIR = './Trajectories'
# Policy and Value neural network parameters
HDIMS = [512]
with gym.make(ENV) as env :
ACTION_SPACE = env.action_space.n
OBSERVATION_SPACE = env.observation_space.shape[0]
# Number of workers to use for sampling
# and cuda device id
N_PARALLEL = mp.cpu_count()
CUDA_IDX = None
# Constants for training an agent using A2C
LR = 0.001
VALUE_LOSS_COEFF = 0.5
ENTROPY_LOSS_COEFF = 0.01
N_STEPS = 5e5 # steps for training
LOG_STEP = 1e5
# For all randomness!
SEED = 1