-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathddpg.py
executable file
·115 lines (72 loc) · 3.59 KB
/
ddpg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#--------------------------------------------
# AUTHOR: KAUSHIK BALAKRISHNAN
#--------------------------------------------
import tensorflow as tf
import numpy as np
#import gym
#from gym import wrappers
import tflearn
import argparse
import pprint as pp
from replay_buffer import ReplayBuffer
from AandC import *
from TrainOrTest import *
def train(args):
with tf.Session() as sess:
np.random.seed(int(args['random_seed']))
tf.set_random_seed(int(args['random_seed']))
state_dim = 29
action_dim = 3
action_bound = 1.0
actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
float(args['actor_lr']), float(args['tau']),
int(args['minibatch_size']),int(args['noise_option']))
critic = CriticNetwork(sess, state_dim, action_dim,
float(args['critic_lr']), float(args['tau']),
float(args['gamma']),
actor.get_num_trainable_vars())
trainDDPG(sess, args, actor, critic)
def test(args):
with tf.Session() as sess:
np.random.seed(int(args['random_seed']))
tf.set_random_seed(int(args['random_seed']))
state_dim = 29
action_dim = 3
action_bound = 1.0
actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
float(args['actor_lr']), float(args['tau']),
int(args['minibatch_size']),int(args['noise_option']))
critic = CriticNetwork(sess, state_dim, action_dim,
float(args['critic_lr']), float(args['tau']),
float(args['gamma']),
actor.get_num_trainable_vars())
saver = tf.train.Saver()
saver.restore(sess, "ckpt/model")
testDDPG(sess, args, actor, critic)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='provide arguments for DDPG agent')
# agent parameters
parser.add_argument('--actor-lr', help='actor network learning rate', default=0.0001)
parser.add_argument('--critic-lr', help='critic network learning rate', default=0.001)
parser.add_argument('--gamma', help='discount factor for critic updates', default=0.99)
parser.add_argument('--tau', help='soft target update parameter', default=0.001)
parser.add_argument('--buffer-size', help='max size of the replay buffer', default=1000000)
parser.add_argument('--minibatch-size', help='size of minibatch for minibatch-SGD', default=64)
# run parameters
parser.add_argument('--random-seed', help='random seed for repeatability', default=1234)
parser.add_argument('--episode_count', help='max num of episodes to do while training', default=2000)
parser.add_argument('--max_steps', help='max length of 1 episode', default=10000)
#-------------------------------------------------------------------------
# for noise, use only OU; param is work in progress
# noise_option is also work in progress; it's value is immaterial for now
parser.add_argument('--noise', help='OU/nonoise', default='OU')
parser.add_argument('--noise_option', help='1/2/3', default='3')
#--------------------------------------------------------------------------
args = vars(parser.parse_args())
pp.pprint(args)
train_test = input("enter 1 for train / 0 for test ")
train_test = int(train_test)
if (train_test == 1):
train(args)
elif (train_test == 0):
test(args)