@@ -8,24 +8,25 @@ rnn_name = Recurrent
8
8
num_envs = 4096
9
9
10
10
[train]
11
- total_timesteps = 800_000_000
12
- minibatch_size = 32768
13
- adam_beta1 = 0.9315687674807638
14
- adam_beta2 = 0.9997941907784418
15
- adam_eps = 0.0001
11
+ total_timesteps = 150_000_000
12
+ adam_beta1 = 0.9051396950168318
13
+ adam_beta2 = 0.9998075206648028
14
+ adam_eps = 6.080590274937272e-8
16
15
anneal_lr = 1
17
16
batch_size = auto
18
17
bptt_horizon = 64
19
- clip_coef = 0.36337120207114476
20
- ent_coef = 0.0033403389041002187
21
- gae_lambda = 0.995
22
- gamma = 0.9431639100863852
23
- learning_rate = 0.001
24
- max_grad_norm = 2.8332963585467064
18
+ clip_coef = 0.18136589604817419
19
+ ent_coef = 0.008771869770071427
20
+ gae_lambda = 0.852233401446272
21
+ gamma = 0.9937244196032619
22
+ learning_rate = 0.013637100389343416
23
+ max_grad_norm = 3.8161147569794194
24
+ max_minibatch_size = 32768
25
+ minibatch_size = 32768
25
26
optimizer = muon
26
- precision = float32
27
- prio_alpha = 0.8923297825051283
28
- prio_beta0 = 0.8515156017255217
27
+ prio_alpha = 0.6199253864227026
28
+ prio_beta0 = 0.835222519905636
29
29
vf_clip_coef = 0.1
30
- vf_coef = 2.306977594383939
31
- vtrace_rho_clip = 3.5765516197769918
30
+ vf_coef = 1.304631362812606
31
+ vtrace_c_clip = 1.4932276946661736
32
+ vtrace_rho_clip = 0.45768721432684095
0 commit comments