Skip to content

Commit 3933342

Browse files
committed
optimizations and sweep results
1 parent 7b6cc34 commit 3933342

File tree

4 files changed

+271
-159
lines changed

4 files changed

+271
-159
lines changed

pufferlib/config/ocean/vision_test.ini

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,24 +8,25 @@ rnn_name = Recurrent
88
num_envs = 4096
99

1010
[train]
11-
total_timesteps = 800_000_000
12-
minibatch_size = 32768
13-
adam_beta1 = 0.9315687674807638
14-
adam_beta2 = 0.9997941907784418
15-
adam_eps = 0.0001
11+
total_timesteps = 150_000_000
12+
adam_beta1 = 0.9051396950168318
13+
adam_beta2 = 0.9998075206648028
14+
adam_eps = 6.080590274937272e-8
1615
anneal_lr = 1
1716
batch_size = auto
1817
bptt_horizon = 64
19-
clip_coef = 0.36337120207114476
20-
ent_coef = 0.0033403389041002187
21-
gae_lambda = 0.995
22-
gamma = 0.9431639100863852
23-
learning_rate = 0.001
24-
max_grad_norm = 2.8332963585467064
18+
clip_coef = 0.18136589604817419
19+
ent_coef = 0.008771869770071427
20+
gae_lambda = 0.852233401446272
21+
gamma = 0.9937244196032619
22+
learning_rate = 0.013637100389343416
23+
max_grad_norm = 3.8161147569794194
24+
max_minibatch_size = 32768
25+
minibatch_size = 32768
2526
optimizer = muon
26-
precision = float32
27-
prio_alpha = 0.8923297825051283
28-
prio_beta0 = 0.8515156017255217
27+
prio_alpha = 0.6199253864227026
28+
prio_beta0 = 0.835222519905636
2929
vf_clip_coef = 0.1
30-
vf_coef = 2.306977594383939
31-
vtrace_rho_clip = 3.5765516197769918
30+
vf_coef = 1.304631362812606
31+
vtrace_c_clip = 1.4932276946661736
32+
vtrace_rho_clip = 0.45768721432684095

pufferlib/ocean/vision_test/vision_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
int main(void) {
88
VisionTest env = {};
99

10-
env.observations = (uint8_t*)calloc(3*64, sizeof(uint8_t)); // Alloc our 16x16 window compressed to float
10+
env.observations = (uint8_t*)calloc(3*64 + 1, sizeof(uint8_t)); // Alloc our 16x16 window compressed to float
1111
env.actions = (int*)calloc(5, sizeof(int));
1212
env.rewards = (float*)calloc(1, sizeof(float));
1313
env.terminals = (unsigned char*)calloc(1, sizeof(unsigned char));

0 commit comments

Comments
 (0)