File tree Expand file tree Collapse file tree 3 files changed +3
-78
lines changed Expand file tree Collapse file tree 3 files changed +3
-78
lines changed Original file line number Diff line number Diff line change @@ -4,86 +4,11 @@ env_name = puffer_four_rooms
44policy_name = Policy
55rnn_name = Recurrent
66
7- [vec]
8- num_workers = 12
9- num_envs = 12
10-
117[env]
12- num_envs = 100
8+ num_envs = 256
139
1410[train]
1511total_timesteps = 10_000_000
1612gamma = 0.99
1713learning_rate = 0.015
1814minibatch_size = 32768
19- device = mps
20-
21- [sweep]
22- method = Protein
23- metric = perf
24- goal = maximize
25- downsample = 0
26-
27- [sweep.train.learning_rate]
28- distribution = log_normal
29- min = 0.001
30- max = 0.1
31- mean = 0.015
32- scale = 0.5
33-
34- [sweep.train.gamma]
35- distribution = uniform
36- min = 0.95
37- max = 0.999
38- mean = 0.99
39- scale = auto
40-
41- [sweep.train.total_timesteps]
42- distribution = log_normal
43- min = 5_000_000
44- max = 50_000_000
45- mean = 10_000_000
46- scale = 0.5
47-
48- [sweep.train.minibatch_size]
49- distribution = uniform_pow2
50- min = 8192
51- max = 65536
52- mean = 32768
53- scale = auto
54-
55- [sweep.train.ent_coef]
56- distribution = log_normal
57- min = 0.0001
58- max = 0.01
59- mean = 0.001
60- scale = 0.5
61-
62- [sweep.train.clip_coef]
63- distribution = uniform
64- min = 0.1
65- max = 0.3
66- mean = 0.2
67- scale = auto
68-
69- [sweep.train.vf_coef]
70- distribution = uniform
71- min = 0.5
72- max = 4.0
73- mean = 2.0
74- scale = auto
75-
76- [sweep.env.size]
77- distribution = int_uniform
78- min = 13
79- max = 25
80- mean = 19
81- scale = auto
82-
83- [sweep.policy.hidden_size]
84- distribution = uniform_pow2
85- min = 64
86- max = 512
87- mean = 128
88- scale = auto
89-
Original file line number Diff line number Diff line change 55
66static int my_init (Env * env , PyObject * args , PyObject * kwargs ) {
77 env -> size = unpack (kwargs , "size" );
8- env -> see_through_walls = 0 ; // Default to false for MinGrid compatibility
8+ env -> see_through_walls = 0 ;
99 // Allocate grid memory for full state (stores OBJECT_IDX values)
1010 env -> grid = (unsigned char * )calloc (env -> size * env -> size , sizeof (unsigned char ));
1111 return 0 ;
Original file line number Diff line number Diff line change @@ -63,7 +63,7 @@ typedef struct {
6363} FourRooms ;
6464
6565void add_log (FourRooms * env ) {
66- env -> log .perf += (env -> rewards [0 ] > 0 ) ? 1 : 0 ;
66+ env -> log .perf += (env -> rewards [0 ] > 0 ) ? 1.0 : 0. 0 ;
6767 env -> log .score += env -> rewards [0 ];
6868 env -> log .episode_length += env -> tick ;
6969 env -> log .episode_return += env -> rewards [0 ];
You can’t perform that action at this time.
0 commit comments