Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions pufferlib/config/ocean/artillery3d.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
[base]
package = ocean
env_name = puffer_artillery3d
policy_name = Policy
rnn_name = Recurrent

[vec]
num_envs = 8

[env]
num_envs = 1024
debug = 0

dist_fade = 0.75
max_dist0 = 240
max_reward = 1.00
max_reward_dist = 5
miss_penalty = -0.75
out_bounds_penalty = -0.01
target_size = 15
turn_penalty = -0.003
turn_penalty_delay = 100
turn_penalty_ramp = 0.01
rng = 7
same_runs = 1

[policy]
hidden_size = 128

[rnn]
input_size = 128
hidden_size = 128

[train]
adam_beta1 = 0.9603485277769915
adam_beta2 = 0.9676992387435311
adam_eps = 9.4852806860935e-08
batch_size = auto
bptt_horizon = 64
clip_coef = 0.048815198297704664
ent_coef = 0.049999999999999996
gae_lambda = 0.95
gamma = 0.9840496436204362
learning_rate = 0.005
max_grad_norm = 2.589981415830053
#max_minibatch_size = 32768
#min_minibatch_size = 32768
minibatch_size = 16384 #16384 32768
prio_alpha = 0.8573365561813164
prio_beta0 = 0.9145256572446854
total_timesteps = 400_000_000
vf_clip_coef = 0.1921803059597499
vf_coef = 0.8111721600187421
vtrace_c_clip = 2.13335526866267
vtrace_rho_clip = 2.0368704261583224

[sweep]
method = Protein
metric = acc1000
goal = maximize
downsample = 10

#[sweep.train.total_timesteps]
#distribution = log_normal
#min = 90_000_000
#max = 100_000_000
#mean = 90_500_000
#scale = auto

#[sweep.train.learning_rate]
#distribution = log_normal
#min = 0.005
#mean = 0.0065
#max = 0.01
#scale = 0.5

#[sweep.env.frameskip]
#distribution = categorical
#values = 1, 2, 4, 8

[sweep.env.max_dist0]
distribution = uniform
min = 25.0
max = 250.0
mean = 200.0
scale = auto

[sweep.env.dist_fade]
distribution = uniform
min = 0.01
max = 1.0
mean = 0.36
scale = auto

#[sweep.env.same_runs]
#distribution = uniform
#min =1
#max = 50
#mean = 10
#scale = auto

[sweep.env.max_reward_dist]
distribution = uniform
min = 5
max = 50
mean = 10
scale = auto

[sweep.env.turn_penalty]
distribution = uniform
min = -0.01
max = -0.003
mean = -0.003
scale = auto

[sweep.env.turn_penalty_delay]
distribution = uniform
min = 50
max = 100
mean = 64
scale = auto

[sweep.env.turn_penalty_ramp]
distribution = uniform
min = 0.01
max = 0.05
mean = 0.023
scale = auto

[sweep.env.miss_penalty]
distribution = uniform
min = -1.0
max = -0.1
mean = -0.1
scale = auto

#[sweep.train.vtrace_rho_clip]
#distribution = uniform
#min = 1.0
#max = 5.0
#mean = 1.5
#scale = auto

#[sweep.train.max_grad_norm]
#distribution = uniform
#min = 1.0
#mean = 1.15
#max = 5.0
#scale = auto

#[sweep.train.vf_coef]
#distribution = uniform
#min = 0.0
#max = 1.0
#mean = 1.0
#scale = auto

#[sweep.train.gae_lambda]
#distribution = logit_normal
#min = 0.95
#mean = 0.96
#max = 0.9995
#scale = auto

#[sweep.train.vtrace_c_clip]
#distribution = uniform
#min = 0.0
#max = 4.0
#mean = 1.3
#scale = auto

#[sweep.train.minibatch_size]
#distribution = uniform_pow2
#min = 8192
#max = 32768
#mean = 16384
#scale = auto

#[sweep.train.ent_coef]
#distribution = log_normal
#min = 0.00001
#mean = 0.018
#max = 0.05
#scale = auto
60 changes: 60 additions & 0 deletions pufferlib/ocean/artillery3d/artillery3d.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include "artillery3d.h"
#include "puffernet.h"

void demo() {
Artillery3D env = {
.debug = 0,
.dist_fade = 0.15,
.miss_penalty = -0.1,
.max_reward = 1.0,
.max_reward_dist = 5,
.max_dist0 = 250,
.out_bounds_penalty = -0.01,
.target_size = 15,
.turn_penalty = -0.003,
.turn_penalty_delay = 64,
.turn_penalty_ramp = 0.01,
.render = 1,
.rng = 7,
.same_runs = 1,
.i = 1,
};
allocate(&env);

env.client = make_client(&env);

const char* weights_path = "resources/artillery3d/puffer_artillery3d_weights.bin";
int weights_size = 135688;

Weights* weights = load_weights(weights_path, weights_size);
int logit_sizes[1] = {7};
int obs_size = 19;
LinearLSTM* net = make_linearlstm(weights, 1, obs_size, logit_sizes, 1);

c_reset(&env);
SetTargetFPS(30);
while (!WindowShouldClose()) {
if (IsKeyDown(KEY_LEFT_SHIFT)) {
env.actions[0] = 100;
if (IsKeyDown(KEY_SPACE)) env.actions[0] = 0;
if (IsKeyDown(KEY_UP) || IsKeyDown(KEY_W)) env.actions[0] = 1;
if (IsKeyDown(KEY_DOWN) || IsKeyDown(KEY_S)) env.actions[0] = 2;
if (IsKeyDown(KEY_LEFT) || IsKeyDown(KEY_A)) env.actions[0] = 3;
if (IsKeyDown(KEY_RIGHT) || IsKeyDown(KEY_D)) env.actions[0] = 4;
} else {
int* actions = (int*)env.actions;
forward_linearlstm(net, env.observations, actions);
env.actions[0] = actions[0];
}
c_step(&env);
c_render(&env);
}
free_linearlstm(net);
free(weights);
free_allocated(&env);
close_client(env.client);
}

int main() {
demo();
}
Loading
Loading