Skip to content

[WIP] Simpler td3 example #993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 56 additions & 53 deletions examples/td3/config.yaml
Original file line number Diff line number Diff line change
@@ -1,59 +1,62 @@
# Environment
env_name: HalfCheetah-v4
env_task: ""
exp_name: "debugging"
env_library: gym
record_video: 0
normalize_rewards_online: 0
normalize_rewards_online_scale: 5
normalize_rewards_online_decay: 0.99
total_frames: 1000000
frames_per_batch: 1000
max_frames_per_traj: 1000
frame_skip: 1
from_pixels: 0
seed: 0
# Task and env
env:
env_name: HalfCheetah-v4
env_task: ""
env_library: gym
record_video: 0
normalize_rewards_online: 0
normalize_rewards_online_scale: 5
normalize_rewards_online_decay: 0.99
n_samples_stats: 1000
frame_skip: 1
from_pixels: False
num_envs: 1
reward_scaling:
noop: 1
seed: 0

# Collection
init_random_frames: 25000
init_env_steps: 10000
record_interval: 10
record_frames: 10000
async_collection: 1
#collector_devices: [cuda:1,cuda:1,cuda:1,cuda:1]
collector_devices: [cpu] # ,cpu,cpu,cpu]
env_per_collector: 1
num_workers: 1
# Collector
collector:
async_collection: 1
frames_per_batch: 1000
total_frames: 1000000
multi_step: 0
init_random_frames: 25000
collector_devices: cpu # ,cpu,cpu,cpu]
num_collectors: 1
max_frames_per_traj: 1000

# Replay Buffer
buffer_size: 1000000
# logger
logger:
backend: tensorboard
exp_name: td3_cheetah_gym
log_interval: 10000 # record interval in frames
eval_steps: 1000

# Optimization
utd_ratio: 1.0
gamma: 0.99
loss: double
loss_function: smooth_l1
lr: 3e-4
weight_decay: 0.0
lr_scheduler: ""
optim_steps_per_batch: 128
batch_size: 256
target_update_polyak: 0.995
# Buffer
replay_buffer:
prb: 0
buffer_prefetch: 64
capacity: 1_000_000

# Algorithm
prb: 0 # use prioritized experience replay
policy_update_delay: 2
multi_step: 0
n_steps_return: 1
activation: relu
gSDE: 0
# Optimization
optim:
device: cpu
lr: 3e-4
weight_decay: 0.0
batch_size: 256
lr_scheduler: ""
optim_steps_per_batch: 1000
policy_update_delay: 2

# Logging
logger: wandb
mode: online
# Policy and model
model:
ou_exploration: 0
noisy: False
activation: relu

# Extra
batch_transform: 1
buffer_prefetch: 64
norm_stats: 1
device: "cpu"
# loss
loss:
loss_function: smooth_l1
gamma: 0.99
tau: 0.05
Loading