Skip to content

Commit

Permalink
[Feature] Updated TD-MPC2 Baseline to Support 128x128 RGB with Extra …
Browse files Browse the repository at this point in the history
…State Data (#903)
  • Loading branch information
t-sekai authored Mar 6, 2025
1 parent 91e1396 commit 9996e6e
Show file tree
Hide file tree
Showing 10 changed files with 391 additions and 54 deletions.
3 changes: 3 additions & 0 deletions examples/baselines/tdmpc2/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
logs/
outputs/
wandb/
275 changes: 275 additions & 0 deletions examples/baselines/tdmpc2/baselines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
# Baseline results for TD-MPC2 (We recommend running individual experiments, instead of the entire file)

seed=(9351 4796 1788)
# Wandb settings
use_wandb=false
wandb_entity="na"
wandb_project="na"
wandb_group="na"

### State Based TD-MPC2 Baselines ###

## walltime_efficient Setting ##

# PushCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PushCube-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushCube-v1-state-$seed-walltime_efficient
done

# PickCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PickCube-v1 num_envs=32 control_mode=pd_ee_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PickCube-v1-state-$seed-walltime_efficient
done

# StackCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=StackCube-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-StackCube-v1-state-$seed-walltime_efficient
done

# PegInsertionSide-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PegInsertionSide-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PegInsertionSide-v1-state-$seed-walltime_efficient
done

# PushT-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PushT-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushT-v1-state-$seed-walltime_efficient
done

# AnymalC-Reach-v1 #

for seed in ${seed[@]}
do
echo y | python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=AnymalC-Reach-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state eval_reconfiguration_frequency=0 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-AnymalC-Reach-v1-state-$seed-walltime_efficient
done

# UnitreeG1TransportBox-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=UnitreeG1TransportBox-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-UnitreeG1TransportBox-v1-state-$seed-walltime_efficient
done

## sample_efficient Setting ##

# PushCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PushCube-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-PushCube-v1-state-$seed-sample_efficient
done

# PickCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PickCube-v1 num_envs=1 control_mode=pd_ee_delta_pos env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-PickCube-v1-state-$seed-sample_efficient
done

# StackCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=StackCube-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-StackCube-v1-state-$seed-sample_efficient
done

# PegInsertionSide-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PegInsertionSide-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-PegInsertionSide-v1-state-$seed-sample_efficient
done

# PushT-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=PushT-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-PushT-v1-state-$seed-sample_efficient
done

# AnymalC-Reach-v1 #

for seed in ${seed[@]}
do
echo y | python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=AnymalC-Reach-v1 num_envs=1 control_mode=pd_joint_delta_pos env_type=cpu obs=state eval_reconfiguration_frequency=0 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-AnymalC-Reach-v1-state-$seed-sample_efficient
done

# UnitreeG1TransportBox-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=1_000_000 exp_name=tdmpc2 \
env_id=UnitreeG1TransportBox-v1 num_envs=1 control_mode=pd_joint_delta_pos env_type=cpu obs=state \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=sample_efficient \
wandb_name=tdmpc2-UnitreeG1TransportBox-v1-state-$seed-sample_efficient
done


### RGB Based TD-MPC2 Baselines ###

## walltime_efficient Setting ##

# PushCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PushCube-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushCube-v1-rgb-$seed-walltime_efficient
done

# PickCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PickCube-v1 num_envs=32 control_mode=pd_ee_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PickCube-v1-rgb-$seed-walltime_efficient
done

# StackCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=4_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=StackCube-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-StackCube-v1-rgb-$seed-walltime_efficient
done

# PegInsertionSide-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=4_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PegInsertionSide-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PegInsertionSide-v1-rgb-$seed-walltime_efficient
done

# PushT-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PushT-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushT-v1-rgb-$seed-walltime_efficient
done

# AnymalC-Reach-v1 #
for seed in ${seed[@]}
do
echo y | python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=AnymalC-Reach-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 eval_reconfiguration_frequency=0 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-AnymalC-Reach-v1-rgb-$seed-walltime_efficient
done

# UnitreeG1TransportBox-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=UnitreeG1TransportBox-v1 num_envs=32 control_mode=pd_joint_delta_pos env_type=gpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-UnitreeG1TransportBox-v1-rgb-$seed-walltime_efficient
done

## sample_efficient Setting ##

# PushCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PushCube-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushCube-v1-rgb-$seed-walltime_efficient
done

# PickCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=1_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PickCube-v1 num_envs=1 control_mode=pd_ee_delta_pos env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PickCube-v1-rgb-$seed-walltime_efficient
done

# StackCube-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=4_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=StackCube-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-StackCube-v1-rgb-$seed-walltime_efficient
done

# PegInsertionSide-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=4_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PegInsertionSide-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PegInsertionSide-v1-rgb-$seed-walltime_efficient
done

# PushT-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=PushT-v1 num_envs=1 control_mode=pd_ee_delta_pose env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-PushT-v1-rgb-$seed-walltime_efficient
done

# AnymalC-Reach-v1 #
for seed in ${seed[@]}
do
echo y | python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=AnymalC-Reach-v1 num_envs=1 control_mode=pd_joint_delta_pos env_type=cpu obs=rgb render_size=128 eval_reconfiguration_frequency=0 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-AnymalC-Reach-v1-rgb-$seed-walltime_efficient
done

# UnitreeG1TransportBox-v1 #
for seed in ${seed[@]}
do
python train.py model_size=5 steps=2_000_000 seed=$seed buffer_size=100_000 exp_name=tdmpc2 \
env_id=UnitreeG1TransportBox-v1 num_envs=1 control_mode=pd_joint_delta_pos env_type=cpu obs=rgb render_size=128 \
wandb=$use_wandb wandb_entity=$wandb_entity wandb_project=$wandb_project wandb_group=$wandb_group setting_tag=walltime_efficient \
wandb_name=tdmpc2-UnitreeG1TransportBox-v1-rgb-$seed-walltime_efficient
done
25 changes: 18 additions & 7 deletions examples/baselines/tdmpc2/common/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,22 @@ def conv(in_shape, num_channels, act=None):
Basic convolutional encoder for TD-MPC2 with raw image observations.
4 layers of convolution with ReLU activations, followed by a linear layer.
"""
assert in_shape[-1] == 64 # assumes rgb observations to be 64x64
layers = [
ShiftAug(), PixelPreprocess(),
nn.Conv2d(in_shape[0], num_channels, 7, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 5, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=1), nn.Flatten()]
assert in_shape[-1] == 64 or in_shape[-1] == 128 # assumes rgb observations to be either 64x64 or 128x128
if in_shape[-1] == 64:
layers = [
ShiftAug(), PixelPreprocess(),
nn.Conv2d(in_shape[0], num_channels, 7, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 5, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=1), nn.Flatten()]
elif in_shape[-1] == 128:
layers = [
ShiftAug(), PixelPreprocess(),
nn.Conv2d(in_shape[0], num_channels, 7, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 5, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=2), nn.ReLU(inplace=True),
nn.Conv2d(num_channels, num_channels, 3, stride=1), nn.Flatten()]
if act:
layers.append(act)
return nn.Sequential(*layers)
Expand All @@ -148,6 +157,8 @@ def enc(cfg, out={}):
out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim], cfg.latent_dim, act=SimNorm(cfg))
elif k == 'rgb':
out[k] = conv(cfg.obs_shape[k], cfg.num_channels, act=SimNorm(cfg))
elif k == 'rgb-state':
out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, (cfg.rgb_state_num_enc_layers-1)*[cfg.rgb_state_enc_dim], cfg.rgb_state_latent_dim, act=SimNorm(cfg))
else:
raise NotImplementedError(f"Encoder for observation type {k} not implemented.")
return nn.ModuleDict(out)
23 changes: 17 additions & 6 deletions examples/baselines/tdmpc2/common/world_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import torch
import torch.nn as nn

from tensordict.tensordict import TensorDict
from common import layers, math, init


Expand All @@ -22,10 +22,14 @@ def __init__(self, cfg):
for i in range(len(cfg.tasks)):
self._action_masks[i, :cfg.action_dims[i]] = 1.
self._encoder = layers.enc(cfg)
self._dynamics = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], cfg.latent_dim, act=layers.SimNorm(cfg))
self._reward = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1))
self._pi = layers.mlp(cfg.latent_dim + cfg.task_dim, 2*[cfg.mlp_dim], 2*cfg.action_dim)
self._Qs = layers.Ensemble([layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim],
cfg.true_latent_dim = cfg.latent_dim
if 'rgb-state' in self._encoder: #
cfg.true_latent_dim += cfg.rgb_state_latent_dim

self._dynamics = layers.mlp(cfg.true_latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], cfg.true_latent_dim, act=layers.SimNorm(cfg))
self._reward = layers.mlp(cfg.true_latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1))
self._pi = layers.mlp(cfg.true_latent_dim + cfg.task_dim, 2*[cfg.mlp_dim], 2*cfg.action_dim)
self._Qs = layers.Ensemble([layers.mlp(cfg.true_latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim],
max(cfg.num_bins, 1), dropout=cfg.dropout) for _ in range(cfg.num_q)])
self.apply(init.weight_init)
init.zero_([self._reward[-1].weight, self._Qs.params[-2]])
Expand Down Expand Up @@ -99,8 +103,15 @@ def encode(self, obs, task):
"""
if self.cfg.multitask:
obs = self.task_emb(obs, task)
if self.cfg.obs == 'rgb' and obs.ndim == 5:
if self.cfg.obs == 'rgb' and not self.cfg.include_state and obs.ndim == 5:
return torch.stack([self._encoder[self.cfg.obs](o) for o in obs])
elif self.cfg.obs == 'rgb' and self.cfg.include_state and isinstance(obs, dict):
return torch.cat([self._encoder[k](o) for k, o in obs.items()], dim=1)
elif self.cfg.obs == 'rgb' and self.cfg.include_state and isinstance(obs, TensorDict): # Iterate through buffer batch for update
if obs.ndim == 2: # ndim=6 for rgb but ndim=2 here bc it's diffeerent with TensorDict
return torch.stack([torch.cat([self._encoder[k](o) for k, o in os.items()], dim=1) for os in obs])
else: # ndim=5 for rgb
return torch.cat([self._encoder[k](o) for k, o in obs.items()], dim=1)
return self._encoder[self.cfg.obs](obs)

def next(self, z, a, task):
Expand Down
6 changes: 6 additions & 0 deletions examples/baselines/tdmpc2/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ control_mode: default # or pd_joint_delta_pos or pd_ee_delta_pose
num_envs: 32
num_eval_envs: 4
env_type: gpu # cpu
include_state: true # for rgb mode, if we want to use extra state data like qpos, goal position, etc.
render_mode: rgb_array # ['rgb_array' for quality, or 'sensors' for speed]
render_size: 64
setting_tag: none # ['none', 'walltime_efficient', 'sample_efficient', ...] for wandb tags
Expand All @@ -16,6 +17,7 @@ setting_tag: none # ['none', 'walltime_efficient', 'sample_efficient', ...] for
checkpoint: ???
eval_episodes_per_env: 2 # total (eval_episodes_per_env * num_eval_envs number) of eval episodes
eval_freq: 50000
eval_reconfiguration_frequency: 1

# training
steps: 1_000_000
Expand Down Expand Up @@ -61,6 +63,9 @@ vmax: +10
model_size: ???
num_enc_layers: 2
enc_dim: 256
rgb_state_enc_dim: 64
rgb_state_num_enc_layers: 1
rgb_state_latent_dim: 64
num_channels: 32
mlp_dim: 512
latent_dim: 512
Expand Down Expand Up @@ -96,6 +101,7 @@ action_dims: ???
episode_lengths: ???
seed_steps: ???
bin_size: ???
true_latent_dim: ???

# Added for Maniskill RL Baselines Config Convention (don't assign to them)
env_cfg:
Expand Down
Loading

0 comments on commit 9996e6e

Please sign in to comment.