Squashed commit of the following:

commit 2f5e103 Merge: ecbfdff 570739a Author: StoneT2000 <[email protected]> Date: Mon Nov 7 21:40:05 2022 -0800 Merge branch 'main' into release commit ecbfdff Author: StoneT2000 <[email protected]> Date: Mon Nov 7 21:37:10 2022 -0800 work commit 728edbc Author: StoneT2000 <[email protected]> Date: Mon Nov 7 21:30:43 2022 -0800 add ms1 commit e66881b Author: StoneT2000 <[email protected]> Date: Mon Nov 7 21:30:12 2022 -0800 add maniskill envs commit 35a50b3 Author: StoneT2000 <[email protected]> Date: Mon Nov 7 21:22:40 2022 -0800 remove commit c531488 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 15:36:22 2022 -0700 Update README.md commit 33c6ca7 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 15:22:47 2022 -0700 Update README.md commit 1fb2e98 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 15:00:12 2022 -0700 silo obstacle push commit 5c2705c Author: StoneT2000 <[email protected]> Date: Thu Oct 27 13:33:58 2022 -0700 Update env.py commit 171505b Author: StoneT2000 <[email protected]> Date: Thu Oct 27 13:24:29 2022 -0700 Update README.md commit 4e6fb4c Author: StoneT2000 <[email protected]> Date: Thu Oct 27 13:16:54 2022 -0700 work commit 6c0b7a5 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 12:59:43 2022 -0700 Update .gitignore commit 50cae37 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 12:58:49 2022 -0700 work commit 4f51443 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 12:51:34 2022 -0700 clean up commit 05440f7 Author: StoneT2000 <[email protected]> Date: Thu Oct 27 12:46:05 2022 -0700 fix bugs commit 4772c3d Author: StoneT2000 <[email protected]> Date: Thu Oct 27 12:26:19 2022 -0700 add rest of the data collection scripts commit 72baa97 Author: StoneT2000 <[email protected]> Date: Wed Oct 26 16:46:29 2022 -0700 work commit 92b4d82 Author: StoneT2000 <[email protected]> Date: Wed Oct 26 16:30:47 2022 -0700 data collection scripts added commit 190aeaf Author: StoneT2000 <[email protected]> Date: Wed Oct 26 16:12:10 2022 -0700 opendrawer exps and cfgs commit ed1a131 Author: StoneT2000 <[email protected]> Date: Wed Oct 26 14:05:58 2022 -0700 couchmoving eval scripts done commit a83885d Author: StoneT2000 <[email protected]> Date: Wed Oct 26 13:28:37 2022 -0700 work commit 53abc72 Author: StoneT2000 <[email protected]> Date: Tue Oct 25 10:42:16 2022 -0700 Update trajectory_env.py commit bc2c816 Author: StoneT2000 <[email protected]> Date: Tue Oct 25 10:40:22 2022 -0700 cleanup commit f50139b Author: StoneT2000 <[email protected]> Date: Tue Oct 25 10:36:15 2022 -0700 boxpusher exps cleaned commit f37f741 Author: StoneT2000 <[email protected]> Date: Tue Oct 25 10:00:16 2022 -0700 refacyor commit 7b6c1a0 Author: StoneT2000 <[email protected]> Date: Mon Oct 24 19:58:56 2022 -0700 exps cleaned for blockstacking commit 97e9107 Author: StoneT2000 <[email protected]> Date: Mon Oct 24 17:05:03 2022 -0700 Update README.md commit 26db18d Author: Stone Tao <[email protected]> Date: Mon Oct 24 17:01:10 2022 -0700 Update README.md commit f8e928d Author: StoneT2000 <[email protected]> Date: Mon Oct 24 16:52:31 2022 -0700 work commit dceb543 Author: StoneT2000 <[email protected]> Date: Mon Oct 24 12:09:17 2022 -0700 refactor commit edca3f3 Author: StoneT2000 <[email protected]> Date: Sun Oct 23 19:53:41 2022 -0700 init data collection and scripts commit 39af0e6 Author: StoneT2000 <[email protected]> Date: Sun Oct 23 19:39:47 2022 -0700 add cfgs commit afebbb1 Author: StoneT2000 <[email protected]> Date: Fri Oct 21 13:47:05 2022 -0700 isorted commit fde0b4d Author: StoneT2000 <[email protected]> Date: Fri Oct 21 13:46:53 2022 -0700 models commit 45c0db5 Author: StoneT2000 <[email protected]> Date: Fri Oct 21 13:22:52 2022 -0700 init
StoneT2000 · Nov 8, 2022 · 2fbe541 · 2fbe541
1 parent 570739a
commit 2fbe541
Show file tree

Hide file tree

Showing 1,318 changed files with 400,586 additions and 5 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,17 +15,13 @@ results/
 test_results/
 videos/
 
-external/
-
 wandb
 
 # TODO uncomment once ready
 tr2/planner
 tr2/envs/opendrawer/README.md
 scripts/realworld
 scripts/plan_translate.py
-# has bug to fix
-scripts/exps/blockstacking/eval_test_stack_pyramid.sh
 
 resultsall
 allcfgs
diff --git a/README.md b/README.md
@@ -8,4 +8,106 @@ This is the official codebase for the paper
 
 For visualizations and videos see our project page: https://trajectorytranslation.github.io/. For full details, check out our paper: https://arxiv.org/abs/2210.07658
 
-Stay tuned / watch this repo for when the code is released!
+## Installation
+
+To get started, install the repo with conda as so
+
+```
+conda env create -f environment.yml
+conda activate tr2
+```
+
+And then run
+```
+pip install -e ./paper_rl/
+pip install -e . 
+pip install -e external/ManiSkill2 
+```
+
+Due to some compatability/dependency issues, we are still cleaning up the setup details to install opendrawer (which uses ManiSkill 1). For now you can try the above and then update the conda environment with the ManiSkill 1 dependencies. Check back for updates or watch this repo.
+
+## Getting Started
+
+Our approach relies on following abstract trajectories. Abstract trajectories are easily generated via heuristics that just move 3D points representing objects in space, describing a general plan of what should be achieved by a low-level agent (e.g. the robot arm) without incorporating low-level details like physical manipulation. During RL training, these abstract trajectories are loaded up and given as part of the environment observation. 
+
+Follow the subsequent sections for instructions on obtaining abstract trajectories, training with them, and evaluating with them.
+
+### Abstract Trajectory Generation / Dataset download links
+
+The dataset files can all be found at this google drive link: https://drive.google.com/file/d/1z38DTgzmTc2mfePYnP9qNDUfGgN80FYH/view?usp=sharing
+
+Download and unzip to a folder called `datasets` for the rest of the code to work.
+
+To generate the abstract trajectories for each environment, see the scripts in [scripts/abstract_trajectories/<env_name>](https://github.com/StoneT2000/trajectorytranslation/tree/main/scripts/abstract_trajectories)
+
+### Training
+
+To train with online RL, specify a base configuration yml file, specify the experiment name
+
+```
+python scripts/train_translation_online.py \
+    cfg=train_cfg.yml restart_training=True logging_cfg.exp_name=test_exp exp_cfg.epochs=2000
+```
+
+Results including saved model checkpoints and evalution vidoes are stored in a `results` folder. Note that `results/<exp_name>/models/best_train_EpRet.pt` will be the model with the best training return.
+
+In order to achieve greater precision and success rate, you can run the "finetuning" step by turning on gradient accumulation to stabilize RL training. This was used in the paper for training agents for the Blockstacking task. This can be done by running the following and specifying the initial weights (from the initial online training)
+
+```
+python scripts/train_translation_online.py \
+    cfg=train_cfg.yml restart_training=True logging_cfg.exp_name=test_exp_finetune exp_cfg.epochs=2000 \
+    pretrained_ac_weights=results/test_exp/models/best_train_EpRet.pt exp_cfg.accumulate_grads=True
+```
+
+For each environment, there is an associated `train_cfg.yml` file that specifies the base hyperparameters for online RL training and environment configs. These are stored at `cfgs/<env_name>/train.yml`
+
+### Evaluation
+
+To batch evalute trained models, specify the configurataion file and the model weights.
+
+```
+python scripts/eval_translation.py \
+    cfg=eval_cfg.yml model=results/test_exp/models/best_train_EpRet.pt
+```
+
+To simply watch the trained model, specify the configuration file, the model weights, and the ID of the trajectory
+
+```
+python scripts/watch_translation.py \
+    cfg=watch_cfg.yml model=results/test_exp/models/best_train_EpRet.pt traj_id=2
+```
+
+For each environment, there is an associated config file for evaluation and watching. These are stored at `cfgs/<env_name>/<eval|watch>.yml`
+
+### Reproducing Results
+
+For specific scripts to run experiments to reproduce table 1 in our paper, see `scripts/exps/<env_name>/*.sh`. These contain copy+pastable bash scripts to reproduce the individual results of each trial used to produce the mean values shown in table 1, including training and evaluation.
+
+Already trained models and weights can be downloaded here: https://drive.google.com/file/d/15mTVSWTdX805EO1XGNBG20BE80BKBkah/view?usp=sharing
+They are organized by `results/<env_name>/<model>`
+
+We are still busy cleaning and organizing results for other non-core environments that were tested on as well as one of the ablation studies, stay tuned for updates by watching this repository.
+
+#### Reproducing Real World Experiments
+
+Open sourced code for real world experiments is a work in progress, but here is a high level overview: We first predict the pose of a block in the real world, placed it in simulation and ran our trained blockstacking TR2-GPT2 agent to generate a simulated trajectory. Using position control, we execute the simulated trajectory step by step on the real robot arm. Then we place a new block into view and repeat the steps until done.
+
+<!-- To setup real world experiments, you need a depth camera (our code is configured for intel-real sense), and some calibration of the camera so that you get a transformation matrix from camera frame to robot base frame. -->
+
+### Creating Your Own Environments
+
+This part is still WIP as we're cleaning out the old research and experimental code to make extending the environmentes easier. However in general, you can subclass of the [TrajectoryEnv](https://github.com/StoneT2000/trajectorytranslation/blob/main/tr2/envs/trajectory_env.py) class which lets you load abstract trajectories, stack observations, skip sampling, and more. See existing environments, (BoxPusher is a simple generally cleaner example) of how to do this.
+
+
+## Citation
+
+To cite our work, you can use the following bibtex
+
+```
+@article{tao2022tr2,
+  title     = {Abstract-to-Executable Trajectory Translation for One-Shot Task Generalization}, 
+  author    = {Tao, Stone and Li, Xiaochen and Mu, Tongzhou and Huang, Zhiao and Qin, Yuzhe and Su, Hao},
+  journal   = {arXiv},
+  year      = {2022},
+}
+```
diff --git a/cfgs/blockstacking/eval.yml b/cfgs/blockstacking/eval.yml
@@ -0,0 +1,20 @@
+env: "BlockStackTrajectory-v0"
+env_cfg:
+  early_success: True
+  task_agnostic: False
+  reward_type: "trajectory"
+  trajectories: "datasets/blockstacking/dataset_train_ids.npy"
+  trajectories_dataset: "datasets/blockstacking/dataset.pkl"
+  max_trajectory_skip_steps: 15
+  fixed_max_ep_len: 200
+  give_traj_id: False
+  max_trajectory_length: 1000
+  max_rot_stray_dist: 0.25
+  max_world_state_stray_dist: 0.03
+  max_coord_stray_dist: 0.03
+  robot_type: 'Arm'
+  controller: 'ee'
+  goal: pick_and_place_train
+
+test_n: 128
+n_envs: 16
diff --git a/cfgs/blockstacking/train.yml b/cfgs/blockstacking/train.yml
@@ -0,0 +1,91 @@
+env: "BlockStackTrajectory-v0"
+device: "cuda"
+env_cfg:
+  early_success: False
+  task_agnostic: False
+  reward_type: "trajectory"
+  # path to file with trajectory ids
+  trajectories: "datasets/blockstacking/dataset_train_ids.npy"
+  trajectories_dataset: "datasets/blockstacking/dataset.pkl"
+  max_trajectory_skip_steps: 15 # not using
+  give_traj_id: False
+  max_rot_stray_dist: 0.25 # for an angle smaller than 60 deg
+  max_world_state_stray_dist: 0.03 # half block size, for diff in blocks
+  max_coord_stray_dist: 0.03 # diff between  coords of teacher / student panda hands
+  robot_type: 'Arm'
+  controller: 'ee'
+  goal: pick_and_place_train
+
+exp_cfg:
+  algo: ppo
+  seed: 0
+  n_envs: 16
+
+  gae_lambda: 0.95
+  target_kl: 0.15
+  log_std_scale: -0.5
+  pi_lr: 3e-4
+  vf_lr: 3e-4
+
+  accumulate_grads: False
+  #ppo configs
+  epochs: 2000
+  critic_warmup_epochs: 0
+  update_iters: 3
+  steps_per_epoch: 20000
+  batch_size: 1024
+  eval_freq: 50
+  eval_save_video: True
+  max_ep_len: 200
+
+  dapg: False
+
+logging_cfg:
+  exp_name: transformer_scratch
+  wandb: False
+  tensorboard: True
+  log_freq: 1
+
+model_cfg:
+  type: "TranslationTransformer"
+  pretrained_actor_weights: None
+  pretrained_critic_weights: None
+
+  state_dims: 32
+  act_dims: 4
+  teacher_dims: 10
+
+  max_time_steps: 1024
+  # below should also be merged into dataset_cfgs
+  max_student_length: 300
+  max_teacher_length: 55
+  trajectory_sample_skip_steps: 1
+  # equivalent to positional embeddings
+  # timestep_embeddings: True
+  timestep_embeddings: False
+  # whether to include past student actions into the student stack frames fed into transformer
+  use_past_actions: False
+  # whether to use layer normalization after the initial embedding layers of student/teacher states and student actions
+  embed_layer_norm: True
+
+  # translation model specific configs
+  stack_size: 5
+  state_embedding_hidden_sizes: (64,)
+  state_embedding_activation: 'relu'
+  final_mlp_hidden_sizes: (128, 128)
+  final_mlp_activation: 'relu'
+
+  final_mlp_action_pred_activation: 'tanh'
+  final_mlp_state_pred_activation: 'tanh'
+
+  encoder_config:
+    type: "state"
+
+  # gpt2 specific https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2Config
+  transformer_config:
+    n_head: 2
+    n_layer: 4
+    activation_function: 'gelu_new'
+    resid_pdrop: 0.1 
+    embd_pdrop: 0.1
+    attn_pdrop: 0.1
diff --git a/cfgs/blockstacking/watch.yml b/cfgs/blockstacking/watch.yml
@@ -0,0 +1,20 @@
+device: 'cuda'
+env: "BlockStackTrajectory-v0"
+env_cfg:
+  early_success: True
+  task_agnostic: False
+  reward_type: "trajectory"
+  trajectories: ['3']
+  trajectories_dataset: "datasets/blockstacking/dataset.pkl"
+  max_trajectory_skip_steps: 15
+  fixed_max_ep_len: 200
+  give_traj_id: True
+  max_trajectory_length: 1000
+  max_rot_stray_dist: 0.25
+  max_world_state_stray_dist: 0.03
+  max_coord_stray_dist: 0.03
+  robot_type: 'Arm'
+  controller: 'ee'
+  goal: pick_and_place_train
+  seed_by_dataset: True
+
diff --git a/cfgs/boxpusher/eval.yml b/cfgs/boxpusher/eval.yml
@@ -0,0 +1,17 @@
+env: "BoxPusherTrajectory-v0"
+device: "cuda"
+env_cfg:
+  task_agnostic: False
+  early_success: True
+  reward_type: "trajectory"
+  trajectories: "datasets/boxpusher/dataset_train_ids.npy"
+  trajectories_dataset: "datasets/boxpusher/dataset.pkl"
+  max_trajectory_skip_steps: 15
+  control_type: "2D-continuous"
+  exclude_target_state: True
+  fixed_max_ep_len: 200
+  env_rew_weight: 0.1
+  speed_factor: 0.5
+
+test_n: 128
+n_envs: 8
diff --git a/cfgs/boxpusher/eval_obstacles.yml b/cfgs/boxpusher/eval_obstacles.yml
@@ -0,0 +1,35 @@
+env: "BoxPusherTrajectory-v0"
+device: cuda
+env_cfg:
+  task_agnostic: False
+  early_success: True
+  reward_type: "trajectory"
+  trajectories: "datasets/boxpusher/dataset_train_ids.npy"
+  trajectories_dataset: "datasets/boxpusher/dataset.pkl"
+  max_trajectory_skip_steps: 15
+  control_type: "2D-continuous"
+  exclude_target_state: True
+  fixed_max_ep_len: 200
+  env_rew_weight: 0.1
+  speed_factor: 0.5
+  seed_by_dataset: False
+  re_center: False
+  planner_cfg:
+    planner: "v1"
+    planning_env: "v1"
+    render_plan: False
+    max_plan_length: 300
+    re_center: False
+    save_plan_videos: False
+    min_student_execute_length: 200
+    max_student_execute_length: 200
+    env_cfg:
+      magic_control: True
+      control_type: 2D
+      obs_mode: dict
+      disable_ball_removal: True
+      task: obstacle
+  task: obstacle
+
+test_n: 128
+n_envs: 16
diff --git a/cfgs/boxpusher/train.yml b/cfgs/boxpusher/train.yml
@@ -0,0 +1,77 @@
+env: "BoxPusherTrajectory-v0"
+env_cfg:
+  task_agnostic: False
+  early_success: False
+  reward_type: "trajectory"
+  trajectories: "datasets/boxpusher/dataset_train_ids.npy"
+  trajectories_dataset: "datasets/boxpusher/dataset.pkl"
+  max_trajectory_skip_steps: 15
+  control_type: "2D-continuous" # low-level policies control type
+  exclude_target_state: True # target state is in the abstract trajectory
+  env_rew_weight: 0.1
+  speed_factor: 0.5
+  fixed_max_ep_len: 200
+
+exp_cfg:
+  algo: ppo
+  seed: 0
+  n_envs: 20
+
+  accumulate_grads: False
+  #ppo configs
+  epochs: 3000
+  critic_warmup_epochs: 0
+  update_iters: 3
+  max_ep_len: 200
+  steps_per_epoch: 20000
+  batch_size: 1024
+  target_kl: 0.15
+  dapg: False
+
+logging_cfg:
+  exp_name: boxpusher_translation/test
+  wandb: False
+  tensorboard: True
+  log_freq: 1
+
+model_cfg:
+  type: "TranslationTransformer"
+  pretrained_actor_weights: None
+  pretrained_critic_weights: None
+
+  state_dims: 4
+  act_dims: 2
+  teacher_dims: 4
+
+  max_time_steps: 1024
+  # below should also be merged into dataset_cfgs
+  max_student_length: 128
+  max_teacher_length: 32
+  trajectory_sample_skip_steps: 2
+  # equivalent to positional embeddings
+  # timestep_embeddings: True
+  timestep_embeddings: False
+  # whether to include past student actions into the student stack frames fed into transformer
+  use_past_actions: False
+  teacher_timestep_embeddings: True
+  # whether to use layer normalization after the initial embedding layers of student/teacher states and student actions
+  embed_layer_norm: True
+
+  # translation model specific configs
+  stack_size: 2
+  state_embedding_hidden_sizes: (32,)
+  state_embedding_activation: 'relu'
+  final_mlp_hidden_sizes: (128, 128)
+  final_mlp_activation: 'relu'
+
+  final_mlp_action_pred_activation: 'tanh'
+  final_mlp_state_pred_activation: 'tanh'
+
+  # gpt2 specific https://huggingface.co/docs/transformers/model_doc/gpt2#transformers.GPT2Config
+  transformer_config:
+    n_head: 2
+    n_layer: 4
+    activation_function: 'gelu_new'
+    resid_pdrop: 0.1 
+    embd_pdrop: 0.1
+    attn_pdrop: 0.1