Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reinforcement Learning Template #276

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions functions/colab.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ exports.handler = async function (event, _) {
)
}

if (title === 'Template Reinforcement Learning') {
specific_commands.push(
'!pip install swig\n',
'!pip install gymnasium[box2d]'
)
}

const md_cell = [
`# ${title} by PyTorch-Ignite Code-Generator\n\n`,
'Please, run the cell below to execute your code.'
Expand Down
35 changes: 35 additions & 0 deletions src/templates/template-reinforcement-learning/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[![Code-Generator](https://badgen.net/badge/Template%20by/Code-Generator/ee4c2c?labelColor=eaa700)](https://github.com/pytorch-ignite/code-generator)

# Reinforcement Learning Template

This is the Reinforcement Learning template by Code-Generator using OpenAI Gym for the environment CarRacing-v2.

## Getting Started

Install the dependencies with `pip`:

```sh
pip install -r requirements.txt --progress-bar off -U
```

### Code structure

```
|
|- README.md
|
|- a2c.py : main script to run
|- a2c_model_env.py : Utility functions for the reinforcement learning template for various tasks
|- utils.py : module with various helper functions
|- requirements.txt : dependencies to install with pip
|
|- config_a2c.yaml : global configuration YAML file
```

## Training

### 1 GPU Training

```sh
python a2c.py config_a2c.yaml
```
109 changes: 109 additions & 0 deletions src/templates/template-reinforcement-learning/a2c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from pprint import pformat
from shutil import copy
from typing import Any

import ignite.distributed as idist
import torch
from ignite.engine import Events
from ignite.handlers import LRScheduler

from ignite.utils import manual_seed

from utils import *

from a2c_model_env import make_a2c_models, make_collector, make_loss, make_optim, make_test_env


def main():
config = setup_config()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config.device = f"{device}"

rank = idist.get_rank()
manual_seed(config.seed + rank)
config.output_dir = setup_output_dir(config, rank)
if rank == 0:
save_config(config, config.output_dir)

actor, critic = make_a2c_models(config)
actor = actor.to(device)
critic = critic.to(device)

collector = make_collector(config, policy=actor)
loss_module, adv_module = make_loss(config, actor_network=actor, value_network=critic)
optim = make_optim(config, actor_network=actor, value_network=critic)

batch_size = config.total_frames * config.num_envs
total_network_updates = config.total_frames // batch_size

scheduler = None
if config.lr_scheduler:
scheduler = torch.optim.lr_scheduler.LinearLR(optim, total_iters=total_network_updates)
scheduler = LRScheduler(scheduler)

test_env = make_test_env(config)

def run_single_timestep(engine, _):
frames_in_batch = engine.state.data.numel()
trainer.state.collected_frames += frames_in_batch * config.frame_skip
data_view = engine.state.data.reshape(-1)

with torch.no_grad():
batch = adv_module(data_view)

# Normalize advantage
adv = batch.get("advantage")

# mean of the advantage values
loc = adv.mean().item()
# standard deviation of the advantage values
scale = adv.std().clamp_min(1e-6).item()
# normalizing the advantage values
adv = (adv - loc) / scale
batch.set("advantage", adv)

# Forward pass A2C loss
batch = batch.to(device)
loss = loss_module(batch)
loss_sum = loss["loss_critic"] + loss["loss_objective"] + loss["loss_entropy"]

# Backward pass + learning step
loss_sum.backward()
grad_norm = torch.nn.utils.clip_grad_norm_(list(actor.parameters()) + list(critic.parameters()), max_norm=0.5)
engine.state.metrics = {
"loss_sum": loss_sum.item(),
}
optim.step()
optim.zero_grad()

trainer = Engine(run_single_timestep)

logger = setup_logging(config)
logger.info("Configuration: \n%s", pformat(vars(config)))
trainer.logger = logger

if config.lr_scheduler:
trainer.add_event_handler(Events.ITERATION_COMPLETED, scheduler)

trainer.add_event_handler(
Events.ITERATION_COMPLETED(every=config.log_every_episodes),
log_metrics,
tag="train",
)

@trainer.on(Events.ITERATION_STARTED)
def update_data():
# print(f"New iteration started")
trainer.state.data = next(iter(collector))
trainer.state.collected_frames = 0

@trainer.on(Events.ITERATION_COMPLETED)
def log2():
collector.update_policy_weights_()

# timesteps = range(config.steps_per_episode)
trainer.run(epoch_length=int(config.total_frames / config.frames_per_batch), max_epochs=1)


if __name__ == "__main__":
main()
Loading