-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 509212d
Showing
74 changed files
with
10,739 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.DS_Store | ||
|
||
.ipynb_checkpoints/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
MIT License | ||
|
||
Copyright (c) 2021 authors of paper 6586 submited to ICML 2021 | ||
|
||
Copyright for portions of project Permutation Invariant Critic for | ||
Multi-Agent Deep Reinforcement Learning are held by I.-J. Liu and R. A. Yeh, 2019 | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
## Agent-Temporal Attention for Reward Redistribution in Episodic Multi-Agent Reinforcement Learning (AREL) ## | ||
|
||
|
||
The repository contains Pytorch implementation of AREL based on MADDPG with Permutation Invariant Critic (PIC). | ||
|
||
#### Platform and Dependencies: | ||
* Ubuntu 18.04 | ||
* Python 3.7 | ||
* Pytorch 1.6.0 | ||
* OpenAI gym 0.10.9 (https://github.com/openai/gym) | ||
|
||
### Install the improved MPE: | ||
cd multiagent-particle-envs | ||
pip install -e . | ||
Please ensure that `multiagent-particle-envs` has been added to your `PYTHONPATH`. | ||
|
||
## Training examples | ||
The following are sample commands using different credit assignment methods for MARL training in the Predator-Prey environment with `15 predators`. | ||
|
||
### Agent-temporal attention (AREL) | ||
python maddpg/main_vec_dist_AREL.py --exp_name simple_tag_AREL_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda | ||
### RUDDER | ||
python maddpg/main_vec_dist_RUDDER.py --exp_name simple_tag_RUDDER_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda | ||
### Trajectory-space smoothing (IRCR) | ||
python maddpg/main_vec_dist_IRCR.py --exp_name simple_tag_smooth_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda | ||
### Sequence modeling | ||
python maddpg/main_vec_dist_SeqMod.py --exp_name simple_tag_TimeAtt_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda | ||
|
||
Results will be saved in `results` folder in the parent directory. | ||
|
||
### Acknowledgement | ||
The code of MADDPG with PIC is based on the publicly available implementation of https://github.com/IouJenLiu/PIC | ||
|
||
### License | ||
This project is licensed under the MIT License | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
from .modules import SelfAttentionWide, SelfAttentionWide, TransformerBlock | ||
|
||
from .transformers import Time_Agent_Transformer, Time_Transformer | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,230 @@ | ||
from former import util | ||
from util import mask_ | ||
|
||
import torch | ||
from torch import nn | ||
import torch.nn.functional as F | ||
|
||
import random, math | ||
|
||
class SelfAttentionWide(nn.Module): | ||
def __init__(self, emb, heads=8, mask=False): | ||
""" | ||
:param emb: | ||
:param heads: | ||
:param mask: | ||
""" | ||
|
||
super().__init__() | ||
|
||
self.emb = emb | ||
self.heads = heads | ||
self.mask = mask | ||
self.dot = 0 | ||
|
||
self.tokeys = nn.Linear(emb, emb * heads, bias=False) | ||
self.toqueries = nn.Linear(emb, emb * heads, bias=False) | ||
self.tovalues = nn.Linear(emb, emb * heads, bias=False) | ||
|
||
self.softmax = nn.Softmax(dim=2) | ||
|
||
self.unifyheads = nn.Linear(heads * emb, emb) | ||
|
||
def forward(self, x): | ||
|
||
b, t, e = x.size() | ||
h = self.heads | ||
assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})' | ||
|
||
keys = self.tokeys(x) .view(b, t, h, e) | ||
queries = self.toqueries(x).view(b, t, h, e) | ||
values = self.tovalues(x) .view(b, t, h, e) | ||
|
||
# compute scaled dot-product self-attention | ||
|
||
# - fold heads into the batch dimension | ||
keys = keys.transpose(1, 2).contiguous().view(b * h, t, e) | ||
queries = queries.transpose(1, 2).contiguous().view(b * h, t, e) | ||
values = values.transpose(1, 2).contiguous().view(b * h, t, e) | ||
|
||
queries = queries / (e ** (1/4)) | ||
keys = keys / (e ** (1/4)) | ||
# - Instead of dividing the dot products by sqrt(e), we scale the keys and values. | ||
# This should be more memory efficient | ||
|
||
# - get dot product of queries and keys, and scale | ||
dot = torch.bmm(queries, keys.transpose(1, 2)) | ||
|
||
assert dot.size() == (b*h, t, t) | ||
|
||
if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal | ||
mask_(dot, maskval=float('-inf'), mask_diagonal=False) | ||
|
||
# dot = F.softmax(dot, dim=2) | ||
dot = self.softmax(dot) | ||
# - dot now has row-wise self-attention probabilities | ||
|
||
# apply the self attention to the values | ||
out = torch.bmm(dot, values).view(b, h, t, e) | ||
|
||
# swap h, t back, unify heads | ||
out = out.transpose(1, 2).contiguous().view(b, t, h * e) | ||
|
||
return self.unifyheads(out) | ||
|
||
class SelfAttentionNarrow(nn.Module): | ||
|
||
def __init__(self, emb, heads=8, mask=False): | ||
""" | ||
:param emb: | ||
:param heads: | ||
:param mask: | ||
""" | ||
|
||
super().__init__() | ||
|
||
assert emb % heads == 0, f'Embedding dimension ({emb}) should be divisible by nr. of heads ({heads})' | ||
|
||
self.emb = emb | ||
self.heads = heads | ||
self.mask = mask | ||
|
||
s = emb // heads | ||
# - We will break the embedding into `heads` chunks and feed each to a different attention head | ||
|
||
self.tokeys = nn.Linear(s, s, bias=False) | ||
self.toqueries = nn.Linear(s, s, bias=False) | ||
self.tovalues = nn.Linear(s, s, bias=False) | ||
|
||
self.unifyheads = nn.Linear(heads * s, emb) | ||
|
||
def forward(self, x): | ||
|
||
b, t, e = x.size() | ||
h = self.heads | ||
assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})' | ||
|
||
s = e // h | ||
x = x.view(b, t, h, s) | ||
|
||
keys = self.tokeys(x) | ||
queries = self.toqueries(x) | ||
values = self.tovalues(x) | ||
|
||
assert keys.size() == (b, t, h, s) | ||
assert queries.size() == (b, t, h, s) | ||
assert values.size() == (b, t, h, s) | ||
|
||
# Compute scaled dot-product self-attention | ||
|
||
# - fold heads into the batch dimension | ||
keys = keys.transpose(1, 2).contiguous().view(b * h, t, s) | ||
queries = queries.transpose(1, 2).contiguous().view(b * h, t, s) | ||
values = values.transpose(1, 2).contiguous().view(b * h, t, s) | ||
|
||
queries = queries / (e ** (1/4)) | ||
keys = keys / (e ** (1/4)) | ||
# - Instead of dividing the dot products by sqrt(e), we scale the keys and values. | ||
# This should be more memory efficient | ||
|
||
# - get dot product of queries and keys, and scale | ||
dot = torch.bmm(queries, keys.transpose(1, 2)) | ||
|
||
assert dot.size() == (b*h, t, t) | ||
|
||
if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal | ||
mask_(dot, maskval=float('-inf'), mask_diagonal=False) | ||
|
||
dot = F.softmax(dot, dim=2) | ||
# - dot now has row-wise self-attention probabilities | ||
|
||
# apply the self attention to the values | ||
out = torch.bmm(dot, values).view(b, h, t, s) | ||
|
||
# swap h, t back, unify heads | ||
out = out.transpose(1, 2).contiguous().view(b, t, s * h) | ||
|
||
return self.unifyheads(out) | ||
|
||
class TransformerBlock(nn.Module): | ||
|
||
def __init__(self, emb, heads, mask, seq_length, ff_hidden_mult=4, dropout=0.0, wide=True): | ||
super().__init__() | ||
|
||
self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \ | ||
else SelfAttentionNarrow(emb, heads=heads, mask=mask) | ||
self.mask = mask | ||
|
||
self.norm1 = nn.LayerNorm(emb) | ||
self.norm2 = nn.LayerNorm(emb) | ||
|
||
self.ff = nn.Sequential( | ||
nn.Linear(emb, ff_hidden_mult * emb), | ||
nn.ReLU(), | ||
nn.Linear(ff_hidden_mult * emb, emb) | ||
) | ||
|
||
self.do = nn.Dropout(dropout) | ||
|
||
def forward(self, x): | ||
|
||
attended = self.attention(x) | ||
|
||
x = self.norm1(attended + x) | ||
|
||
x = self.do(x) | ||
|
||
fedforward = self.ff(x) | ||
|
||
x = self.norm2(fedforward + x) | ||
|
||
x = self.do(x) | ||
|
||
return x | ||
|
||
|
||
class TransformerBlock_Agent(nn.Module): | ||
|
||
def __init__(self, emb, heads, mask, seq_length, n_agents, ff_hidden_mult=4, dropout=0.0, wide=True): | ||
super().__init__() | ||
|
||
self.n_a = n_agents | ||
|
||
self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \ | ||
else SelfAttentionNarrow(emb, heads=heads, mask=mask) | ||
self.mask = mask | ||
|
||
self.norm1 = nn.LayerNorm(emb) | ||
self.norm2 = nn.LayerNorm(emb) | ||
|
||
self.ff = nn.Sequential( | ||
nn.Linear(emb, ff_hidden_mult * emb), | ||
nn.ReLU(), | ||
nn.Linear(ff_hidden_mult * emb, emb) | ||
) | ||
|
||
self.do = nn.Dropout(dropout) | ||
|
||
def forward(self, x): | ||
|
||
_, t, e = x.size() | ||
|
||
x = x.view(-1, self.n_a, t, e).transpose(1, 2).contiguous().view(-1, self.n_a, e) | ||
|
||
attended = self.attention(x) | ||
|
||
x = self.norm1(attended + x) | ||
|
||
x = self.do(x) | ||
|
||
fedforward = self.ff(x) | ||
|
||
x = self.norm2(fedforward + x) | ||
|
||
x = self.do(x) | ||
|
||
x = x.view(-1, t, self.n_a, e).transpose(1, 2).contiguous().view(-1, t, e) | ||
|
||
return x |
Oops, something went wrong.