Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
baicenxiao committed Feb 3, 2021
0 parents commit 509212d
Show file tree
Hide file tree
Showing 74 changed files with 10,739 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store

.ipynb_checkpoints/
__pycache__/
24 changes: 24 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
MIT License

Copyright (c) 2021 authors of paper 6586 submited to ICML 2021

Copyright for portions of project Permutation Invariant Critic for
Multi-Agent Deep Reinforcement Learning are held by I.-J. Liu and R. A. Yeh, 2019

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
## Agent-Temporal Attention for Reward Redistribution in Episodic Multi-Agent Reinforcement Learning (AREL) ##


The repository contains Pytorch implementation of AREL based on MADDPG with Permutation Invariant Critic (PIC).

#### Platform and Dependencies:
* Ubuntu 18.04
* Python 3.7
* Pytorch 1.6.0
* OpenAI gym 0.10.9 (https://github.com/openai/gym)

### Install the improved MPE:
cd multiagent-particle-envs
pip install -e .
Please ensure that `multiagent-particle-envs` has been added to your `PYTHONPATH`.

## Training examples
The following are sample commands using different credit assignment methods for MARL training in the Predator-Prey environment with `15 predators`.

### Agent-temporal attention (AREL)
python maddpg/main_vec_dist_AREL.py --exp_name simple_tag_AREL_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
### RUDDER
python maddpg/main_vec_dist_RUDDER.py --exp_name simple_tag_RUDDER_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
### Trajectory-space smoothing (IRCR)
python maddpg/main_vec_dist_IRCR.py --exp_name simple_tag_smooth_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
### Sequence modeling
python maddpg/main_vec_dist_SeqMod.py --exp_name simple_tag_TimeAtt_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda

Results will be saved in `results` folder in the parent directory.

### Acknowledgement
The code of MADDPG with PIC is based on the publicly available implementation of https://github.com/IouJenLiu/PIC

### License
This project is licensed under the MIT License

4 changes: 4 additions & 0 deletions former/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .modules import SelfAttentionWide, SelfAttentionWide, TransformerBlock

from .transformers import Time_Agent_Transformer, Time_Transformer

230 changes: 230 additions & 0 deletions former/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
from former import util
from util import mask_

import torch
from torch import nn
import torch.nn.functional as F

import random, math

class SelfAttentionWide(nn.Module):
def __init__(self, emb, heads=8, mask=False):
"""
:param emb:
:param heads:
:param mask:
"""

super().__init__()

self.emb = emb
self.heads = heads
self.mask = mask
self.dot = 0

self.tokeys = nn.Linear(emb, emb * heads, bias=False)
self.toqueries = nn.Linear(emb, emb * heads, bias=False)
self.tovalues = nn.Linear(emb, emb * heads, bias=False)

self.softmax = nn.Softmax(dim=2)

self.unifyheads = nn.Linear(heads * emb, emb)

def forward(self, x):

b, t, e = x.size()
h = self.heads
assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})'

keys = self.tokeys(x) .view(b, t, h, e)
queries = self.toqueries(x).view(b, t, h, e)
values = self.tovalues(x) .view(b, t, h, e)

# compute scaled dot-product self-attention

# - fold heads into the batch dimension
keys = keys.transpose(1, 2).contiguous().view(b * h, t, e)
queries = queries.transpose(1, 2).contiguous().view(b * h, t, e)
values = values.transpose(1, 2).contiguous().view(b * h, t, e)

queries = queries / (e ** (1/4))
keys = keys / (e ** (1/4))
# - Instead of dividing the dot products by sqrt(e), we scale the keys and values.
# This should be more memory efficient

# - get dot product of queries and keys, and scale
dot = torch.bmm(queries, keys.transpose(1, 2))

assert dot.size() == (b*h, t, t)

if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal
mask_(dot, maskval=float('-inf'), mask_diagonal=False)

# dot = F.softmax(dot, dim=2)
dot = self.softmax(dot)
# - dot now has row-wise self-attention probabilities

# apply the self attention to the values
out = torch.bmm(dot, values).view(b, h, t, e)

# swap h, t back, unify heads
out = out.transpose(1, 2).contiguous().view(b, t, h * e)

return self.unifyheads(out)

class SelfAttentionNarrow(nn.Module):

def __init__(self, emb, heads=8, mask=False):
"""
:param emb:
:param heads:
:param mask:
"""

super().__init__()

assert emb % heads == 0, f'Embedding dimension ({emb}) should be divisible by nr. of heads ({heads})'

self.emb = emb
self.heads = heads
self.mask = mask

s = emb // heads
# - We will break the embedding into `heads` chunks and feed each to a different attention head

self.tokeys = nn.Linear(s, s, bias=False)
self.toqueries = nn.Linear(s, s, bias=False)
self.tovalues = nn.Linear(s, s, bias=False)

self.unifyheads = nn.Linear(heads * s, emb)

def forward(self, x):

b, t, e = x.size()
h = self.heads
assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})'

s = e // h
x = x.view(b, t, h, s)

keys = self.tokeys(x)
queries = self.toqueries(x)
values = self.tovalues(x)

assert keys.size() == (b, t, h, s)
assert queries.size() == (b, t, h, s)
assert values.size() == (b, t, h, s)

# Compute scaled dot-product self-attention

# - fold heads into the batch dimension
keys = keys.transpose(1, 2).contiguous().view(b * h, t, s)
queries = queries.transpose(1, 2).contiguous().view(b * h, t, s)
values = values.transpose(1, 2).contiguous().view(b * h, t, s)

queries = queries / (e ** (1/4))
keys = keys / (e ** (1/4))
# - Instead of dividing the dot products by sqrt(e), we scale the keys and values.
# This should be more memory efficient

# - get dot product of queries and keys, and scale
dot = torch.bmm(queries, keys.transpose(1, 2))

assert dot.size() == (b*h, t, t)

if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal
mask_(dot, maskval=float('-inf'), mask_diagonal=False)

dot = F.softmax(dot, dim=2)
# - dot now has row-wise self-attention probabilities

# apply the self attention to the values
out = torch.bmm(dot, values).view(b, h, t, s)

# swap h, t back, unify heads
out = out.transpose(1, 2).contiguous().view(b, t, s * h)

return self.unifyheads(out)

class TransformerBlock(nn.Module):

def __init__(self, emb, heads, mask, seq_length, ff_hidden_mult=4, dropout=0.0, wide=True):
super().__init__()

self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \
else SelfAttentionNarrow(emb, heads=heads, mask=mask)
self.mask = mask

self.norm1 = nn.LayerNorm(emb)
self.norm2 = nn.LayerNorm(emb)

self.ff = nn.Sequential(
nn.Linear(emb, ff_hidden_mult * emb),
nn.ReLU(),
nn.Linear(ff_hidden_mult * emb, emb)
)

self.do = nn.Dropout(dropout)

def forward(self, x):

attended = self.attention(x)

x = self.norm1(attended + x)

x = self.do(x)

fedforward = self.ff(x)

x = self.norm2(fedforward + x)

x = self.do(x)

return x


class TransformerBlock_Agent(nn.Module):

def __init__(self, emb, heads, mask, seq_length, n_agents, ff_hidden_mult=4, dropout=0.0, wide=True):
super().__init__()

self.n_a = n_agents

self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \
else SelfAttentionNarrow(emb, heads=heads, mask=mask)
self.mask = mask

self.norm1 = nn.LayerNorm(emb)
self.norm2 = nn.LayerNorm(emb)

self.ff = nn.Sequential(
nn.Linear(emb, ff_hidden_mult * emb),
nn.ReLU(),
nn.Linear(ff_hidden_mult * emb, emb)
)

self.do = nn.Dropout(dropout)

def forward(self, x):

_, t, e = x.size()

x = x.view(-1, self.n_a, t, e).transpose(1, 2).contiguous().view(-1, self.n_a, e)

attended = self.attention(x)

x = self.norm1(attended + x)

x = self.do(x)

fedforward = self.ff(x)

x = self.norm2(fedforward + x)

x = self.do(x)

x = x.view(-1, t, self.n_a, e).transpose(1, 2).contiguous().view(-1, t, e)

return x
Loading

0 comments on commit 509212d

Please sign in to comment.