first commit

baicenxiao · Feb 3, 2021 · 509212d · 509212d
commit 509212d
Show file tree

Hide file tree

Showing 74 changed files with 10,739 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+.DS_Store
+
+.ipynb_checkpoints/
+__pycache__/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,24 @@
+MIT License
+
+Copyright (c) 2021 authors of paper 6586 submited to ICML 2021
+
+Copyright for portions of project Permutation Invariant Critic for 
+Multi-Agent Deep Reinforcement Learning are held by I.-J. Liu and R. A. Yeh, 2019
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,36 @@
+## Agent-Temporal Attention for Reward Redistribution in Episodic Multi-Agent Reinforcement Learning (AREL) ##
+
+
+The repository contains Pytorch implementation of AREL based on MADDPG with Permutation Invariant Critic (PIC).
+
+#### Platform and Dependencies: 
+* Ubuntu 18.04 
+* Python 3.7
+* Pytorch 1.6.0
+* OpenAI gym 0.10.9 (https://github.com/openai/gym)
+
+### Install the improved MPE:
+    cd multiagent-particle-envs
+    pip install -e .
+Please ensure that `multiagent-particle-envs` has been added to your `PYTHONPATH`.
+
+## Training examples
+The following are sample commands using different credit assignment methods for MARL training in the Predator-Prey environment with `15 predators`.
+
+### Agent-temporal attention (AREL)
+	python maddpg/main_vec_dist_AREL.py --exp_name simple_tag_AREL_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
+### RUDDER 
+	python maddpg/main_vec_dist_RUDDER.py --exp_name simple_tag_RUDDER_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
+### Trajectory-space smoothing (IRCR)
+	python maddpg/main_vec_dist_IRCR.py --exp_name simple_tag_smooth_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
+### Sequence modeling
+	python maddpg/main_vec_dist_SeqMod.py --exp_name simple_tag_TimeAtt_n15 --scenario simple_tag_n15 --num_steps=50 --num_episodes=100000 --critic_type gcn_max --cuda
+
+Results will be saved in `results` folder in the parent directory.
+
+### Acknowledgement
+The code of MADDPG with PIC is based on the publicly available implementation of https://github.com/IouJenLiu/PIC
+
+### License
+This project is licensed under the MIT License
+
diff --git a/former/__init__.py b/former/__init__.py
@@ -0,0 +1,4 @@
+from .modules import SelfAttentionWide, SelfAttentionWide, TransformerBlock
+
+from .transformers import Time_Agent_Transformer, Time_Transformer
+
diff --git a/former/modules.py b/former/modules.py
@@ -0,0 +1,230 @@
+from former import util
+from util import mask_
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+import random, math
+
+class SelfAttentionWide(nn.Module):
+    def __init__(self, emb, heads=8, mask=False):
+        """
+
+        :param emb:
+        :param heads:
+        :param mask:
+        """
+
+        super().__init__()
+
+        self.emb = emb
+        self.heads = heads
+        self.mask = mask
+        self.dot = 0
+
+        self.tokeys = nn.Linear(emb, emb * heads, bias=False)
+        self.toqueries = nn.Linear(emb, emb * heads, bias=False)
+        self.tovalues = nn.Linear(emb, emb * heads, bias=False)
+
+        self.softmax = nn.Softmax(dim=2)
+
+        self.unifyheads = nn.Linear(heads * emb, emb)
+
+    def forward(self, x):
+
+        b, t, e = x.size()
+        h = self.heads
+        assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})'
+
+        keys    = self.tokeys(x)   .view(b, t, h, e)
+        queries = self.toqueries(x).view(b, t, h, e)
+        values  = self.tovalues(x) .view(b, t, h, e)
+
+        # compute scaled dot-product self-attention
+
+        # - fold heads into the batch dimension
+        keys = keys.transpose(1, 2).contiguous().view(b * h, t, e)
+        queries = queries.transpose(1, 2).contiguous().view(b * h, t, e)
+        values = values.transpose(1, 2).contiguous().view(b * h, t, e)
+
+        queries = queries / (e ** (1/4))
+        keys    = keys / (e ** (1/4))
+        # - Instead of dividing the dot products by sqrt(e), we scale the keys and values.
+        #   This should be more memory efficient
+
+        # - get dot product of queries and keys, and scale
+        dot = torch.bmm(queries, keys.transpose(1, 2))
+
+        assert dot.size() == (b*h, t, t)
+
+        if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal
+            mask_(dot, maskval=float('-inf'), mask_diagonal=False)
+
+        # dot = F.softmax(dot, dim=2)
+        dot = self.softmax(dot)
+        # - dot now has row-wise self-attention probabilities
+
+        # apply the self attention to the values
+        out = torch.bmm(dot, values).view(b, h, t, e)
+
+        # swap h, t back, unify heads
+        out = out.transpose(1, 2).contiguous().view(b, t, h * e)
+
+        return self.unifyheads(out)
+
+class SelfAttentionNarrow(nn.Module):
+
+    def __init__(self, emb, heads=8, mask=False):
+        """
+
+        :param emb:
+        :param heads:
+        :param mask:
+        """
+
+        super().__init__()
+
+        assert emb % heads == 0, f'Embedding dimension ({emb}) should be divisible by nr. of heads ({heads})'
+
+        self.emb = emb
+        self.heads = heads
+        self.mask = mask
+
+        s = emb // heads
+        # - We will break the embedding into `heads` chunks and feed each to a different attention head
+
+        self.tokeys    = nn.Linear(s, s, bias=False)
+        self.toqueries = nn.Linear(s, s, bias=False)
+        self.tovalues  = nn.Linear(s, s, bias=False)
+
+        self.unifyheads = nn.Linear(heads * s, emb)
+
+    def forward(self, x):
+
+        b, t, e = x.size()
+        h = self.heads
+        assert e == self.emb, f'Input embedding dim ({e}) should match layer embedding dim ({self.emb})'
+
+        s = e // h
+        x = x.view(b, t, h, s)
+
+        keys    = self.tokeys(x)
+        queries = self.toqueries(x)
+        values  = self.tovalues(x)
+
+        assert keys.size() == (b, t, h, s)
+        assert queries.size() == (b, t, h, s)
+        assert values.size() == (b, t, h, s)
+
+        # Compute scaled dot-product self-attention
+
+        # - fold heads into the batch dimension
+        keys = keys.transpose(1, 2).contiguous().view(b * h, t, s)
+        queries = queries.transpose(1, 2).contiguous().view(b * h, t, s)
+        values = values.transpose(1, 2).contiguous().view(b * h, t, s)
+
+        queries = queries / (e ** (1/4))
+        keys    = keys / (e ** (1/4))
+        # - Instead of dividing the dot products by sqrt(e), we scale the keys and values.
+        #   This should be more memory efficient
+
+        # - get dot product of queries and keys, and scale
+        dot = torch.bmm(queries, keys.transpose(1, 2))
+
+        assert dot.size() == (b*h, t, t)
+
+        if self.mask: # mask out the upper half of the dot matrix, excluding the diagonal
+            mask_(dot, maskval=float('-inf'), mask_diagonal=False)
+
+        dot = F.softmax(dot, dim=2)
+        # - dot now has row-wise self-attention probabilities
+
+        # apply the self attention to the values
+        out = torch.bmm(dot, values).view(b, h, t, s)
+
+        # swap h, t back, unify heads
+        out = out.transpose(1, 2).contiguous().view(b, t, s * h)
+
+        return self.unifyheads(out)
+
+class TransformerBlock(nn.Module):
+
+    def __init__(self, emb, heads, mask, seq_length, ff_hidden_mult=4, dropout=0.0, wide=True):
+        super().__init__()
+
+        self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \
+                    else SelfAttentionNarrow(emb, heads=heads, mask=mask)
+        self.mask = mask
+
+        self.norm1 = nn.LayerNorm(emb)
+        self.norm2 = nn.LayerNorm(emb)
+
+        self.ff = nn.Sequential(
+            nn.Linear(emb, ff_hidden_mult * emb),
+            nn.ReLU(),
+            nn.Linear(ff_hidden_mult * emb, emb)
+        )
+
+        self.do = nn.Dropout(dropout)
+
+    def forward(self, x):
+
+        attended = self.attention(x)
+
+        x = self.norm1(attended + x)
+
+        x = self.do(x)
+
+        fedforward = self.ff(x)
+
+        x = self.norm2(fedforward + x)
+
+        x = self.do(x)
+
+        return x
+
+
+class TransformerBlock_Agent(nn.Module):
+
+    def __init__(self, emb, heads, mask, seq_length, n_agents, ff_hidden_mult=4, dropout=0.0, wide=True):
+        super().__init__()
+
+        self.n_a = n_agents
+
+        self.attention = SelfAttentionWide(emb, heads=heads, mask=mask) if wide \
+                    else SelfAttentionNarrow(emb, heads=heads, mask=mask)
+        self.mask = mask
+
+        self.norm1 = nn.LayerNorm(emb)
+        self.norm2 = nn.LayerNorm(emb)
+
+        self.ff = nn.Sequential(
+            nn.Linear(emb, ff_hidden_mult * emb),
+            nn.ReLU(),
+            nn.Linear(ff_hidden_mult * emb, emb)
+        )
+
+        self.do = nn.Dropout(dropout)
+
+    def forward(self, x):
+
+        _, t, e = x.size()
+
+        x = x.view(-1, self.n_a, t, e).transpose(1, 2).contiguous().view(-1, self.n_a, e)
+
+        attended = self.attention(x)
+
+        x = self.norm1(attended + x)
+
+        x = self.do(x)
+
+        fedforward = self.ff(x)
+
+        x = self.norm2(fedforward + x)
+
+        x = self.do(x)
+
+        x = x.view(-1, t, self.n_a, e).transpose(1, 2).contiguous().view(-1, t, e)
+
+        return x
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from .modules import SelfAttentionWide, SelfAttentionWide, TransformerBlock

		from .transformers import Time_Agent_Transformer, Time_Transformer