Merge pull request #38 from Farama-Foundation/api/aec-record-statistics

umutucak · web-flow · commit 1cc95ad03b10 · 2024-01-30T10:07:57.000+01:00
RecordStatisticsWrapper for AEC
diff --git a/momaland/utils/aec_wrappers.py b/momaland/utils/aec_wrappers.py
@@ -1,10 +1,45 @@
 """Various wrappers for AEC MO environments."""
+from typing import Optional
 
 import numpy as np
 from gymnasium.wrappers.normalize import RunningMeanStd
 from pettingzoo.utils.wrappers.base import BaseWrapper
 
 
+class RecordEpisodeStatistics(BaseWrapper):
+    """This wrapper will record episode statistics and print them at the end of each episode."""
+
+    def __init__(self, env):
+        """This wrapper will record episode statistics and print them at the end of each episode.
+
+        Args:
+            env (env): The environment to apply the wrapper
+        """
+        BaseWrapper.__init__(self, env)
+        self.episode_rewards = {agent: 0 for agent in self.possible_agents}
+        self.episode_lengths = {agent: 0 for agent in self.possible_agents}
+
+    def last(self, observe: bool = True):
+        """Receives the latest observation from the environment, recording episode statistics."""
+        obs, rews, terminated, truncated, infos = super().last(observe=observe)
+        for agent in self.env.possible_agents:
+            self.episode_rewards[agent] += rews
+            self.episode_lengths[agent] += 1
+        if terminated or truncated:
+            infos["episode"] = {
+                "r": self.episode_rewards,
+                "l": self.episode_lengths,
+            }
+        return obs, rews, terminated, truncated, infos
+
+    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
+        """Resets the environment and the episode statistics."""
+        super().reset(seed, options)
+        for agent in self.env.possible_agents:
+            self.episode_rewards[agent] = 0
+            self.episode_lengths[agent] = 0
+
+
 class LinearizeReward(BaseWrapper):
     """Convert MO reward vector into scalar SO reward value.
 
diff --git a/momaland/utils/parallel_wrappers.py b/momaland/utils/parallel_wrappers.py
@@ -33,7 +33,7 @@ def step(self, actions):
         return obs, rews, terminateds, truncateds, infos
 
     def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
-        """Resets the environment, recording episode statistics."""
+        """Resets the environment and the episode statistics."""
         obs, info = super().reset(seed, options)
         for agent in self.env.possible_agents:
             self.episode_rewards[agent] = 0