Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
aef91f5
Fix env creation
harshbhatt7585 Sep 1, 2025
1d51d81
remove metta.yaml
harshbhatt7585 Sep 2, 2025
3bb8380
remove yaml args
harshbhatt7585 Sep 2, 2025
8d8f6ae
revert num_workers
harshbhatt7585 Sep 3, 2025
84ab0f3
fix import
harshbhatt7585 Sep 7, 2025
3141dba
make like arena_basic_easy_shaped
harshbhatt7585 Sep 7, 2025
1040fcc
use MettaGridPufferBase
harshbhatt7585 Sep 9, 2025
1dba00c
soft code values
harshbhatt7585 Sep 9, 2025
f1ede99
fix env
harshbhatt7585 Sep 9, 2025
95a5134
change agent
harshbhatt7585 Sep 9, 2025
e836e8b
lint
harshbhatt7585 Sep 10, 2025
a4db8f6
remove env_id
harshbhatt7585 Sep 10, 2025
7e8191f
pass state
harshbhatt7585 Sep 10, 2025
7e0c8bc
remove recurrent
harshbhatt7585 Sep 10, 2025
a19ec18
clean code
harshbhatt7585 Sep 10, 2025
029a33c
Refactor & Clean
harshbhatt7585 Sep 10, 2025
a127966
basic env
harshbhatt7585 Sep 11, 2025
427f6a8
fix action space issue
harshbhatt7585 Sep 11, 2025
8110f56
Hyperparamter change
harshbhatt7585 Sep 16, 2025
1378605
Clamp log ratio
harshbhatt7585 Sep 16, 2025
1f08189
reduce workers
harshbhatt7585 Sep 16, 2025
17fa6a3
remove rnn
harshbhatt7585 Sep 16, 2025
172f08f
restore minibatch size
harshbhatt7585 Sep 16, 2025
a1df6c5
use easy_shaped_arena_basic configuration
harshbhatt7585 Sep 16, 2025
76dbf60
restore pufferl
harshbhatt7585 Sep 16, 2025
bc16929
fix log prob
harshbhatt7585 Sep 16, 2025
72ed0cd
fix sample logits
harshbhatt7585 Sep 16, 2025
cdd9557
log prob change
harshbhatt7585 Sep 16, 2025
3157d26
clean
harshbhatt7585 Sep 16, 2025
e9998a6
restore rnn
harshbhatt7585 Sep 17, 2025
a6631d6
restore recurrent architecture
harshbhatt7585 Sep 17, 2025
372a402
max vec
harshbhatt7585 Sep 17, 2025
b44d270
clamp max_vec
harshbhatt7585 Sep 17, 2025
f4d9d8e
normalize
harshbhatt7585 Sep 17, 2025
ba5b207
update env config
harshbhatt7585 Sep 17, 2025
4c9a094
remove config
harshbhatt7585 Sep 17, 2025
fb316d9
fix num of layers
harshbhatt7585 Sep 17, 2025
aa6ac2f
add lstm
harshbhatt7585 Sep 17, 2025
997c27c
remove forward
harshbhatt7585 Sep 17, 2025
efd84eb
Merge branch 'PufferAI:3.0' into fix-metta-train
harshbhatt7585 Sep 17, 2025
71b499e
refactor encode_observation
harshbhatt7585 Sep 18, 2025
388fe5c
increase cnn
harshbhatt7585 Sep 18, 2025
fb43c1a
add standard deviation
harshbhatt7585 Sep 18, 2025
6a2f1f0
do not clamp reward for mettagrid
harshbhatt7585 Sep 18, 2025
5549e95
remove override and trust the parent
harshbhatt7585 Sep 18, 2025
99fb972
revert
harshbhatt7585 Sep 18, 2025
9eb3d8a
fix max_vec
harshbhatt7585 Sep 18, 2025
b5777ef
adjust recent metta refactor
harshbhatt7585 Sep 22, 2025
fe2e2ba
set buffer how metta does
harshbhatt7585 Sep 22, 2025
4bc0f6c
remove comments
harshbhatt7585 Sep 22, 2025
65aff9c
remove print comments
harshbhatt7585 Sep 22, 2025
9e7f456
env cleanup
harshbhatt7585 Sep 22, 2025
141f425
revert num_of_workers
harshbhatt7585 Sep 22, 2025
0c919b9
remove clamping rewards
harshbhatt7585 Sep 22, 2025
26b37c5
clean code
harshbhatt7585 Sep 22, 2025
51d17b2
clean code
harshbhatt7585 Sep 22, 2025
bbefce1
clean torch.py
harshbhatt7585 Sep 22, 2025
1538779
change action space init
harshbhatt7585 Sep 22, 2025
04e67f5
revert action space
harshbhatt7585 Sep 22, 2025
c121d1d
correct git urls and path
harshbhatt7585 Sep 28, 2025
284d477
concise
harshbhatt7585 Sep 30, 2025
bc8f720
revert timestep
harshbhatt7585 Sep 30, 2025
92b2132
change version
harshbhatt7585 Sep 30, 2025
124ee1a
Merge remote-tracking branch 'origin' into fix-metta-train
harshbhatt7585 Sep 30, 2025
280d843
change gymnasium version
harshbhatt7585 Sep 30, 2025
d992a4d
change pettingzoo version
harshbhatt7585 Sep 30, 2025
10e7bde
change version
harshbhatt7585 Oct 1, 2025
ea7646a
remove space
harshbhatt7585 Oct 1, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions pufferlib/config/metta.ini
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,26 @@ heart_reward = 1.0
[train]
total_timesteps = 300_000_000
batch_size = auto
adam_beta1 = 0.8923106632311335
adam_beta2 = 0.9632470625784862
adam_eps = 1.3537431449843922e-7
clip_coef = 0.14919147162017737
ent_coef = 0.016700174334611493
gae_lambda = 0.8443676864928215
gamma = 0.997950174315581
learning_rate = 0.018470110879570414
max_grad_norm = 2.572849891206465
optimizer = adam
adam_beta1 = 0.9
adam_beta2 = 0.999
adam_eps = 1e-12
clip_coef = 0.1
ent_coef = 0.0021
gae_lambda = 0.916
gamma = 0.977
learning_rate = 0.0003
max_grad_norm = 0.5
minibatch_size = 32768
bptt_horizon = 64
prio_alpha = 0.7918451491719373
prio_beta0 = 0.5852686803034238
vf_clip_coef = 0.1569624916309049
vf_coef = 3.2211333828684454
vtrace_c_clip = 2.134490283650365
vtrace_rho_clip = 2.296343917695581
prio_alpha = 0.0
prio_beta0 = 0.6
vf_clip_coef = 0.1
vf_coef = 0.44
vtrace_c_clip = 1.0
vtrace_rho_clip = 1.0
anneal_lr = True
target_kl = 0.01

[sweep]
metric = agent/heart.gained
Expand Down
102 changes: 63 additions & 39 deletions pufferlib/environments/metta/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,52 @@
import gymnasium

import pufferlib
from pufferlib.pufferlib import set_buffers

from omegaconf import OmegaConf
from metta.mettagrid.mettagrid_env import MettaGridEnv
from metta.mettagrid.curriculum.core import SingleTaskCurriculum
from metta.mettagrid.replay_writer import ReplayWriter
from mettagrid.builder.envs import make_arena
from mettagrid.envs.mettagrid_env import MettaGridEnv

def env_creator(name='metta'):
return functools.partial(make, name)

def make(name, config='pufferlib/environments/metta/metta.yaml', render_mode='auto', buf=None, seed=0,
ore_reward=0.17088483842567775, battery_reward=0.9882859711234822, heart_reward=1.0):
'''Metta creation function'''

OmegaConf.register_new_resolver("div", oc_divide, replace=True)
cfg = OmegaConf.load(config)

# Update rewards under the new structure: agent.rewards.inventory
inventory_rewards = cfg['game']['agent']['rewards']['inventory']
inventory_rewards['ore_red'] = float(ore_reward)
inventory_rewards['heart'] = float(heart_reward)
inventory_rewards['battery_red'] = float(battery_reward)
def make(
name,
render_mode="auto",
buf=None,
seed=0,
ore_reward=0.1,
battery_reward=0.8,
heart_reward=1.0,
num_agents=24,
):
mettagrid_cfg = make_arena(num_agents=num_agents)

mettagrid_cfg.game.agent.rewards.inventory = {
"heart": heart_reward,
"ore_red": ore_reward,
"battery_red": battery_reward,
"laser": 0.5,
"armor": 0.5,
"blueprint": 0.5,
}

mettagrid_cfg.game.agent.rewards.inventory_max = {
"heart": 100,
"ore_red": 1,
"battery_red": 1,
"laser": 1,
"armor": 1,
"blueprint": 1,
}

mettagrid_cfg.game.objects["altar"].input_resources = {"battery_red": 1}

env = MettaPuff(mettagrid_cfg, render_mode=render_mode, seed=seed)
set_buffers(env, buf)
env.async_reset(seed=42)

return env

curriculum = SingleTaskCurriculum('puffer', cfg)
return MettaPuff(curriculum, render_mode=render_mode, buf=buf, seed=seed)

def oc_divide(a, b):
"""
Expand All @@ -40,35 +62,37 @@ def oc_divide(a, b):
return result

class MettaPuff(MettaGridEnv):
def __init__(self, curriculum, render_mode='human', buf=None, seed=0):
def __init__(self, env_cfg, render_mode='human', seed=0):
self.replay_writer = None
#if render_mode == 'auto':
# self.replay_writer = ReplayWriter("metta/")

super().__init__(
curriculum=curriculum,
env_cfg=env_cfg,
render_mode=render_mode,
buf=buf,
replay_writer=self.replay_writer
replay_writer=self.replay_writer,
is_training=True, # Enable training mode for desync_episodes
)
self.action_space = pufferlib.spaces.joint_space(self.single_action_space, self.num_agents)
self.actions = self.actions.astype(np.int32)
self.infos = []

@property
def single_action_space(self):
return gymnasium.spaces.MultiDiscrete(super().single_action_space.nvec, dtype=np.int32)
def reset(self, seed=None):
obs, info = super().reset(seed)

def step(self, actions):
obs, rew, term, trunc, info = super().step(actions)
# Update shared buffers if they exist (for vectorization)
if hasattr(self, 'observations') and self.observations is not None:
self.observations[:] = obs

if all(term) or all(trunc):
self.reset()
if 'agent_raw' in info:
del info['agent_raw']
if 'episode_rewards' in info:
info['score'] = info['episode_rewards']
self.infos = [info] * self.num_agents
return obs, self.infos

def step(self, actions):
obs, rewards, terminals, truncations, infos = super().step(actions)

else:
info = []
# Update shared buffers if they exist (for vectorization)
if hasattr(self, 'observations') and self.observations is not None:
self.observations[:] = obs
self.rewards[:] = rewards
self.terminals[:] = terminals
self.truncations[:] = truncations

return obs, rew, term, trunc, [info]
self.infos = infos
return obs, rewards, terminals, truncations, infos
Loading