sourcery-ai-experiments · Hellebore · May 1, 2025 · May 1, 2025 · May 2, 2025 · May 2, 2025
diff --git a/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml b/configs/env/mettagrid/navigation/evals/cylinder_easy.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 500

diff --git a/configs/env/mettagrid/navigation/evals/honeypot.yaml b/configs/env/mettagrid/navigation/evals/honeypot.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 300

diff --git a/configs/env/mettagrid/navigation/evals/knotty.yaml b/configs/env/mettagrid/navigation/evals/knotty.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 800

diff --git a/configs/env/mettagrid/navigation/evals/memory_palace.yaml b/configs/env/mettagrid/navigation/evals/memory_palace.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 300

diff --git a/configs/env/mettagrid/navigation/evals/radial_large.yaml b/configs/env/mettagrid/navigation/evals/radial_large.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 1000

diff --git a/configs/env/mettagrid/navigation/evals/radial_mini.yaml b/configs/env/mettagrid/navigation/evals/radial_mini.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 300

diff --git a/configs/env/mettagrid/navigation/evals/radial_small.yaml b/configs/env/mettagrid/navigation/evals/radial_small.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 200

diff --git a/configs/env/mettagrid/navigation/evals/swirls.yaml b/configs/env/mettagrid/navigation/evals/swirls.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 500

diff --git a/configs/env/mettagrid/navigation/evals/thecube.yaml b/configs/env/mettagrid/navigation/evals/thecube.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 500

diff --git a/configs/env/mettagrid/navigation/evals/walkaround.yaml b/configs/env/mettagrid/navigation/evals/walkaround.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 400

diff --git a/configs/env/mettagrid/navigation/evals/wanderout.yaml b/configs/env/mettagrid/navigation/evals/wanderout.yaml
@@ -1,6 +1,7 @@
 defaults:
   - /env/mettagrid/mettagrid@
-
+  - _self_
+
 game:
   num_agents: 20  #how many agents are in the map x2
   max_steps: 800

diff --git a/devops/add_to_leaderboard.sh b/devops/add_to_leaderboard.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+# Usage function for better help messages
+usage() {
+  echo "Usage: $0 -r RUN_NAME [-w WANDB_PATH] [additional Hydra overrides]"
+  echo "  -r RUN_NAME     Your run name (e.g., b.$USER.test_run)"
+  echo "  -w WANDB_PATH   Optional: Full wandb path if different from auto-generated"
+  echo ""
+  echo "  Any additional arguments will be passed directly to the Python commands"
+  echo "  Example: $0 -r b.$USER.test_run +hardware=macbook"
+  exit 1
+}
+
+# Initialize variables
+RUN_NAME=""
+WANDB_PATH=""
+ADDITIONAL_ARGS=""
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    -r|--run)
+      RUN_NAME="$2"
+      shift 2
+      ;;
+    -w|--wandb)
+      WANDB_PATH="$2"
+      shift 2
+      ;;
+    -h|--help)
+      usage
+      ;;
+    *)
+      # Collect additional arguments
+      ADDITIONAL_ARGS="$ADDITIONAL_ARGS $1"
+      shift
+      ;;
+  esac
+done
+
+# Check if run name is provided
+if [ -z "$RUN_NAME" ]; then
+  echo "Error: Run name is required"
+  usage
+fi
+
+# Auto-generate wandb path if not provided
+if [ -z "$WANDB_PATH" ]; then
+  WANDB_PATH="wandb://run/$RUN_NAME"
+fi
+
+echo "Adding policy to eval leaderboard with run name: $RUN_NAME"
+echo "Using policy URI: $WANDB_PATH"
+if [ ! -z "$ADDITIONAL_ARGS" ]; then
+  echo "Additional arguments: $ADDITIONAL_ARGS"
+fi
+
+# Step 1: Verifying policy exists on wandb
+echo "Step 1: Verifying policy exists on wandb..."
+# Add a check here if needed to verify the policy exists on wandb
+
+# Step 2: Run the simulation
+echo "Step 2: Running simulation..."
+SIM_CMD="python3 -m tools.sim sim=navigation run=\"$RUN_NAME\" policy_uri=\"$WANDB_PATH\" +eval_db_uri=wandb://artifacts/navigation_db $ADDITIONAL_ARGS"
+echo "Executing: $SIM_CMD"
+eval $SIM_CMD
+
+# Check if the sim was successful
+if [ $? -ne 0 ]; then
+  echo "Error: Simulation failed. Exiting."
+  exit 1
+fi
+
+# Step 3: Analyze and update dashboard
+echo "Step 3: Analyzing results and updating dashboard..."
+ANALYZE_CMD="python3 -m tools.analyze run=analyze +eval_db_uri=wandb://artifacts/navigation_db analyzer.output_path=s3://softmax-public/policydash/dashboard.html +analyzer.num_output_policies=\"all\" $ADDITIONAL_ARGS"
+echo "Executing: $ANALYZE_CMD"
+eval $ANALYZE_CMD
+
+if [ $? -ne 0 ]; then
+  echo "Error: Analysis failed. Exiting."
+  exit 1
+fi
+
+echo "Successfully added policy to leaderboard and updated dashboard!"
+echo "Dashboard URL: https://softmax-public.s3.amazonaws.com/policydash/dashboard.html"
diff --git a/devops/build_mettagrid.sh b/devops/build_mettagrid.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# This script rebuilds mettagrid without rebuilding other dependencies
+
+# Exit immediately if a command exits with a non-zero status
+set -e
+
+# Parse command line arguments
+CLEAN=0
+for arg in "$@"; do
+    case $arg in
+        --clean)
+            CLEAN=1
+            shift
+            ;;
+    esac
+done
+
+# Display appropriate header based on clean flag
+if [ "$CLEAN" -eq 1 ]; then
+    echo "========== Rebuilding mettagrid (clean) =========="
+else
+    echo "========== Rebuilding mettagrid =========="
+fi
+
+# Get the directory where this script is located
+SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
-# Get the directory where this script is located
-SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+# Get the directory where this script is located using a portable method
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd -P)"
-# Get the directory where this script is located
-SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
+# Get the directory where this script is located using a portable method
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd -P)"
+
+# Go to the project root directory 
+cd "$SCRIPT_DIR/.."
+
+# Check if deps/mettagrid exists
+if [ ! -d "deps/mettagrid" ]; then
+    echo "Error: mettagrid directory not found at deps/mettagrid"
+    echo "Make sure you have run the full dependency installation script first."
+    exit 1
+fi
+
+# Navigate to mettagrid directory
+cd deps/mettagrid
+
+echo "Building mettagrid in $(pwd)"
+
+# Clean build artifacts only if --clean flag is specified
+if [ "$CLEAN" -eq 1 ]; then
+    echo "Cleaning previous build artifacts..."
+    rm -rf build
+    find . -name "*.so" -delete
+    echo "Clean completed."
+else
+    echo "Skipping clean (use --clean to remove previous build artifacts)"
+fi
+
+# Rebuild mettagrid
+echo "Rebuilding mettagrid..."
+python setup.py build_ext --inplace
+
+# Reinstall in development mode
+echo "Reinstalling mettagrid in development mode..."
+pip install -e .
+
+echo "========== mettagrid rebuild complete =========="
diff --git a/devops/checkout_and_build.sh b/devops/checkout_and_build.sh
@@ -118,12 +118,11 @@ mkdir -p deps
 cd deps
 
 # ========== METTAGRID ==========
-# Note that version control for the mettagrid package has been brought into our monorepo
-cd mettagrid
-echo "Building mettagrid into $(pwd)"
-python setup.py build_ext --inplace
-pip install -e .
-cd ..
+# Call the dedicated build_mettagrid.sh script instead of building directly
+echo "Building mettagrid using devops/build_mettagrid.sh"
+cd ..  # Go back to project root
+devops/build_mettagrid.sh
+cd deps  # Return to deps directory for remaining dependencies
 
 # Install dependencies using the function
 install_repo "fast_gae" $FAST_GAE_REPO "main" "python setup.py build_ext --inplace && pip install -e ."

diff --git a/metta/agent/policy_store.py b/metta/agent/policy_store.py
@@ -93,7 +93,6 @@ def _policy_records(self, uri, selector_type="top", n=1, metric: str = "score"):
                 prs = self._prs_from_wandb_sweep(sweep_name, version)
             else:
                 prs = self._prs_from_wandb_artifact(wandb_uri, version)
-
         elif uri.startswith("file://"):
             prs = self._prs_from_path(uri[len("file://") :])
         elif uri.startswith("puffer://"):
@@ -104,42 +103,75 @@ def _policy_records(self, uri, selector_type="top", n=1, metric: str = "score"):
         if len(prs) == 0:
             raise ValueError(f"No policies found at {uri}")
 
+        logger.info(f"Found {len(prs)} policies at {uri}")
+
         if selector_type == "all":
+            logger.info(f"Returning all {len(prs)} policies")
             return prs
-
         elif selector_type == "latest":
-            return [prs[0]]
-
+            selected = [prs[0]]
+            logger.info(f"Selected latest policy: {selected[0].name}")
+            return selected
         elif selector_type == "rand":
-            return [random.choice(prs)]
-
+            selected = [random.choice(prs)]
+            logger.info(f"Selected random policy: {selected[0].name}")
+            return selected
         elif selector_type == "top":
-            if metric not in prs[0].metadata:
-                # check if the metric is in eval_scores
-                if "eval_scores" in prs[0].metadata and metric in prs[0].metadata["eval_scores"]:
-                    policy_scores = {p: p.metadata["eval_scores"].get(metric, None) for p in prs}
-                else:
-                    logger.warning(f"Metric {metric} not found in policy metadata, returning latest policy")
-                    return [prs[0]]  #
-            else:
+            if (
+                "eval_scores" in prs[0].metadata
+                and prs[0].metadata["eval_scores"] is not None
+                and metric in prs[0].metadata["eval_scores"]
+            ):
+                # Metric is in eval_scores
+                logger.info(f"Found metric '{metric}' in metadata['eval_scores']")
+                policy_scores = {p: p.metadata.get("eval_scores", {}).get(metric, None) for p in prs}
+            elif metric in prs[0].metadata:
+                # Metric is directly in metadata
+                logger.info(f"Found metric '{metric}' directly in metadata")
                 policy_scores = {p: p.metadata.get(metric, None) for p in prs}
+            else:
+                # Metric not found anywhere
+                logger.warning(
+                    f"Metric '{metric}' not found in policy metadata or eval_scores, returning latest policy"
+                )
+                selected = [prs[0]]
+                logger.info(f"Selected latest policy (due to missing metric): {selected[0].name}")
+                return selected
 
             policies_with_scores = [p for p, s in policy_scores.items() if s is not None]
+
             # If more than 20% of the policies have no score, return the latest policy
             if len(policies_with_scores) < len(prs) * 0.8:
                 logger.warning("Too many invalid scores, returning latest policy")
-                return [prs[0]]  # return latest if metric not found
-            top = sorted(policies_with_scores, key=lambda p: policy_scores[p])[-n:]
+                selected = [prs[0]]  # return latest if metric not found
+                logger.info(f"Selected latest policy (due to too many invalid scores): {selected[0].name}")
+                return selected
+
+            # Sort by metric score (assuming higher is better)
+            def get_policy_score(policy: PolicyRecord) -> float:  # Explicitly return a comparable type
+                score = policy_scores.get(policy)
+                if score is None:
+                    return float("-inf")  # Or another appropriate default
+                return score
+
+            top = sorted(policies_with_scores, key=get_policy_score)[-n:]
+
             if len(top) < n:
                 logger.warning(f"Only found {len(top)} policies matching criteria, requested {n}")
 
-            logger.info(f"Top {n} policies by {metric}:")
+            logger.info(f"Top {len(top)} policies by {metric}:")
             logger.info(f"{'Policy':<40} | {metric:<20}")
             logger.info("-" * 62)
             for pr in top:
-                logger.info(f"{pr.name:<40} | {pr.metadata.get(metric, 0):<20.4f}")
+                score = policy_scores[pr]
+                logger.info(f"{pr.name:<40} | {score:<20.4f}")
+
+            selected = top[-n:]
+            logger.info(f"Selected {len(selected)} top policies by {metric}")
+            for i, pr in enumerate(selected):
+                logger.info(f"  {i + 1}. {pr.name} (score: {policy_scores[pr]:.4f})")
 
-            return top[-n:]
+            return selected
         else:
             raise ValueError(f"Invalid selector type {selector_type}")
 
@@ -180,10 +212,16 @@ def save(self, name: str, path: str, policy: nn.Module, metadata: dict):
         return pr
 
     def add_to_wandb_run(self, run_id: str, pr: PolicyRecord, additional_files=None):
-        return self.add_to_wandb_artifact(run_id, "model", pr.metadata, pr.local_path(), additional_files)
+        local_path = pr.local_path()
+        if local_path is None:
+            raise ValueError("PolicyRecord has no local path")
+        return self.add_to_wandb_artifact(run_id, "model", pr.metadata, local_path, additional_files)
 
     def add_to_wandb_sweep(self, sweep_name: str, pr: PolicyRecord, additional_files=None):
-        return self.add_to_wandb_artifact(sweep_name, "sweep_model", pr.metadata, pr.local_path(), additional_files)
+        local_path = pr.local_path()
+        if local_path is None:
+            raise ValueError("PolicyRecord has no local path")
+        return self.add_to_wandb_artifact(sweep_name, "sweep_model", pr.metadata, local_path, additional_files)
 
     def add_to_wandb_artifact(self, name: str, type: str, metadata: dict, local_path: str, additional_files=None):
         if self._wandb_run is None:

diff --git a/metta/rl/pufferlib/trainer.py b/metta/rl/pufferlib/trainer.py
@@ -74,7 +74,7 @@ def __init__(
         self.eval_stats_logger = EvalStatsLogger(self.sim_suite_config, wandb_run)
         self.average_reward = 0.0  # Initialize average reward estimate
         self._current_eval_score = None
-        self.eval_scores = None
+        self.eval_scores = {}
         self._eval_results = []
         self._weights_helper = WeightsMetricsHelper(cfg)
         self._make_vecenv()