-
Notifications
You must be signed in to change notification settings - Fork 0
feat: Leaderboard Submission Script #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: base-sha/5350f947594f1393f0a46bccec214ffd94ca5dc1
Are you sure you want to change the base?
Changes from all commits
075b80a
8eddf00
3dd0fac
0188fda
294da36
d529eb6
a712595
8a7efb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#!/bin/bash | ||
|
||
# Usage function for better help messages | ||
usage() { | ||
echo "Usage: $0 -r RUN_NAME [-w WANDB_PATH] [additional Hydra overrides]" | ||
echo " -r RUN_NAME Your run name (e.g., b.$USER.test_run)" | ||
echo " -w WANDB_PATH Optional: Full wandb path if different from auto-generated" | ||
echo "" | ||
echo " Any additional arguments will be passed directly to the Python commands" | ||
echo " Example: $0 -r b.$USER.test_run +hardware=macbook" | ||
exit 1 | ||
} | ||
|
||
# Initialize variables | ||
RUN_NAME="" | ||
WANDB_PATH="" | ||
ADDITIONAL_ARGS="" | ||
|
||
# Parse command line arguments | ||
while [[ $# -gt 0 ]]; do | ||
case $1 in | ||
-r|--run) | ||
RUN_NAME="$2" | ||
shift 2 | ||
;; | ||
-w|--wandb) | ||
WANDB_PATH="$2" | ||
shift 2 | ||
;; | ||
-h|--help) | ||
usage | ||
;; | ||
*) | ||
# Collect additional arguments | ||
ADDITIONAL_ARGS="$ADDITIONAL_ARGS $1" | ||
shift | ||
;; | ||
esac | ||
done | ||
|
||
# Check if run name is provided | ||
if [ -z "$RUN_NAME" ]; then | ||
echo "Error: Run name is required" | ||
usage | ||
fi | ||
|
||
# Auto-generate wandb path if not provided | ||
if [ -z "$WANDB_PATH" ]; then | ||
WANDB_PATH="wandb://run/$RUN_NAME" | ||
fi | ||
|
||
echo "Adding policy to eval leaderboard with run name: $RUN_NAME" | ||
echo "Using policy URI: $WANDB_PATH" | ||
if [ ! -z "$ADDITIONAL_ARGS" ]; then | ||
echo "Additional arguments: $ADDITIONAL_ARGS" | ||
fi | ||
|
||
# Step 1: Verifying policy exists on wandb | ||
echo "Step 1: Verifying policy exists on wandb..." | ||
# Add a check here if needed to verify the policy exists on wandb | ||
|
||
# Step 2: Run the simulation | ||
echo "Step 2: Running simulation..." | ||
SIM_CMD="python3 -m tools.sim sim=navigation run=\"$RUN_NAME\" policy_uri=\"$WANDB_PATH\" +eval_db_uri=wandb://artifacts/navigation_db $ADDITIONAL_ARGS" | ||
echo "Executing: $SIM_CMD" | ||
eval $SIM_CMD | ||
|
||
# Check if the sim was successful | ||
if [ $? -ne 0 ]; then | ||
echo "Error: Simulation failed. Exiting." | ||
exit 1 | ||
fi | ||
|
||
# Step 3: Analyze and update dashboard | ||
echo "Step 3: Analyzing results and updating dashboard..." | ||
ANALYZE_CMD="python3 -m tools.analyze run=analyze +eval_db_uri=wandb://artifacts/navigation_db analyzer.output_path=s3://softmax-public/policydash/dashboard.html +analyzer.num_output_policies=\"all\" $ADDITIONAL_ARGS" | ||
echo "Executing: $ANALYZE_CMD" | ||
eval $ANALYZE_CMD | ||
|
||
if [ $? -ne 0 ]; then | ||
echo "Error: Analysis failed. Exiting." | ||
exit 1 | ||
fi | ||
|
||
echo "Successfully added policy to leaderboard and updated dashboard!" | ||
echo "Dashboard URL: https://softmax-public.s3.amazonaws.com/policydash/dashboard.html" |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,62 @@ | ||||||||||
#!/bin/bash | ||||||||||
|
||||||||||
# This script rebuilds mettagrid without rebuilding other dependencies | ||||||||||
|
||||||||||
# Exit immediately if a command exits with a non-zero status | ||||||||||
set -e | ||||||||||
|
||||||||||
# Parse command line arguments | ||||||||||
CLEAN=0 | ||||||||||
for arg in "$@"; do | ||||||||||
case $arg in | ||||||||||
--clean) | ||||||||||
CLEAN=1 | ||||||||||
shift | ||||||||||
;; | ||||||||||
esac | ||||||||||
done | ||||||||||
|
||||||||||
# Display appropriate header based on clean flag | ||||||||||
if [ "$CLEAN" -eq 1 ]; then | ||||||||||
echo "========== Rebuilding mettagrid (clean) ==========" | ||||||||||
else | ||||||||||
echo "========== Rebuilding mettagrid ==========" | ||||||||||
fi | ||||||||||
|
||||||||||
# Get the directory where this script is located | ||||||||||
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" | ||||||||||
Comment on lines
+26
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (bug_risk): Usage of 'readlink -f' may have cross-platform issues. macOS lacks 'readlink -f'. Consider using a portable method (e.g., realpath or a shell function) to determine the script directory.
Suggested change
|
||||||||||
|
||||||||||
# Go to the project root directory | ||||||||||
cd "$SCRIPT_DIR/.." | ||||||||||
|
||||||||||
# Check if deps/mettagrid exists | ||||||||||
if [ ! -d "deps/mettagrid" ]; then | ||||||||||
echo "Error: mettagrid directory not found at deps/mettagrid" | ||||||||||
echo "Make sure you have run the full dependency installation script first." | ||||||||||
exit 1 | ||||||||||
fi | ||||||||||
|
||||||||||
# Navigate to mettagrid directory | ||||||||||
cd deps/mettagrid | ||||||||||
|
||||||||||
echo "Building mettagrid in $(pwd)" | ||||||||||
|
||||||||||
# Clean build artifacts only if --clean flag is specified | ||||||||||
if [ "$CLEAN" -eq 1 ]; then | ||||||||||
echo "Cleaning previous build artifacts..." | ||||||||||
rm -rf build | ||||||||||
find . -name "*.so" -delete | ||||||||||
echo "Clean completed." | ||||||||||
else | ||||||||||
echo "Skipping clean (use --clean to remove previous build artifacts)" | ||||||||||
fi | ||||||||||
|
||||||||||
# Rebuild mettagrid | ||||||||||
echo "Rebuilding mettagrid..." | ||||||||||
python setup.py build_ext --inplace | ||||||||||
|
||||||||||
# Reinstall in development mode | ||||||||||
echo "Reinstalling mettagrid in development mode..." | ||||||||||
pip install -e . | ||||||||||
|
||||||||||
echo "========== mettagrid rebuild complete ==========" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,7 +74,7 @@ def __init__( | |
self.eval_stats_logger = EvalStatsLogger(self.sim_suite_config, wandb_run) | ||
self.average_reward = 0.0 # Initialize average reward estimate | ||
self._current_eval_score = None | ||
self.eval_scores = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (bug_risk): Changed initialization of eval_scores to an empty dictionary. Verify downstream code treats eval_scores as a dict—this avoids null checks but requires consistent usage. Suggested implementation: if not self.eval_scores: self.eval_scores["latest"] = current_score Depending on your downstream code usage you may have to:
|
||
self.eval_scores = {} | ||
self._eval_results = [] | ||
self._weights_helper = WeightsMetricsHelper(cfg) | ||
self._make_vecenv() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🚨 suggestion (security): Using eval for command execution may introduce security concerns.
Sanitize all arguments to prevent shell injection, or use an array-based command invocation instead of eval.
Suggested implementation:
Note: Ensure that the variable ADDITIONAL_ARGS does not contain unintended extra characters and is properly defined. Review the rest of the script so the usage of read -r -a additional_args does not interfere with other parts of your code.