forked from simplescaling/s1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcurate_med_s1k.sh
executable file
·38 lines (30 loc) · 1.35 KB
/
curate_med_s1k.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
# Check if experiment name is provided
if [ $# -ne 1 ]; then
echo "Usage: $0 <experiment_name>"
exit 1
fi
experiment_name=$1
# Source configuration first to get environment variables
echo "Sourcing config.sh..."
source "/share/pi/nigam/users/calebwin/med-s1/config.sh" || { echo "Failed to source config.sh"; exit 1; }
# Get experiment config from results.json
config=$(jq -r ".experiments[\"$experiment_name\"].config" "$RESULTS_JSON")
if [ "$config" = "null" ]; then
echo "Error: Experiment '$experiment_name' not found in $RESULTS_JSON"
exit 1
fi
# Create logs directory if it doesn't exist
mkdir -p "${MED_S1_DIR}/logs"
# Export experiment name for Python script
export EXPERIMENT_NAME="$experiment_name"
# Check if base dataset exists in hf_cache
if [ -d "$DATA_DIR/plumbing_test_001_20250219_145607" ] && [ -f "$DATA_DIR/plumbing_test_001_20250219_145607/med_s1k_filtered.parquet" ]; then
echo "Base dataset already exists at $DATA_DIR/plumbing_test_001_20250219_145607"
echo "Using CPU for processing since we only need to read and process existing data..."
sbatch "${MED_S1_DIR}/data/curate_med_s1k_cpu.sh" "$experiment_name"
else
echo "Base dataset needs to be created"
echo "Using GPU for processing since we need to run model inference..."
sbatch "${MED_S1_DIR}/data/curate_med_s1k_gpu.sh" "$experiment_name"
fi