|
| 1 | +version: '3' # Taskfile build version |
| 2 | + |
| 3 | +vars: |
| 4 | + # Variables |
| 5 | + env_name: "LeanAgent" |
| 6 | + python_version: "3.10" |
| 7 | + raid_dir: "data/raid" # root folder that stores all LeanAgent artifacts (repos, datasets, checkpoints) |
| 8 | + repo_dir: "{{.raid_dir}}/repos_new" # where GitHub Lean repositories are cloned to |
| 9 | + tactic_generator_id: "11DXxixg6S4-hUA-u-78geOxEB7J7rCoX" # gdown file id for the pretrained tactic-generator ckpt |
| 10 | + starting_retriever_id: "1aRd1jQPu_TX15Ib5htzn3wZqHYowl3ax" # gdown id for the baseline retriever checkpoint |
| 11 | + leanagent_ckpt_id: "1plkC7Y5n0OVCJ0Ad6pH8_mwbALKYY_FY" # gdown id for a fully-trained LeanAgent model snapshot |
| 12 | + fisher_start_id: "1Q8yHq7XTAaHXGCiCGmhwZhTfkhHhN1cP" # gdown id for initial Fisher matrix used in EWC ablation |
| 13 | + cache_dir: "{{.raid_dir}}/.cache/lean_dojo" # disk cache used by LeanDojo tracing |
| 14 | + ray_tmpdir: "{{.raid_dir}}/tmp" # scratch space for Ray object-store & spill files |
| 15 | + |
| 16 | + # Paths |
| 17 | + data_dir: "datasets" # subfolder (inside RAID_DIR) where processed Lean datasets live |
| 18 | + checkpoint_dir: "checkpoints" # directory for model checkpoints saved during training |
| 19 | + eval_results_file: "eval_results.json" # aggregated evaluation metrics written by LeanAgent |
| 20 | + db_file: "leanagent_db.json" # JSON file backing the dynamic theorem database |
| 21 | + proof_log_file: "proof_log.json" # chronological log of proof attempts / successes |
| 22 | + encountered_theorems_file: "encountered_theorems.txt" # flat list of theorem names already processed |
| 23 | + fisher_dir: "fisher" # folder where per-repository Fisher matrices are stored |
| 24 | + |
| 25 | + |
| 26 | +# Load project-local secrets (i.e. GitHub token) |
| 27 | +dotenv: ['.env'] |
| 28 | + |
| 29 | +# Ensure Conda is discoverable for all tasks |
| 30 | +env: |
| 31 | + PATH: "$HOME/miniconda/condabin:$HOME/miniconda/bin:$CONDA_HOME/bin:$PATH" |
| 32 | + |
| 33 | +tasks: |
| 34 | + # --------------------------------------------------------------------------- |
| 35 | + # Run pytest test-suite |
| 36 | + # --------------------------------------------------------------------------- |
| 37 | + test: |
| 38 | + desc: "Run the pytest test suite." |
| 39 | + cmds: |
| 40 | + - "python -m pytest" |
| 41 | + |
| 42 | + # --------------------------------------------------------------------------- |
| 43 | + # Ensure GitHub token is available in .env |
| 44 | + # --------------------------------------------------------------------------- |
| 45 | + check_github_token: |
| 46 | + desc: "Fail if .env is missing or lacks a non-empty GITHUB_ACCESS_TOKEN entry." |
| 47 | + cmds: |
| 48 | + - | |
| 49 | + bash -c ' |
| 50 | + if [ ! -f ".env" ]; then |
| 51 | + echo "ERROR: .env file missing — create it and add GITHUB_ACCESS_TOKEN=<token>" >&2 |
| 52 | + exit 1 |
| 53 | + fi |
| 54 | + TOKEN=$(grep -E "^GITHUB_ACCESS_TOKEN=" .env | cut -d"=" -f2-) |
| 55 | + if [ -z "$TOKEN" ]; then |
| 56 | + echo "ERROR: GITHUB_ACCESS_TOKEN not set in .env" >&2 |
| 57 | + exit 1 |
| 58 | + fi |
| 59 | + ' |
| 60 | + create_conda_env: |
| 61 | + desc: "Create conda env {{.env_name}} if missing" |
| 62 | + cmds: |
| 63 | + - cmd: | |
| 64 | + bash -c ' |
| 65 | + # Ensure conda executable is reachable |
| 66 | + for d in "$HOME/miniconda/condabin" "$HOME/miniconda/bin" "$HOME/miniconda3/bin" "$HOME/anaconda3/bin" "$HOME/miniforge3/bin" "/opt/conda/bin"; do |
| 67 | + if [ -d "$d" ] && [[ ":$PATH:" != *":$d:"* ]]; then |
| 68 | + export PATH="$d:$PATH" |
| 69 | + fi |
| 70 | + done |
| 71 | +
|
| 72 | + # If conda still missing, install Miniconda silently |
| 73 | + if ! command -v conda >/dev/null 2>&1; then |
| 74 | + echo "Conda not found – installing Miniconda..." >&2 |
| 75 | + curl -sSLo "$HOME/miniconda.sh" https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh |
| 76 | + bash "$HOME/miniconda.sh" -b -p "$HOME/miniconda3" |
| 77 | + export PATH="$HOME/miniconda3/bin:$PATH" |
| 78 | + rm "$HOME/miniconda.sh" |
| 79 | + fi |
| 80 | +
|
| 81 | + if ! command -v conda >/dev/null 2>&1; then |
| 82 | + echo "Conda executable not found in PATH" >&2 |
| 83 | + exit 127 |
| 84 | + fi |
| 85 | + if ! conda env list | grep -q "^{{.env_name}} "; then |
| 86 | + conda create -n {{.env_name}} python={{.python_version}} -y |
| 87 | + fi |
| 88 | + ' |
| 89 | + |
| 90 | + install_elan: |
| 91 | + desc: "Ensure elan is installed" |
| 92 | + cmds: |
| 93 | + - cmd: | |
| 94 | + bash -c ' |
| 95 | + if [ ! -f "$HOME/.elan/bin/elan" ]; then |
| 96 | + echo "elan not found. Installing..." |
| 97 | + curl https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh -sSf | sh -s -- --default-toolchain none -y |
| 98 | + else |
| 99 | + echo "elan is already installed." |
| 100 | + fi |
| 101 | + ' |
| 102 | + |
| 103 | + install_requirements: |
| 104 | + desc: "Install Python requirements" |
| 105 | + |
| 106 | + cmds: |
| 107 | + - "conda run -n {{.env_name}} pip install -r requirements.txt" |
| 108 | + |
| 109 | + download_checkpoint_data: |
| 110 | + desc: "Download pretrained checkpoints via gdown" |
| 111 | + cmds: |
| 112 | + - cmd: | |
| 113 | + bash -c ' |
| 114 | + conda run -n {{.env_name}} pip install --quiet gdown |
| 115 | + conda run -n {{.env_name}} gdown https://drive.google.com/uc?id={{.tactic_generator_id}} -O /tmp/tactic_gen.ckpt |
| 116 | + conda run -n {{.env_name}} gdown https://drive.google.com/uc?id={{.starting_retriever_id}} -O /tmp/start_retriever.ckpt |
| 117 | + conda run -n {{.env_name}} gdown https://drive.google.com/uc?id={{.leanagent_ckpt_id}} -O /tmp/leanagent.ckpt |
| 118 | +
|
| 119 | + mkdir -p {{.raid_dir}}/{{.checkpoint_dir}} |
| 120 | + mv /tmp/tactic_gen.ckpt {{.raid_dir}}/ # TODO: This is not used, investigate. |
| 121 | + mv /tmp/start_retriever.ckpt {{.raid_dir}}/{{.checkpoint_dir}}/ |
| 122 | + mv /tmp/leanagent.ckpt {{.raid_dir}}/{{.checkpoint_dir}}/ |
| 123 | + ' |
| 124 | + replace_files: |
| 125 | + desc: "Patch LeanAgent files with custom versions" |
| 126 | + env: |
| 127 | + RAID_DIR: "{{.raid_dir}}" |
| 128 | + cmds: |
| 129 | + - "bash replace_files.sh" |
| 130 | + |
| 131 | + # --------------------------------------------------------------------------- |
| 132 | + # Composite setup task |
| 133 | + # --------------------------------------------------------------------------- |
| 134 | + setup: |
| 135 | + desc: "Initial setup: conda env, elan, requirements, checkpoints, file patches" |
| 136 | + deps: |
| 137 | + - check_github_token |
| 138 | + - install_elan |
| 139 | + - create_conda_env |
| 140 | + - install_requirements |
| 141 | + - download_checkpoint_data |
| 142 | + - replace_files |
| 143 | + cmds: [] |
| 144 | + |
| 145 | + # --------------------------------------------------------------------------- |
| 146 | + # Launch LeanAgent main training/proving process |
| 147 | + # --------------------------------------------------------------------------- |
| 148 | + run: |
| 149 | + desc: "Launch LeanAgent main training/proving process." |
| 150 | + deps: |
| 151 | + - check_github_token |
| 152 | + env: |
| 153 | + PYTHONPATH: "$PYTHONPATH:{{.raid_dir}}/LeanAgent" |
| 154 | + CACHE_DIR: "{{.cache_dir}}" |
| 155 | + RAY_TMPDIR: "{{.ray_tmpdir}}" |
| 156 | + RAID_DIR: "{{.raid_dir}}" |
| 157 | + REPO_DIR: "{{.repo_dir}}" |
| 158 | + DATA_DIR: "{{.data_dir}}" |
| 159 | + CHECKPOINT_DIR: "{{.checkpoint_dir}}" |
| 160 | + EVAL_RESULTS_FILE_PATH: "{{.eval_results_file}}" |
| 161 | + DB_FILE_NAME: "{{.db_file}}" |
| 162 | + PROOF_LOG_FILE_NAME: "{{.proof_log_file}}" |
| 163 | + ENCOUNTERED_THEOREMS_FILE: "{{.encountered_theorems_file}}" |
| 164 | + FISHER_DIR: "{{.fisher_dir}}" |
| 165 | + cmds: |
| 166 | + - | |
| 167 | + bash -c ' |
| 168 | + source $(conda info --base)/etc/profile.d/conda.sh |
| 169 | + conda activate {{.env_name}} |
| 170 | + rm -rf $RAY_TMPDIR || true |
| 171 | + mkdir -p "$RAY_TMPDIR" |
| 172 | + ray stop --force || true |
| 173 | + nvidia-smi || true |
| 174 | + python leanagent.py |
| 175 | + ' |
| 176 | +
|
| 177 | + # --------------------------------------------------------------------------- |
| 178 | + # Compute Fisher information matrix |
| 179 | + # --------------------------------------------------------------------------- |
| 180 | + run_fisher: |
| 181 | + desc: "Elastic Weight Consolidation for lifelong learning." |
| 182 | + env: |
| 183 | + PYTHONPATH: "$PYTHONPATH:{{.raid_dir}}/LeanAgent" |
| 184 | + NEW_DATA_PATH: "{{.raid_dir}}/datasets/new_data" |
| 185 | + RAID_DIR: "{{.raid_dir}}" |
| 186 | + CACHE_DIR: "{{.cache_dir}}" |
| 187 | + RAY_TMPDIR: "{{.ray_tmpdir}}" |
| 188 | + cmds: |
| 189 | + - | |
| 190 | + bash -c ' |
| 191 | + source $(conda info --base)/etc/profile.d/conda.sh |
| 192 | + conda activate {{.env_name}} |
| 193 | +
|
| 194 | + # Ensure initial Fisher matrix is present |
| 195 | + python -m pip install --quiet gdown |
| 196 | + if [ ! -f "{{.raid_dir}}/fisher_start.pkl" ]; then |
| 197 | + gdown https://drive.google.com/uc?id={{.fisher_start_id}} -O {{.raid_dir}}/fisher_start.pkl |
| 198 | + fi |
| 199 | +
|
| 200 | + echo "Removing old Ray temp files" >&2 |
| 201 | + rm -rf /tmp/ray || true |
| 202 | + rm -rf "$RAY_TMPDIR" || true |
| 203 | + mkdir -p "$RAY_TMPDIR" |
| 204 | + echo "Stopping Ray" >&2 |
| 205 | + ray stop --force || true |
| 206 | +
|
| 207 | + # Optional GPU status |
| 208 | + nvidia-smi || true |
| 209 | +
|
| 210 | + echo "Running compute_fisher.py" >&2 |
| 211 | + python compute_fisher.py |
| 212 | + ' |
| 213 | +
|
| 214 | + # --------------------------------------------------------------------------- |
| 215 | + # Lifelong learning: alternate training (run) and Fisher computation |
| 216 | + # --------------------------------------------------------------------------- |
| 217 | + run_ewc: |
| 218 | + desc: "Alternate run and run_fisher tasks N times (default 1). Usage: task run_ewc -- 5" |
| 219 | + cmds: |
| 220 | + - | |
| 221 | + bash -c ' |
| 222 | + COUNT={{default "1" (first .CLI_ARGS)}} |
| 223 | + echo "Running EWC for $COUNT iteration(s)" |
| 224 | + for i in $(seq 1 $COUNT); do |
| 225 | + echo "--- Iteration $i: Training ---" |
| 226 | + task run |
| 227 | + echo "--- Iteration $i: Computing Fisher ---" |
| 228 | + task run_fisher |
| 229 | + done |
| 230 | + ' |
0 commit comments