From 73dc4ac8295e046be39e9a7c2080a4856c4748b8 Mon Sep 17 00:00:00 2001
From: Patrick Wang <wang.patrick57@gmail.com>
Date: Sat, 21 Dec 2024 14:51:31 -0500
Subject: [PATCH] now downloading job data from my personal drive link

---
 benchmark/job/cli.py          | 7 ++++---
 dependencies/requirements.txt | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/benchmark/job/cli.py b/benchmark/job/cli.py
index 06356164..be2feb01 100644
--- a/benchmark/job/cli.py
+++ b/benchmark/job/cli.py
@@ -3,7 +3,6 @@
 import click
 
 from benchmark.constants import DEFAULT_SCALE_FACTOR
-from benchmark.job.load_info import JobLoadInfo
 from util.log import DBGYM_LOGGER_NAME
 from util.shell import subprocess_run
 from util.workspace import (
@@ -13,7 +12,8 @@
     link_result,
 )
 
-JOB_TABLES_URL = "https://homepages.cwi.nl/~boncz/job/imdb.tgz"
+# JOB_TABLES_URL = "https://homepages.cwi.nl/~boncz/job/imdb.tgz" # This link stopped working for me
+JOB_TABLES_URL = "https://drive.google.com/uc?id=19m0zDpphAw0Bu9Irr_ta9EGr5k85hiN1"
 JOB_QUERY_NAMES = [
     "1a",
     "1b",
@@ -177,7 +177,8 @@ def _download_job_data(dbgym_cfg: DBGymConfig) -> None:
 
     logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloading: {expected_symlink_dpath}")
     real_data_path = dbgym_cfg.cur_task_runs_data_path(mkdir=True)
-    subprocess_run(f"curl -O {JOB_TABLES_URL}", cwd=real_data_path)
+    # subprocess_run(f"curl -O {JOB_TABLES_URL}", cwd=real_data_path) # This is if we're using a non-Google-Drive link
+    subprocess_run(f"gdown {JOB_TABLES_URL}", cwd=real_data_path)
     job_data_dpath = dbgym_cfg.cur_task_runs_data_path(
         default_tables_dname(DEFAULT_SCALE_FACTOR), mkdir=True
     )
diff --git a/dependencies/requirements.txt b/dependencies/requirements.txt
index 6c0cb4b7..e1252f80 100644
--- a/dependencies/requirements.txt
+++ b/dependencies/requirements.txt
@@ -135,3 +135,4 @@ Werkzeug==3.0.1
 wrapt==1.14.1
 zipp==3.17.0
 streamlit==1.39.0
+gdown==5.2.0
\ No newline at end of file