From 73dc4ac8295e046be39e9a7c2080a4856c4748b8 Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Sat, 21 Dec 2024 14:51:31 -0500 Subject: [PATCH] now downloading job data from my personal drive link --- benchmark/job/cli.py | 7 ++++--- dependencies/requirements.txt | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmark/job/cli.py b/benchmark/job/cli.py index 06356164..be2feb01 100644 --- a/benchmark/job/cli.py +++ b/benchmark/job/cli.py @@ -3,7 +3,6 @@ import click from benchmark.constants import DEFAULT_SCALE_FACTOR -from benchmark.job.load_info import JobLoadInfo from util.log import DBGYM_LOGGER_NAME from util.shell import subprocess_run from util.workspace import ( @@ -13,7 +12,8 @@ link_result, ) -JOB_TABLES_URL = "https://homepages.cwi.nl/~boncz/job/imdb.tgz" +# JOB_TABLES_URL = "https://homepages.cwi.nl/~boncz/job/imdb.tgz" # This link stopped working for me +JOB_TABLES_URL = "https://drive.google.com/uc?id=19m0zDpphAw0Bu9Irr_ta9EGr5k85hiN1" JOB_QUERY_NAMES = [ "1a", "1b", @@ -177,7 +177,8 @@ def _download_job_data(dbgym_cfg: DBGymConfig) -> None: logging.getLogger(DBGYM_LOGGER_NAME).info(f"Downloading: {expected_symlink_dpath}") real_data_path = dbgym_cfg.cur_task_runs_data_path(mkdir=True) - subprocess_run(f"curl -O {JOB_TABLES_URL}", cwd=real_data_path) + # subprocess_run(f"curl -O {JOB_TABLES_URL}", cwd=real_data_path) # This is if we're using a non-Google-Drive link + subprocess_run(f"gdown {JOB_TABLES_URL}", cwd=real_data_path) job_data_dpath = dbgym_cfg.cur_task_runs_data_path( default_tables_dname(DEFAULT_SCALE_FACTOR), mkdir=True ) diff --git a/dependencies/requirements.txt b/dependencies/requirements.txt index 6c0cb4b7..e1252f80 100644 --- a/dependencies/requirements.txt +++ b/dependencies/requirements.txt @@ -135,3 +135,4 @@ Werkzeug==3.0.1 wrapt==1.14.1 zipp==3.17.0 streamlit==1.39.0 +gdown==5.2.0 \ No newline at end of file