diff --git a/.github/workflows/unittest_ci.yml b/.github/workflows/tests_ci.yml
similarity index 68%
rename from .github/workflows/unittest_ci.yml
rename to .github/workflows/tests_ci.yml
index 0c088c98..f2f151f9 100644
--- a/.github/workflows/unittest_ci.yml
+++ b/.github/workflows/tests_ci.yml
@@ -1,4 +1,4 @@
-name: Unit Tests
+name: Unit and Integration Tests
 
 on:
   push: {}
@@ -26,10 +26,22 @@ jobs:
     # Note that the GHA runners are stateful. Dependencies installed from previous runs will still be on the runner.
     # This means this step will usually be pretty fast as most dependencies will already be cached. However, it also
     # means that past runs might interfere with the current run, so you sometimes may need to restart the GHA runners.
+
+    # We need to do `. "$HOME/.cargo/env"` in each step for it to work.
     - name: Install dependencies
       run: |
         ./dependencies/install_dependencies.sh
-        . "$HOME/.cargo/env"
 
     - name: Run unit tests
-      run: python scripts/run_unittests.py
+      run: |
+        . "$HOME/.cargo/env"
+        python scripts/run_unit_tests.py
+
+    - name: Run integration test
+      # Delete the workspace. Run once with a clean workspace. Run again from the existing workspace.
+      # Need to run with a non-root user in order to start Postgres.
+      run: |
+        . "$HOME/.cargo/env"
+        rm -rf ../dbgym_integtest_workspace
+        ./scripts/integration_test.sh ssd
+        ./scripts/integration_test.sh ssd
diff --git a/benchmark/tpch/cli.py b/benchmark/tpch/cli.py
index 82adeff5..ac9a11a4 100644
--- a/benchmark/tpch/cli.py
+++ b/benchmark/tpch/cli.py
@@ -67,7 +67,7 @@ def _clone(dbgym_cfg: DBGymConfig):
         f"./tpch_setup.sh {real_build_path}", cwd=dbgym_cfg.cur_source_path()
     )
     symlink_dpath = link_result(dbgym_cfg, real_build_path / "tpch-kit")
-    assert os.path.samefile(expected_symlink_dpath, symlink_dpath)
+    assert expected_symlink_dpath.samefile(symlink_dpath)
     benchmark_tpch_logger.info(f"Cloned: {expected_symlink_dpath}")
 
 
@@ -97,7 +97,7 @@ def _generate_queries(dbgym_cfg: DBGymConfig, seed_start: int, seed_end: int, sc
                 verbose=False,
             )
         queries_symlink_dpath = link_result(dbgym_cfg, real_dir)
-        assert os.path.samefile(queries_symlink_dpath, expected_queries_symlink_dpath)
+        assert queries_symlink_dpath.samefile(expected_queries_symlink_dpath)
     benchmark_tpch_logger.info(
         f"Generated queries: {data_path} [{seed_start}, {seed_end}]"
     )
@@ -119,7 +119,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float):
     subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")
 
     tables_symlink_dpath = link_result(dbgym_cfg, real_dir)
-    assert os.path.samefile(tables_symlink_dpath, expected_tables_symlink_dpath)
+    assert tables_symlink_dpath.samefile(expected_tables_symlink_dpath)
     benchmark_tpch_logger.info(f"Generated: {expected_tables_symlink_dpath}")
 
 
@@ -130,12 +130,12 @@ def _generate_workload(
     query_subset: str,
     scale_factor: float,
 ):
-    symlink_data_dir = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
+    symlink_data_dpath = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
     workload_name = workload_name_fn(scale_factor, seed_start, seed_end, query_subset)
-    expected_workload_symlink_dpath = symlink_data_dir / (workload_name + ".link")
+    expected_workload_symlink_dpath = symlink_data_dpath / (workload_name + ".link")
 
     benchmark_tpch_logger.info(f"Generating: {expected_workload_symlink_dpath}")
-    real_dir = dbgym_cfg.cur_task_runs_data_path(
+    real_dpath = dbgym_cfg.cur_task_runs_data_path(
         workload_name, mkdir=True
     )
 
@@ -147,15 +147,15 @@ def _generate_workload(
     elif query_subset == "odd":
         queries = [f"{i}" for i in range(1, 22 + 1) if i % 2 == 1]
 
-    with open(real_dir / "order.txt", "w") as f:
+    with open(real_dpath / "order.txt", "w") as f:
         for seed in range(seed_start, seed_end + 1):
             for qnum in queries:
-                sql_fpath = (symlink_data_dir / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
+                sql_fpath = (symlink_data_dpath / (_get_queries_dname(seed, scale_factor) + ".link")).resolve() / f"{qnum}.sql"
                 assert sql_fpath.exists() and not sql_fpath.is_symlink() and sql_fpath.is_absolute(), "We should only write existent real absolute paths to a file"
                 output = ",".join([f"S{seed}-Q{qnum}", str(sql_fpath)])
                 print(output, file=f)
                 # TODO(WAN): add option to deep-copy the workload.
     
-    workload_symlink_dpath = link_result(dbgym_cfg, real_dir)
+    workload_symlink_dpath = link_result(dbgym_cfg, real_dpath)
     assert workload_symlink_dpath == expected_workload_symlink_dpath
     benchmark_tpch_logger.info(f"Generated: {expected_workload_symlink_dpath}")
diff --git a/benchmark/tpch/tpch_setup.sh b/benchmark/tpch/tpch_setup.sh
index 8f5c2478..011a7523 100755
--- a/benchmark/tpch/tpch_setup.sh
+++ b/benchmark/tpch/tpch_setup.sh
@@ -7,7 +7,7 @@ TPCH_REPO_ROOT="$1"
 if [ ! -d "${TPCH_REPO_ROOT}/tpch-kit" ]; then
   mkdir -p "${TPCH_REPO_ROOT}"
   cd "${TPCH_REPO_ROOT}"
-  git clone git@github.com:lmwnshn/tpch-kit.git --single-branch --branch master --depth 1
+  git clone https://github.com/lmwnshn/tpch-kit.git --single-branch --branch master --depth 1
   cd ./tpch-kit/dbgen
   make MACHINE=LINUX DATABASE=POSTGRESQL
 fi
diff --git a/config.yaml b/dbgym_config.yaml
similarity index 100%
rename from config.yaml
rename to dbgym_config.yaml
diff --git a/dbms/postgres/build_repo.sh b/dbms/postgres/build_repo.sh
index 271f7056..2127c438 100755
--- a/dbms/postgres/build_repo.sh
+++ b/dbms/postgres/build_repo.sh
@@ -7,7 +7,7 @@ REPO_REAL_PARENT_DPATH="$1"
 # Download and make postgres from the boot repository.
 mkdir -p "${REPO_REAL_PARENT_DPATH}"
 cd "${REPO_REAL_PARENT_DPATH}"
-git clone git@github.com:lmwnshn/boot.git --single-branch --branch vldb_2024 --depth 1
+git clone https://github.com/lmwnshn/boot.git --single-branch --branch vldb_2024 --depth 1
 cd ./boot
 ./cmudb/build/configure.sh release "${REPO_REAL_PARENT_DPATH}/boot/build/postgres"
 make clean
@@ -25,7 +25,7 @@ make install -j
 cd "${REPO_REAL_PARENT_DPATH}/boot"
 
 # Download and make hypopg.
-git clone git@github.com:HypoPG/hypopg.git
+git clone https://github.com/HypoPG/hypopg.git
 cd ./hypopg
 PG_CONFIG="${REPO_REAL_PARENT_DPATH}/boot/build/postgres/bin/pg_config" make install
 cd "${REPO_REAL_PARENT_DPATH}/boot"
diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py
index f81a877f..22789140 100644
--- a/dbms/postgres/cli.py
+++ b/dbms/postgres/cli.py
@@ -104,7 +104,7 @@ def _build_repo(dbgym_cfg: DBGymConfig, rebuild):
 
     # only link at the end so that the link only ever points to a complete repo
     repo_symlink_dpath = link_result(dbgym_cfg, repo_real_dpath)
-    assert os.path.samefile(expected_repo_symlink_dpath, repo_symlink_dpath)
+    assert expected_repo_symlink_dpath.samefile(repo_symlink_dpath)
     dbms_postgres_logger.info(f"Set up repo in {expected_repo_symlink_dpath}")
 
 
diff --git a/manage/cli.py b/manage/cli.py
index d8c7e9b4..3ce8c65a 100644
--- a/manage/cli.py
+++ b/manage/cli.py
@@ -169,11 +169,11 @@ def clean_workspace(dbgym_cfg: DBGymConfig, mode: str="safe", verbose=False) ->
             if not is_child_path(real_fordpath, dbgym_cfg.dbgym_runs_path):
                 continue
 
-            assert not os.path.samefile(real_fordpath, dbgym_cfg.dbgym_runs_path)
+            assert not real_fordpath.samefile(dbgym_cfg.dbgym_runs_path)
 
             # Figure out the task_run_child_fordpath to put into task_run_child_fordpaths_to_keep
             task_run_child_fordpath = None
-            if os.path.samefile(parent_dpath_of_path(real_fordpath), dbgym_cfg.dbgym_runs_path):
+            if parent_dpath_of_path(real_fordpath).samefile(dbgym_cfg.dbgym_runs_path):
                 # While it's true that it shouldn't be possible to symlink to a directory directly in task_runs/,
                 #   we'll just not delete it if the user happens to have one like this. Even if the user messed up
                 #   the structure somehow, it's just a good idea not to delete it.
@@ -183,10 +183,10 @@ def clean_workspace(dbgym_cfg: DBGymConfig, mode: str="safe", verbose=False) ->
                 #   However, as with above, we won't just nuke files if the workspace doesn't follow this rule for
                 #   some reason.
                 task_run_child_fordpath = real_fordpath
-                while not os.path.samefile(parent_dpath_of_path(task_run_child_fordpath), dbgym_cfg.dbgym_runs_path):
+                while not parent_dpath_of_path(task_run_child_fordpath).samefile(dbgym_cfg.dbgym_runs_path):
                     task_run_child_fordpath = parent_dpath_of_path(task_run_child_fordpath)
             assert task_run_child_fordpath != None
-            assert os.path.samefile(parent_dpath_of_path(task_run_child_fordpath), dbgym_cfg.dbgym_runs_path), f"task_run_child_fordpath ({task_run_child_fordpath}) is not a direct child of dbgym_cfg.dbgym_runs_path"
+            assert parent_dpath_of_path(task_run_child_fordpath).samefile(dbgym_cfg.dbgym_runs_path), f"task_run_child_fordpath ({task_run_child_fordpath}) is not a direct child of dbgym_cfg.dbgym_runs_path"
             task_run_child_fordpaths_to_keep.add(task_run_child_fordpath)
                 
             # If on safe mode, add symlinks inside the task_run_child_fordpath to be processed
diff --git a/misc/utils.py b/misc/utils.py
index 380cafca..24562494 100644
--- a/misc/utils.py
+++ b/misc/utils.py
@@ -168,19 +168,19 @@ class DBGymConfig:
     """
     Global configurations that apply to all parts of DB-Gym
     """
+    num_times_created_this_run: int = 0
+
+    def __init__(self, dbgym_config_path: Path):
+        # The logic around dbgym_tmp_path assumes that DBGymConfig is only constructed once.
+        DBGymConfig.num_times_created_this_run += 1
+        assert DBGymConfig.num_times_created_this_run == 1, f"DBGymConfig has been created {DBGymConfig.num_times_created_this_run} times. It should only be created once per run."
 
-    def __init__(self, config_path):
-        """
-        Parameters
-        ----------
-        config_path : Path
-        """
         assert is_base_git_dir(
             os.getcwd()
         ), "This script should be invoked from the root of the dbgym repo."
 
         # Parse the YAML file.
-        contents: str = Path(config_path).read_text()
+        contents: str = dbgym_config_path.read_text()
         yaml_config: dict = yaml.safe_load(contents)
 
         # Require dbgym_workspace_path to be absolute.
@@ -189,7 +189,7 @@ def __init__(self, config_path):
             Path(yaml_config["dbgym_workspace_path"]).resolve().absolute()
         )
 
-        self.path: Path = config_path
+        self.path: Path = dbgym_config_path
         self.cur_path_list: list[str] = ["dbgym"]
         self.root_yaml: dict = yaml_config
         self.cur_yaml: dict = self.root_yaml
@@ -204,11 +204,14 @@ def __init__(self, config_path):
             self.dbgym_workspace_path
         )
         self.dbgym_symlinks_path.mkdir(parents=True, exist_ok=True)
-        # tmp is a workspace for this run only
-        # one use for it is to place the unzipped dbdata
-        # there's no need to save the actual dbdata dir in run_*/ because we just save a symlink to
-        #   the .tgz file we unzipped
+        # tmp/ is a workspace for this run only
+        # One use for it is to place the unzipped dbdata.
+        # There's no need to save the actual dbdata dir in run_*/ because we just save a symlink to
+        #   the .tgz file we unzipped.
         self.dbgym_tmp_path = get_tmp_path_from_workspace_path(self.dbgym_workspace_path)
+        # The best place to delete the old dbgym_tmp_path is in DBGymConfig.__init__().
+        # This is better than deleting the dbgym_tmp_path is in DBGymConfig.__del__() because DBGymConfig may get deleted before execution has completed.
+        # Also, by keeping the tmp directory around, you can look at it to debug issues.
         if self.dbgym_tmp_path.exists():
             shutil.rmtree(self.dbgym_tmp_path)
         self.dbgym_tmp_path.mkdir(parents=True, exist_ok=True)
@@ -217,14 +220,10 @@ def __init__(self, config_path):
         self.dbgym_this_run_path = (
             self.dbgym_runs_path / f"run_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}"
         )
-        # exist_ok is False because we don't want to override a previous task run's data.
+        # `exist_ok` is False because we don't want to override a previous task run's data.
         self.dbgym_this_run_path.mkdir(parents=True, exist_ok=False)
 
-    def __del__(self):
-        if self.dbgym_tmp_path.exists():
-            shutil.rmtree(self.dbgym_tmp_path)
-
-    # append_group() is used to mark the "codebase path" of an invocation of the CLI. The "codebase path" is
+    # `append_group()` is used to mark the "codebase path" of an invocation of the CLI. The "codebase path" is
     #   explained further in the documentation.
     def append_group(self, name) -> None:
         self.cur_path_list.append(name)
@@ -285,15 +284,15 @@ def conv_inputpath_to_realabspath(dbgym_cfg: DBGymConfig, inputpath: os.PathLike
     It *does not* check whether the path exists, since the user might be wanting to create a new file/dir
     Raises RuntimeError for errors
     """
-    # for simplicity we only process Path objects
+    # For simplicity, we only process Path objects.
     realabspath = Path(inputpath)
-    # expanduser() is always "ok" to call first
+    # `expanduser()` is always "ok" to call first.
     realabspath = realabspath.expanduser()
-    # the reason we don't call Path.absolute() is because the path should be relative to dbgym_cfg.dbgym_repo_path,
-    #   which is not necessary where cwd() points at the time of calling this function
+    # The reason we don't call Path.absolute() is because the path should be relative to dbgym_cfg.dbgym_repo_path,
+    #   which is not necessary where cwd() points at the time of calling this function.
     if not realabspath.is_absolute():
         realabspath = dbgym_cfg.dbgym_repo_path / realabspath
-    # resolve has two uses: normalize the path (remove ..) and resolve symlinks
+    # `resolve()` has two uses: normalize the path (remove ..) and resolve symlinks.
     # I believe the pathlib library (https://docs.python.org/3/library/pathlib.html#pathlib.Path.resolve) does it this
     #   way to avoid an edge case related to symlinks and normalizing paths (footnote 1 of the linked docs)
     realabspath = realabspath.resolve()
@@ -320,7 +319,7 @@ def is_fully_resolved(path: Path) -> bool:
     assert isinstance(path, Path)
     resolved_path = path.resolve()
     # Converting them to strings is the most unambiguously strict way of checking equality.
-    # Stuff like Path.__eq__() or os.path.samefile() might be more lenient.
+    # Stuff like Path.__eq__() or Path.samefile() might be more lenient.
     return str(resolved_path) == str(path)
 
 
@@ -362,6 +361,7 @@ def basename_of_path(dpath: Path) -> str:
         return dpath_basename
 
 
+# TODO(phw2): refactor to use Path
 def is_child_path(child_path: os.PathLike, parent_dpath: os.PathLike) -> bool:
     """
     Checks whether child_path refers to a file/dir/link that is a child of the dir referred to by parent_dpath
@@ -394,7 +394,8 @@ def open_and_save(dbgym_cfg: DBGymConfig, open_fpath: Path, mode="r"):
      - If you open two "config" files of the same name but different paths, only the first open will be saved.
         - Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
     """
-    # process/validate open_fpath
+    # validate open_fpath
+    assert isinstance(open_fpath, Path)
     assert is_fully_resolved(
         open_fpath
     ), f"open_and_save(): open_fpath ({open_fpath}) should be a fully resolved path"
@@ -416,21 +417,23 @@ def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Pa
     The task_runs/ folder is organized like task_runs/run_*/[codebase]/[org]/any/path/you/want.
     This function extracts the [codebase] and [org] components
     """
+    assert isinstance(task_run_fordpath, Path)
     assert not task_run_fordpath.is_symlink()
-    parent_dpath = os.path.dirname(task_run_fordpath)
-    assert not os.path.samefile(
-        parent_dpath, dbgym_cfg.dbgym_runs_path
+    parent_dpath = task_run_fordpath.parent
+    # TODO(phw2): make this a common function
+    assert not parent_dpath.samefile(
+        dbgym_cfg.dbgym_runs_path
     ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
-    assert not os.path.samefile(
-        parent_dpath_of_path(parent_dpath), dbgym_cfg.dbgym_runs_path
+    assert not parent_dpath_of_path(parent_dpath).samefile(
+        dbgym_cfg.dbgym_runs_path
     ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-    assert not os.path.samefile(
-        parent_dpath_of_path(parent_dpath_of_path(parent_dpath)), dbgym_cfg.dbgym_runs_path
+    assert not parent_dpath_of_path(parent_dpath_of_path(parent_dpath)).samefile(
+        dbgym_cfg.dbgym_runs_path
     ), f"task_run_fordpath ({task_run_fordpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
     # org_dpath is the run_*/[codebase]/[organization]/ dir that task_run_fordpath is in
     org_dpath = parent_dpath
-    while not os.path.samefile(
-        parent_dpath_of_path(parent_dpath_of_path(parent_dpath_of_path(org_dpath))), dbgym_cfg.dbgym_runs_path
+    while not parent_dpath_of_path(parent_dpath_of_path(parent_dpath_of_path(org_dpath))).samefile(
+        dbgym_cfg.dbgym_runs_path
     ):
         org_dpath = parent_dpath_of_path(org_dpath)
     org_dname = basename_of_path(org_dpath)
@@ -440,6 +443,7 @@ def extract_from_task_run_fordpath(dbgym_cfg: DBGymConfig, task_run_fordpath: Pa
     return codebase_dpath, codebase_dname, org_dpath, org_dname
 
 
+# TODO(phw2): really look at the clean PR to see what it changed
 # TODO(phw2): after merging agent-train, refactor some code in agent-train to use save_file() instead of open_and_save()
 def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
     """
@@ -451,12 +455,8 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
     We create a symlink if it is a "dependency", meaning a task.py command was run to generate it
         In these cases we create a symlink so we have full provenance for how the dependency was created
     """
-    # process fpath and ensure that it's a file at the end
-    fpath = conv_inputpath_to_realabspath(dbgym_cfg, fpath)
-    fpath = os.path.realpath(fpath)  # traverse symlinks
-    assert is_fully_resolved(
-        fpath
-    ), f"fpath ({fpath}) should be a fully resolved path"
+    # validate fpath
+    assert isinstance(fpath, Path)
     assert not os.path.islink(fpath), f"fpath ({fpath}) should not be a symlink"
     assert os.path.exists(fpath), f"fpath ({fpath}) does not exist"
     assert os.path.isfile(fpath), f"fpath ({fpath}) is not a file"
@@ -470,34 +470,14 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
     #   2. files or dirs generated by a run may be very large (up to 100s of GBs) so we don't want to copy them
     if is_child_path(fpath, dbgym_cfg.dbgym_runs_path):
         # get paths we'll need later.
-        parent_dpath = os.path.dirname(fpath)
-        assert not os.path.samefile(
-            parent_dpath, dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/ dir instead of directly in dbgym_cfg.dbgym_runs_path ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dpath_of_path(parent_dpath), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        assert not os.path.samefile(
-            parent_dpath_of_path(parent_dpath_of_path(parent_dpath)), dbgym_cfg.dbgym_runs_path
-        ), f"fpath ({fpath}) should be inside a run_*/[codebase]/[organization]/ dir instead of directly in run_*/ ({dbgym_cfg.dbgym_runs_path})"
-        # org_dpath is the run_*/[codebase]/[organization]/ dir that fpath is in
-        org_dpath = parent_dpath
-        while not os.path.samefile(
-            parent_dpath_of_path(parent_dpath_of_path(parent_dpath_of_path(org_dpath))), dbgym_cfg.dbgym_runs_path
-        ):
-            org_dpath = parent_dpath_of_path(org_dpath)
-        org_dname = basename_of_path(org_dpath)
-        codebase_dpath = parent_dpath_of_path(org_dpath)
-        codebase_dname = basename_of_path(codebase_dpath)
-        this_run_save_dpath = os.path.join(
-            dbgym_cfg.dbgym_this_run_path, codebase_dname, org_dname
-        )
+        _, codebase_dname, org_dpath, org_dname = extract_from_task_run_fordpath(dbgym_cfg, fpath)
+        this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / codebase_dname / org_dname
         os.makedirs(this_run_save_dpath, exist_ok=True)
 
         # if the fpath file is directly in org_dpath, we symlink the file directly
-        parent_dpath = os.path.dirname(fpath)
-        if os.path.samefile(parent_dpath, org_dpath):
-            fname = os.path.basename(fpath)
+        parent_dpath = parent_dpath_of_path(fpath)
+        if parent_dpath.samefile(org_dpath):
+            fname = basename_of_path(fpath)
             symlink_fpath = this_run_save_dpath / (fname + ".link")
             try_create_symlink(fpath, symlink_fpath)
         # else, we know the fpath file is _not_ directly inside org_dpath dir
@@ -506,19 +486,19 @@ def save_file(dbgym_cfg: DBGymConfig, fpath: Path) -> Path:
         else:
             # set base_dpath such that its parent is org_dpath
             base_dpath = parent_dpath
-            while not os.path.samefile(parent_dpath_of_path(base_dpath), org_dpath):
+            while not parent_dpath_of_path(base_dpath).samefile(org_dpath):
                 base_dpath = parent_dpath_of_path(base_dpath)
 
             # create symlink
             open_base_dname = basename_of_path(base_dpath)
-            symlink_dpath = os.path.join(this_run_save_dpath, open_base_dname)
+            symlink_dpath = this_run_save_dpath / (open_base_dname + ".link")
             try_create_symlink(base_dpath, symlink_dpath)
     # if it wasn't generated by a run
     else:
         # since we don't know where the file is at all, the location is "unknown" and the org is "all"
         this_run_save_dpath = dbgym_cfg.dbgym_this_run_path / "unknown" / "all"
         os.makedirs(this_run_save_dpath, exist_ok=True)
-        fname = os.path.basename(fpath)
+        fname = basename_of_path(fpath)
         # in this case, we want to copy instead of symlinking since it might disappear in the future
         copy_fpath = this_run_save_dpath / fname
         shutil.copy(fpath, copy_fpath)
@@ -536,25 +516,26 @@ def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_nam
         version of a file.
     This function will return the path to the symlink that was created.
     """
-    assert is_fully_resolved(result_path), f"result_path ({result_path}) should be a fully resolved path"
-    result_path = conv_inputpath_to_realabspath(dbgym_cfg, result_path)
-    assert is_child_path(result_path, dbgym_cfg.dbgym_this_run_path)
-    assert not os.path.islink(result_path)
+    assert isinstance(result_fordpath, Path)
+    assert is_fully_resolved(result_fordpath), f"result_fordpath ({result_fordpath}) should be a fully resolved path"
+    result_fordpath = conv_inputpath_to_realabspath(dbgym_cfg, result_fordpath)
+    assert is_child_path(result_fordpath, dbgym_cfg.dbgym_this_run_path)
+    assert not os.path.islink(result_fordpath)
 
     if custom_result_name != None:
         result_name = custom_result_name
     else:
-        if os.path.isfile(result_path):
-            result_name = os.path.basename(result_path)
-        elif os.path.isdir(result_path):
-            result_name = basename_of_path(result_path)
+        if os.path.isfile(result_fordpath):
+            result_name = basename_of_path(result_fordpath) + ".link"
+        elif os.path.isdir(result_fordpath):
+            result_name = basename_of_path(result_fordpath) + ".link"
         else:
             raise AssertionError("result_fordpath must be either a file or dir")
 
     # Figure out the parent directory path of the symlink
     codebase_dpath, codebase_dname, _, org_dname = extract_from_task_run_fordpath(dbgym_cfg, result_fordpath)
     # We're only supposed to save files generated by us, which means they should be in cur_task_runs_path()
-    assert os.path.samefile(codebase_dpath, dbgym_cfg.cur_task_runs_path()), f"link_result should only be called on files generated by this invocation to task.py"
+    assert codebase_dpath.samefile(dbgym_cfg.cur_task_runs_path()), f"link_result should only be called on files generated by this invocation to task.py"
     symlink_parent_dpath = dbgym_cfg.dbgym_symlinks_path / codebase_dname / org_dname
     symlink_parent_dpath.mkdir(parents=True, exist_ok=True)
 
@@ -562,7 +543,7 @@ def link_result(dbgym_cfg: DBGymConfig, result_fordpath: Path, custom_result_nam
     # Note that in a multi-threaded setting, this might remove one created by a process in the same run,
     #   meaning it's not "old" by our definition of "old". However, we'll always end up with a symlink
     #   file of the current run regardless of the order of threads.
-    assert result_name.endswith(".link") and not result_name.endswith(".link.link"), "result_name ({result_name}) should end with \".link\""
+    assert result_name.endswith(".link") and not result_name.endswith(".link.link"), f"result_name ({result_name}) should end with \".link\""
     symlink_path = symlink_parent_dpath / result_name
     try_remove_file(symlink_path)
     try_create_symlink(result_fordpath, symlink_path)
@@ -594,7 +575,7 @@ def try_remove_file(path: Path) -> None:
         pass
 
 
-def restart_ray(redis_port: int):
+def restart_ray(redis_port: int) -> None:
     """
     Stop and start Ray.
     This is good to do between each stage to avoid bugs from carrying over across stages
@@ -607,7 +588,7 @@ def restart_ray(redis_port: int):
     )
 
 
-def make_redis_started(port: int):
+def make_redis_started(port: int) -> None:
     """
     Start Redis if it's not already started.
     Note that Ray uses Redis but does *not* use this function. It starts Redis on its own.
diff --git a/scripts/integration_test.sh b/scripts/integration_test.sh
new file mode 100755
index 00000000..c63af9fa
--- /dev/null
+++ b/scripts/integration_test.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+DBMS=postgres
+BENCHMARK=tpch
+SCALE_FACTOR=0.01
+AGENT=protox
+INTENDED_DBDATA_HARDWARE="${1:-hdd}"
+
+export DBGYM_CONFIG_PATH=scripts/integtest_dbgym_config.yaml
+
+# Benchmark
+python3 task.py benchmark $BENCHMARK data $SCALE_FACTOR
+python3 task.py benchmark $BENCHMARK workload --scale-factor $SCALE_FACTOR
+
+# DBMS
+python3 task.py dbms $DBMS build
+python3 task.py dbms $DBMS dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE
+
+# Tune
+python3 task.py tune $AGENT embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE # long datagen so that train doesn't crash
+python3 task.py tune $AGENT embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
+python3 task.py tune $AGENT agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE
+python3 task.py tune $AGENT agent tune tpch --scale-factor $SCALE_FACTOR
+python3 task.py tune $AGENT agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/integtest_dbgym_config.yaml b/scripts/integtest_dbgym_config.yaml
new file mode 100644
index 00000000..14a1063a
--- /dev/null
+++ b/scripts/integtest_dbgym_config.yaml
@@ -0,0 +1,3 @@
+dbgym_workspace_path: ../dbgym_integtest_workspace
+boot_redis_port: 7379
+ray_gcs_port: 7380
\ No newline at end of file
diff --git a/scripts/quickstart.sh b/scripts/quickstart.sh
index 7d082726..70965793 100755
--- a/scripts/quickstart.sh
+++ b/scripts/quickstart.sh
@@ -18,6 +18,5 @@ python3 task.py dbms $DBMS dbdata tpch --scale-factor $SCALE_FACTOR
 # Tune
 python3 task.py tune $AGENT embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" # long datagen so that train doesn't crash
 python3 task.py tune $AGENT embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2
-python3 task.py tune $AGENT agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01 --build-space-good-for-boot
+python3 task.py tune $AGENT agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01
 python3 task.py tune $AGENT agent tune tpch --scale-factor $SCALE_FACTOR
-python3 task.py tune $AGENT agent replay tpch --scale-factor $SCALE_FACTOR
diff --git a/scripts/run_unittests.py b/scripts/run_unit_tests.py
similarity index 100%
rename from scripts/run_unittests.py
rename to scripts/run_unit_tests.py
diff --git a/task.py b/task.py
index 31a5bd12..93b59b1e 100644
--- a/task.py
+++ b/task.py
@@ -1,4 +1,6 @@
 import logging
+import os
+from pathlib import Path
 import click
 
 from misc.utils import DBGymConfig
@@ -8,16 +10,17 @@
 from tune.cli import tune_group
 from manage.cli import manage_group
 
+
 # TODO(phw2): save commit, git diff, and run command
 # TODO(phw2): remove write permissions on old run_*/ dirs to enforce that they are immutable
 
 
 @click.group()
-@click.option("--config-path", default="config.yaml")
 @click.pass_context
-def task(ctx, config_path):
+def task(ctx):
     """💩💩💩 CMU-DB Database Gym: github.com/cmu-db/dbgym 💩💩💩"""
-    ctx.obj = DBGymConfig(config_path)
+    dbgym_config_path = Path(os.getenv("DBGYM_CONFIG_PATH", "dbgym_config.yaml"))
+    ctx.obj = DBGymConfig(dbgym_config_path)
 
 
 if __name__ == "__main__":
diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py
index 20d73292..f5bbd687 100644
--- a/tune/protox/embedding/train_all.py
+++ b/tune/protox/embedding/train_all.py
@@ -187,7 +187,7 @@ def train_all_embeddings(
 
     # Connect to cluster or die.
     restart_ray(dbgym_cfg.root_yaml["ray_gcs_port"])
-    ray.init(address=f"localhost:{dbgym_cfg.root_yaml['boot_redis_port']}", log_to_driver=False)
+    ray.init(address=f"localhost:{dbgym_cfg.root_yaml['ray_gcs_port']}", log_to_driver=False)
 
     scheduler = FIFOScheduler()  # type: ignore
     # Search.