Skip to content

Commit 3cd655f

Browse files
Time workload (replay) (#55)
**Summary**: can time a workload for use when replaying. **Demo**: Added test that passes. <img width="624" alt="Screenshot 2024-12-24 at 14 45 23" src="https://github.com/user-attachments/assets/6362e8e7-8a70-4808-9359-a1545a37bf5c" /> **Details**: * Added `TuningAgentMetadata` class which gives extra info needed during replay. * Modified `MockTuningAgent` to give proper metadata. * Refactored `default_*` lambdas in `workspace.py` to be actual functions.
1 parent 8410891 commit 3cd655f

24 files changed

+512
-298
lines changed

benchmark/job/cli.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from util.shell import subprocess_run
99
from util.workspace import (
1010
DBGymConfig,
11-
default_tables_dname,
11+
get_default_tables_dname,
1212
get_workload_name,
1313
is_fully_resolved,
1414
link_result,
@@ -172,7 +172,7 @@ def _download_job_data(dbgym_cfg: DBGymConfig) -> None:
172172
dbgym_cfg,
173173
JOB_TABLES_URL,
174174
"imdb.tgz",
175-
default_tables_dname(DEFAULT_SCALE_FACTOR),
175+
get_default_tables_dname(DEFAULT_SCALE_FACTOR),
176176
)
177177

178178

benchmark/job/load_info.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from benchmark.constants import DEFAULT_SCALE_FACTOR
55
from dbms.load_info_base_class import LoadInfoBaseClass
6-
from util.workspace import DBGymConfig, default_tables_dname, is_fully_resolved
6+
from util.workspace import DBGymConfig, get_default_tables_dname, is_fully_resolved
77

88
JOB_SCHEMA_FNAME = "job_schema.sql"
99

@@ -52,7 +52,7 @@ def __init__(self, dbgym_cfg: DBGymConfig):
5252
dbgym_cfg.dbgym_symlinks_path / JobLoadInfo.CODEBASE_DNAME / "data"
5353
)
5454
tables_symlink_dpath = (
55-
data_root_dpath / f"{default_tables_dname(DEFAULT_SCALE_FACTOR)}.link"
55+
data_root_dpath / f"{get_default_tables_dname(DEFAULT_SCALE_FACTOR)}.link"
5656
)
5757
tables_dpath = tables_symlink_dpath.resolve()
5858
assert is_fully_resolved(

benchmark/tpch/cli.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from util.shell import subprocess_run
1010
from util.workspace import (
1111
DBGymConfig,
12-
default_tables_dname,
12+
get_default_tables_dname,
1313
get_scale_factor_string,
1414
get_workload_name,
1515
is_fully_resolved,
@@ -134,7 +134,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
134134
tpch_kit_dpath = _get_tpch_kit_dpath(dbgym_cfg)
135135
data_path = dbgym_cfg.cur_symlinks_data_path(mkdir=True)
136136
expected_tables_symlink_dpath = (
137-
data_path / f"{default_tables_dname(scale_factor)}.link"
137+
data_path / f"{get_default_tables_dname(scale_factor)}.link"
138138
)
139139
if expected_tables_symlink_dpath.exists():
140140
logging.getLogger(DBGYM_LOGGER_NAME).info(
@@ -147,7 +147,7 @@ def _generate_data(dbgym_cfg: DBGymConfig, scale_factor: float) -> None:
147147
)
148148
subprocess_run(f"./dbgen -vf -s {scale_factor}", cwd=tpch_kit_dpath / "dbgen")
149149
real_dir = dbgym_cfg.cur_task_runs_data_path(
150-
default_tables_dname(scale_factor), mkdir=True
150+
get_default_tables_dname(scale_factor), mkdir=True
151151
)
152152
subprocess_run(f"mv ./*.tbl {real_dir}", cwd=tpch_kit_dpath / "dbgen")
153153

benchmark/tpch/load_info.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Optional
33

44
from dbms.load_info_base_class import LoadInfoBaseClass
5-
from util.workspace import DBGymConfig, default_tables_dname, is_fully_resolved
5+
from util.workspace import DBGymConfig, get_default_tables_dname, is_fully_resolved
66

77
TPCH_SCHEMA_FNAME = "tpch_schema.sql"
88
TPCH_CONSTRAINTS_FNAME = "tpch_constraints.sql"
@@ -45,7 +45,7 @@ def __init__(self, dbgym_cfg: DBGymConfig, scale_factor: float):
4545
dbgym_cfg.dbgym_symlinks_path / TpchLoadInfo.CODEBASE_DNAME / "data"
4646
)
4747
tables_symlink_dpath = (
48-
data_root_dpath / f"{default_tables_dname(scale_factor)}.link"
48+
data_root_dpath / f"{get_default_tables_dname(scale_factor)}.link"
4949
)
5050
tables_dpath = tables_symlink_dpath.resolve()
5151
assert is_fully_resolved(

dbms/postgres/cli.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@
3535
from util.workspace import (
3636
WORKSPACE_PATH_PLACEHOLDER,
3737
DBGymConfig,
38-
default_dbdata_parent_dpath,
39-
default_pgbin_path,
4038
fully_resolve_path,
4139
get_dbdata_tgz_name,
40+
get_default_dbdata_parent_dpath,
41+
get_default_pgbin_path,
4242
is_fully_resolved,
4343
is_ssd,
4444
link_result,
@@ -78,7 +78,7 @@ def postgres_build(dbgym_cfg: DBGymConfig, rebuild: bool) -> None:
7878
"--pgbin-path",
7979
type=Path,
8080
default=None,
81-
help=f"The path to the bin containing Postgres executables. The default is {default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.",
81+
help=f"The path to the bin containing Postgres executables. The default is {get_default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.",
8282
)
8383
@click.option(
8484
"--intended-dbdata-hardware",
@@ -90,7 +90,7 @@ def postgres_build(dbgym_cfg: DBGymConfig, rebuild: bool) -> None:
9090
"--dbdata-parent-dpath",
9191
default=None,
9292
type=Path,
93-
help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {default_dbdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
93+
help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_default_dbdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.",
9494
)
9595
def postgres_dbdata(
9696
dbgym_cfg: DBGymConfig,
@@ -102,9 +102,9 @@ def postgres_dbdata(
102102
) -> None:
103103
# Set args to defaults programmatically (do this before doing anything else in the function)
104104
if pgbin_path is None:
105-
pgbin_path = default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
105+
pgbin_path = get_default_pgbin_path(dbgym_cfg.dbgym_workspace_path)
106106
if dbdata_parent_dpath is None:
107-
dbdata_parent_dpath = default_dbdata_parent_dpath(
107+
dbdata_parent_dpath = get_default_dbdata_parent_dpath(
108108
dbgym_cfg.dbgym_workspace_path
109109
)
110110

env/integtest_pg_conn.py

+6-25
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,13 @@
33

44
import psycopg
55

6-
from env.integtest_util import (
7-
INTEGTEST_BENCHMARK,
8-
INTEGTEST_SCALE_FACTOR,
9-
IntegtestWorkspace,
10-
)
6+
from env.integtest_util import IntegtestWorkspace
117
from env.pg_conn import PostgresConn
128
from util.pg import (
139
DEFAULT_POSTGRES_PORT,
1410
get_is_postgres_running,
1511
get_running_postgres_ports,
1612
)
17-
from util.workspace import (
18-
DEFAULT_BOOT_CONFIG_FPATH,
19-
default_dbdata_parent_dpath,
20-
default_pgbin_path,
21-
default_pristine_dbdata_snapshot_path,
22-
)
2313

2414

2515
class PostgresConnTests(unittest.TestCase):
@@ -33,15 +23,7 @@ def setUp(self) -> None:
3323
"Make sure Postgres isn't running before starting the integration test. `pkill postgres` is one way "
3424
+ "to ensure this. Be careful about accidentally taking down other people's Postgres instances though.",
3525
)
36-
self.pristine_dbdata_snapshot_path = default_pristine_dbdata_snapshot_path(
37-
IntegtestWorkspace.get_workspace_path(),
38-
INTEGTEST_BENCHMARK,
39-
INTEGTEST_SCALE_FACTOR,
40-
)
41-
self.dbdata_parent_dpath = default_dbdata_parent_dpath(
42-
IntegtestWorkspace.get_workspace_path()
43-
)
44-
self.pgbin_dpath = default_pgbin_path(IntegtestWorkspace.get_workspace_path())
26+
self.metadata = IntegtestWorkspace.get_default_metadata()
4527

4628
# The reason we restart Postgres every time is to ensure a "clean" starting point
4729
# so that all tests are independent of each other.
@@ -58,11 +40,10 @@ def create_pg_conn(self, pgport: int = DEFAULT_POSTGRES_PORT) -> PostgresConn:
5840
return PostgresConn(
5941
IntegtestWorkspace.get_dbgym_cfg(),
6042
pgport,
61-
self.pristine_dbdata_snapshot_path,
62-
self.dbdata_parent_dpath,
63-
self.pgbin_dpath,
64-
False,
65-
DEFAULT_BOOT_CONFIG_FPATH,
43+
self.metadata.pristine_dbdata_snapshot_path,
44+
self.metadata.dbdata_parent_path,
45+
self.metadata.pgbin_path,
46+
None,
6647
)
6748

6849
def test_start_on_multiple_ports(self) -> None:

env/integtest_replay.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import unittest
2+
3+
from env.integtest_util import IntegtestWorkspace, MockTuningAgent
4+
from env.replay import replay
5+
6+
7+
class ReplayTests(unittest.TestCase):
8+
@staticmethod
9+
def setUpClass() -> None:
10+
IntegtestWorkspace.set_up_workspace()
11+
12+
def test_replay(self) -> None:
13+
agent = MockTuningAgent(IntegtestWorkspace.get_dbgym_cfg())
14+
replay(IntegtestWorkspace.get_dbgym_cfg(), agent.tuning_agent_artifacts_dpath)
15+
16+
17+
if __name__ == "__main__":
18+
unittest.main()

env/integtest_tuning_agent.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,15 @@
11
import unittest
2-
from typing import Any, Optional
32

4-
from env.integtest_util import IntegtestWorkspace
3+
from env.integtest_util import IntegtestWorkspace, MockTuningAgent
54
from env.tuning_agent import (
65
DBMSConfigDelta,
76
IndexesDelta,
87
QueryKnobsDelta,
98
SysKnobsDelta,
10-
TuningAgent,
11-
TuningAgentStepReader,
9+
TuningAgentArtifactsReader,
1210
)
1311

1412

15-
class MockTuningAgent(TuningAgent):
16-
def __init__(self, *args: Any, **kwargs: Any) -> None:
17-
super().__init__(*args, **kwargs)
18-
self.config_to_return: Optional[DBMSConfigDelta] = None
19-
20-
def _step(self) -> DBMSConfigDelta:
21-
assert self.config_to_return is not None
22-
ret = self.config_to_return
23-
# Setting this ensures you must set self.config_to_return every time.
24-
self.config_to_return = None
25-
return ret
26-
27-
2813
class PostgresConnTests(unittest.TestCase):
2914
@staticmethod
3015
def setUpClass() -> None:
@@ -48,7 +33,7 @@ def test_get_step_delta(self) -> None:
4833
agent.config_to_return = PostgresConnTests.make_config("c")
4934
agent.step()
5035

51-
reader = TuningAgentStepReader(agent.dbms_cfg_deltas_dpath)
36+
reader = TuningAgentArtifactsReader(agent.tuning_agent_artifacts_dpath)
5237

5338
self.assertEqual(reader.get_step_delta(1), PostgresConnTests.make_config("b"))
5439
self.assertEqual(reader.get_step_delta(0), PostgresConnTests.make_config("a"))
@@ -65,7 +50,7 @@ def test_get_all_deltas(self) -> None:
6550
agent.config_to_return = PostgresConnTests.make_config("c")
6651
agent.step()
6752

68-
reader = TuningAgentStepReader(agent.dbms_cfg_deltas_dpath)
53+
reader = TuningAgentArtifactsReader(agent.tuning_agent_artifacts_dpath)
6954

7055
self.assertEqual(
7156
reader.get_all_deltas(),
@@ -76,6 +61,13 @@ def test_get_all_deltas(self) -> None:
7661
],
7762
)
7863

64+
def test_get_metadata(self) -> None:
65+
agent = MockTuningAgent(IntegtestWorkspace.get_dbgym_cfg())
66+
reader = TuningAgentArtifactsReader(agent.tuning_agent_artifacts_dpath)
67+
metadata = reader.get_metadata()
68+
expected_metadata = IntegtestWorkspace.get_default_metadata()
69+
self.assertEqual(metadata, expected_metadata)
70+
7971

8072
if __name__ == "__main__":
8173
unittest.main()

env/integtest_util.py

+60-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
11
import subprocess
22
from pathlib import Path
3-
from typing import Optional
3+
from typing import Any, Optional
44

55
import yaml
66

7-
from util.workspace import DBGymConfig
7+
from env.tuning_agent import DBMSConfigDelta, TuningAgent, TuningAgentMetadata
8+
from util.workspace import (
9+
DBGymConfig,
10+
fully_resolve_path,
11+
get_default_dbdata_parent_dpath,
12+
get_default_pgbin_path,
13+
get_default_pristine_dbdata_snapshot_path,
14+
get_default_workload_name_suffix,
15+
get_default_workload_path,
16+
get_workload_name,
17+
)
818

919
# These are the values used by set_up_env_integtests.sh.
1020
# TODO: make set_up_env_integtests.sh take in these values directly as envvars.
@@ -45,3 +55,51 @@ def get_dbgym_cfg() -> DBGymConfig:
4555
def get_workspace_path() -> Path:
4656
with open(IntegtestWorkspace.ENV_INTEGTESTS_DBGYM_CONFIG_FPATH) as f:
4757
return Path(yaml.safe_load(f)["dbgym_workspace_path"])
58+
59+
@staticmethod
60+
def get_default_metadata() -> TuningAgentMetadata:
61+
dbgym_cfg = IntegtestWorkspace.get_dbgym_cfg()
62+
workspace_path = fully_resolve_path(
63+
dbgym_cfg, IntegtestWorkspace.get_workspace_path()
64+
)
65+
return TuningAgentMetadata(
66+
workload_path=fully_resolve_path(
67+
dbgym_cfg,
68+
get_default_workload_path(
69+
workspace_path,
70+
INTEGTEST_BENCHMARK,
71+
get_workload_name(
72+
INTEGTEST_SCALE_FACTOR,
73+
get_default_workload_name_suffix(INTEGTEST_BENCHMARK),
74+
),
75+
),
76+
),
77+
pristine_dbdata_snapshot_path=fully_resolve_path(
78+
dbgym_cfg,
79+
get_default_pristine_dbdata_snapshot_path(
80+
workspace_path, INTEGTEST_BENCHMARK, INTEGTEST_SCALE_FACTOR
81+
),
82+
),
83+
dbdata_parent_path=fully_resolve_path(
84+
dbgym_cfg, get_default_dbdata_parent_dpath(workspace_path)
85+
),
86+
pgbin_path=fully_resolve_path(
87+
dbgym_cfg, get_default_pgbin_path(workspace_path)
88+
),
89+
)
90+
91+
92+
class MockTuningAgent(TuningAgent):
93+
def __init__(self, *args: Any, **kwargs: Any) -> None:
94+
super().__init__(*args, **kwargs)
95+
self.config_to_return: Optional[DBMSConfigDelta] = None
96+
97+
def _get_metadata(self) -> TuningAgentMetadata:
98+
return IntegtestWorkspace.get_default_metadata()
99+
100+
def _step(self) -> DBMSConfigDelta:
101+
assert self.config_to_return is not None
102+
ret = self.config_to_return
103+
# Setting this ensures you must set self.config_to_return every time.
104+
self.config_to_return = None
105+
return ret

env/integtest_workload.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
)
99
from env.workload import Workload
1010
from util.workspace import (
11-
default_workload_path,
1211
fully_resolve_path,
1312
get_default_workload_name_suffix,
13+
get_default_workload_path,
1414
get_workload_name,
1515
)
1616

@@ -23,7 +23,7 @@ def setUpClass() -> None:
2323
def test_workload(self) -> None:
2424
workload_dpath = fully_resolve_path(
2525
IntegtestWorkspace.get_dbgym_cfg(),
26-
default_workload_path(
26+
get_default_workload_path(
2727
IntegtestWorkspace.get_workspace_path(),
2828
INTEGTEST_BENCHMARK,
2929
get_workload_name(

0 commit comments

Comments
 (0)