From 8227c5e1a3861948d88fddaca053ade4f4ab68ed Mon Sep 17 00:00:00 2001 From: Patrick Wang Date: Wed, 25 Dec 2024 20:37:07 -0800 Subject: [PATCH] Proto-X removal (#64) **Summary**: removed everything related to Proto-X from the codebase. **Details**: * Proto-X is not going away. It's just going to be in its own submodule. * Also removed `tune/demo/`. --- .github/workflows/tests.yaml | 10 - README.md | 1 - analyze/__init__.py | 0 analyze/cli.py | 79 -- analyze/tests/__init__.py | 0 .../unittest_analysis_files/out.tfevents | Bin 40283 -> 0 bytes analyze/tests/unittest_analyze.py | 31 - benchmark/job/load_info.py | 2 +- dbms/load_info_base_class.py | 8 +- dbms/postgres/cli.py | 3 - experiments/protox_tpch_sf0point1/main.sh | 33 - experiments/protox_tpch_sf1/main.sh | 27 - experiments/protox_tpch_sf10/main.sh | 31 - scripts/_run_tests.py | 6 - scripts/configs/apt_requirements.txt | 2 +- scripts/configs/requirements.txt | 158 +-- scripts/pat_test.sh | 14 +- scripts/pipfreeze.sh | 2 +- scripts/run_demo.sh | 4 - scripts/run_protox_e2e_test.py | 207 ---- task.py | 17 +- tune/__init__.py | 0 tune/cli.py | 13 - tune/demo/__init__.py | 0 tune/demo/main.py | 114 --- tune/protox/__init__.py | 0 tune/protox/agent/__init__.py | 0 tune/protox/agent/agent_env.py | 151 --- tune/protox/agent/base_class.py | 87 -- tune/protox/agent/buffers.py | 134 --- tune/protox/agent/build_trial.py | 573 ----------- tune/protox/agent/cli.py | 17 - tune/protox/agent/coerce_config.py | 96 -- tune/protox/agent/default_sysknobs.yaml | 126 --- tune/protox/agent/hpo.py | 809 --------------- tune/protox/agent/noise.py | 118 --- tune/protox/agent/off_policy_algorithm.py | 257 ----- tune/protox/agent/policies.py | 203 ---- tune/protox/agent/replay.py | 512 ---------- tune/protox/agent/torch_layers.py | 86 -- tune/protox/agent/tune.py | 169 ---- tune/protox/agent/utils.py | 111 --- tune/protox/agent/wolp/__init__.py | 0 tune/protox/agent/wolp/policies.py | 298 ------ tune/protox/agent/wolp/wolp.py | 195 ---- tune/protox/cli.py | 15 - tune/protox/default_job_benchbase_config.xml | 37 - tune/protox/default_job_benchmark_config.yaml | 198 ---- tune/protox/default_tpch_benchbase_config.xml | 134 --- .../protox/default_tpch_benchmark_config.yaml | 124 --- tune/protox/embedding/__init__.py | 0 tune/protox/embedding/analyze.py | 471 --------- tune/protox/embedding/cli.py | 15 - tune/protox/embedding/datagen.py | 924 ------------------ tune/protox/embedding/default_hpo_space.json | 242 ----- tune/protox/embedding/loss.py | 181 ---- tune/protox/embedding/select.py | 254 ----- tune/protox/embedding/train.py | 264 ----- tune/protox/embedding/train_all.py | 551 ----------- tune/protox/embedding/train_args.py | 83 -- tune/protox/embedding/trainer.py | 249 ----- tune/protox/embedding/utils.py | 54 - tune/protox/embedding/vae.py | 402 -------- tune/protox/env/__init__.py | 6 - tune/protox/env/artifact_manager.py | 187 ---- tune/protox/env/lsc/__init__.py | 0 tune/protox/env/lsc/lsc.py | 136 --- tune/protox/env/lsc/lsc_wrapper.py | 54 - tune/protox/env/mqo/__init__.py | 0 tune/protox/env/mqo/mqo_wrapper.py | 430 -------- tune/protox/env/pg_env.py | 457 --------- tune/protox/env/space/__init__.py | 0 tune/protox/env/space/holon_space.py | 372 ------- .../protox/env/space/latent_space/__init__.py | 6 - .../space/latent_space/latent_index_space.py | 305 ------ .../space/latent_space/latent_knob_space.py | 243 ----- .../space/latent_space/latent_query_space.py | 63 -- .../env/space/latent_space/lsc_index_space.py | 127 --- tune/protox/env/space/primitive/__init__.py | 47 - tune/protox/env/space/primitive/index.py | 133 --- tune/protox/env/space/primitive/knob.py | 355 ------- .../protox/env/space/primitive/latent_knob.py | 139 --- .../env/space/primitive_space/__init__.py | 6 - .../env/space/primitive_space/index_policy.py | 307 ------ .../env/space/primitive_space/index_space.py | 113 --- .../env/space/primitive_space/knob_space.py | 39 - .../env/space/primitive_space/query_space.py | 100 -- tune/protox/env/space/state/__init__.py | 6 - tune/protox/env/space/state/lsc_space.py | 58 -- tune/protox/env/space/state/metric.py | 315 ------ tune/protox/env/space/state/space.py | 40 - tune/protox/env/space/state/structure.py | 137 --- tune/protox/env/space/utils.py | 295 ------ tune/protox/env/target_reset/__init__.py | 0 .../env/target_reset/target_reset_wrapper.py | 110 --- tune/protox/env/types.py | 209 ---- tune/protox/env/util/__init__.py | 0 tune/protox/env/util/execute.py | 106 -- tune/protox/env/util/reward.py | 197 ---- tune/protox/env/util/workload_analysis.py | 171 ---- tune/protox/env/workload.py | 707 -------------- tune/protox/tests/__init__.py | 0 .../unittest_dsb.yaml | 519 ---------- .../unittest_jobfull.yaml | 198 ---- .../unittest_tpcc.yaml | 162 --- .../unittest_tpch.yaml | 124 --- tune/protox/tests/unittest_dsb_dir/order.txt | 53 - .../tests/unittest_dsb_dir/query001s0.sql | 33 - .../tests/unittest_dsb_dir/query010s0.sql | 75 -- .../tests/unittest_dsb_dir/query013s0.sql | 48 - .../tests/unittest_dsb_dir/query013s0_spj.sql | 48 - .../tests/unittest_dsb_dir/query014s0.sql | 124 --- .../tests/unittest_dsb_dir/query018s0.sql | 29 - .../tests/unittest_dsb_dir/query018s0_spj.sql | 24 - .../tests/unittest_dsb_dir/query019s0.sql | 25 - .../tests/unittest_dsb_dir/query019s0_spj.sql | 15 - .../tests/unittest_dsb_dir/query023s0.sql | 55 -- .../tests/unittest_dsb_dir/query025s0.sql | 45 - .../tests/unittest_dsb_dir/query025s0_spj.sql | 37 - .../tests/unittest_dsb_dir/query027s0.sql | 21 - .../tests/unittest_dsb_dir/query027s0_spj.sql | 19 - .../tests/unittest_dsb_dir/query030s0.sql | 35 - .../tests/unittest_dsb_dir/query031s0.sql | 59 -- .../tests/unittest_dsb_dir/query032s0.sql | 32 - .../tests/unittest_dsb_dir/query038s0.sql | 29 - .../tests/unittest_dsb_dir/query039as0.sql | 27 - .../tests/unittest_dsb_dir/query039bs0.sql | 28 - .../tests/unittest_dsb_dir/query040s0.sql | 28 - .../tests/unittest_dsb_dir/query040s0_spj.sql | 24 - .../tests/unittest_dsb_dir/query050s0.sql | 58 -- .../tests/unittest_dsb_dir/query050s0_spj.sql | 32 - .../tests/unittest_dsb_dir/query054s0.sql | 62 -- .../tests/unittest_dsb_dir/query058s0.sql | 82 -- .../tests/unittest_dsb_dir/query059s0.sql | 47 - .../tests/unittest_dsb_dir/query064s0.sql | 126 --- .../tests/unittest_dsb_dir/query065s0.sql | 30 - .../tests/unittest_dsb_dir/query069s0.sql | 52 - .../tests/unittest_dsb_dir/query072s0.sql | 29 - .../tests/unittest_dsb_dir/query072s0_spj.sql | 26 - .../tests/unittest_dsb_dir/query075s0.sql | 74 -- .../tests/unittest_dsb_dir/query080s0.sql | 111 --- .../tests/unittest_dsb_dir/query081s0.sql | 35 - .../tests/unittest_dsb_dir/query083s0.sql | 76 -- .../tests/unittest_dsb_dir/query084s0.sql | 18 - .../tests/unittest_dsb_dir/query084s0_spj.sql | 17 - .../tests/unittest_dsb_dir/query085s0.sql | 81 -- .../tests/unittest_dsb_dir/query085s0_spj.sql | 78 -- .../tests/unittest_dsb_dir/query087s0.sql | 31 - .../tests/unittest_dsb_dir/query091s0.sql | 28 - .../tests/unittest_dsb_dir/query091s0_spj.sql | 28 - .../tests/unittest_dsb_dir/query092s0.sql | 35 - .../tests/unittest_dsb_dir/query094s0.sql | 30 - .../tests/unittest_dsb_dir/query099s0.sql | 36 - .../tests/unittest_dsb_dir/query099s0_spj.sql | 21 - .../tests/unittest_dsb_dir/query100s0.sql | 26 - .../tests/unittest_dsb_dir/query100s0_spj.sql | 27 - .../tests/unittest_dsb_dir/query101s0.sql | 31 - .../tests/unittest_dsb_dir/query101s0_spj.sql | 29 - .../tests/unittest_dsb_dir/query102s0.sql | 43 - .../tests/unittest_dsb_dir/query102s0_spj.sql | 41 - tune/protox/tests/unittest_index_space.py | 102 -- tune/protox/tests/unittest_jobfull_dir/1.sql | 19 - tune/protox/tests/unittest_jobfull_dir/10.sql | 22 - .../protox/tests/unittest_jobfull_dir/10b.sql | 21 - .../protox/tests/unittest_jobfull_dir/10c.sql | 19 - tune/protox/tests/unittest_jobfull_dir/11.sql | 30 - .../protox/tests/unittest_jobfull_dir/11b.sql | 31 - .../protox/tests/unittest_jobfull_dir/11c.sql | 32 - .../protox/tests/unittest_jobfull_dir/11d.sql | 30 - tune/protox/tests/unittest_jobfull_dir/12.sql | 30 - .../protox/tests/unittest_jobfull_dir/12b.sql | 30 - .../protox/tests/unittest_jobfull_dir/12c.sql | 32 - tune/protox/tests/unittest_jobfull_dir/13.sql | 29 - .../protox/tests/unittest_jobfull_dir/13b.sql | 32 - .../protox/tests/unittest_jobfull_dir/13c.sql | 32 - .../protox/tests/unittest_jobfull_dir/13d.sql | 29 - tune/protox/tests/unittest_jobfull_dir/14.sql | 40 - .../protox/tests/unittest_jobfull_dir/14b.sql | 41 - .../protox/tests/unittest_jobfull_dir/14c.sql | 42 - tune/protox/tests/unittest_jobfull_dir/15.sql | 33 - .../protox/tests/unittest_jobfull_dir/15b.sql | 34 - .../protox/tests/unittest_jobfull_dir/15c.sql | 33 - .../protox/tests/unittest_jobfull_dir/15d.sql | 30 - tune/protox/tests/unittest_jobfull_dir/16.sql | 26 - .../protox/tests/unittest_jobfull_dir/16b.sql | 24 - .../protox/tests/unittest_jobfull_dir/16c.sql | 25 - .../protox/tests/unittest_jobfull_dir/16d.sql | 26 - tune/protox/tests/unittest_jobfull_dir/17.sql | 22 - .../protox/tests/unittest_jobfull_dir/17b.sql | 21 - .../protox/tests/unittest_jobfull_dir/17c.sql | 21 - .../protox/tests/unittest_jobfull_dir/17d.sql | 20 - .../protox/tests/unittest_jobfull_dir/17e.sql | 20 - .../protox/tests/unittest_jobfull_dir/17f.sql | 20 - tune/protox/tests/unittest_jobfull_dir/18.sql | 26 - .../protox/tests/unittest_jobfull_dir/18b.sql | 34 - .../protox/tests/unittest_jobfull_dir/18c.sql | 34 - tune/protox/tests/unittest_jobfull_dir/19.sql | 42 - .../protox/tests/unittest_jobfull_dir/19b.sql | 40 - .../protox/tests/unittest_jobfull_dir/19c.sql | 39 - .../protox/tests/unittest_jobfull_dir/19d.sql | 35 - tune/protox/tests/unittest_jobfull_dir/1b.sql | 18 - tune/protox/tests/unittest_jobfull_dir/1c.sql | 19 - tune/protox/tests/unittest_jobfull_dir/1d.sql | 18 - tune/protox/tests/unittest_jobfull_dir/2.sql | 14 - tune/protox/tests/unittest_jobfull_dir/20.sql | 39 - .../protox/tests/unittest_jobfull_dir/20b.sql | 40 - .../protox/tests/unittest_jobfull_dir/20c.sql | 42 - tune/protox/tests/unittest_jobfull_dir/21.sql | 43 - .../protox/tests/unittest_jobfull_dir/21b.sql | 37 - .../protox/tests/unittest_jobfull_dir/21c.sql | 44 - tune/protox/tests/unittest_jobfull_dir/22.sql | 48 - .../protox/tests/unittest_jobfull_dir/22b.sql | 48 - .../protox/tests/unittest_jobfull_dir/22c.sql | 54 - .../protox/tests/unittest_jobfull_dir/22d.sql | 52 - tune/protox/tests/unittest_jobfull_dir/23.sql | 39 - .../protox/tests/unittest_jobfull_dir/23b.sql | 41 - .../protox/tests/unittest_jobfull_dir/23c.sql | 42 - tune/protox/tests/unittest_jobfull_dir/24.sql | 50 - .../protox/tests/unittest_jobfull_dir/24b.sql | 53 - tune/protox/tests/unittest_jobfull_dir/25.sql | 42 - .../protox/tests/unittest_jobfull_dir/25b.sql | 44 - .../protox/tests/unittest_jobfull_dir/25c.sql | 49 - tune/protox/tests/unittest_jobfull_dir/26.sql | 53 - .../protox/tests/unittest_jobfull_dir/26b.sql | 46 - .../protox/tests/unittest_jobfull_dir/26c.sql | 51 - tune/protox/tests/unittest_jobfull_dir/27.sql | 52 - .../protox/tests/unittest_jobfull_dir/27b.sql | 52 - .../protox/tests/unittest_jobfull_dir/27c.sql | 56 -- tune/protox/tests/unittest_jobfull_dir/28.sql | 66 -- .../protox/tests/unittest_jobfull_dir/28b.sql | 60 -- .../protox/tests/unittest_jobfull_dir/28c.sql | 66 -- tune/protox/tests/unittest_jobfull_dir/29.sql | 67 -- .../protox/tests/unittest_jobfull_dir/29b.sql | 65 -- .../protox/tests/unittest_jobfull_dir/29c.sql | 66 -- tune/protox/tests/unittest_jobfull_dir/2b.sql | 14 - tune/protox/tests/unittest_jobfull_dir/2c.sql | 14 - tune/protox/tests/unittest_jobfull_dir/2d.sql | 14 - tune/protox/tests/unittest_jobfull_dir/3.sql | 20 - tune/protox/tests/unittest_jobfull_dir/30.sql | 59 -- .../protox/tests/unittest_jobfull_dir/30b.sql | 62 -- .../protox/tests/unittest_jobfull_dir/30c.sql | 61 -- tune/protox/tests/unittest_jobfull_dir/31.sql | 54 - .../protox/tests/unittest_jobfull_dir/31b.sql | 59 -- .../protox/tests/unittest_jobfull_dir/31c.sql | 57 -- tune/protox/tests/unittest_jobfull_dir/32.sql | 17 - .../protox/tests/unittest_jobfull_dir/32b.sql | 17 - tune/protox/tests/unittest_jobfull_dir/33.sql | 50 - .../protox/tests/unittest_jobfull_dir/33b.sql | 48 - .../protox/tests/unittest_jobfull_dir/33c.sql | 52 - tune/protox/tests/unittest_jobfull_dir/3b.sql | 13 - tune/protox/tests/unittest_jobfull_dir/3c.sql | 22 - tune/protox/tests/unittest_jobfull_dir/4.sql | 17 - tune/protox/tests/unittest_jobfull_dir/4b.sql | 17 - tune/protox/tests/unittest_jobfull_dir/4c.sql | 17 - tune/protox/tests/unittest_jobfull_dir/5.sql | 24 - tune/protox/tests/unittest_jobfull_dir/5b.sql | 19 - tune/protox/tests/unittest_jobfull_dir/5c.sql | 26 - tune/protox/tests/unittest_jobfull_dir/6.sql | 17 - tune/protox/tests/unittest_jobfull_dir/6b.sql | 24 - tune/protox/tests/unittest_jobfull_dir/6c.sql | 17 - tune/protox/tests/unittest_jobfull_dir/6d.sql | 24 - tune/protox/tests/unittest_jobfull_dir/6e.sql | 17 - tune/protox/tests/unittest_jobfull_dir/6f.sql | 23 - tune/protox/tests/unittest_jobfull_dir/7.sql | 31 - tune/protox/tests/unittest_jobfull_dir/7b.sql | 29 - tune/protox/tests/unittest_jobfull_dir/7c.sql | 36 - tune/protox/tests/unittest_jobfull_dir/8.sql | 25 - tune/protox/tests/unittest_jobfull_dir/8b.sql | 30 - tune/protox/tests/unittest_jobfull_dir/8c.sql | 20 - tune/protox/tests/unittest_jobfull_dir/8d.sql | 20 - tune/protox/tests/unittest_jobfull_dir/9.sql | 33 - tune/protox/tests/unittest_jobfull_dir/9b.sql | 31 - tune/protox/tests/unittest_jobfull_dir/9c.sql | 30 - tune/protox/tests/unittest_jobfull_dir/9d.sql | 29 - .../tests/unittest_jobfull_dir/order.txt | 113 --- tune/protox/tests/unittest_primitive.py | 150 --- .../tests/unittest_ref/ref_dsb_idxspace.pkl | Bin 7599 -> 0 bytes .../tests/unittest_ref/ref_dsb_workload.pkl | Bin 106915 -> 0 bytes .../unittest_ref/ref_jobfull_idxspace.pkl | Bin 4401 -> 0 bytes .../unittest_ref/ref_jobfull_workload.pkl | Bin 141086 -> 0 bytes .../tests/unittest_ref/ref_tpcc_idxspace.pkl | Bin 2753 -> 0 bytes .../tests/unittest_ref/ref_tpcc_workload.pkl | Bin 11205 -> 0 bytes .../tests/unittest_ref/ref_tpch_idxspace.pkl | Bin 3133 -> 0 bytes .../tests/unittest_ref/ref_tpch_workload.pkl | Bin 18675 -> 0 bytes tune/protox/tests/unittest_tpcc_dir/1.sql | 5 - tune/protox/tests/unittest_tpcc_dir/10.sql | 3 - tune/protox/tests/unittest_tpcc_dir/11.sql | 2 - tune/protox/tests/unittest_tpcc_dir/12.sql | 4 - tune/protox/tests/unittest_tpcc_dir/13.sql | 4 - tune/protox/tests/unittest_tpcc_dir/14.sql | 3 - tune/protox/tests/unittest_tpcc_dir/15.sql | 6 - tune/protox/tests/unittest_tpcc_dir/16.sql | 6 - tune/protox/tests/unittest_tpcc_dir/17.sql | 4 - tune/protox/tests/unittest_tpcc_dir/18.sql | 6 - tune/protox/tests/unittest_tpcc_dir/19.sql | 6 - tune/protox/tests/unittest_tpcc_dir/2.sql | 4 - tune/protox/tests/unittest_tpcc_dir/20.sql | 8 - tune/protox/tests/unittest_tpcc_dir/21.sql | 6 - tune/protox/tests/unittest_tpcc_dir/22.sql | 3 - tune/protox/tests/unittest_tpcc_dir/23.sql | 3 - tune/protox/tests/unittest_tpcc_dir/24.sql | 4 - tune/protox/tests/unittest_tpcc_dir/25.sql | 4 - tune/protox/tests/unittest_tpcc_dir/26.sql | 7 - tune/protox/tests/unittest_tpcc_dir/27.sql | 5 - tune/protox/tests/unittest_tpcc_dir/28.sql | 8 - tune/protox/tests/unittest_tpcc_dir/29.sql | 7 - tune/protox/tests/unittest_tpcc_dir/3.sql | 4 - tune/protox/tests/unittest_tpcc_dir/30.sql | 3 - tune/protox/tests/unittest_tpcc_dir/31.sql | 8 - tune/protox/tests/unittest_tpcc_dir/32.sql | 4 - tune/protox/tests/unittest_tpcc_dir/33.sql | 9 - tune/protox/tests/unittest_tpcc_dir/4.sql | 5 - tune/protox/tests/unittest_tpcc_dir/5.sql | 5 - tune/protox/tests/unittest_tpcc_dir/6.sql | 5 - tune/protox/tests/unittest_tpcc_dir/7.sql | 6 - tune/protox/tests/unittest_tpcc_dir/8.sql | 5 - tune/protox/tests/unittest_tpcc_dir/9.sql | 3 - tune/protox/tests/unittest_tpcc_dir/txn.txt | 33 - tune/protox/tests/unittest_tpch_dir/01.sql | 21 - tune/protox/tests/unittest_tpch_dir/02.sql | 45 - tune/protox/tests/unittest_tpch_dir/03.sql | 24 - tune/protox/tests/unittest_tpch_dir/04.sql | 22 - tune/protox/tests/unittest_tpch_dir/05.sql | 25 - tune/protox/tests/unittest_tpch_dir/06.sql | 10 - tune/protox/tests/unittest_tpch_dir/07.sql | 40 - tune/protox/tests/unittest_tpch_dir/08.sql | 38 - tune/protox/tests/unittest_tpch_dir/09.sql | 33 - tune/protox/tests/unittest_tpch_dir/10.sql | 33 - tune/protox/tests/unittest_tpch_dir/11.sql | 28 - tune/protox/tests/unittest_tpch_dir/12.sql | 29 - tune/protox/tests/unittest_tpch_dir/13.sql | 21 - tune/protox/tests/unittest_tpch_dir/14.sql | 14 - tune/protox/tests/unittest_tpch_dir/15.sql | 35 - tune/protox/tests/unittest_tpch_dir/16.sql | 30 - tune/protox/tests/unittest_tpch_dir/17.sql | 18 - tune/protox/tests/unittest_tpch_dir/18.sql | 34 - tune/protox/tests/unittest_tpch_dir/19.sql | 36 - tune/protox/tests/unittest_tpch_dir/20.sql | 38 - tune/protox/tests/unittest_tpch_dir/21.sql | 41 - tune/protox/tests/unittest_tpch_dir/22.sql | 38 - tune/protox/tests/unittest_tpch_dir/order.txt | 22 - tune/protox/tests/unittest_workload.py | 93 -- tune/protox/tests/unittest_workload_utils.py | 231 ----- util/log.py | 13 - util/workspace.py | 140 +-- 345 files changed, 52 insertions(+), 23808 deletions(-) delete mode 100644 analyze/__init__.py delete mode 100644 analyze/cli.py delete mode 100644 analyze/tests/__init__.py delete mode 100644 analyze/tests/unittest_analysis_files/out.tfevents delete mode 100644 analyze/tests/unittest_analyze.py delete mode 100755 experiments/protox_tpch_sf0point1/main.sh delete mode 100755 experiments/protox_tpch_sf1/main.sh delete mode 100755 experiments/protox_tpch_sf10/main.sh delete mode 100755 scripts/run_demo.sh delete mode 100644 scripts/run_protox_e2e_test.py delete mode 100644 tune/__init__.py delete mode 100644 tune/cli.py delete mode 100644 tune/demo/__init__.py delete mode 100644 tune/demo/main.py delete mode 100644 tune/protox/__init__.py delete mode 100644 tune/protox/agent/__init__.py delete mode 100644 tune/protox/agent/agent_env.py delete mode 100644 tune/protox/agent/base_class.py delete mode 100644 tune/protox/agent/buffers.py delete mode 100644 tune/protox/agent/build_trial.py delete mode 100644 tune/protox/agent/cli.py delete mode 100644 tune/protox/agent/coerce_config.py delete mode 100644 tune/protox/agent/default_sysknobs.yaml delete mode 100644 tune/protox/agent/hpo.py delete mode 100644 tune/protox/agent/noise.py delete mode 100644 tune/protox/agent/off_policy_algorithm.py delete mode 100644 tune/protox/agent/policies.py delete mode 100644 tune/protox/agent/replay.py delete mode 100644 tune/protox/agent/torch_layers.py delete mode 100644 tune/protox/agent/tune.py delete mode 100644 tune/protox/agent/utils.py delete mode 100644 tune/protox/agent/wolp/__init__.py delete mode 100644 tune/protox/agent/wolp/policies.py delete mode 100644 tune/protox/agent/wolp/wolp.py delete mode 100644 tune/protox/cli.py delete mode 100644 tune/protox/default_job_benchbase_config.xml delete mode 100644 tune/protox/default_job_benchmark_config.yaml delete mode 100644 tune/protox/default_tpch_benchbase_config.xml delete mode 100644 tune/protox/default_tpch_benchmark_config.yaml delete mode 100644 tune/protox/embedding/__init__.py delete mode 100644 tune/protox/embedding/analyze.py delete mode 100644 tune/protox/embedding/cli.py delete mode 100644 tune/protox/embedding/datagen.py delete mode 100644 tune/protox/embedding/default_hpo_space.json delete mode 100644 tune/protox/embedding/loss.py delete mode 100644 tune/protox/embedding/select.py delete mode 100644 tune/protox/embedding/train.py delete mode 100644 tune/protox/embedding/train_all.py delete mode 100644 tune/protox/embedding/train_args.py delete mode 100644 tune/protox/embedding/trainer.py delete mode 100644 tune/protox/embedding/utils.py delete mode 100644 tune/protox/embedding/vae.py delete mode 100644 tune/protox/env/__init__.py delete mode 100644 tune/protox/env/artifact_manager.py delete mode 100644 tune/protox/env/lsc/__init__.py delete mode 100644 tune/protox/env/lsc/lsc.py delete mode 100644 tune/protox/env/lsc/lsc_wrapper.py delete mode 100644 tune/protox/env/mqo/__init__.py delete mode 100644 tune/protox/env/mqo/mqo_wrapper.py delete mode 100644 tune/protox/env/pg_env.py delete mode 100644 tune/protox/env/space/__init__.py delete mode 100644 tune/protox/env/space/holon_space.py delete mode 100644 tune/protox/env/space/latent_space/__init__.py delete mode 100644 tune/protox/env/space/latent_space/latent_index_space.py delete mode 100644 tune/protox/env/space/latent_space/latent_knob_space.py delete mode 100644 tune/protox/env/space/latent_space/latent_query_space.py delete mode 100644 tune/protox/env/space/latent_space/lsc_index_space.py delete mode 100644 tune/protox/env/space/primitive/__init__.py delete mode 100644 tune/protox/env/space/primitive/index.py delete mode 100644 tune/protox/env/space/primitive/knob.py delete mode 100644 tune/protox/env/space/primitive/latent_knob.py delete mode 100644 tune/protox/env/space/primitive_space/__init__.py delete mode 100644 tune/protox/env/space/primitive_space/index_policy.py delete mode 100644 tune/protox/env/space/primitive_space/index_space.py delete mode 100644 tune/protox/env/space/primitive_space/knob_space.py delete mode 100644 tune/protox/env/space/primitive_space/query_space.py delete mode 100644 tune/protox/env/space/state/__init__.py delete mode 100644 tune/protox/env/space/state/lsc_space.py delete mode 100644 tune/protox/env/space/state/metric.py delete mode 100644 tune/protox/env/space/state/space.py delete mode 100644 tune/protox/env/space/state/structure.py delete mode 100644 tune/protox/env/space/utils.py delete mode 100644 tune/protox/env/target_reset/__init__.py delete mode 100644 tune/protox/env/target_reset/target_reset_wrapper.py delete mode 100644 tune/protox/env/types.py delete mode 100644 tune/protox/env/util/__init__.py delete mode 100644 tune/protox/env/util/execute.py delete mode 100644 tune/protox/env/util/reward.py delete mode 100644 tune/protox/env/util/workload_analysis.py delete mode 100644 tune/protox/env/workload.py delete mode 100644 tune/protox/tests/__init__.py delete mode 100644 tune/protox/tests/unittest_benchmark_configs/unittest_dsb.yaml delete mode 100644 tune/protox/tests/unittest_benchmark_configs/unittest_jobfull.yaml delete mode 100644 tune/protox/tests/unittest_benchmark_configs/unittest_tpcc.yaml delete mode 100644 tune/protox/tests/unittest_benchmark_configs/unittest_tpch.yaml delete mode 100644 tune/protox/tests/unittest_dsb_dir/order.txt delete mode 100644 tune/protox/tests/unittest_dsb_dir/query001s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query010s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query013s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query013s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query014s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query018s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query018s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query019s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query019s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query023s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query025s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query025s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query027s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query027s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query030s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query031s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query032s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query038s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query039as0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query039bs0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query040s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query040s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query050s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query050s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query054s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query058s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query059s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query064s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query065s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query069s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query072s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query072s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query075s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query080s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query081s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query083s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query084s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query084s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query085s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query085s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query087s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query091s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query091s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query092s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query094s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query099s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query099s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query100s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query100s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query101s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query101s0_spj.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query102s0.sql delete mode 100644 tune/protox/tests/unittest_dsb_dir/query102s0_spj.sql delete mode 100644 tune/protox/tests/unittest_index_space.py delete mode 100644 tune/protox/tests/unittest_jobfull_dir/1.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/10.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/10b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/10c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/11.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/11b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/11c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/11d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/12.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/12b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/12c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/13.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/13b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/13c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/13d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/14.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/14b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/14c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/15.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/15b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/15c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/15d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/16.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/16b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/16c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/16d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17e.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/17f.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/18.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/18b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/18c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/19.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/19b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/19c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/19d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/1b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/1c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/1d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/2.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/20.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/20b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/20c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/21.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/21b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/21c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/22.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/22b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/22c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/22d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/23.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/23b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/23c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/24.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/24b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/25.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/25b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/25c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/26.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/26b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/26c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/27.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/27b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/27c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/28.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/28b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/28c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/29.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/29b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/29c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/2b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/2c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/2d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/3.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/30.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/30b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/30c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/31.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/31b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/31c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/32.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/32b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/33.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/33b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/33c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/3b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/3c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/4.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/4b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/4c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/5.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/5b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/5c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6e.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/6f.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/7.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/7b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/7c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/8.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/8b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/8c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/8d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/9.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/9b.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/9c.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/9d.sql delete mode 100644 tune/protox/tests/unittest_jobfull_dir/order.txt delete mode 100644 tune/protox/tests/unittest_primitive.py delete mode 100644 tune/protox/tests/unittest_ref/ref_dsb_idxspace.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_dsb_workload.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_jobfull_idxspace.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_jobfull_workload.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_tpcc_idxspace.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_tpcc_workload.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_tpch_idxspace.pkl delete mode 100644 tune/protox/tests/unittest_ref/ref_tpch_workload.pkl delete mode 100644 tune/protox/tests/unittest_tpcc_dir/1.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/10.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/11.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/12.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/13.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/14.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/15.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/16.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/17.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/18.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/19.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/2.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/20.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/21.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/22.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/23.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/24.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/25.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/26.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/27.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/28.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/29.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/3.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/30.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/31.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/32.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/33.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/4.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/5.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/6.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/7.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/8.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/9.sql delete mode 100644 tune/protox/tests/unittest_tpcc_dir/txn.txt delete mode 100644 tune/protox/tests/unittest_tpch_dir/01.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/02.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/03.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/04.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/05.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/06.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/07.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/08.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/09.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/10.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/11.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/12.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/13.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/14.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/15.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/16.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/17.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/18.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/19.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/20.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/21.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/22.sql delete mode 100644 tune/protox/tests/unittest_tpch_dir/order.txt delete mode 100644 tune/protox/tests/unittest_workload.py delete mode 100644 tune/protox/tests/unittest_workload_utils.py diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 15cbe79a..ed0f0d3e 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -58,13 +58,3 @@ jobs: . "$HOME/.cargo/env" export ./scripts/run_integration_tests.sh - - # - name: Run end-to-end tests - # # End-to-end tests are like integration tests in that they require external systems to be running. - # # Unlike integration tests though, they don't perform detailed checks for any individual module. - # # - # # Note that we need to run with a non-root user in order to start Postgres. This is configured in the .yaml - # # file for our self-hosted GHA runners. - # run: | - # . "$HOME/.cargo/env" - # python -m scripts.run_protox_e2e_test ssd diff --git a/README.md b/README.md index 9637fe79..16d08dd6 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,6 @@ The tasks are grouped into categories that correspond to the top-level directori - `benchmark` - tasks to generate data and queries for different benchmarks (e.g., TPC-H, JOB) - `dbms` - tasks to build and start DBMSs (e.g., PostgreSQL) -- `tune` - tasks to train autonomous database tuning agents ## Credits diff --git a/analyze/__init__.py b/analyze/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/analyze/cli.py b/analyze/cli.py deleted file mode 100644 index b45eb06f..00000000 --- a/analyze/cli.py +++ /dev/null @@ -1,79 +0,0 @@ -import json -import logging -import re -from pathlib import Path -from typing import Any - -import click -import tensorflow -from google.protobuf.json_format import MessageToJson -from tensorflow.core.util.event_pb2 import Event - -from util.log import DBGYM_OUTPUT_LOGGER_NAME - - -@click.group(name="analyze") -def analyze_group() -> None: - pass - - -@click.command(name="tfevents") -@click.argument("tfevents-path", type=Path) -def analyze_tfevents(tfevents_path: Path) -> None: - minimal_json = tboard_to_minimal_json(tfevents_path) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"seconds spent resetting: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_reset$')}" - ) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"seconds spent reconfiguring: {get_total_instr_time_event(minimal_json, r'.*PostgresEnv_shift_state$')}" - ) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"seconds spent evaluating workload: {get_total_instr_time_event(minimal_json, r'.*Workload_execute$')}" - ) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"seconds spent training agent: {get_total_instr_time_event(minimal_json, r'.*(WolpPolicy_train_actor|WolpPolicy_train_critic)$')}" - ) - - -# The "minimal json" unwraps each summary so that we're left only with the parts that differ between summaries -def tboard_to_minimal_json(tfevent_fpath: Path) -> list[dict[str, Any]]: - minimal_json = [] - - raw_dataset = tensorflow.data.TFRecordDataset(tfevent_fpath) - - for raw_record in raw_dataset: - event = Event() - event.ParseFromString(raw_record.numpy()) - - # Convert event to JSON - json_summary = json.loads(MessageToJson(event.summary)) - - # We get a {} at the very start - if json_summary == {}: - continue - - assert "value" in json_summary - json_summary = json_summary["value"] - assert len(json_summary) == 1 - json_summary = json_summary[0] - - minimal_json.append(json_summary) - - return minimal_json - - -# An "instr_time_event" is an event with a "tag" that looks like "instr_time/*" -def get_total_instr_time_event( - minimal_json: list[dict[str, Any]], event_regex: str -) -> float: - event_pattern = re.compile(event_regex) - total_time = 0 - - for json_summary in minimal_json: - if event_pattern.fullmatch(json_summary["tag"]) is not None: - total_time += json_summary["simpleValue"] - - return total_time - - -analyze_group.add_command(analyze_tfevents) diff --git a/analyze/tests/__init__.py b/analyze/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/analyze/tests/unittest_analysis_files/out.tfevents b/analyze/tests/unittest_analysis_files/out.tfevents deleted file mode 100644 index ce803d0520db46ecff6a11d6d7e636e512e58053..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 40283 zcmcJYcUTnH`p2svo)~+<5*x66mzh~W&{z>0*l<~3g%#LESP(QfQesev1q&vz z23sts+z@M2VlNmqD%cfcixEu>h$fc1u$wu)yYu@!!I^Xa^tn$y&-eX)=FIMyokM>Y z+y7oI=)YW)@Yiwofb2_tt=Kx2eUl9)i??*Rhq#x;U`jM6gq!sVksgUjadG;D zRF5eMMvEcA!%+SI(oHnR8g#aQ`D1!^oiO`<_oI$SE{j|vk~>%bcbnGfF+w~{JkT@R z9B1&1*IN>d5iwKrrYO(I@Tk-{PfL=?`p*ewi#f&9U`qCkHAh8R0r9EcGLF_ z{w*%aVvO}Pq!=QSEQacdN>mcv$Fb&ceQZFIK2|qEqEJaCy@yHA!dt7E%I@y@fSNOr z4M~D3MfLNGDA%QHBLKBy6j7=SEh@T8bvgIWdO+PomMTYkv!~Eq_AU+vRAn^LtO}$E z@5+o8Z~6;Ri^x)y=+K!5RKShSzXR$(BhjoXbSbQiiqLG#0n`g*scN+6+;Zl?hFv=W zHEIgctQsVmRY*T-SEnk#TYznnN){_Y zpSE+SN|dGR0QOtLSSh-D$BQ0Y)ZrSyQd7yMm7&gscc^2>e5C+;f-qK&@_xTWWxc=h zG{A;TBb!!%au@BPn_cU^5n%5U#wyXHZL_GW(>&pj<3AyrR)spf$kt3iePck{zX@a2 z=w!2*bi+e|egM0ECfT$abi58lpT3h>0kFQah+?H)XovqnI;5!QK!80(7%M>qWk!1H ztf*Xo9Xp$BS}CfV9!Q~-9!tjAZw0OE)SWH5jj@5^!Q#IE_ErEse2175Vq(quNS*x? zB)ZwGT>IVf6Qd*TD1Tj>8zNISn=nK?N*wrBsKI*ccMZ!RlR11uygtI9vzT?UHUETM zzAX0koH=2o!Or~c%vhtS3NuHGgT+I6Foz`>5>lO@(qq(=vds&}+o8u#J)b8^|AGxV zR6Io7@2#N!e4*)UO4eCF@*6DZQB)h>u;6#8FgvT}%aeboyR7ERQ(eKoJh{s|`NcHY zHXeL=3Yoz21pV^l`+=XIpW=dhxIJ9z%P_O37{f-1M~DaW9%N#)aiT?+Xt5fD3gcS) zUHAV6R@YO<#uKu-nNJFQ2OXTz0$h4&m?hw)x2}A%--fkcJ_DED&lpAG((@gCQG2IQ zpARm*j$!O5iClUzSEFzFkK5)(+l_g(Z0dQD2}8$;$B2jXH^yR!*Tq{m7udC}|B?OL z?xw~Qz<__iTpj}+;Xl9>X-KgxG7}TbaqQYgyHkd-UhqcMNY4(qR-fX*((SmOnheIM z=x}pFwAmbq8Wo6qTjxz_14bNzv2Qx{>`}?`hDVqs=(obUJ^6&|a7(nNL z+Dn^rU2+nvKo2nV?N^||0E+Kes}|2-+Gcc{4Tytc*qCom>~D@Wn`|0^j5jZ?@mtvH zNH;)SCIm6W92*~Gjx|Q4>LQJa5eddPqp5n8O)Ky2d%D}a6W|8<1QYqMvNbz?@rjdT z(V1Jm+8AA`F4n#xR9@azD(Vx<&Qm826$cBmB7_?zTKC}WYl6{iLhov>`DUN(KOU?K zIhb9mRe@_aE-1l}?64PCUU^-ctXeP-FmDRUG$hnqqHpZ~`7_J44TAPHh_TN&>+!lW zkyji$;|b#N;!*#0#+*6r%mJokLqekA4fN!CbG83H{&6VSK$(uwytf|vm@O7_TwhaC z+#A$=9+}#0rKX8s$g`5zz`_?RMa4gxv{~OR+zFU&$pSJRi8$t?kJBDa4?Y1&=VL1Wq>cv5(T!P`Xi0=>?_NZ!42+*xhZE7xA{2ECrgRQ|m> zeq>UhZ!*sMblG2c0Wdpdvcu)ibaY99n(5uO7q0%a6}S|KnZ#UDpnYDKn00PJ5x}KA z7%jL<3Us#96((Xq>@eWcnJ?H0lW<9a;u3Z;E!AD0gI>GNVX6Fkb#zIIQhsm7#P|7y z0n7v;nT{?gk#>O_b3m(_4_w-ZnZ#UDqIR?EGL;(@UjUcHbJ-CK?vfI%@BACBefS7| zsTzrq!ndkIGPJ@+cUSTmXmzEKOvhGLX!<0YIozb~VbJOW%p}&T3Z=i}#YAtLeH^r^ zm`AQv6-pb>gZcLG<1S!@$iPSvD})MlAK!#g9bC5%^qP~)1{K~VHA-9F(AOv4Eg2-e zjhVcYa(rG=qvl&0`sS|=-UgD&mI#>C(O@+yiSMYbpL6daFnBs<5;IthW_N6~U}zIwi! zi_;T8(#@F3J1K|38gz3|I+HT&Mj}XBg{l0LIvT7&b$ezp-c7cQ0|xtl#db=}U>n-H zj}-z8Ubl)x3vRFmJzAK_oEh|0Q_!pDH!PBOuN($z(9U)lS*!k9KE1EN@9qP1A>V*!a zrZ8n0#&zK8$;52lSC8X%nl_({V#%&gS|Dz3a7KH0JL^7Gt5x1CnA zVbK&Tzt*(nPzlnmYJ`hdvg(P&}|;Z5)ev` za_8M-Fog~$4o#dg;gjGGjdwdPXIbiNT`!g3Z#De zkpB8l!*4*SBN$3fCQx;w*>+b)RAL#r8?`;M`RbnO)p%my; zaWVaK?*V;5xBfq}Q3(j8M4i^Q)cz6CtOtP37ZU0ul(J@(XvK`q20~rIP;x>k(Tsu_ zwEu#UK0v5$2iPeKFO)6s1&^h@fl!k%lZa4C^v#UV=&VzVib1zOU@QTlRA{+JHXR%l zT>_vD53=*+4|Nhsg}(c_Gxc#=qbwj)5QdTyN`=aP&+xvrbZiGu?%We>U?SzJ(B7B3 zyqi`u&IEJ*6=V6&*=bM6x`F$63p3;H$0QER|zQG(gRuNG>w>-9_~MD^t+Ae4JCJ7gk4sZrRSAZAUA>`KtB8Dsf( z>m-y0ZHZ{Z98L2a1+wnJP~KTN_Q@q0BrbJl(C?k}AZvqD?1=eib@Et)-aFz-P45#O z2s|E!q2xT)RR4mG8j(^l1$Z2Ko((L##~KvWwkP#*!^IJx+XBqw-7VMNrY&kxW$Ra< z+m{&2zgtI-t#8DI_saD7GneWCvi85g&X;#q4v($aL1P+F5q&D)N5(}M%Rj55$5v{^(gk^$K=_j|MIX33U=milTGMsh5-LqyeG2 zmazjSCzKS0_}`-HTW#MXuCgiZPy#1CRaP zZn0yo)nk9~Qs0~J4kuQ>!1&L5+2_$(zoVV{6oSY8Z6B~m-jDq`mPH8)?4G5qe750z zKwVFkDn)x;{-!6Leue;5^c&HvGBl-0Qzj%N`w^gqlBFt;cjyP&nfIHV1Jo^KsY>Mc z(PO$t_+T1PUy-G%P*H9RCT1X=39iix8h=VfqbpN_GD-`OQ9 zw0d5ucR$%f_|^RsW)fK|)M(QK1GDS#3RrBbr$i?#m7?xR1E|hzW;{09-=kDEqD`Vn z7&~4(Ry=~wdz8bg&mjqN`oxGtlMPgiUuqx#?;sAAq4!!gp^edl_W*E%Dt6F>5-X64 zOPbHUdq;HuJn9u`uo@k?*^${$Al@~}-tpG-@+i?J3=S2K6ORfGn^a~Q5?{Ve^>-Qj1dx}KC(F^H@*ryYoGk@_{D3@J zf$Au}r}n4Qxqz&4>HNzG z@{{P0oSo_5M2@y~*lEB086a<*$YH5IuUBV1l*3|-KqpVH^m9!KIiR<%ATIOfM~d8B zS>j0XaPgpjyMlnPolZo3g!QCaWW-n>fi8K)qUW!CD(519@Jkb(16!a9^QRWm+m4x%ARN}kH&lE4T*+qudQA{0IHU)( z>JsQiGsE}X0oQ2ziEOKHe~m(c--x;WDK&X?nLi+g2|@h7P6mqa`-RWJMlGKL;u;}{ zZ>$R7WT0(juG(??{fofKK)+z3kS7C$wa#aff9TRT%)aK;9G&ML#WpDPx#PVDHSu*B zHvBa>IH%&MWS~7W6Fxxdt+JTL(g_ucZ+qBVl zzcVLLa9xnJ1XKAZb^InLLDRCXFyEg{yanEZL-B00#NL9FpbmYmGFjjL2G1%Oh0(Rz z4(BS?>9wWmpT05=e+_I5CcFeA`A^vCkq+8?JCj*@r=$UJsT4CwxMY3Sko*Z9vsKpv zBt4qU_R2r0qf1g`cFUwq(+`~mE~%%mOkyrcQC~NP3Y`;p6+Bv*h0%h$RFjsPxG<*# z^m+y(dH2foRI+-;RA$xHk3g?(DeR2-_v&b{4DC-JK$o}6)B)y5A(@UY$xy(WuJmeU z#!BGQBFrS_k_?U3$mxk!W-SCRoyTawU9us+ukYpuTxyoewo1Y!8+G@*uLpo$M`5b) zF3C~*($DGGH7nrnU>Z(mlk#Uex+F)-r|+R&AILcdTpEa(#9WeFe}Z#}T9pxe9k}!b zMhote91R$~-K$7G@H2Nn!o%=W6<5()hE$r6U+ExJx!<+0o3k zpx1ZOh3l2$9;ZONFVCiTO+4NT^y-hP{CjmYSc#f-A3|FuBvpZ=d0E0GbNjO0IIN0(G6Xjl?cp4zoLV3rBVbZk{sbE4mw%F%T}tLJ-h zg12fzR{Xjl5?rmzF_QQFhGSi_QF}HsP6oYR##G^5Qlq<6hEKz>@5678ar4+-c_-yq zm(=LokPM#|O^;Q8qy?DDKdGa^YNYd9puIYJ{vBZOW6UJBE~(MqV>Z*vC*Eof4DLOj z9kSpCtF2!$3h8zSP5Xesu^7p_R}O=1)T!;JSAkwPU@HG!9alpQdN4lSXV%db?LpGJ zn8`aShrt>&b?s()PTOhl6mRz&wo?8{9SzphbUJxEeF_+?!%SiZ+t6cQ8g>DLmtnNv z2HTMLgX``Ey?W)c6XxA3hru>#+xuZ!@I1(ZskM3t#kFbZyc_I==! zVrZ=%E^<~s-y5Dj6lDDaWBF%wbl3{Deq*Iu+^FLZw)#Dnu(j4|tDh_D zKR+kpILK}{C&dTMn44LBebYj}Uks#~Ubu5;DRws{THOK#y1mCO+Jnp!j zg_84FhU)(kMt^{|x&V*EFC#L&jUR{^s=2*!$7x3x3F0L z-8y-!K>u;~rnP(Zb3oSjwz5zn9xJL}n5VN#lP`d*gE5wWRws`Y==aNA=(NSLmx0IG z7)s7#TVT<^QS-ohd;+tD_gI1cIxnH0Mt&a$x^>+qT(?{vD{7wVmyL261iI~yvHZJr z@>q$UXQcTI5B+>J$eM+rL_AiaZpK%Xu4nOfko73W^3Uqzv9e~NQ2sb(FYvfw0oyA% zk8SK{?{*FW9v5zB*}{8lW4>EHqy%(Zg_*p&jGp8fco>L;g;fo|11*r@!wb@Et+ zKCb_Zrtr@bvq08l4CS4b>vM$)Eo^mJ98Z9TN7sT?v~4A6>72Ltmf~=LGZiJ7L4WJ zt&_)UwAZ^K)9KZ-NFdZBA)!t}sgY@RLq_dW-UA4=buU{gIib|nje|x^r=3agrxX>K zExb@_)G)x6`LK260w9!ZADfqmP`0Q{uCEK|HW6b92&F*@ao>8+-qz*6AZsCp^3KY& zFlwsrwbc5er~r_)0%Q4Sb@Et)wvPSEyI%T+P~fp_Kiexgk8OdAl^z?x!kB>B!h5Vi zt4yoBC$=bpXA^A0Oy1pcEsVCP-TOps0^L5sSpMBQE{xVQyVPq6m_{p?eFZ#@Kg4#+ zJ1d9BR_xZ$%if)aqCOyNA;$90>gcf*T7B_&ynk~J@VEj)iFs^iuNbic{-I~-VRp`f zdu(U6{wnw~=yoz@^6r+yV>`Ch`Xj!e+pQSOzgs7dB`Ef{bf5IsRmVZrat!62mCIuZ z`u0#I-MM8cJkz$<5w=$TS)DwVSpNjiOS+(2g9zZU5ktv&Y-7h3j)xmyYcX4Rk0t2P zmlbr@sQba7TlbUfka>5@<*~&2%eqSXuOX&S#@SybY+a}BY>@$D1I2^Iec$r+>4cbA zvp$kNtqY0U__^iyECRpcSc_4$`W46j|C-%D2ZUukY2&AU;JFjLOneI?-~MG{r#JiJ z&50H^>Z;Zyj8D0?A^7!3`(n0C62Bg)`D;JSq=N8~fI414D#!95LuVH)r=kj4&j-}y zWT|rGp7$Y@wqSl6K)oU$mE&@$e&Hd2n)+G&4uIO~6wzLl)@NFGsIoBebU+;=AeEz6 z75e(rFI1>#U01Lmb^MvlN@78(`GrmZy=cgV{s3zb5X%u(QvLlZoBsa#hP~i^z7Avg z-{YL_=ThX>B8`$>t7rzY-o{YgBj(6zJy>bi>;6=B?lbt;N;;ooo8_O?>3%Ln^razG z-KmRKfctp_hLXFV+g@hu-E{a?@ck+evxUE(OVNF!FZI)_tEHgZTbRkaTaE?F+HLg; iwP$1RJa9BX=gsw0HK&uj(?jT^0k(V0^GSH&681k1EAs&W diff --git a/analyze/tests/unittest_analyze.py b/analyze/tests/unittest_analyze.py deleted file mode 100644 index 4c1af2cc..00000000 --- a/analyze/tests/unittest_analyze.py +++ /dev/null @@ -1,31 +0,0 @@ -import unittest -from pathlib import Path - -from analyze.cli import get_total_instr_time_event, tboard_to_minimal_json - - -class AnalyzeTests(unittest.TestCase): - def test_tfevents(self) -> None: - tfevents_path = Path("analyze/tests/unittest_analysis_files/out.tfevents") - minimal_json = tboard_to_minimal_json(tfevents_path) - self.assertAlmostEqual( - get_total_instr_time_event(minimal_json, r".*PostgresEnv_reset$"), 8.0046994 - ) - self.assertAlmostEqual( - get_total_instr_time_event(minimal_json, r".*PostgresEnv_shift_state$"), - 12.4918935, - ) - self.assertAlmostEqual( - get_total_instr_time_event(minimal_json, r".*Workload_execute$"), - 31.831543260000004, - ) - self.assertAlmostEqual( - get_total_instr_time_event( - minimal_json, r".*(WolpPolicy_train_actor|WolpPolicy_train_critic)$" - ), - 19.9834938712, - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/benchmark/job/load_info.py b/benchmark/job/load_info.py index 8323f665..4df370b6 100644 --- a/benchmark/job/load_info.py +++ b/benchmark/job/load_info.py @@ -74,5 +74,5 @@ def get_table_file_delimiter(self) -> str: def get_constraints_fpath(self) -> Optional[Path]: # JOB does not have any constraints. It does have indexes, but we don't want to create - # those indexes so that Proto-X can start from a clean slate. + # those indexes so that the tuning agent can start from a clean slate. return None diff --git a/dbms/load_info_base_class.py b/dbms/load_info_base_class.py index 6df647da..847579d6 100644 --- a/dbms/load_info_base_class.py +++ b/dbms/load_info_base_class.py @@ -10,16 +10,16 @@ class LoadInfoBaseClass: """ def get_schema_fpath(self) -> Path: - raise NotImplemented + raise NotImplementedError def get_tables_and_fpaths(self) -> list[tuple[str, Path]]: - raise NotImplemented + raise NotImplementedError # We assume the table file has a "csv-like" format where values are separated by a delimiter. def get_table_file_delimiter(self) -> str: - raise NotImplemented + raise NotImplementedError # If the subclassing benchmark does not have constraints, you can return None here. # Constraints are also indexes. def get_constraints_fpath(self) -> Optional[Path]: - raise NotImplemented + raise NotImplementedError diff --git a/dbms/postgres/cli.py b/dbms/postgres/cli.py index 5c5722ed..8d34e366 100644 --- a/dbms/postgres/cli.py +++ b/dbms/postgres/cli.py @@ -1,8 +1,5 @@ """ At a high level, this file's goal is to (1) build postgres and (2) create dbdata (aka pgdata). -On the other hand, the goal of tune.protox.env.util.postgres is to provide helpers to manage - a Postgres instance during agent tuning. -util.pg provides helpers used by *both* of the above files (as well as other files). """ import logging diff --git a/experiments/protox_tpch_sf0point1/main.sh b/experiments/protox_tpch_sf0point1/main.sh deleted file mode 100755 index 480f28ca..00000000 --- a/experiments/protox_tpch_sf0point1/main.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -set -euxo pipefail - -SCALE_FACTOR=0.1 -INTENDED_DBDATA_HARDWARE=ssd -. ./experiments/load_per_machine_envvars.sh -echo $DBDATA_PARENT_DPATH - -# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -# python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 0.1 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH -python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 0.2 -python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR -exit 0 - -# benchmark -python3 task.py benchmark tpch data $SCALE_FACTOR -python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR - -# postgres -python3 task.py dbms postgres build -python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH - -exit 0 - -# embedding -python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH # long datagen so that train doesn't crash -python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2 - -# agent -python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --num-samples 4 --max-concurrent 4 --workload-timeout 100 --query-timeout 15 --tune-duration-during-hpo 1 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR -python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR diff --git a/experiments/protox_tpch_sf1/main.sh b/experiments/protox_tpch_sf1/main.sh deleted file mode 100755 index 769bf942..00000000 --- a/experiments/protox_tpch_sf1/main.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -euxo pipefail - -SCALE_FACTOR=1 -INTENDED_DBDATA_HARDWARE=ssd -. ./experiments/load_per_machine_envvars.sh - -# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 1 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -exit 0 - -# benchmark -python3 task.py benchmark tpch data $SCALE_FACTOR -python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR - -# postgres -python3 task.py dbms postgres build -python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH - -# embedding -python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH -python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10 - -# agent -python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR diff --git a/experiments/protox_tpch_sf10/main.sh b/experiments/protox_tpch_sf10/main.sh deleted file mode 100755 index 62814340..00000000 --- a/experiments/protox_tpch_sf10/main.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -set -euxo pipefail - -SCALE_FACTOR=10 -INTENDED_DBDATA_HARDWARE=ssd -. ./experiments/load_per_machine_envvars.sh - -# space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --tune-duration-during-tune 4 -# python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR --enable-boot-during-tune --tune-duration-during-tune 4 -# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR -# python3 task.py tune protox agent replay tpch --scale-factor $SCALE_FACTOR --boot-enabled-during-tune -exit 0 - -# benchmark -python3 task.py benchmark tpch data $SCALE_FACTOR -python3 task.py benchmark tpch workload --scale-factor $SCALE_FACTOR - -# postgres -python3 task.py dbms postgres build -python3 task.py dbms postgres dbdata tpch --scale-factor $SCALE_FACTOR --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH - -# embedding -python3 task.py tune protox embedding datagen tpch --scale-factor $SCALE_FACTOR --override-sample-limits "lineitem,32768" --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH -python3 task.py tune protox embedding train tpch --scale-factor $SCALE_FACTOR --train-max-concurrent 10 - -# agent -python3 task.py tune protox agent hpo tpch --scale-factor $SCALE_FACTOR --max-concurrent 4 --tune-duration-during-hpo 4 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -python3 task.py tune protox agent tune tpch --scale-factor $SCALE_FACTOR diff --git a/scripts/_run_tests.py b/scripts/_run_tests.py index 5763f7e1..bfd51aee 100644 --- a/scripts/_run_tests.py +++ b/scripts/_run_tests.py @@ -2,12 +2,6 @@ import sys import unittest -# See comment in the base task.py file for why we do this. -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import tensorflow - -del os.environ["TF_CPP_MIN_LOG_LEVEL"] - if __name__ == "__main__": loader = unittest.TestLoader() suite = loader.discover(".", pattern=sys.argv[1]) diff --git a/scripts/configs/apt_requirements.txt b/scripts/configs/apt_requirements.txt index b12d0d37..ae69fccf 100644 --- a/scripts/configs/apt_requirements.txt +++ b/scripts/configs/apt_requirements.txt @@ -6,4 +6,4 @@ rpm zlib1g-dev cbindgen redis-server -redis-tools +redis-tools \ No newline at end of file diff --git a/scripts/configs/requirements.txt b/scripts/configs/requirements.txt index 6c0cb4b7..baff4a68 100644 --- a/scripts/configs/requirements.txt +++ b/scripts/configs/requirements.txt @@ -1,137 +1,27 @@ -absl-py==2.1.0 -aiosignal==1.3.1 -astroid==3.2.4 -astunparse==1.6.3 -async-timeout==4.0.3 -attrs==23.2.0 -black==24.2.0 -cachetools==5.3.2 -certifi==2023.11.17 -charset-normalizer==3.3.2 -click==8.1.7 -cloudpickle==3.0.0 -cmake==3.28.1 -cramjam==2.8.1 -dill==0.3.8 -distlib==0.3.8 -faiss-gpu==1.7.2 -Farama-Notifications==0.0.4 -fastparquet==2023.10.1 -filelock==3.13.1 -flatbuffers==23.5.26 -frozenlist==1.4.1 -fsspec==2023.12.2 -future==0.18.3 -gast==0.5.4 -google-auth==2.27.0 -google-auth-oauthlib==1.0.0 -google-pasta==0.2.0 -greenlet==3.0.3 -grpcio==1.60.0 -gymnasium==0.29.1 -h5py==3.10.0 -hyperopt==0.2.7 -idna==3.6 -importlib-metadata==7.0.1 +async-timeout==5.0.1 +black==24.10.0 +cffi==1.17.1 +click==8.1.8 +cryptography==44.0.0 +greenlet==3.1.1 isort==5.13.2 -jax-jumpy==1.0.0 -Jinja2==3.1.3 -joblib==1.3.2 -jsonschema==4.21.1 -jsonschema-specifications==2023.12.1 -keras==2.15.0 -libclang==16.0.6 -lit==17.0.6 -Markdown==3.5.2 -MarkupSafe==2.1.4 -mccabe==0.7.0 -ml-dtypes==0.2.0 -mpmath==1.3.0 -msgpack==1.0.7 -mypy==1.11.2 +mypy==1.14.0 mypy-extensions==1.0.0 -networkx==3.2.1 -numpy==1.26.3 -nvidia-cublas-cu11==11.10.3.66 -nvidia-cublas-cu12==12.1.3.1 -nvidia-cuda-cupti-cu11==11.7.101 -nvidia-cuda-cupti-cu12==12.1.105 -nvidia-cuda-nvrtc-cu11==11.7.99 -nvidia-cuda-nvrtc-cu12==12.1.105 -nvidia-cuda-runtime-cu11==11.7.99 -nvidia-cuda-runtime-cu12==12.1.105 -nvidia-cudnn-cu11==8.5.0.96 -nvidia-cudnn-cu12==9.1.0.70 -nvidia-cufft-cu11==10.9.0.58 -nvidia-cufft-cu12==11.0.2.54 -nvidia-curand-cu11==10.2.10.91 -nvidia-curand-cu12==10.3.2.106 -nvidia-cusolver-cu11==11.4.0.1 -nvidia-cusolver-cu12==11.4.5.107 -nvidia-cusparse-cu11==11.7.4.91 -nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu11==2.20.5 -nvidia-nccl-cu12==2.20.5 -nvidia-nvjitlink-cu12==12.3.101 -nvidia-nvtx-cu11==11.7.91 -nvidia-nvtx-cu12==12.1.105 -oauthlib==3.2.2 -opt-einsum==3.3.0 -packaging==23.2 -pandas==2.2.0 -pandas-stubs==2.2.2.240807 +packaging==24.2 pathspec==0.12.1 -pglast==6.2 -platformdirs==4.2.0 -plumbum==1.8.1 -protobuf==3.20.3 -psutil==5.9.8 -psycopg==3.1.9 -py4j==0.10.9.7 -pyaml==23.5.9 -pyarrow==15.0.0 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -python-dateutil==2.8.2 -pytorch-metric-learning==2.1.1 -pytz==2023.4 -PyYAML==6.0.1 -ray==2.9.3 -record-keeper==0.9.32 -redis==5.0.3 -referencing==0.33.0 -requests==2.31.0 -requests-oauthlib==1.3.1 -rpds-py==0.17.1 -rsa==4.9 -scikit-learn==1.4.0 -scipy==1.12.0 -six==1.16.0 -SQLAlchemy==2.0.28 -sympy==1.12 -tabulate==0.9.0 -tensorboard==2.15.2 -tensorboard-data-server==0.7.2 -tensorboardX==2.6 -tensorflow==2.15.0.post1 -tensorflow-estimator==2.15.0 -tensorflow-io-gcs-filesystem==0.36.0 -termcolor==2.4.0 -threadpoolctl==3.2.0 -tomli==2.0.1 -tomlkit==0.13.2 -torch==2.4.0 -tqdm==4.66.1 -triton==3.0.0 -types-protobuf==5.28.0.20240924 -types-python-dateutil==2.9.0.20240821 -types-pytz==2024.1.0.20240417 -types-PyYAML==6.0.12.20240808 -typing_extensions==4.9.0 -tzdata==2023.4 -urllib3==2.2.0 -virtualenv==20.25.0 -Werkzeug==3.0.1 -wrapt==1.14.1 -zipp==3.17.0 -streamlit==1.39.0 +pglast==7.2 +platformdirs==4.3.6 +plumbum==1.9.0 +psutil==6.1.1 +psycopg==3.2.3 +pycparser==2.22 +PyYAML==6.0.2 +redis==5.2.1 +SQLAlchemy==2.0.36 +tomli==2.2.1 +types-cffi==1.16.0.20241221 +types-pyOpenSSL==24.1.0.20240722 +types-PyYAML==6.0.12.20241221 +types-redis==4.6.0.20241004 +types-setuptools==75.6.0.20241223 +typing_extensions==4.12.2 diff --git a/scripts/pat_test.sh b/scripts/pat_test.sh index e9802a14..dcbb9a90 100755 --- a/scripts/pat_test.sh +++ b/scripts/pat_test.sh @@ -5,8 +5,7 @@ set -euxo pipefail . ./experiments/load_per_machine_envvars.sh # space for testing. uncomment this to run individual commands from the script (copy pasting is harder because there are envvars) -python3 task.py tune protox agent replay tpch --scale-factor 0.01 -exit 0 +# exit 0 # benchmark python3 task.py benchmark job data @@ -14,13 +13,4 @@ python3 task.py benchmark job workload --query-subset demo # postgres python3 task.py dbms postgres build -python3 task.py dbms postgres dbdata job --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH - -# embedding -python3 task.py tune protox embedding datagen job --workload-name-suffix demo --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH # long datagen so that train doesn't crash -python3 task.py tune protox embedding train job --workload-name-suffix demo --iterations-per-epoch 1 --num-points-to-sample 2 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2 - -# agent -python3 task.py tune protox agent hpo job --workload-name-suffix demo --num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 2 --tune-duration-during-hpo 0.03 --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH --build-space-good-for-boot -python3 task.py tune protox agent tune job --workload-name-suffix demo -python3 task.py tune protox agent replay job --workload-name-suffix demo +python3 task.py dbms postgres dbdata job --intended-dbdata-hardware $INTENDED_DBDATA_HARDWARE --dbdata-parent-dpath $DBDATA_PARENT_DPATH \ No newline at end of file diff --git a/scripts/pipfreeze.sh b/scripts/pipfreeze.sh index b1d112fd..b9493c5e 100755 --- a/scripts/pipfreeze.sh +++ b/scripts/pipfreeze.sh @@ -1,2 +1,2 @@ #!/bin/bash -pip freeze >dependencies/requirements.txt \ No newline at end of file +pip freeze >scripts/configs/requirements.txt \ No newline at end of file diff --git a/scripts/run_demo.sh b/scripts/run_demo.sh deleted file mode 100755 index e5a1e3c7..00000000 --- a/scripts/run_demo.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# You may need to do `pkill python` to fully restart the streamlit server. If you do not do this, objects cached -# with @st.cache_resource may still be persisted even after you do Ctrl-C and rerun ./scripts/run_demo.sh. -python -m streamlit run tune/demo/main.py \ No newline at end of file diff --git a/scripts/run_protox_e2e_test.py b/scripts/run_protox_e2e_test.py deleted file mode 100644 index f1f2ad77..00000000 --- a/scripts/run_protox_e2e_test.py +++ /dev/null @@ -1,207 +0,0 @@ -import os -import shutil -import subprocess -import sys -from enum import Enum, auto -from pathlib import Path - -import yaml - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from benchmark.tpch.constants import DEFAULT_TPCH_SEED -from util.pg import get_is_postgres_running -from util.workspace import ( - get_default_embedder_path, - get_default_hpoed_agent_params_path, - get_default_pristine_dbdata_snapshot_path, - get_default_replay_data_fpath, - get_default_repo_path, - get_default_tables_path, - get_default_traindata_path, - get_default_tuning_steps_dpath, - get_default_workload_path, - get_workload_name, -) - -# Be careful when changing these constants. In some places, the E2E test is hardcoded to work for these specific constants. -DBMS = "postgres" -AGENT = "protox" -E2ETEST_DBGYM_CONFIG_FPATH = Path("scripts/configs/e2e_test_dbgym_config.yaml") - - -def get_workspace_dpath(config_fpath: Path) -> Path: - with open(config_fpath, "r") as file: - config = yaml.safe_load(file) - return Path(config.get("dbgym_workspace_path")) - - -def clear_workspace(workspace_dpath: Path) -> None: - actual_workspace_dpath = Path("../dbgym_workspace") - if workspace_dpath.exists(): - if actual_workspace_dpath.exists(): - assert not workspace_dpath.samefile( - actual_workspace_dpath - ), "YOU MAY BE ABOUT TO DELETE YOUR ACTUAL WORKSPACE" - shutil.rmtree(workspace_dpath) - - -class Stage(Enum): - Tables = auto() - Workload = auto() - DBRepo = auto() - DBData = auto() - EmbeddingData = auto() - EmbeddingModel = auto() - TuneHPO = auto() - TuneTune = auto() - Replay = auto() - - -# When debugging the E2E test, this gives you an easy way of turning off certain stages to speed up your iteration cycle. -# -# I made this slightly convoluted system is because you can't just naively comment out a big chunk of code with all the stages -# you don't want to run. Many stages define variables that are used by future stages, which can't be commented out. -# -# One useful debugging workflow is to run all stages up until a point, make a copy of that workspace, and then rerun the -# integration test as many times as you want starting from that copy. -ALL_STAGES = {stage for stage in Stage} -# This is a set and not a list because the order of stages is already pre-defined. This just defines what not to skip. -STAGES_TO_RUN = ALL_STAGES - - -def run_e2e_for_benchmark(benchmark_name: str, intended_dbdata_hardware: str) -> None: - if benchmark_name == "tpch": - scale_factor = 0.01 - query_subset = "all" - workload_name_suffix = f"{DEFAULT_TPCH_SEED}_{DEFAULT_TPCH_SEED}_{query_subset}" - embedding_datagen_args = "--override-sample-limits lineitem,32768" - embedding_train_args = "--iterations-per-epoch 1 --num-points-to-sample 1 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2" - tune_hpo_args = "--num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 1 --tune-duration-during-hpo 0.01" - elif benchmark_name == "job": - scale_factor = DEFAULT_SCALE_FACTOR - query_subset = "demo" - workload_name_suffix = query_subset - embedding_datagen_args = "" - embedding_train_args = "--iterations-per-epoch 1 --num-points-to-sample 2 --num-batches 1 --batch-size 64 --start-epoch 15 --num-samples 4 --train-max-concurrent 4 --num-curate 2" - tune_hpo_args = "--num-samples 2 --max-concurrent 2 --workload-timeout 15 --query-timeout 2 --tune-duration-during-hpo 0.03" - else: - assert False - - # Clear the E2E testing workspace so we always run the test with a clean slate. - workspace_dpath = get_workspace_dpath(E2ETEST_DBGYM_CONFIG_FPATH) - clear_workspace(workspace_dpath) - - # Make other checks that we have a clean slate for testing. - assert not get_is_postgres_running() - - # Run the full Proto-X training pipeline, asserting things along the way - # Setup (workload and database) - tables_dpath = get_default_tables_path( - workspace_dpath, benchmark_name, scale_factor - ) - if Stage.Tables in STAGES_TO_RUN: - assert not tables_dpath.exists() - subprocess.run( - f"python task.py benchmark {benchmark_name} data {scale_factor}".split(), - check=True, - ) - assert tables_dpath.exists() - - workload_name = get_workload_name(scale_factor, workload_name_suffix) - workload_dpath = get_default_workload_path( - workspace_dpath, benchmark_name, workload_name - ) - if Stage.Workload in STAGES_TO_RUN: - assert not workload_dpath.exists() - subprocess.run( - f"python task.py benchmark {benchmark_name} workload --query-subset {query_subset} --scale-factor {scale_factor}".split(), - check=True, - ) - assert workload_dpath.exists() - - repo_dpath = get_default_repo_path(workspace_dpath) - if Stage.DBRepo in STAGES_TO_RUN: - assert not repo_dpath.exists() - subprocess.run(f"python task.py dbms {DBMS} build".split(), check=True) - assert repo_dpath.exists() - - pristine_dbdata_snapshot_fpath = get_default_pristine_dbdata_snapshot_path( - workspace_dpath, benchmark_name, scale_factor - ) - if Stage.DBData in STAGES_TO_RUN: - assert not pristine_dbdata_snapshot_fpath.exists() - subprocess.run( - f"python task.py dbms {DBMS} dbdata {benchmark_name} --scale-factor {scale_factor} --intended-dbdata-hardware {intended_dbdata_hardware}".split(), - check=True, - ) - assert pristine_dbdata_snapshot_fpath.exists() - - # Tuning (embedding, HPO, and actual tuning) - traindata_dpath = get_default_traindata_path( - workspace_dpath, benchmark_name, workload_name - ) - if Stage.EmbeddingData in STAGES_TO_RUN: - assert not traindata_dpath.exists() - subprocess.run( - f"python task.py tune {AGENT} embedding datagen {benchmark_name} --workload-name-suffix {workload_name_suffix} --scale-factor {scale_factor} {embedding_datagen_args} --intended-dbdata-hardware {intended_dbdata_hardware}".split(), - check=True, - ) - assert traindata_dpath.exists() - - embedder_dpath = get_default_embedder_path( - workspace_dpath, benchmark_name, workload_name - ) - if Stage.EmbeddingModel in STAGES_TO_RUN: - assert not embedder_dpath.exists() - subprocess.run( - f"python task.py tune {AGENT} embedding train {benchmark_name} --workload-name-suffix {workload_name_suffix} --scale-factor {scale_factor} {embedding_train_args}".split(), - check=True, - ) - assert embedder_dpath.exists() - - hpoed_agent_params_fpath = get_default_hpoed_agent_params_path( - workspace_dpath, benchmark_name, workload_name - ) - if Stage.TuneHPO in STAGES_TO_RUN: - assert not hpoed_agent_params_fpath.exists() - subprocess.run( - f"python task.py tune {AGENT} agent hpo {benchmark_name} --workload-name-suffix {workload_name_suffix} --scale-factor {scale_factor} {tune_hpo_args} --intended-dbdata-hardware {intended_dbdata_hardware}".split(), - check=True, - ) - assert hpoed_agent_params_fpath.exists() - - tuning_steps_dpath = get_default_tuning_steps_dpath( - workspace_dpath, benchmark_name, workload_name, False - ) - if Stage.TuneTune in STAGES_TO_RUN: - assert not tuning_steps_dpath.exists() - subprocess.run( - f"python task.py tune {AGENT} agent tune {benchmark_name} --workload-name-suffix {workload_name_suffix} --scale-factor {scale_factor}".split(), - check=True, - ) - assert tuning_steps_dpath.exists() - - # Post-training (replay) - replay_data_fpath = get_default_replay_data_fpath( - workspace_dpath, benchmark_name, workload_name, False - ) - if Stage.Replay in STAGES_TO_RUN: - assert not replay_data_fpath.exists() - subprocess.run( - f"python3 task.py tune {AGENT} agent replay {benchmark_name} --workload-name-suffix {workload_name_suffix} --scale-factor {scale_factor}".split(), - check=True, - ) - assert replay_data_fpath.exists() - - # Clear it at the end as well to avoid leaving artifacts. - clear_workspace(workspace_dpath) - - -if __name__ == "__main__": - intended_dbdata_hardware = sys.argv[1] if len(sys.argv) > 1 else "hdd" - - # Set the config file so that we use resources that don't conflict with normal usage (e.g. a different workspace, different ports, etc.). - os.environ["DBGYM_CONFIG_PATH"] = str(E2ETEST_DBGYM_CONFIG_FPATH) - - run_e2e_for_benchmark("tpch", intended_dbdata_hardware) - run_e2e_for_benchmark("job", intended_dbdata_hardware) diff --git a/task.py b/task.py index ff30f9c3..0b59cae5 100644 --- a/task.py +++ b/task.py @@ -1,22 +1,9 @@ -import os -from pathlib import Path - import click -from util.log import set_up_loggers, set_up_warnings - -# Do this to suppress the logs we'd usually get when importing tensorflow. -# By importing tensorflow in task.py, we avoid it being imported in any other file since task.py is always entered first. -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" -import tensorflow - -del os.environ["TF_CPP_MIN_LOG_LEVEL"] - -from analyze.cli import analyze_group from benchmark.cli import benchmark_group from dbms.cli import dbms_group from manage.cli import manage_group -from tune.cli import tune_group +from util.log import set_up_loggers, set_up_warnings from util.workspace import make_standard_dbgym_cfg # TODO(phw2): Save commit, git diff, and run command. @@ -39,7 +26,5 @@ def task(ctx: click.Context) -> None: if __name__ == "__main__": task.add_command(benchmark_group) task.add_command(manage_group) - task.add_command(analyze_group) task.add_command(dbms_group) - task.add_command(tune_group) task() diff --git a/tune/__init__.py b/tune/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/cli.py b/tune/cli.py deleted file mode 100644 index 72aefb43..00000000 --- a/tune/cli.py +++ /dev/null @@ -1,13 +0,0 @@ -import click - -from tune.protox.cli import protox_group -from util.workspace import DBGymConfig - - -@click.group(name="tune") -@click.pass_obj -def tune_group(dbgym_cfg: DBGymConfig) -> None: - dbgym_cfg.append_group("tune") - - -tune_group.add_command(protox_group) diff --git a/tune/demo/__init__.py b/tune/demo/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/demo/main.py b/tune/demo/main.py deleted file mode 100644 index c8e5602a..00000000 --- a/tune/demo/main.py +++ /dev/null @@ -1,114 +0,0 @@ -import streamlit as st - -from env.pg_conn import PostgresConn -from util.pg import DEFAULT_POSTGRES_PORT, get_is_postgres_running -from util.workspace import ( - DEFAULT_BOOT_CONFIG_FPATH, - DBGymConfig, - get_default_dbdata_parent_dpath, - get_default_pgbin_path, - get_default_pristine_dbdata_snapshot_path, - make_standard_dbgym_cfg, -) - - -# The rationale behind this code is very subtle. I'll first go over streamlit concepts before describing why this function exists. -# -# First, in streamlit, there are three kinds of "script reruns". These are ordered from least to most "disruptive": -# 1. st.rerun(). Will reset any local variables but will not reset st.session_state. -# 2. Reloading the browser page (perhaps if you changed some code). Will reset local vars and st.session_state but not things -# cached with @st.cache_resource. -# 3. Restarting the streamlit server. If you're running the server locally, you can restart it by doing Ctrl-C, `pkill python`, -# and then `streamlit run ...` (or `./scripts/run_demo.sh`). Will reset local vars, st.session_state, and things cached with -# @st.cache_resource, but will not reset things persisted to disk (though we currently don't persist anything to disk). Doing -# `pkill python` is critical here to actually reset the things cached with @st.cache_resource. -# -# Next, DBGymConfig has a safeguard where it can only be created once per instance of the Python interpreter. If you just put it -# in st.session_state, it would get re-created when you reloaded the browser page, causing it to trip the assertion that checks -# DBGymConfig.num_times_created_this_run == 1. Thus, we use @st.cache_resource to avoid this. -# -# I considered modifying num_times_created_this_run to instead be num_active_instances and doing `num_active_instances -= 1` in -# DBGymConfig.__del__(). However, streamlit doesn't actually destroy objects when you reload the browser page; it only destroys -# objects when you restart the streamlit server. -# -# If you modify the code of DBGymConfig, you will need to fully restart the streamlit server for those changes to be propagated. -@st.cache_resource -def make_dbgym_cfg_cached() -> DBGymConfig: - return make_standard_dbgym_cfg() - - -class Demo: - BENCHMARK = "tpch" - SCALE_FACTOR = 0.01 - - def __init__(self) -> None: - self.dbgym_cfg = make_dbgym_cfg_cached() - self.pristine_dbdata_snapshot_path = get_default_pristine_dbdata_snapshot_path( - self.dbgym_cfg.dbgym_workspace_path, Demo.BENCHMARK, Demo.SCALE_FACTOR - ) - self.dbdata_parent_dpath = get_default_dbdata_parent_dpath( - self.dbgym_cfg.dbgym_workspace_path - ) - self.pgbin_dpath = get_default_pgbin_path(self.dbgym_cfg.dbgym_workspace_path) - self.pg_conn = PostgresConn( - self.dbgym_cfg, - DEFAULT_POSTGRES_PORT, - self.pristine_dbdata_snapshot_path, - self.dbdata_parent_dpath, - self.pgbin_dpath, - None, - ) - - def _get_categorized_system_knobs(self) -> tuple[dict[str, str], dict[str, str]]: - IMPORTANT_KNOBS = {"shared_buffers", "enable_nestloop"} - all_knobs = self.pg_conn.get_system_knobs() - important_knobs = { - knob: val for knob, val in all_knobs.items() if knob in IMPORTANT_KNOBS - } - unimportant_knobs = { - knob: val for knob, val in all_knobs.items() if knob not in IMPORTANT_KNOBS - } - return important_knobs, unimportant_knobs - - def main(self) -> None: - is_postgres_running = get_is_postgres_running() - - if is_postgres_running: - st.write("Postgres is RUNNING") - - if st.button("Stop Postgres"): - self.pg_conn.shutdown_postgres() - st.rerun() - - with st.form("reconfig", clear_on_submit=True, enter_to_submit=False): - knob = st.text_input("Knob", placeholder="Enter text here...") - val = st.text_input("Value", placeholder="Enter text here...") - submit_button = st.form_submit_button("Reconfigure") - if submit_button: - if knob != "" and val != "": - if "conf_changes" not in st.session_state: - st.session_state.conf_changes = dict() - - # By using st.session_state, we persist changes across st.rerun() (though not across reloading the browser). - st.session_state.conf_changes[knob] = val - self.pg_conn.restart_with_changes(st.session_state.conf_changes) - st.rerun() - - important_knobs, unimportant_knobs = self._get_categorized_system_knobs() - with st.expander("Important knobs", expanded=True): - st.write(important_knobs) - - with st.expander("Other knobs", expanded=False): - st.write(unimportant_knobs) - else: - st.write("Postgres is STOPPED") - - if st.button("Start Postgres"): - self.pg_conn.restore_pristine_snapshot() - self.pg_conn.restart_postgres() - st.rerun() - - -if __name__ == "__main__": - demo = Demo() - demo.main() diff --git a/tune/protox/__init__.py b/tune/protox/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/agent/__init__.py b/tune/protox/agent/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/agent/agent_env.py b/tune/protox/agent/agent_env.py deleted file mode 100644 index 4a69c2ef..00000000 --- a/tune/protox/agent/agent_env.py +++ /dev/null @@ -1,151 +0,0 @@ -import copy -import inspect -from typing import Any - -import gymnasium as gym -import numpy as np -from numpy.typing import NDArray - - -class AgentEnv(gym.Wrapper[Any, Any, Any, Any]): - def __init__(self, env: gym.Env[Any, Any]): - super().__init__(env) - self.class_attributes = dict(inspect.getmembers(self.__class__)) - - def reset(self, **kwargs: Any) -> tuple[Any, dict[str, Any]]: - observations, info = self.env.reset(**kwargs) - self._check_val(event="reset", observations=observations) - self._observations = observations - return observations, info - - def step( - self, actions: NDArray[np.float32] - ) -> tuple[Any, float, bool, bool, dict[str, Any]]: - self._actions = actions - - observations, rewards, term, trunc, infos = self.env.step(actions) - self._check_val(event="step_wait", observations=observations, rewards=[rewards]) - self._observations = observations - - # Automatically reset. - if term or trunc: - infos["terminal_observation"] = observations - observations, _ = self.env.reset() - - return observations, float(rewards), term, trunc, copy.deepcopy(infos) - - def __getattr__(self, name: str) -> Any: - """Find attribute from wrapped env(s) if this wrapper does not have it. - Useful for accessing attributes from envs which are wrapped with multiple wrappers - which have unique attributes of interest. - """ - blocked_class = self.getattr_depth_check(name, already_found=False) - if blocked_class is not None: - own_class = f"{type(self).__module__}.{type(self).__name__}" - error_str = ( - f"Error: Recursive attribute lookup for {name} from {own_class} is " - f"ambiguous and hides attribute from {blocked_class}" - ) - raise AttributeError(error_str) - - return self.getattr_recursive(name) - - def _get_all_attributes(self) -> dict[str, Any]: - """Get all (inherited) instance and class attributes - - :return: all_attributes - """ - all_attributes = self.__dict__.copy() - all_attributes.update(self.class_attributes) - return all_attributes - - def getattr_recursive(self, name: str) -> Any: - """Recursively check wrappers to find attribute. - - :param name: name of attribute to look for - :return: attribute - """ - all_attributes = self._get_all_attributes() - if name in all_attributes: # attribute is present in this wrapper - attr = getattr(self, name) - elif hasattr(self.env, "getattr_recursive"): - # Attribute not present, child is wrapper. Call getattr_recursive rather than getattr - # to avoid a duplicate call to getattr_depth_check. - attr = self.env.getattr_recursive(name) - else: # attribute not present, child is an unwrapped VecEnv - attr = getattr(self.env, name) - - return attr - - def getattr_depth_check(self, name: str, already_found: bool) -> str: - """See base class. - - :return: name of module whose attribute is being shadowed, if any. - """ - all_attributes = self._get_all_attributes() - if name in all_attributes and already_found: - # this env's attribute is being hidden because of a higher env. - shadowed_wrapper_class = f"{type(self).__module__}.{type(self).__name__}" - # elif name in all_attributes and not already_found: - # # we have found the first reference to the attribute. Now check for duplicates. - # shadowed_wrapper_class = self.env.getattr_depth_check(name, True) - # else: - # # this wrapper does not have the attribute. Keep searching. - # shadowed_wrapper_class = self.env.getattr_depth_check(name, already_found) - - return shadowed_wrapper_class - - def check_array_value( - self, name: str, value: NDArray[np.float32] - ) -> list[tuple[str, str]]: - """ - Check for inf and NaN for a single numpy array. - - :param name: Name of the value being check - :param value: Value (numpy array) to check - :return: A list of issues found. - """ - found = [] - has_nan = np.any(np.isnan(value)) - has_inf = np.any(np.isinf(value)) - if has_inf: - found.append((name, "inf")) - if has_nan: - found.append((name, "nan")) - return found - - def _check_val(self, event: str, **kwargs: Any) -> None: - found = [] - for name, value in kwargs.items(): - if isinstance(value, (np.ndarray, list)): - found += self.check_array_value(name, np.asarray(value)) - elif isinstance(value, dict): - for inner_name, inner_val in value.items(): - found += self.check_array_value(f"{name}.{inner_name}", inner_val) - elif isinstance(value, tuple): - for idx, inner_val in enumerate(value): - found += self.check_array_value(f"{name}.{idx}", inner_val) - else: - raise TypeError(f"Unsupported observation type {type(value)}.") - - if found: - msg = "" - for i, (name, type_val) in enumerate(found): - msg += f"found {type_val} in {name}" - if i != len(found) - 1: - msg += ", " - - msg += ".\r\nOriginated from the " - - if event == "reset": - msg += "environment observation (at reset)" - elif event == "step": - msg += ( - f"environment, Last given value was: \r\n\taction={self._actions}" - ) - elif event == "step_async": - msg += f"RL model, Last given value was: \r\n\tobservations={self._observations}" - else: - raise ValueError("Internal error.") - - raise ValueError(msg) diff --git a/tune/protox/agent/base_class.py b/tune/protox/agent/base_class.py deleted file mode 100644 index 03c40134..00000000 --- a/tune/protox/agent/base_class.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Abstract base classes for RL algorithms.""" - -from abc import ABC, abstractmethod -from typing import Any, Optional - -import numpy as np -from numpy.typing import NDArray - -from tune.protox.agent.agent_env import AgentEnv -from tune.protox.agent.noise import ActionNoise -from tune.protox.env.artifact_manager import ArtifactManager -from util.workspace import TuningMode - - -class BaseAlgorithm(ABC): - """ - The base of RL algorithms - :param seed: Seed for the pseudo random generators - """ - - def __init__(self, seed: Optional[int] = None): - self.num_timesteps = 0 - self._total_timesteps = 0 - self.seed = seed - self.action_noise: Optional[ActionNoise] = None - self._last_obs: Optional[NDArray[np.float32]] = None - self._episode_num = 0 - # For logging (and TD3 delayed updates) - self._n_updates = 0 # type: int - # The artifact_manager object - self._artifact_manager: Optional[ArtifactManager] = None - self.timeout_checker = None - - def set_artifact_manager(self, artifact_manager: Optional[ArtifactManager]) -> None: - """ - Setter for for artifact_manager object. - - .. warning:: - """ - self._artifact_manager = artifact_manager - - @property - def artifact_manager(self) -> ArtifactManager: - """Getter for the artifact_manager object.""" - assert self._artifact_manager is not None - return self._artifact_manager - - def set_timeout_checker(self, timeout_checker: Any) -> None: - self.timeout_checker = timeout_checker - - def _setup_learn( - self, - env: AgentEnv, - total_timesteps: int, - ) -> int: - """ - Initialize different variables needed for training. - - :param total_timesteps: The total number of samples (env steps) to train on - :return: Total timesteps - """ - if self.action_noise is not None: - self.action_noise.reset() - - # Make sure training timesteps are ahead of the internal counter - total_timesteps += self.num_timesteps - self._total_timesteps = total_timesteps - - # Avoid resetting the environment when calling ``.learn()`` consecutive times - if self._last_obs is None: - ( - self._last_obs, - _, - ) = env.reset() # pytype: disable=annotation-type-mismatch - - return total_timesteps - - @abstractmethod - def learn( - self, env: AgentEnv, total_timesteps: int, tuning_mode: TuningMode - ) -> None: - """ - Return a trained model. - - :param total_timesteps: The total number of samples (env steps) to train on - :return: the trained model - """ diff --git a/tune/protox/agent/buffers.py b/tune/protox/agent/buffers.py deleted file mode 100644 index d4de74d4..00000000 --- a/tune/protox/agent/buffers.py +++ /dev/null @@ -1,134 +0,0 @@ -import copy -from typing import Any, Dict, List, NamedTuple, Optional, cast - -import numpy as np -import torch as th -from numpy.typing import NDArray - - -class ReplayBufferSamples(NamedTuple): - observations: th.Tensor - actions: th.Tensor - next_observations: th.Tensor - dones: th.Tensor - rewards: th.Tensor - infos: list[dict[str, Any]] - - -class ReplayBuffer: - """ - Replay buffer used in off-policy algorithms like SAC/TD3. - - :param buffer_size: Max number of element in the buffer - """ - - def __init__( - self, - buffer_size: int, - obs_shape: list[int], - action_dim: int = 0, - ): - super().__init__() - self.buffer_size = buffer_size - self.obs_shape = obs_shape - - assert action_dim > 0 - self.action_dim = action_dim - self.pos = 0 - self.full = False - - # Adjust buffer size - self.buffer_size = buffer_size - - self.observations = np.zeros( - (self.buffer_size, *self.obs_shape), dtype=np.float32 - ) - - self.next_observations = np.zeros( - (self.buffer_size, *self.obs_shape), dtype=np.float32 - ) - self.actions = np.zeros((self.buffer_size, self.action_dim), dtype=np.float32) - - self.rewards = np.zeros((self.buffer_size), dtype=np.float32) - self.dones = np.zeros((self.buffer_size), dtype=np.float32) - self.infos: list[Optional[dict[str, Any]]] = [None] * self.buffer_size - - def size(self) -> int: - """ - :return: The current size of the buffer - """ - if self.full: - return self.buffer_size - return self.pos - - def add( - self, - obs: NDArray[np.float32], - next_obs: NDArray[np.float32], - action: NDArray[np.float32], - reward: float, - done: bool, - infos: dict[str, Any], - ) -> None: - # Reshape to handle multi-dim and discrete action spaces, see GH #970 #1392 - action = action.reshape((self.action_dim)) - - # Copy to avoid modification by reference - self.observations[self.pos] = np.array(obs).copy() - self.next_observations[self.pos] = np.array(next_obs).copy() - - self.actions[self.pos] = np.array(action).copy() - self.rewards[self.pos] = np.array(reward).copy() - self.dones[self.pos] = np.array(done).copy() - self.infos[self.pos] = copy.deepcopy(infos) - - self.pos += 1 - if self.pos == self.buffer_size: - self.full = True - self.pos = 0 - - def sample(self, batch_size: int) -> ReplayBufferSamples: - """ - Sample elements from the replay buffer. - Custom sampling when using memory efficient variant, - as we should not sample the element with index `self.pos` - See https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274 - - :param batch_size: Number of element to sample - :return: - """ - upper_bound = self.buffer_size if self.full else self.pos - batch_inds = np.random.randint(0, upper_bound, size=batch_size) - return self._get_samples(batch_inds) - - def _get_samples(self, batch_inds: NDArray[np.int32]) -> ReplayBufferSamples: - next_obs = self.next_observations[batch_inds, :] - - data = ( - self.observations[batch_inds, :], - self.actions[batch_inds, :], - next_obs, - self.dones[batch_inds].reshape(-1, 1), - self.rewards[batch_inds].reshape(-1, 1), - cast(list[dict[str, Any]], [self.infos[x] for x in batch_inds]), - ) - return ReplayBufferSamples( - observations=self.to_torch(data[0]), - actions=self.to_torch(data[1]), - next_observations=self.to_torch(data[2]), - dones=self.to_torch(data[3]), - rewards=self.to_torch(data[4]), - infos=data[-1], - ) - - def to_torch(self, array: NDArray[np.float32]) -> th.Tensor: - """ - Convert a numpy array to a PyTorch tensor. - Note: it copies the data by default - - :param array: - :return: - """ - if copy: - return th.tensor(array) - return th.as_tensor(array) diff --git a/tune/protox/agent/build_trial.py b/tune/protox/agent/build_trial.py deleted file mode 100644 index 2d81b2d7..00000000 --- a/tune/protox/agent/build_trial.py +++ /dev/null @@ -1,573 +0,0 @@ -import glob -import json -import socket -import xml.etree.ElementTree as ET -from pathlib import Path -from typing import Any, Callable, Optional, Tuple, Union - -import gymnasium as gym -import numpy as np -import torch -from gymnasium.wrappers import FlattenObservation # type: ignore -from gymnasium.wrappers import ( # type: ignore[attr-defined] - NormalizeObservation, - NormalizeReward, -) -from torch import nn -from torch.optim import Adam # type: ignore[attr-defined] - -from env.pg_conn import PostgresConn -from tune.protox.agent.agent_env import AgentEnv -from tune.protox.agent.buffers import ReplayBuffer -from tune.protox.agent.noise import ClampNoise -from tune.protox.agent.policies import Actor, ContinuousCritic -from tune.protox.agent.utils import parse_noise_type -from tune.protox.agent.wolp.policies import WolpPolicy -from tune.protox.agent.wolp.wolp import Wolp -from tune.protox.embedding.train_all import ( - create_vae_model, - fetch_vae_parameters_from_workload, -) -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.lsc.lsc import LSC -from tune.protox.env.lsc.lsc_wrapper import LSCWrapper -from tune.protox.env.mqo.mqo_wrapper import MQOWrapper -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.latent_space.latent_knob_space import LatentKnobSpace -from tune.protox.env.space.latent_space.latent_query_space import LatentQuerySpace -from tune.protox.env.space.latent_space.lsc_index_space import LSCIndexSpace -from tune.protox.env.space.state import LSCMetricStateSpace, LSCStructureStateSpace -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.target_reset.target_reset_wrapper import TargetResetWrapper -from tune.protox.env.types import ProtoAction, TableAttrAccessSetsMap -from tune.protox.env.util.reward import RewardUtility -from tune.protox.env.workload import Workload -from util.workspace import ( - DBGymConfig, - TuningMode, - make_redis_started, - open_and_save, - save_file, -) - - -def _parse_activation_fn(act_type: str) -> type[nn.Module]: - if act_type == "relu": - return nn.ReLU - elif act_type == "gelu": - return nn.GELU - elif act_type == "mish": - return nn.Mish - elif act_type == "tanh": - return nn.Tanh - else: - raise ValueError(f"Unsupported activation type {act_type}") - - -def _get_signal(signal_folder: Union[str, Path]) -> tuple[int, str]: - MIN_PORT = 5434 - MAX_PORT = 5500 - - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - port = MIN_PORT - while port <= MAX_PORT: - try: - s.bind(("", port)) - - drop = False - for sig in glob.glob(f"{signal_folder}/*.signal"): - if port == int(Path(sig).stem): - drop = True - break - - # Someone else has actually taken hold of this. - if drop: - port += 1 - s.close() - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - continue - - with open(f"{signal_folder}/{port}.signal", "w") as f: - f.write(str(port)) - f.close() - - s.close() - return port, f"{signal_folder}/{port}.signal" - except OSError as e: - port += 1 - raise IOError("No free ports to bind postgres to.") - - -def _modify_benchbase_config( - dbgym_cfg: DBGymConfig, port: int, hpo_params: dict[str, Any] -) -> None: - if hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]: - conf_etree = ET.parse( - dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml" - ) - jdbc = f"jdbc:postgresql://localhost:{port}/benchbase?preferQueryMode=extended" - conf_etree.getroot().find("url").text = jdbc # type: ignore - - oltp_config = hpo_params["benchbase_config"]["oltp_config"] - if conf_etree.getroot().find("scalefactor") is not None: - conf_etree.getroot().find("scalefactor").text = str(oltp_config["oltp_sf"]) # type: ignore - if conf_etree.getroot().find("terminals") is not None: - conf_etree.getroot().find("terminals").text = str(oltp_config["oltp_num_terminals"]) # type: ignore - if conf_etree.getroot().find("works") is not None: - works = conf_etree.getroot().find("works").find("work") # type: ignore - if works.find("time") is not None: # type: ignore - conf_etree.getroot().find("works").find("work").find("time").text = str(oltp_config["oltp_duration"]) # type: ignore - if works.find("warmup") is not None: # type: ignore - conf_etree.getroot().find("works").find("work").find("warmup").text = str(oltp_config["oltp_warmup"]) # type: ignore - conf_etree.write( - dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "benchmark.xml" - ) - - -def _gen_noise_scale( - vae_config: dict[str, Any], hpo_params: dict[str, Any] -) -> Callable[[ProtoAction, Optional[torch.Tensor]], ProtoAction]: - def f(p: ProtoAction, n: Optional[torch.Tensor]) -> ProtoAction: - assert n is not None - if hpo_params["scale_noise_perturb"]: - return ProtoAction( - torch.clamp( - p + n * vae_config["output_scale"], 0.0, vae_config["output_scale"] - ) - ) - else: - return ProtoAction(torch.clamp(p + n, 0.0, 1.0)) - - return f - - -def _build_utilities( - dbgym_cfg: DBGymConfig, - tuning_mode: TuningMode, - pgport: int, - hpo_params: dict[str, Any], -) -> tuple[ArtifactManager, RewardUtility, PostgresConn, Workload]: - artifact_manager = ArtifactManager( - dbgym_cfg, - hpo_params["trace"], - ) - - reward_utility = RewardUtility( - target=( - "tps" - if hpo_params["benchmark_config"]["query_spec"]["oltp_workload"] - else "latency" - ), - metric=hpo_params["reward"], - reward_scaler=hpo_params["reward_scaler"], - artifact_manager=artifact_manager, - ) - - # If we're using Boot, PostgresConn.restart_postgres() assumes that Redis is running. Thus, - # we start Redis here if necessary. - enable_boot = hpo_params["enable_boot"][str(tuning_mode)] - if enable_boot: - make_redis_started(dbgym_cfg.root_yaml["boot_redis_port"]) - - pg_conn = PostgresConn( - dbgym_cfg=dbgym_cfg, - pgport=pgport, - pristine_dbdata_snapshot_fpath=Path( - hpo_params["pgconn_info"]["pristine_dbdata_snapshot_path"] - ), - dbdata_parent_dpath=Path(hpo_params["pgconn_info"]["dbdata_parent_dpath"]), - pgbin_path=Path(hpo_params["pgconn_info"]["pgbin_path"]), - boot_config_fpath=( - hpo_params["boot_config_fpath"][str(tuning_mode)] if enable_boot else None - ), - ) - # TODO(phw2): I removed artifact_manager here. Fix this later. - - workload = Workload( - dbgym_cfg=dbgym_cfg, - tables=hpo_params["benchmark_config"]["tables"], - attributes=hpo_params["benchmark_config"]["attributes"], - query_spec=hpo_params["benchmark_config"]["query_spec"], - workload_path=Path(hpo_params["workload_path"]), - pid=None, - workload_timeout=hpo_params["workload_timeout"][str(tuning_mode)], - workload_timeout_penalty=hpo_params["workload_timeout_penalty"], - artifact_manager=artifact_manager, - ) - - return artifact_manager, reward_utility, pg_conn, workload - - -def _build_actions( - dbgym_cfg: DBGymConfig, - seed: int, - hpo_params: dict[str, Any], - workload: Workload, - artifact_manager: ArtifactManager, -) -> tuple[HolonSpace, LSC]: - sysknobs = LatentKnobSpace( - artifact_manager=artifact_manager, - tables=hpo_params["benchmark_config"]["tables"], - knobs=hpo_params["system_knobs"], - quantize=True, - quantize_factor=hpo_params["default_quantization_factor"], - seed=seed, - table_level_knobs=hpo_params["benchmark_config"]["table_level_knobs"], - latent=True, - ) - - with open_and_save(dbgym_cfg, Path(hpo_params["embedder_path"]) / "config") as f: - vae_config = json.load(f) - - assert vae_config["mean_output_act"] == "sigmoid" - index_output_transform = ( - lambda x: torch.nn.Sigmoid()(x) * vae_config["output_scale"] - ) - index_noise_scale = _gen_noise_scale(vae_config, hpo_params) - - max_attrs, max_cat_features = fetch_vae_parameters_from_workload( - workload, len(hpo_params["benchmark_config"]["tables"]) - ) - vae = create_vae_model(vae_config, max_attrs, max_cat_features) - embedder_fpath = Path(hpo_params["embedder_path"]) / "embedder.pth" - save_file(dbgym_cfg, embedder_fpath) - vae.load_state_dict(torch.load(embedder_fpath)) - - lsc = LSC( - horizon=hpo_params["horizon"], - lsc_parameters=hpo_params["lsc"], - vae_config=vae_config, - artifact_manager=artifact_manager, - ) - - idxspace = LSCIndexSpace( - tables=hpo_params["benchmark_config"]["tables"], - max_num_columns=hpo_params["benchmark_config"]["max_num_columns"], - max_indexable_attributes=workload.max_indexable(), - seed=seed, - # TODO(wz2): We should theoretically pull this from the DBMS. - rel_metadata=hpo_params["benchmark_config"]["attributes"], - attributes_overwrite=workload.column_usages(), - tbl_include_subsets=TableAttrAccessSetsMap(workload.tbl_include_subsets), - index_space_aux_type=hpo_params["benchmark_config"]["index_space_aux_type"], - index_space_aux_include=hpo_params["benchmark_config"][ - "index_space_aux_include" - ], - deterministic_policy=True, - vae=vae, - latent_dim=vae_config["latent_dim"], - index_output_transform=index_output_transform, - index_noise_scale=index_noise_scale, - artifact_manager=artifact_manager, - lsc=lsc, - ) - - qspace = LatentQuerySpace( - tables=hpo_params["benchmark_config"]["tables"], - quantize=True, - quantize_factor=hpo_params["default_quantization_factor"], - seed=seed, - per_query_knobs_gen=hpo_params["benchmark_config"]["per_query_knob_gen"], - per_query_parallel=( - {} - if not hpo_params["benchmark_config"]["per_query_select_parallel"] - else workload.query_aliases - ), - per_query_scans=( - {} - if not hpo_params["benchmark_config"]["per_query_scan_method"] - else workload.query_aliases - ), - query_names=workload.order, - artifact_manager=artifact_manager, - latent=True, - ) - - hspace = HolonSpace( - knob_space=sysknobs, - index_space=idxspace, - query_space=qspace, - seed=seed, - artifact_manager=artifact_manager, - ) - return hspace, lsc - - -def _build_observation_space( - dbgym_cfg: DBGymConfig, - action_space: HolonSpace, - lsc: LSC, - hpo_params: dict[str, Any], - seed: int, -) -> StateSpace: - if hpo_params["metric_state"] == "metric": - return LSCMetricStateSpace( - dbgym_cfg=dbgym_cfg, - lsc=lsc, - tables=hpo_params["benchmark_config"]["tables"], - seed=seed, - ) - elif hpo_params["metric_state"] == "structure": - return LSCStructureStateSpace( - lsc=lsc, - action_space=action_space, - normalize=False, - seed=seed, - ) - elif hpo_params["metric_state"] == "structure_normalize": - return LSCStructureStateSpace( - lsc=lsc, - action_space=action_space, - normalize=True, - seed=seed, - ) - else: - ms = hpo_params["metric_state"] - raise ValueError(f"Unsupported state representation {ms}") - - -def _build_env( - dbgym_cfg: DBGymConfig, - tuning_mode: TuningMode, - hpo_params: dict[str, Any], - pg_conn: PostgresConn, - observation_space: StateSpace, - holon_space: HolonSpace, - lsc: LSC, - workload: Workload, - reward_utility: RewardUtility, - artifact_manager: ArtifactManager, -) -> tuple[TargetResetWrapper, AgentEnv]: - - env = gym.make( - "Postgres-v0", - dbgym_cfg=dbgym_cfg, - tuning_mode=tuning_mode, - observation_space=observation_space, - action_space=holon_space, - workload=workload, - horizon=hpo_params["horizon"], - reward_utility=reward_utility, - pg_conn=pg_conn, - query_timeout=hpo_params["query_timeout"], - benchbase_config=hpo_params["benchbase_config"], - artifact_manager=artifact_manager, - ) - - # Check whether to create the MQO wrapper. - if not hpo_params["benchmark_config"]["query_spec"]["oltp_workload"]: - if ( - hpo_params["workload_eval_mode"] != "pq" - or hpo_params["workload_eval_inverse"] - or hpo_params["workload_eval_reset"] - ): - env = MQOWrapper( - workload_eval_mode=hpo_params["workload_eval_mode"], - workload_eval_inverse=hpo_params["workload_eval_inverse"], - workload_eval_reset=hpo_params["workload_eval_reset"], - benchbase_config=hpo_params["benchbase_config"], - query_timeout=hpo_params["query_timeout"], - env=env, - artifact_manager=artifact_manager, - ) - - # Attach LSC. - env = LSCWrapper( - lsc=lsc, - env=env, - artifact_manager=artifact_manager, - ) - - # Attach TargetResetWrapper. - target_reset = env = TargetResetWrapper( - env=env, - maximize_state=hpo_params["maximize_state"], - reward_utility=reward_utility, - start_reset=False, - artifact_manager=artifact_manager, - ) - - env = FlattenObservation(env) - if hpo_params["normalize_state"]: - env = NormalizeObservation(env) - - if hpo_params["normalize_reward"]: - env = NormalizeReward(env, gamma=hpo_params["gamma"]) - - # Wrap the AgentEnv to have null checking. - env = AgentEnv(env) - return target_reset, env - - -def _build_agent( - seed: int, - hpo_params: dict[str, Any], - observation_space: StateSpace, - action_space: HolonSpace, - artifact_manager: ArtifactManager, - ray_trial_id: Optional[str], -) -> Wolp: - action_dim = noise_action_dim = action_space.latent_dim() - critic_action_dim = action_space.critic_dim() - - actor = Actor( - observation_space=observation_space, - action_space=action_space, - net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")], - features_dim=gym.spaces.utils.flatdim(observation_space), - activation_fn=_parse_activation_fn(hpo_params["activation_fn"]), - weight_init=hpo_params["weight_init"], - bias_zero=hpo_params["bias_zero"], - squash_output=False, - action_dim=action_dim, - policy_weight_adjustment=hpo_params["policy_weight_adjustment"], - ) - - actor_target = Actor( - observation_space=observation_space, - action_space=action_space, - net_arch=[int(l) for l in hpo_params["pi_arch"].split(",")], - features_dim=gym.spaces.utils.flatdim(observation_space), - activation_fn=_parse_activation_fn(hpo_params["activation_fn"]), - weight_init=hpo_params["weight_init"], - bias_zero=hpo_params["bias_zero"], - squash_output=False, - action_dim=action_dim, - policy_weight_adjustment=hpo_params["policy_weight_adjustment"], - ) - - actor_optimizer = Adam(actor.parameters(), lr=hpo_params["learning_rate"]) - - critic = ContinuousCritic( - observation_space=observation_space, - action_space=action_space, - net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")], - features_dim=gym.spaces.utils.flatdim(observation_space), - activation_fn=_parse_activation_fn(hpo_params["activation_fn"]), - weight_init=hpo_params["weight_init"], - bias_zero=hpo_params["bias_zero"], - n_critics=2, - action_dim=critic_action_dim, - ) - - critic_target = ContinuousCritic( - observation_space=observation_space, - action_space=action_space, - net_arch=[int(l) for l in hpo_params["qf_arch"].split(",")], - features_dim=gym.spaces.utils.flatdim(observation_space), - activation_fn=_parse_activation_fn(hpo_params["activation_fn"]), - weight_init=hpo_params["weight_init"], - bias_zero=hpo_params["bias_zero"], - n_critics=2, - action_dim=critic_action_dim, - ) - - critic_optimizer = Adam( - critic.parameters(), - lr=hpo_params["learning_rate"] * hpo_params["critic_lr_scale"], - ) - - policy = WolpPolicy( - observation_space=observation_space, - action_space=action_space, - actor=actor, - actor_target=actor_target, - actor_optimizer=actor_optimizer, - critic=critic, - critic_target=critic_target, - critic_optimizer=critic_optimizer, - grad_clip=hpo_params["grad_clip"], - policy_l2_reg=hpo_params["policy_l2_reg"], - tau=hpo_params["tau"], - gamma=hpo_params["gamma"], - artifact_manager=artifact_manager, - ) - - # Setup the noise policy. - noise_params = hpo_params["noise_parameters"] - means = np.zeros((noise_action_dim,), dtype=np.float32) - stddevs = np.full( - (noise_action_dim,), noise_params["noise_sigma"], dtype=np.float32 - ) - action_noise_type = parse_noise_type(noise_params["noise_type"]) - action_noise = None if not action_noise_type else action_noise_type(means, stddevs) - - target_noise = hpo_params["target_noise"] - means = np.zeros( - ( - hpo_params["batch_size"], - noise_action_dim, - ), - dtype=np.float32, - ) - stddevs = np.full( - ( - hpo_params["batch_size"], - noise_action_dim, - ), - target_noise["target_policy_noise"], - dtype=np.float32, - ) - target_action_noise = parse_noise_type("normal") - assert target_action_noise - clamp_noise = ClampNoise( - target_action_noise(means, stddevs), target_noise["target_noise_clip"] - ) - - return Wolp( - policy=policy, - replay_buffer=ReplayBuffer( - buffer_size=hpo_params["buffer_size"], - obs_shape=[gym.spaces.utils.flatdim(observation_space)], - action_dim=critic_action_dim, - ), - ray_trial_id=ray_trial_id, - learning_starts=hpo_params["learning_starts"], - batch_size=hpo_params["batch_size"], - train_freq=(hpo_params["train_freq_frequency"], hpo_params["train_freq_unit"]), - gradient_steps=hpo_params["gradient_steps"], - action_noise=action_noise, - target_action_noise=clamp_noise, - seed=seed, - neighbor_parameters=hpo_params["neighbor_parameters"], - ) - - -def build_trial( - dbgym_cfg: DBGymConfig, - tuning_mode: TuningMode, - seed: int, - hpo_params: dict[str, Any], - ray_trial_id: Optional[str] = None, -) -> tuple[ArtifactManager, TargetResetWrapper, AgentEnv, Wolp, str]: - # The massive trial builder. - - port, signal = _get_signal(hpo_params["pgconn_info"]["pgbin_path"]) - _modify_benchbase_config(dbgym_cfg, port, hpo_params) - - artifact_manager, reward_utility, pg_conn, workload = _build_utilities( - dbgym_cfg, tuning_mode, port, hpo_params - ) - holon_space, lsc = _build_actions( - dbgym_cfg, seed, hpo_params, workload, artifact_manager - ) - observation_space = _build_observation_space( - dbgym_cfg, holon_space, lsc, hpo_params, seed - ) - target_reset, env = _build_env( - dbgym_cfg, - tuning_mode, - hpo_params, - pg_conn, - observation_space, - holon_space, - lsc, - workload, - reward_utility, - artifact_manager, - ) - - agent = _build_agent( - seed, hpo_params, observation_space, holon_space, artifact_manager, ray_trial_id - ) - return artifact_manager, target_reset, env, agent, signal diff --git a/tune/protox/agent/cli.py b/tune/protox/agent/cli.py deleted file mode 100644 index 5b64983a..00000000 --- a/tune/protox/agent/cli.py +++ /dev/null @@ -1,17 +0,0 @@ -import click - -from tune.protox.agent.hpo import hpo -from tune.protox.agent.replay import replay -from tune.protox.agent.tune import tune -from util.workspace import DBGymConfig - - -@click.group("agent") -@click.pass_obj -def agent_group(dbgym_cfg: DBGymConfig) -> None: - dbgym_cfg.append_group("agent") - - -agent_group.add_command(hpo) -agent_group.add_command(tune) -agent_group.add_command(replay) diff --git a/tune/protox/agent/coerce_config.py b/tune/protox/agent/coerce_config.py deleted file mode 100644 index ccfe798e..00000000 --- a/tune/protox/agent/coerce_config.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging -from typing import Any - -import yaml - -from util.workspace import DBGymConfig, TuningMode, open_and_save - - -def coerce_config( - dbgym_cfg: DBGymConfig, space: dict[str, Any], hpo_params: dict[str, Any] -) -> dict[str, Any]: - if "space_version" not in hpo_params: - # This is an old version. Coerce the params file. - new_config = {} - margs = hpo_params["mythril_args"] - - with open_and_save(dbgym_cfg, margs["benchmark_config"]) as f: - benchmark_config = yaml.safe_load(f) - benchmark = [k for k in benchmark_config.keys()][0] - benchmark_config = benchmark_config[benchmark] - benchmark_config["benchmark"] = benchmark - - # Merge the query specs. - mqs = hpo_params["mythril_query_spec"] - benchmark_config["query_spec"].update(mqs) - - defaults = { - "trace": True, - "seed": hpo_params["mythril_args"]["seed"], - "tune_duration": { - str(TuningMode.HPO): hpo_params["mythril_args"]["duration"], - }, - "workload_timeout": { - str(TuningMode.HPO): hpo_params["mythril_args"]["workload_timeout"], - }, - "query_timeout": hpo_params["mythril_args"]["timeout"], - "pgconn_info": { - "pgport": 5432, - "pguser": "admin", - "pgpass": "", - "pristine_dbdata_snapshot_path": "/mnt/nvme0n1/wz2/noisepage/pgdata", - "dbdata_parent_dpath": "/mnt/nvme0n1/wz2/noisepage/", - "pgbin_path": "/mnt/nvme0n1/wz2/noisepage/", - }, - "benchmark_config": benchmark_config, - "benchbase_config": { - "oltp_config": { - "oltp_num_terminals": margs.get("oltp_num_terminals", 0), - "oltp_duration": margs.get("oltp_duration", 0), - "oltp_sf": margs.get("oltp_sf", 0), - "oltp_warmup": margs.get("oltp_warmup", 0), - }, - "benchbase_path": "/home/wz2/noisepage-pilot/artifacts/benchbase/", - "benchbase_config_path": hpo_params["mythril_args"][ - "benchbase_config_path" - ], - }, - "system_knobs": hpo_params["mythril_system_knobs"], - "lsc": { - "enabled": hpo_params["lsc_parameters"]["lsc_enabled"], - "initial": hpo_params["lsc_parameters"]["lsc_shift_initial"], - "increment": hpo_params["lsc_parameters"]["lsc_shift_increment"], - "max": hpo_params["lsc_parameters"]["lsc_shift_max"], - "shift_eps_freq": hpo_params["lsc_parameters"][ - "lsc_shift_schedule_eps_freq" - ], - "shift_after": hpo_params["lsc_parameters"]["lsc_shift_after"], - }, - "neighbor_parameters": { - "knob_num_nearest": hpo_params["neighbor_parameters"][ - "knob_num_nearest" - ], - "knob_span": hpo_params["neighbor_parameters"]["knob_span"], - "index_num_samples": hpo_params["neighbor_parameters"][ - "index_num_samples" - ], - "index_rules": hpo_params["neighbor_parameters"].get( - "index_subset", True - ), - }, - "embedder_path": hpo_params["vae_metadata"]["embedder_path"], - } - - for s in space.keys(): - if s in defaults: - new_config[s] = defaults[s] - elif s in hpo_params: - new_config[s] = hpo_params[s] - elif s == "space_version": - continue - else: - assert False, f"{s} unable to coerce." - - return new_config - - return hpo_params diff --git a/tune/protox/agent/default_sysknobs.yaml b/tune/protox/agent/default_sysknobs.yaml deleted file mode 100644 index f705a61a..00000000 --- a/tune/protox/agent/default_sysknobs.yaml +++ /dev/null @@ -1,126 +0,0 @@ -system_knobs: - # Knob Specification - # : - # type: - # min: - # max: - # quantize: - # log_scale: <1 if using log2 scale, 0 otherwise> - # unit: - - # Starts the autovacuum subprocess - autovacuum: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - # Minimum number of tuple inserts, updates, or deletes prior to analyze. - autovacuum_analyze_threshold: {type: "integer", min: 0, max: 2147483647, quantize: 0, log_scale: 1, unit: 0} - # Number of tuple inserts, updates, or deletes prior to analyze as a fraction of reltuples. - autovacuum_analyze_scale_factor: {type: "float", min: 0, max: 1, quantize: 10, log_scale: 0, unit: 0} - # Sets the maximum number of simultaneously running autovacuum worker processes - autovacuum_max_workers: {type: "integer", min: 1, max: 20, quantize: 0, log_scale: 0, unit: 0} - # Time to sleep between autovacuum runs (sec unit). - autovacuum_naptime: {type: "integer_time", min: 60, max: 300, quantize: 8, log_scale: 0, unit: 1000000} - # Vacuum cost delay in milliseconds, for autovacuum (ms unit). - autovacuum_vacuum_cost_delay: {type: "integer_time", min: 0, max: 100, quantize: -1, log_scale: 0, unit: 1000} - # Vacuum cost amount available before napping, for autovacuum. - autovacuum_vacuum_cost_limit: {type: "integer", min: 1, max: 10000, quantize: -1, log_scale: 0, unit: 0} - # Minimum number of tuple updates or deletes prior to vacuum. - autovacuum_vacuum_threshold: {type: "integer", min: 0, max: 2147483647, quantize: 0, log_scale: 1, unit: 0} - # Minimum number of tuple inserts prior to vacuum, or -1 to disable insert vacuums. - autovacuum_vacuum_insert_threshold: {type: "integer", min: -1, max: 2147483647, quantize: 0, log_scale: 1, unit: 0} - # Number of tuple updates or deletes prior to vacuum as a fraction of reltuples. - autovacuum_vacuum_scale_factor: {type: "float", min: 0, max: 100, quantize: -1, log_scale: 0, unit: 0} - # Number of tuple inserts prior to vacuum as a fraction of reltuples. - autovacuum_vacuum_insert_scale_factor: {type: "float", min: 0, max: 100, quantize: -1, log_scale: 0, unit: 0} - # Sets the maximum memory to be used by each autovacuum worker process (1kB unit) -- 4GB. - autovacuum_work_mem: {type: "bytes", min: 1024, max: 4194304, quantize: 0, log_scale: 1, unit: 1024} - - # Number of pages after which previously performed writes are flushed to disk (8kb unit). - backend_flush_after: {type: "bytes", min: 0, max: 256, quantize: -1, log_scale: 0, unit: 8192} - # Background writer sleep time between rounds (ms unit). - bgwriter_delay: {type: "integer_time", min: 10, max: 10000, quantize: -1, log_scale: 0, unit: 1000} - # Number of pages after which previously performed writes are flushed to disk (page unit). - bgwriter_flush_after: {type: "bytes", min: 0, max: 256, quantize: -1, log_scale: 0, unit: 8192} - # Background writer maximum number of LRU pages to flush per round (page unit) -- see shared_buffers. - bgwriter_lru_maxpages: {type: "integer", min: 0, max: 4194304, quantize: 0, log_scale: 1, unit: 0} - # Multiple of the average buffer usage to free per round (float multiplier). - bgwriter_lru_multiplier: {type: "float", min: 2, max: 10, quantize: -1, log_scale: 0, unit: 0} - # Time spent flushing dirty buffers during checkpoint, as fraction of checkpoint interval (float). - checkpoint_completion_target: {type: "float", min: 0, max: 1, quantize: 10, log_scale: 0, unit: 0} - # Sets the maximum time between automatic WAL checkpoints (sec unit). - checkpoint_timeout: {type: "integer_time", min: 30, max: 300, quantize: 9, log_scale: 0, unit: 1000000} - # Number of pages after which previously performed writes are flushed to disk (page unit). - checkpoint_flush_after: {type: "bytes", min: 0, max: 256, quantize: -1, log_scale: 0, unit: 8192} - - # Sets the delay in microseconds between transaction commit and flushing WAL to disk (us unit). - commit_delay: {type: "integer_time", min: 0, max: 100000, quantize: 10, log_scale: 0, unit: 0} - # Sets the minimum number of concurrent open transactions required before performing commit_delay. - commit_siblings: {type: "integer", min: 0, max: 20, quantize: -1, log_scale: 0, unit: 0} - # Sets the time to wait on a lock before checking for deadlock (ms unit) -- 1 second. - deadlock_timeout: {type: "integer_time", min: 1, max: 1000000, quantize: 1000, log_scale: 0, unit: 1000} - - # Allow JIT compilation. - jit: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - # Allow JIT compilation of expressions. - jit_expressions: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - # Perform JIT compilation if query is more expensive. - jit_above_cost: {type: "float", min: 0, max: 1.0E+8, quantize: 0, log_scale: 1, unit: 0} - # Perform JIT inlining if query is more expensive. - jit_inline_above_cost: {type: "float", min: -1, max: 1.0E+8, quantize: 0, log_scale: 1, unit: 0} - # Optimize JIT-compiled functions if query is more expensive. - jit_optimize_above_cost: {type: "float", min: -1, max: 1.0E+8, quantize: 0, log_scale: 1, unit: 0} - # Allow JIT compilation of tuple deforming. - jit_tuple_deforming: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - - # Sets the planner's assumption about the total size of the data caches (8kb unit) -- 32GB. - effective_cache_size: {type: "bytes", min: 1, max: 4194304, quantize: 0, log_scale: 1, unit: 8192} - # Number of simultaneous requests that can be handled efficiently by the disk subsystem. - effective_io_concurrency: {type: "integer", min: 0, max: 1000, quantize: -1, log_scale: 0, unit: 0} - # A variant of effective_io_concurrency that is used for maintenance work. - maintenance_io_concurrency: {type: "integer", min: 0, max: 1000, quantize: -1, log_scale: 0, unit: 0} - - ## Enables the planner's use of explicit sort steps. - #enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Enables or disables the query planner's use of gather merge plan types. - #enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Enables or disables the query planner's use of hash-agg plan types. - #enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Enables or disables the query planner's use of hash-join plan types with parallel hash. - #enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Enables or disables the query planner's use of materialization. - #enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Enables or disables the query planner's use of memoize plans. - #enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - ## Sets the planner's estimate of the cost of a nonsequentially fetched disk page. - #random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - ## Sets the planner's estimate of the cost of a sequentially fetched disk page. - #seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - - # Sets the maximum number of background processes that the system can support. - max_worker_processes: {type: "integer", min: 0, max: 20, quantize: -1, log_scale: 0, unit: 0} - # Sets the maximum number of workers that the system can support for parallel operations. - max_parallel_workers: {type: "integer", min: 0, max: 20, quantize: -1, log_scale: 0, unit: 0} - # Sets the maximum number of workers that can be started by a single Gather or Gather Merge node. - max_parallel_workers_per_gather: {type: "integer", min: 0, max: 20, quantize: -1, log_scale: 0, unit: 0} - # Sets the maximum number of parallel workers that can be started by a single utility command. - max_parallel_maintenance_workers: {type: "integer", min: 0, max: 20, quantize: -1, log_scale: 0, unit: 0} - - # Sets the number of disk-page buffers in shared memory for WAL (8kB unit) -- 128MB. - wal_buffers: {type: "bytes", min: 8, max: 16384, quantize: 0, log_scale: 1, unit: 8192} - # Compresses full-page writes written in WAL file with specified method. - wal_compression: {type: "binary_enum", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - # Time between WAL flushes performed in the WAL writer (ms). - wal_writer_delay: {type: "integer_time", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 1000} - # Amount of WAL written out by WAL writer that triggers a flush (8kB unit). - wal_writer_flush_after: {type: "bytes", min: 0, max: 2097152, quantize: 0, log_scale: 1, unit: 8192} - # Sets the WAL size that triggers a checkpoint (1MB unit). - max_wal_size: {type: "bytes", min: 32, max: 16384, quantize: 16, log_scale: 0, unit: 1048576} - # Sets the minimum size to shrink the WAL to. - min_wal_size: {type: "bytes", min: 32, max: 16384, quantize: 16, log_scale: 0, unit: 1048576} - - # Multiple of work_mem to use for hash tables. - #hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 1000, log_scale: 0, unit: 0} - # Sets the maximum memory to be used for maintenance operations (1kB unit) -- 4GB. - maintenance_work_mem: {type: "bytes", min: 1024, max: 4194304, quantize: 0, log_scale: 1, unit: 1024} - # Sets the number of shared memory buffers used by the server (8kb unit, 128KB - 32GB). - shared_buffers: {type: "bytes", min: 256, max: 4194304, quantize: 0, log_scale: 1, unit: 8192} - # Sets the maximum memory to be used for query workspaces (1kB unit). - work_mem: {type: "bytes", min: 64, max: 4194304, quantize: 0, log_scale: 1, unit: 1024} diff --git a/tune/protox/agent/hpo.py b/tune/protox/agent/hpo.py deleted file mode 100644 index bb3751cb..00000000 --- a/tune/protox/agent/hpo.py +++ /dev/null @@ -1,809 +0,0 @@ -import json -import logging -import os -import random -import shutil -import sys -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Optional, Type, Union - -import click -import numpy as np -import pandas as pd -import ray -import torch -import yaml -from ray import tune -from ray.air import FailureConfig, RunConfig -from ray.train import SyncConfig -from ray.tune import Trainable, TuneConfig -from ray.tune.schedulers import FIFOScheduler -from ray.tune.search.basic_variant import BasicVariantGenerator - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from tune.protox.agent.build_trial import build_trial -from util.log import DBGYM_LOGGER_NAME -from util.workspace import ( - BENCHMARK_NAME_PLACEHOLDER, - DEFAULT_BOOT_CONFIG_FPATH, - DEFAULT_SYSKNOBS_PATH, - DEFAULT_WORKLOAD_TIMEOUT, - SCALE_FACTOR_PLACEHOLDER, - WORKLOAD_NAME_PLACEHOLDER, - WORKSPACE_PATH_PLACEHOLDER, - DBGymConfig, - TuningMode, - fully_resolve_path, - get_default_benchbase_config_path, - get_default_benchmark_config_path, - get_default_dbdata_parent_dpath, - get_default_embedder_path, - get_default_hpoed_agent_params_fname, - get_default_pgbin_path, - get_default_pristine_dbdata_snapshot_path, - get_default_workload_name_suffix, - get_default_workload_path, - get_workload_name, - is_ssd, - link_result, - open_and_save, - restart_ray, -) - -METRIC_NAME = "Best Metric" - - -class AgentHPOArgs: - def __init__( - self, - benchmark_name: str, - workload_name: str, - embedder_path: Path, - benchmark_config_path: Path, - benchbase_config_path: Path, - sysknobs_path: Path, - pristine_dbdata_snapshot_path: Path, - dbdata_parent_dpath: Path, - pgbin_path: Path, - workload_path: Path, - seed: int, - agent: str, - max_concurrent: int, - num_samples: int, - tune_duration_during_hpo: float, - workload_timeout: float, - query_timeout: float, - enable_boot_during_hpo: bool, - boot_config_fpath_during_hpo: Path, - build_space_good_for_boot: bool, - ): - self.benchmark_name = benchmark_name - self.workload_name = workload_name - self.embedder_path = embedder_path - self.benchmark_config_path = benchmark_config_path - self.benchbase_config_path = benchbase_config_path - self.sysknobs_path = sysknobs_path - self.pristine_dbdata_snapshot_path = pristine_dbdata_snapshot_path - self.dbdata_parent_dpath = dbdata_parent_dpath - self.pgbin_path = pgbin_path - self.workload_path = workload_path - self.seed = seed - self.agent = agent - self.max_concurrent = max_concurrent - self.num_samples = num_samples - self.tune_duration_during_hpo = tune_duration_during_hpo - self.workload_timeout = workload_timeout - self.query_timeout = query_timeout - self.enable_boot_during_hpo = enable_boot_during_hpo - self.boot_config_fpath_during_hpo = boot_config_fpath_during_hpo - self.build_space_good_for_boot = build_space_good_for_boot - - -@click.command() -@click.pass_obj -@click.argument("benchmark-name") -@click.option( - "--workload-name-suffix", - type=str, - default=None, - help=f"The suffix of the workload name (the part after the scale factor).", -) -@click.option( - "--scale-factor", - type=float, - default=DEFAULT_SCALE_FACTOR, - help=f"The scale factor used when generating the data of the benchmark.", -) -@click.option( - "--embedder-path", - type=Path, - default=None, - help=f"The path to the directory that contains an `embedder.pth` file with a trained encoder and decoder as well as a `config` file. The default is {get_default_embedder_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER)}", -) -@click.option( - "--benchmark-config-path", - type=Path, - default=None, - help=f"The path to the .yaml config file for the benchmark. The default is {get_default_benchmark_config_path(BENCHMARK_NAME_PLACEHOLDER)}.", -) -@click.option( - "--benchbase-config-path", - type=Path, - default=None, - help=f"The path to the .xml config file for BenchBase, used to run OLTP workloads. The default is {get_default_benchbase_config_path(BENCHMARK_NAME_PLACEHOLDER)}.", -) -@click.option( - "--sysknobs-path", - type=Path, - default=DEFAULT_SYSKNOBS_PATH, - help=f"The path to the file configuring the space of system knobs the tuner can tune.", -) -@click.option( - "--pristine-dbdata-snapshot-path", - type=Path, - default=None, - help=f"The path to the .tgz snapshot of the dbdata directory to use as a starting point for tuning. The default is {get_default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.", -) -@click.option( - "--intended-dbdata-hardware", - type=click.Choice(["hdd", "ssd"]), - default="hdd", - help=f"The intended hardware dbdata should be on. Used as a sanity check for --dbdata-parent-dpath.", -) -@click.option( - "--dbdata-parent-dpath", - type=Path, - default=None, - help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_default_dbdata_parent_dpath(WORKSPACE_PATH_PLACEHOLDER)}.", -) -@click.option( - "--pgbin-path", - type=Path, - default=None, - help=f"The path to the bin containing Postgres executables. The default is {get_default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.", -) -@click.option( - "--workload-path", - type=Path, - default=None, - help=f"The path to the directory that specifies the workload (such as its queries and order of execution). The default is {get_default_workload_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER)}.", -) -@click.option( - "--seed", - type=int, - default=None, - help="The seed used for all sources of randomness (random, np, torch, etc.). The default is a random value.", -) -@click.option( - "--agent", - type=str, - default="wolp", - help=f"The RL algorithm to use for the tuning agent.", -) -@click.option( - "--max-concurrent", - type=int, - default=1, - help=f"The max # of concurrent agent models to train. Note that unlike in HPO, all will use the same hyperparameters. This just helps control for other sources of randomness.", -) -@click.option( - "--num-samples", - type=int, - default=40, - help=f"The # of times to specific hyperparameter configs to sample from the hyperparameter search space and train agent models with.", -) -@click.option( - "--tune-duration-during-hpo", - type=float, - default=4.0, - help="The number of hours to run each hyperparamer config tuning trial for.", -) -@click.option( - "--workload-timeout", - type=int, - default=DEFAULT_WORKLOAD_TIMEOUT, - help="The timeout (in seconds) of a workload. We run the workload once per DBMS configuration. For OLAP workloads, certain configurations may be extremely suboptimal, so we need to time out the workload.", -) -@click.option( - "--query-timeout", - type=int, - default=30, - help="The timeout (in seconds) of a query. See the help of --workload-timeout for the motivation of this.", -) -@click.option( - "--enable-boot-during-hpo", - is_flag=True, - help="Whether to enable the Boot query accelerator during the HPO process. Deciding to use Boot during HPO is separate from deciding to use Boot during tuning.", -) -@click.option( - "--boot-config-fpath-during-hpo", - type=Path, - default=DEFAULT_BOOT_CONFIG_FPATH, - help="The path to the file configuring Boot when running HPO. When tuning, you may use a different Boot config.", -) -# Building a space good for Boot is subtly different from whether we enable Boot during HPO. -# There are certain options that qualitatively do not perform well with Boot (e.g. metrics state -# because Boot extrapolates the query runtime but not metrics). This param controls whether we -# use those options or not. -# I chose the word "good" instead of "compatible" because metrics state does not _crash_ if you -# use Boot but it just doesn't seem like it would perform well. -# One workflow where these two variables are different is where we don't enable Boot during HPO -# but do want to enable Boot during tuning. -# However, whether we're building a space good for Boot is also different from whether we enable -# Boot during tuning. We often want to compare one tuning run with Boot against one without -# Boot, in which case we'd build a space good for Boot and then run it once with Boot and once -# without Boot. -@click.option( - "--build-space-good-for-boot", - is_flag=True, - help="Whether to avoid certain options that are known to not perform well when Boot is enabled. See the codebase for why this is different from --enable-boot-during-hpo.", -) -def hpo( - dbgym_cfg: DBGymConfig, - benchmark_name: str, - workload_name_suffix: Optional[str], - scale_factor: float, - embedder_path: Optional[Path], - benchmark_config_path: Optional[Path], - benchbase_config_path: Optional[Path], - sysknobs_path: Path, - pristine_dbdata_snapshot_path: Optional[Path], - intended_dbdata_hardware: str, - dbdata_parent_dpath: Optional[Path], - pgbin_path: Optional[Path], - workload_path: Optional[Path], - seed: Optional[int], - agent: str, - max_concurrent: int, - num_samples: int, - tune_duration_during_hpo: float, - workload_timeout: int, - query_timeout: int, - enable_boot_during_hpo: bool, - boot_config_fpath_during_hpo: Path, - build_space_good_for_boot: bool, -) -> None: - # Set args to defaults programmatically (do this before doing anything else in the function) - if workload_name_suffix is None: - workload_name_suffix = get_default_workload_name_suffix(benchmark_name) - workload_name = get_workload_name(scale_factor, workload_name_suffix) - if embedder_path is None: - embedder_path = get_default_embedder_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ) - if benchmark_config_path is None: - benchmark_config_path = get_default_benchmark_config_path(benchmark_name) - if benchbase_config_path is None: - benchbase_config_path = get_default_benchbase_config_path(benchmark_name) - if pristine_dbdata_snapshot_path is None: - pristine_dbdata_snapshot_path = get_default_pristine_dbdata_snapshot_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, scale_factor - ) - if dbdata_parent_dpath is None: - dbdata_parent_dpath = get_default_dbdata_parent_dpath( - dbgym_cfg.dbgym_workspace_path - ) - if pgbin_path is None: - pgbin_path = get_default_pgbin_path(dbgym_cfg.dbgym_workspace_path) - if workload_path is None: - workload_path = get_default_workload_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ) - if seed is None: - seed = random.randint(0, int(1e8)) - - # Fully resolve all input paths. - embedder_path = fully_resolve_path(dbgym_cfg, embedder_path) - benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) - benchbase_config_path = fully_resolve_path(dbgym_cfg, benchbase_config_path) - sysknobs_path = fully_resolve_path(dbgym_cfg, sysknobs_path) - pristine_dbdata_snapshot_path = fully_resolve_path( - dbgym_cfg, pristine_dbdata_snapshot_path - ) - dbdata_parent_dpath = fully_resolve_path(dbgym_cfg, dbdata_parent_dpath) - pgbin_path = fully_resolve_path(dbgym_cfg, pgbin_path) - workload_path = fully_resolve_path(dbgym_cfg, workload_path) - boot_config_fpath_during_hpo = fully_resolve_path( - dbgym_cfg, boot_config_fpath_during_hpo - ) - - # Check assertions on args - if intended_dbdata_hardware == "hdd": - assert not is_ssd( - dbdata_parent_dpath - ), f"Intended hardware is HDD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an SSD" - elif intended_dbdata_hardware == "ssd": - assert is_ssd( - dbdata_parent_dpath - ), f"Intended hardware is SSD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an HDD" - else: - assert False - - # Create args object - hpo_args = AgentHPOArgs( - benchmark_name, - workload_name, - embedder_path, - benchmark_config_path, - benchbase_config_path, - sysknobs_path, - pristine_dbdata_snapshot_path, - dbdata_parent_dpath, - pgbin_path, - workload_path, - seed, - agent, - max_concurrent, - num_samples, - tune_duration_during_hpo, - workload_timeout, - query_timeout, - enable_boot_during_hpo, - boot_config_fpath_during_hpo, - build_space_good_for_boot, - ) - _tune_hpo(dbgym_cfg, hpo_args) - - -# The reason we put the paths inside the space is so that the tuner only receives the space .json file -# as a CLI arg and doesn't need any other CLI args. The hyperparameters are selected using the paths -# given here, so it doesn't make sense to specify them separately when tuning. -def build_space( - sysknobs: dict[str, Any], - benchmark_config: dict[str, Any], - workload_path: Path, - embedder_path: list[Path], - pgconn_info: dict[str, Path], - benchbase_config: dict[str, Any] = {}, - tune_duration_during_hpo: float = 30.0, - seed: int = 0, - enable_boot_during_hpo: bool = False, - boot_config_fpath_during_hpo: Path = Path(), - build_space_good_for_boot: bool = False, - workload_timeouts: list[float] = [600.0], - query_timeouts: list[float] = [30.0], -) -> dict[str, Any]: - - return { - # Internal space versioning. - "space_version": "2.0", - "trace": True, - "seed": seed, - # For params that may differ between HPO, tune, and replay, I chose to represent them - # as dictionaries. I felt this was less confusing that overriding parts of the hpo_params - # during tune or replay. With the dictionary representation, we never override anything in - # hpo_params - we only ever add new fields to hpo_params. - "enable_boot": { - str(TuningMode.HPO): enable_boot_during_hpo, - }, - "boot_config_fpath": { - str(TuningMode.HPO): boot_config_fpath_during_hpo, - }, - # Timeouts. - "tune_duration": { - str(TuningMode.HPO): tune_duration_during_hpo, - }, - "workload_timeout": { - str(TuningMode.HPO): tune.choice(workload_timeouts), - }, - "query_timeout": tune.choice(query_timeouts), - # Paths. - "workload_path": str(workload_path), - "pgconn_info": pgconn_info, - "benchmark_config": benchmark_config, - "benchbase_config": benchbase_config, - # Embeddings. - "embedder_path": tune.choice(map(str, embedder_path)), - # Default quantization factor to use. - "default_quantization_factor": 100, - "system_knobs": sysknobs, - # Horizon before resetting. - "horizon": 5, - # Workload Eval. - "workload_eval_mode": tune.choice(["all", "all_enum"]), - "workload_eval_inverse": tune.choice([False, True]), - "workload_eval_reset": True, - # Reward. - "reward": tune.choice(["multiplier", "relative"]), - "reward_scaler": tune.choice([1, 2, 10]), - "workload_timeout_penalty": 1, - "normalize_reward": tune.choice([False, True]), - # State. - "metric_state": tune.choice( - ([] if build_space_good_for_boot else ["metric"]) - + ["structure", "structure_normalize"] - ), - "maximize_state": not benchmark_config.get("oltp_workload", False), - # Whether to normalize state or not. - "normalize_state": tune.sample_from( - lambda spc: ( - False - if spc["config"]["metric_state"] == "structure_normalize" - else True - ) - ), - # LSC Parameters. The units for these are based on the embedding itself. - # TODO(): Set these parameters based on the workload/embedding structure itself. - "lsc": { - "enabled": False, - # These are the initial low-bias, comma separated by the horizon step. - "initial": "0", - # These are the units for how much to increment the low-bias by each time. - "increment": "0", - # Maximum allowed shift. - "max": "0", - # This controls how frequently to try and boost the shifts based on episode. - "shift_eps_freq": 1, - # How many episodes to start. - "shift_after": 3, - }, - # RL Agent Parameters. - # Number of warmup steps. - "learning_starts": 0, - # Learning rate. - "learning_rate": tune.choice([1e-3, 6e-4, 3e-5]), - "critic_lr_scale": tune.choice([1.0, 2.5, 5.0]), - "policy_l2_reg": tune.choice([0.01, 0.05]), - # Discount. - "gamma": tune.choice([0, 0.9, 0.95]), - # Polyak averaging rate. - "tau": tune.choice([0.995, 1.0]), - # Replay Buffer Size. - "buffer_size": 1_000_000, - # Batch size. - "batch_size": tune.choice([16, 32]), - # Gradient Clipping. - "grad_clip": tune.choice([1.0, 5.0, 10.0]), - # Gradient steps per sample. - "gradient_steps": tune.choice([1, 2, 4]), - # Training steps. - "train_freq_unit": tune.choice(["step", "episode"]), - "train_freq_frequency": 1, - # Target noise. - "target_noise": { - "target_noise_clip": tune.choice([0.05, 0.1, 0.15]), - "target_policy_noise": tune.choice([0.15, 0.2]), - }, - # Noise parameters. - "noise_parameters": { - "noise_type": tune.choice(["normal", "ou"]), - "noise_sigma": tune.choice([0.05, 0.1, 0.15]), - }, - "scale_noise_perturb": True, - # Neighbor parameters. - "neighbor_parameters": { - "knob_num_nearest": tune.choice([10, 100]), - "knob_span": tune.choice([1, 3]), - "index_num_samples": 1, - # Use index rules whenever we aren't optimizing OLTP. - "index_rules": not benchmark_config.get("oltp_workload", False), - }, - # Networks. - "weight_init": tune.choice(["xavier_normal", "xavier_uniform", "orthogonal"]), - "bias_zero": tune.choice([False, True]), - "policy_weight_adjustment": tune.choice([1, 100]), - "activation_fn": tune.choice(["gelu", "mish"]), - "pi_arch": tune.choice(["128,128", "256,256", "512,512"]), - "qf_arch": tune.choice(["256", "512", "1024"]), - } - - -class TuneTimeoutChecker(object): - def __init__(self, tune_duration: float) -> None: - self.limit = (tune_duration * 3600) > 0 - self.remain = int(tune_duration * 3600) - self.running = False - self.start = 0.0 - - def resume(self) -> None: - self.start = time.time() - self.running = True - - def pause(self) -> None: - if self.limit and self.running: - self.remain -= int(time.time() - self.start) - self.running = False - - def __call__(self) -> bool: - if not self.limit: - return False - - if self.remain <= 0: - return True - - if self.running: - return int(time.time() - self.start) >= self.remain - - return False - - -class TuneTrial: - def __init__( - self, - dbgym_cfg: DBGymConfig, - tuning_mode: TuningMode, - ray_trial_id: Optional[str] = None, - ) -> None: - """ - We use this object for HPO, tune, and replay. It behaves *slightly* differently - depending on what it's used for, which is why we have the tuning_mode param. - """ - self.dbgym_cfg = dbgym_cfg - self.tuning_mode = tuning_mode - - if self.tuning_mode == TuningMode.HPO: - assert ( - ray_trial_id != None - ), "If we're doing HPO, we will create multiple TuneTrial() objects. We thus need to differentiate them somehow." - else: - assert ( - ray_trial_id is None - ), "If we're not doing HPO, we (currently) will create only one TuneTrial() object. For clarity, we set ray_trial_id to None since ray_trial_id should not be used in this case." - self.ray_trial_id = ray_trial_id - - def setup(self, hpo_params: dict[str, Any]) -> None: - # Attach mythril directory to the search path. - sys.path.append(os.path.expanduser(self.dbgym_cfg.dbgym_repo_path)) - - torch.set_default_dtype(torch.float32) # type: ignore[no-untyped-call] - seed = ( - hpo_params["seed"] - if hpo_params["seed"] != -1 - else np.random.randint(np.iinfo(np.int32).max) - ) - np.random.seed(seed) - torch.manual_seed(seed) - - tune_duration = hpo_params["tune_duration"][str(self.tuning_mode)] - - self.timeout_checker = TuneTimeoutChecker(tune_duration) - self.artifact_manager, self.target_reset, self.env, self.agent, self.signal = ( - build_trial( - self.dbgym_cfg, - self.tuning_mode, - seed=seed, - hpo_params=hpo_params, - ray_trial_id=self.ray_trial_id, - ) - ) - logging.getLogger(DBGYM_LOGGER_NAME).info("%s", hpo_params) - logging.getLogger(DBGYM_LOGGER_NAME).info(f"Seed: {seed}") - - # Attach the timeout checker and loggers. - self.agent.set_timeout_checker(self.timeout_checker) - self.agent.set_artifact_manager(self.artifact_manager) - - self.env_init = False - self.start_time = time.time() - self.step_count = 0 - - def step(self) -> dict[Any, Any]: - self.step_count += 1 - # Only measure the actual tuning time. - self.timeout_checker.resume() - - episode = self.agent._episode_num - it = self.agent.num_timesteps - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Starting episode: {episode+1}, iteration: {it+1}" - ) - - if not self.env_init: - _, infos = self.env.reset() - baseline_reward, baseline_metric = ( - infos["baseline_reward"], - infos["baseline_metric"], - ) - metric_reward_message = f"Baseline Metric: {baseline_metric}. Baseline Reward: {baseline_reward}" - logging.getLogger(DBGYM_LOGGER_NAME).info(metric_reward_message) - self.artifact_manager.log_to_replay_info(metric_reward_message) - self.env_init = True - - assert ( - self.ray_trial_id != None - if self.tuning_mode == TuningMode.HPO - else True - ), "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names." - self.artifact_manager.stash_results( - infos, name_override="baseline", ray_trial_id=self.ray_trial_id - ) - else: - self.agent.learn(self.env, total_timesteps=1, tuning_mode=self.tuning_mode) - - self.timeout_checker.pause() - self.artifact_manager.advance() - - # Step telemetry that we care about. - data = { - "AgentEpisode": episode, - "AgentTimesteps": it, - "TrialStep": self.step_count, - "Best Metric": ( - self.target_reset.real_best_metric if self.target_reset else -1 - ), - "Best Seen Metric": ( - self.target_reset.best_metric if self.target_reset else -1 - ), - "HoursElapsed": (time.time() - self.start_time) / 3600.0, - } - - # If we've timed out. Note that we've timed out. - if self.timeout_checker(): - self.cleanup() - data[ray.tune.result.DONE] = True - - return data - - def cleanup(self) -> None: - self.artifact_manager.flush() - self.env.close() # type: ignore[no-untyped-call] - if Path(self.signal).exists(): - os.remove(self.signal) - - -# I want to pass dbgym_cfg into TuneOpt without putting it inside `hpo_params`. This is because it's a pain to turn DBGymConfig -# into a nice dictionary of strings, and nothing in DBGymConfig would be relevant to someone checking the configs later -# Using a function to create a class is Ray's recommended way of doing this (see -# https://discuss.ray.io/t/using-static-variables-to-control-trainable-subclass-in-ray-tune/808/4) -# If you don't create the class with a function, it doesn't work due to how Ray serializes classes -global_dbgym_cfg: DBGymConfig - - -def create_tune_opt_class(dbgym_cfg_param: DBGymConfig) -> Type[Trainable]: - global global_dbgym_cfg - global_dbgym_cfg = dbgym_cfg_param - - class TuneOpt(Trainable): - dbgym_cfg = global_dbgym_cfg - - def setup(self, hpo_params: dict[str, Any]) -> None: - self.trial = TuneTrial( - TuneOpt.dbgym_cfg, TuningMode.HPO, ray_trial_id=self.trial_id - ) - self.trial.setup(hpo_params) - - def step(self) -> dict[Any, Any]: - return self.trial.step() - - def cleanup(self) -> None: - return self.trial.cleanup() - - def save_checkpoint(self, checkpoint_dir: str) -> None: - # We can't actually do anything about this right now. - pass - - def load_checkpoint(self, checkpoint_dir: Union[dict[Any, Any], None]) -> None: - # We can't actually do anything about this right now. - pass - - return TuneOpt - - -def _tune_hpo(dbgym_cfg: DBGymConfig, hpo_args: AgentHPOArgs) -> None: - with open_and_save(dbgym_cfg, hpo_args.sysknobs_path) as f: - sysknobs = yaml.safe_load(f)["system_knobs"] - - with open_and_save(dbgym_cfg, hpo_args.benchmark_config_path) as f: - benchmark_config = yaml.safe_load(f) - is_oltp = benchmark_config["protox"]["query_spec"]["oltp_workload"] - benchmark = [k for k in benchmark_config.keys()][0] - benchmark_config = benchmark_config[benchmark] - benchmark_config["benchmark"] = benchmark - - # TODO(phw2): read the dir hpo_args.embedder_path and get a list of embeddings - embedder_path = [hpo_args.embedder_path] - # TODO(phw2): make workload and query timeout params lists instead of just ints - workload_timeouts = [hpo_args.workload_timeout] - query_timeouts = [hpo_args.query_timeout] - - assert not is_oltp - benchbase_config: dict[str, Any] = {} - # This is commented out because OLTP is currently not implemented. - # benchbase_config = ( - # { - # "oltp_config": { - # "oltp_num_terminals": hpo_args.oltp_num_terminals, - # "oltp_duration": hpo_args.oltp_duration, - # "oltp_sf": hpo_args.oltp_sf, - # "oltp_warmup": hpo_args.oltp_warmup, - # }, - # "benchbase_path": hpo_args.benchbase_path, - # "benchbase_config_path": hpo_args.benchbase_config_path, - # } - # if is_oltp - # else {} - # ) - - space = build_space( - sysknobs, - benchmark_config, - hpo_args.workload_path, - embedder_path, - pgconn_info={ - "pristine_dbdata_snapshot_path": hpo_args.pristine_dbdata_snapshot_path, - "dbdata_parent_dpath": hpo_args.dbdata_parent_dpath, - "pgbin_path": hpo_args.pgbin_path, - }, - benchbase_config=benchbase_config, - tune_duration_during_hpo=hpo_args.tune_duration_during_hpo, - seed=hpo_args.seed, - enable_boot_during_hpo=hpo_args.enable_boot_during_hpo, - boot_config_fpath_during_hpo=hpo_args.boot_config_fpath_during_hpo, - build_space_good_for_boot=hpo_args.build_space_good_for_boot, - workload_timeouts=workload_timeouts, - query_timeouts=query_timeouts, - ) - - restart_ray(dbgym_cfg.root_yaml["ray_gcs_port"]) - ray.init( - address=f"localhost:{dbgym_cfg.root_yaml['ray_gcs_port']}", log_to_driver=False - ) - - # Scheduler. - scheduler = FIFOScheduler() # type: ignore[no-untyped-call] - - # Search. - search = BasicVariantGenerator(max_concurrent=hpo_args.max_concurrent) - - mode = "max" if is_oltp else "min" - tune_config = TuneConfig( - scheduler=scheduler, - search_alg=search, - num_samples=hpo_args.num_samples, - max_concurrent_trials=hpo_args.max_concurrent, - chdir_to_trial_dir=True, - metric=METRIC_NAME, - mode=mode, - ) - - dtime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - run_config = RunConfig( - name=f"ProtoxHPO_{dtime}", - failure_config=FailureConfig(max_failures=0, fail_fast=True), - sync_config=SyncConfig(), - verbose=2, - log_to_file=True, - storage_path=str(dbgym_cfg.cur_task_runs_path("hpo_ray_results", mkdir=True)), - ) - - tuner = ray.tune.Tuner( - create_tune_opt_class(dbgym_cfg), - tune_config=tune_config, - run_config=run_config, - param_space=space, - ) - - results = tuner.fit() - if results.num_errors > 0: - for i in range(len(results)): - if results[i].error: - logging.getLogger(DBGYM_LOGGER_NAME).error(f"Trial {results[i]} FAILED") - assert False, "Encountered exceptions!" - - # Save the best params.json. - best_result = results.get_best_result(metric=METRIC_NAME, mode=mode) - best_params_generated_fpath = Path(best_result.path) / "params.json" - # Before saving, copy it into run_*/[codebase]/data/. This way, save_file() called on - # params.json will link directly to run_*/[codebase]/data/params.json instead of to - # run_*/[codebase]/hpo_ray_results/TuneOpt*/. - best_params_copy_fpath = ( - dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json" - ) - shutil.copy(best_params_generated_fpath, best_params_copy_fpath) - link_result( - dbgym_cfg, - best_params_copy_fpath, - custom_result_name=get_default_hpoed_agent_params_fname( - hpo_args.benchmark_name, hpo_args.workload_name - ) - + ".link", - ) - # We also link from run_*/[codebase]/data/params.json to run_*/[codebase]/hpo_ray_results/TuneOpt*/**/params.json. - # This way, when _manually_ looking through run_*/, we can see which HPO trial was - # responsible for creating params.json. - best_params_link_fpath = ( - dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "params.json.link" - ) - os.symlink(best_params_generated_fpath, best_params_link_fpath) diff --git a/tune/protox/agent/noise.py b/tune/protox/agent/noise.py deleted file mode 100644 index c036c216..00000000 --- a/tune/protox/agent/noise.py +++ /dev/null @@ -1,118 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional - -import numpy as np -from numpy.typing import NDArray - - -class ActionNoise(ABC): - """ - The action noise base class - """ - - def __init__(self, mean: NDArray[np.float32], sigma: NDArray[np.float32]) -> None: - super().__init__() - self._mu = mean - self._sigma = sigma - - def reset(self) -> None: - """ - Call end of episode reset for the noise - """ - pass - - @abstractmethod - def __call__(self) -> NDArray[np.float32]: - raise NotImplementedError() - - -class NormalActionNoise(ActionNoise): - """ - A Gaussian action noise. - - :param mean: Mean value of the noise - :param sigma: Scale of the noise (std here) - :param dtype: Type of the output noise - """ - - def __init__( - self, - mean: NDArray[np.float32], - sigma: NDArray[np.float32], - ) -> None: - super().__init__(mean, sigma) - - def __call__(self) -> NDArray[np.float32]: - return np.random.normal(self._mu, self._sigma).astype(np.float32) - - def __repr__(self) -> str: - return f"NormalActionNoise(mu={self._mu}, sigma={self._sigma})" - - -class OrnsteinUhlenbeckActionNoise(ActionNoise): - """ - An Ornstein Uhlenbeck action noise, this is designed to approximate Brownian motion with friction. - - Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab - - :param mean: Mean of the noise - :param sigma: Scale of the noise - :param theta: Rate of mean reversion - :param dt: Timestep for the noise - :param initial_noise: Initial value for the noise output, (if None: 0) - :param dtype: Type of the output noise - """ - - def __init__( - self, - mean: NDArray[np.float32], - sigma: NDArray[np.float32], - theta: float = 0.15, - dt: float = 1e-2, - initial_noise: Optional[NDArray[np.float32]] = None, - ) -> None: - super().__init__(mean, sigma) - self._theta = theta - self._dt = dt - self.initial_noise = initial_noise - self.noise_prev = np.zeros_like(self._mu) - self.reset() - - def __call__(self) -> NDArray[np.float32]: - noise: NDArray[np.float32] = ( - self.noise_prev - + self._theta * (self._mu - self.noise_prev) * self._dt - + self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape) - ) - self.noise_prev = noise - return noise.astype(np.float32) - - def reset(self) -> None: - """ - reset the Ornstein Uhlenbeck noise, to the initial position - """ - self.noise_prev = ( - self.initial_noise - if self.initial_noise is not None - else np.zeros_like(self._mu) - ) - - def __repr__(self) -> str: - return f"OrnsteinUhlenbeckActionNoise(mu={self._mu}, sigma={self._sigma})" - - -class ClampNoise(ActionNoise): - def __init__(self, other: ActionNoise, clamp: float): - super().__init__(np.zeros(0, dtype=np.float32), np.zeros(0, dtype=np.float32)) - self.other = other - self.other.reset() - self.clamp = clamp - - def __call__(self) -> NDArray[np.float32]: - return np.clip(self.other(), -self.clamp, self.clamp).astype(np.float32) - - def reset(self) -> None: - self.other.reset() - - def __repr__(self) -> str: - return f"ClampNoise({self.clamp}, {self.other})" diff --git a/tune/protox/agent/off_policy_algorithm.py b/tune/protox/agent/off_policy_algorithm.py deleted file mode 100644 index 6d1edcc9..00000000 --- a/tune/protox/agent/off_policy_algorithm.py +++ /dev/null @@ -1,257 +0,0 @@ -from copy import deepcopy -from typing import Any, Dict, Optional, Tuple - -import numpy as np -from numpy.typing import NDArray - -from tune.protox.agent.agent_env import AgentEnv -from tune.protox.agent.base_class import BaseAlgorithm -from tune.protox.agent.buffers import ReplayBuffer -from tune.protox.agent.noise import ActionNoise -from tune.protox.agent.utils import ( - RolloutReturn, - TrainFreq, - TrainFrequencyUnit, - should_collect_more_steps, -) -from util.workspace import TuningMode - - -class OffPolicyAlgorithm(BaseAlgorithm): - """ - The base for Off-Policy algorithms (ex: SAC/TD3) - - :param policy: The policy model - :param replay_buffer - :param learning_rate: learning rate for the optimizer, - it can be a function of the current progress remaining (from 1 to 0) - :param learning_starts: how many steps of the model to collect transitions for before learning starts - :param batch_size: Minibatch size for each gradient update - :param train_freq: Update the model every ``train_freq`` steps. Alternatively pass a tuple of frequency and unit - like ``(5, "step")`` or ``(2, "episode")``. - :param gradient_steps: How many gradient steps to do after each rollout (see ``train_freq``) - Set to ``-1`` means to do as many gradient steps as steps done in the environment - during the rollout. - :param action_noise: the action noise type (None by default), this can help - for hard exploration problem. Cf common.noise for the different action noise type. - :param seed: Seed for the pseudo random generators - """ - - def __init__( - self, - policy: Any, - replay_buffer: ReplayBuffer, - learning_starts: int = 100, - batch_size: int = 256, - train_freq: tuple[int, str] = (1, "step"), - gradient_steps: int = 1, - action_noise: Optional[ActionNoise] = None, - seed: Optional[int] = None, - ray_trial_id: Optional[str] = None, - ): - super().__init__(seed=seed) - self.policy = policy - self.replay_buffer = replay_buffer - self.ray_trial_id = ray_trial_id - - self.batch_size = batch_size - self.learning_starts = learning_starts - self.gradient_steps = gradient_steps - self.action_noise = action_noise - - # Save train freq parameter, will be converted later to TrainFreq object - self.train_freq = self._convert_train_freq(train_freq) - - def _convert_train_freq(self, train_freq: tuple[int, str]) -> TrainFreq: - """ - Convert `train_freq` parameter (int or tuple) - to a TrainFreq object. - """ - return TrainFreq(*(train_freq[0], TrainFrequencyUnit(train_freq[1]))) - - def train(self, env: AgentEnv, gradient_steps: int, batch_size: int) -> None: - """ - Sample the replay buffer and do the updates - (gradient descent and update target networks) - """ - raise NotImplementedError() - - def _on_step(self) -> None: - """ - Method called after each step in the environment. - It is meant to trigger DQN target network update - but can be used for other purposes - """ - pass - - def _store_transition( - self, - replay_buffer: ReplayBuffer, - buffer_action: NDArray[np.float32], - new_obs: NDArray[np.float32], - reward: float, - dones: bool, - infos: dict[str, Any], - ) -> None: - """ - Store transition in the replay buffer. - We store the normalized action and the unnormalized observation. - It also handles terminal observations (because AgentEnv resets automatically). - - :param replay_buffer: Replay buffer object where to store the transition. - :param buffer_action: normalized action - :param new_obs: next observation in the current episode - or first observation of the episode (when dones is True) - :param reward: reward for the current transition - :param dones: Termination signal - :param infos: List of additional information about the transition. - It may contain the terminal observations and information about timeout. - """ - # Avoid changing the original ones - self._last_original_obs, new_obs_, reward_ = self._last_obs, new_obs, reward - assert self._last_original_obs is not None - - # Avoid modification by reference - next_obs = deepcopy(new_obs_) - # As the Env resets automatically, new_obs is already the - # first observation of the next episode - if dones: - assert infos.get("terminal_observation") is not None - assert not isinstance(next_obs, dict) - next_obs = infos["terminal_observation"] - - replay_buffer.add( - self._last_original_obs, - next_obs, - buffer_action, - reward_, - dones, - infos, - ) - - self._last_obs = new_obs - - def _sample_action( - self, - learning_starts: int, - action_noise: Optional[ActionNoise] = None, - ) -> tuple[NDArray[np.float32], NDArray[np.float32]]: - raise NotImplementedError() - - def collect_rollouts( - self, - tuning_mode: TuningMode, - env: AgentEnv, - train_freq: TrainFreq, - replay_buffer: ReplayBuffer, - action_noise: Optional[ActionNoise] = None, - learning_starts: int = 0, - ) -> RolloutReturn: - """ - Collect experiences and store them into a ``ReplayBuffer``. - - :param env: The training environment - :param train_freq: How much experience to collect - by doing rollouts of current policy. - Either ``TrainFreq(, TrainFrequencyUnit.STEP)`` - or ``TrainFreq(, TrainFrequencyUnit.EPISODE)`` - with ```` being an integer greater than 0. - :param action_noise: Action noise that will be used for exploration - Required for deterministic policy (e.g. TD3). This can also be used - in addition to the stochastic policy for SAC. - :param learning_starts: Number of steps before learning for the warm-up phase. - :param replay_buffer: - :return: - """ - # Switch to eval mode (this affects batch norm / dropout) - self.policy.set_training_mode(False) - - num_collected_steps, num_collected_episodes = 0, 0 - - assert isinstance(env, AgentEnv), "You must pass a AgentEnv" - assert train_freq.frequency > 0, "Should at least collect one step or episode." - - continue_training = True - while should_collect_more_steps( - train_freq, num_collected_steps, num_collected_episodes - ): - if self.timeout_checker is not None and self.timeout_checker(): - # Timeout has been hit. - continue_training = False - break - - # Select action randomly or according to policy - actions, buffer_actions = self._sample_action(learning_starts, action_noise) - - # Rescale and perform action - new_obs, rewards, terms, truncs, infos = env.step(actions) - dones = terms or truncs - # We only stash the results if we're not doing HPO, or else the results from concurrent HPO would get - # stashed in the same directory and potentially cause a race condition. - if self.artifact_manager: - assert ( - self.ray_trial_id != None if tuning_mode == TuningMode.HPO else True - ), "If we're doing HPO, we need to ensure that we're passing a non-None ray_trial_id to stash_results() to avoid conflicting folder names." - self.artifact_manager.stash_results( - infos, ray_trial_id=self.ray_trial_id - ) - - self.num_timesteps += 1 - num_collected_steps += 1 - - # Store data in replay buffer (normalized action and unnormalized observation) - self._store_transition( - replay_buffer, buffer_actions, new_obs, rewards, dones, infos - ) - - # For DQN, check if the target network should be updated - # and update the exploration schedule - # For SAC/TD3, the update is dones as the same time as the gradient update - # see https://github.com/hill-a/stable-baselines/issues/900 - self._on_step() - - if dones: - # Update stats - num_collected_episodes += 1 - self._episode_num += 1 - if action_noise is not None: - action_noise.reset() - - return RolloutReturn( - num_collected_steps, num_collected_episodes, continue_training - ) - - def learn( - self, env: AgentEnv, total_timesteps: int, tuning_mode: TuningMode - ) -> None: - assert isinstance(env, AgentEnv) - total_timesteps = self._setup_learn(env, total_timesteps) - - while self.num_timesteps < total_timesteps: - rollout = self.collect_rollouts( - tuning_mode, - env, - train_freq=self.train_freq, - replay_buffer=self.replay_buffer, - action_noise=self.action_noise, - learning_starts=self.learning_starts, - ) - - if rollout.continue_training is False: - break - - if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts: - # If no `gradient_steps` is specified, - # do as many gradients steps as steps performed during the rollout - gradient_steps = ( - self.gradient_steps - if self.gradient_steps >= 0 - else rollout.episode_timesteps - ) - # Special case when the user passes `gradient_steps=0` - if gradient_steps > 0: - self.train( - env, - gradient_steps=gradient_steps, - batch_size=self.batch_size, - ) diff --git a/tune/protox/agent/policies.py b/tune/protox/agent/policies.py deleted file mode 100644 index ff11a432..00000000 --- a/tune/protox/agent/policies.py +++ /dev/null @@ -1,203 +0,0 @@ -"""Policies: abstract base class and concrete implementations.""" - -import logging -from typing import Any, List, Optional, Tuple, Type, cast - -import numpy as np -import torch as th -from gymnasium import spaces -from numpy.typing import NDArray -from torch import nn - -from tune.protox.agent.torch_layers import create_mlp - - -class BaseModel(nn.Module): - """ - The base model object: makes predictions in response to observations. - - In the case of policies, the prediction is an action. In the case of critics, it is the - estimated value of the observation. - - :param observation_space: The observation space of the environment - :param action_space: The action space of the environment - :param optimizer_class: The optimizer to use, ``th.optim.Adam`` by default - :param optimizer_kwargs: Additional keyword arguments, excluding the learning rate, to pass to the optimizer - """ - - def __init__( - self, observation_space: spaces.Space[Any], action_space: spaces.Space[Any] - ): - super().__init__() - self.observation_space = observation_space - self.action_space = action_space - - def extract_features(self, obs: th.Tensor) -> th.Tensor: - return cast(th.Tensor, nn.Flatten()(obs.float())) - - def set_training_mode(self, mode: bool) -> None: - """ - Put the policy in either training or evaluation mode. - - This affects certain modules, such as batch normalisation and dropout. - - :param mode: if true, set to training mode, else set to evaluation mode - """ - self.train(mode) - - def obs_to_tensor( - self, - observation: NDArray[np.float32], - ) -> th.Tensor: - """ - Convert an input observation to a PyTorch tensor that can be fed to a model. - Includes sugar-coating to handle different observations. - - :param observation: the input observation - :return: The observation as PyTorch tensor - """ - observation = np.array(observation) - if not isinstance(observation, dict): - # Add batch dimension if needed - sh = ( - self.observation_space.shape - if self.observation_space.shape - else [spaces.utils.flatdim(self.observation_space)] - ) - observation = observation.reshape((-1, *sh)) - - return th.as_tensor(observation) - - -class Actor(BaseModel): - """ - Actor network (policy) for wolpertinger architecture (based on TD3). - - :param observation_space: Obervation space - :param action_space: Action space - :param net_arch: Network architecture - :param features_dim: Number of features - :param activation_fn: Activation function - """ - - def __init__( - self, - observation_space: spaces.Space[Any], - action_space: spaces.Space[Any], - net_arch: list[int], - features_dim: int, - activation_fn: Type[nn.Module] = nn.ReLU, - weight_init: Optional[str] = None, - bias_zero: bool = False, - squash_output: bool = False, - action_dim: int = 0, - policy_weight_adjustment: float = 1.0, - ): - super().__init__(observation_space, action_space) - - actor_net = create_mlp( - features_dim, - action_dim, - net_arch, - activation_fn, - squash_output=squash_output, - weight_init=weight_init, - bias_zero=bias_zero, - final_layer_adjust=policy_weight_adjustment, - ) - # Deterministic action - self.mu = nn.Sequential(*actor_net) - - def forward(self, obs: th.Tensor) -> th.Tensor: - return cast(th.Tensor, self.mu(self.extract_features(obs))) - - def _predict( - self, observation: th.Tensor, deterministic: bool = False - ) -> th.Tensor: - # Note: the deterministic deterministic parameter is ignored in the case of TD3. - # Predictions are always deterministic. - return cast(th.Tensor, self(observation)) - - def check_grad(self) -> None: - layers = [l for l in self.mu.children()] - assert isinstance(layers[0], nn.Linear), f"{layers[0]} {type(layers[0])}" - assert layers[0].weight.grad is not None - assert not layers[0].weight.grad.isnan().any() # Check no NaN. - assert layers[0].weight.grad.any() # Check grad at least flows. - - -class ContinuousCritic(BaseModel): - """ - Critic network(s) for DDPG/SAC/TD3. - It represents the action-state value function (Q-value function). - Compared to A2C/PPO critics, this one represents the Q-value - and takes the continuous action as input. It is concatenated with the state - and then fed to the network which outputs a single value: Q(s, a). - For more recent algorithms like SAC/TD3, multiple networks - are created to give different estimates. - - By default, it creates two critic networks used to reduce overestimation - thanks to clipped Q-learning (cf TD3 paper). - - :param observation_space: Obervation space - :param action_space: Action space - :param net_arch: Network architecture - :param features_dim: Number of features - :param activation_fn: Activation function - :param n_critics: Number of critic networks to create. - """ - - def __init__( - self, - observation_space: spaces.Space[Any], - action_space: spaces.Space[Any], - net_arch: list[int], - features_dim: int, - activation_fn: Type[nn.Module] = nn.ReLU, - weight_init: Optional[str] = None, - bias_zero: bool = False, - n_critics: int = 2, - action_dim: int = 0, - ): - super().__init__(observation_space, action_space) - - assert action_dim > 0 - self.action_dim = action_dim - self.n_critics = n_critics - self.q_networks = [] - for idx in range(n_critics): - q_net = nn.Sequential( - *create_mlp( - features_dim + action_dim, - 1, - net_arch, - activation_fn, - weight_init=weight_init, - bias_zero=bias_zero, - ) - ) - self.add_module(f"qf{idx}", q_net) - self.q_networks.append(q_net) - - def forward(self, obs: th.Tensor, actions: th.Tensor) -> tuple[th.Tensor, ...]: - with th.set_grad_enabled(True): - features = self.extract_features(obs) - qvalue_input = th.cat([features, actions], dim=1) - return tuple(q_net(qvalue_input) for q_net in self.q_networks) - - def q1_forward(self, obs: th.Tensor, actions: th.Tensor) -> th.Tensor: - """ - Only predict the Q-value using the first network. - This allows to reduce computation when all the estimates are not needed - (e.g. when updating the policy in TD3). - """ - with th.no_grad(): - features = self.extract_features(obs) - return cast(th.Tensor, self.q_networks[0](th.cat([features, actions], dim=1))) - - def check_grad(self) -> None: - layers = [l for l in self.q_networks[0].children()] - assert isinstance(layers[0], nn.Linear), f"{layers[0]} {type(layers[0])}" - assert layers[0].weight.grad is not None - assert not layers[0].weight.grad.isnan().any() # Check no NaN. - assert layers[0].weight.grad.any() # Check grad at least flows. diff --git a/tune/protox/agent/replay.py b/tune/protox/agent/replay.py deleted file mode 100644 index e9135b0d..00000000 --- a/tune/protox/agent/replay.py +++ /dev/null @@ -1,512 +0,0 @@ -""" -Replaying a tuning run gives you the authoritative runtimes of that tuning run. -The original tuning run has per-query timeouts, so the runtimes may be inaccurate. The - replayed tuning run does not have per-query timeouts. -Additionally, the original tuning run may have been accelerated by Boot, whereas the - replayed tuning run is not. -""" - -import json -import logging -import pickle -from datetime import datetime -from pathlib import Path -from typing import Optional, cast - -import click -import pandas as pd -import tqdm -from dateutil.parser import parse - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from tune.protox.agent.build_trial import build_trial -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.pg_env import PostgresEnv -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs -from tune.protox.env.types import ActionsInfo -from tune.protox.env.workload import Workload -from util.log import DBGYM_LOGGER_NAME, DBGYM_OUTPUT_LOGGER_NAME -from util.workspace import ( - DBGymConfig, - TuningMode, - fully_resolve_path, - get_default_replay_data_fname, - get_default_tuning_steps_dpath, - get_default_workload_name_suffix, - get_workload_name, - link_result, - open_and_save, - save_file, -) - -REPLAY_DATA_FNAME = "replay_data.csv" - - -class ReplayArgs: - def __init__( - self, - benchmark_name: str, - workload_name: str, - boot_enabled_during_tune: bool, - # If it's None, it'll get set later on inside replay_tuning_run(). - workload_timeout_during_replay: Optional[float], - replay_all_variations: bool, - simulated: bool, - # If it's None, it'll get set later on inside replay_tuning_run(). - cutoff: Optional[float], - blocklist: list[str], - ): - self.benchmark_name = benchmark_name - self.workload_name = workload_name - self.boot_enabled_during_tune = boot_enabled_during_tune - self.workload_timeout_during_replay = workload_timeout_during_replay - self.replay_all_variations = replay_all_variations - self.simulated = simulated - self.cutoff = cutoff - self.blocklist = blocklist - - -@click.command() -@click.pass_obj -@click.argument("benchmark-name") -@click.option( - "--workload-name-suffix", - type=str, - default=None, - help=f"The suffix of the workload name (the part after the scale factor).", -) -@click.option( - "--scale-factor", - type=float, - default=DEFAULT_SCALE_FACTOR, - help="The scale factor used when generating the data of the benchmark.", -) -@click.option( - "--boot-enabled-during-tune", - is_flag=True, - help="Whether Boot was enabled during tuning.", -) -@click.option( - "--tuning-steps-dpath", - type=Path, - default=None, - help="The path to the `tuning_steps` directory to be replayed.", -) -@click.option( - "--workload-timeout-during-replay", - type=float, - default=None, - # You can make it use the workload timeout used during tuning if you want. - # I just made it use the workload timeout from HPO because I don't currently persist the tuning HPO params. - help="The timeout (in seconds) of a workload when replaying. By default, it will be equal to the workload timeout used during HPO.", -) -@click.option( - "--replay-all-variations", - is_flag=True, - help="If true, replay all the variations of each query. If false, only replay the variation we found was best in the tuning run. Replaying all variations has two possible use cases: (1) it makes the cache warm to better replicate behavior during tuning, (2) if the best variation during tuning was determined with Boot, it might not still be the best variation.", -) -@click.option( - "--simulated", - is_flag=True, - help="Set to true to use the runtimes from the original tuning run instead of replaying the workload.", -) -@click.option( - "--cutoff", - type=float, - default=None, - help='Only evaluate configs up to cutoff hours. None means "evaluate all configs".', -) -@click.option( - "--blocklist", - type=list[str], - default=[], - help="Ignore running queries in the blocklist.", -) -def replay( - dbgym_cfg: DBGymConfig, - benchmark_name: str, - workload_name_suffix: Optional[str], - scale_factor: float, - boot_enabled_during_tune: bool, - tuning_steps_dpath: Optional[Path], - workload_timeout_during_replay: Optional[float], - replay_all_variations: bool, - simulated: bool, - cutoff: Optional[float], - blocklist: list[str], -) -> None: - # Set args to defaults programmatically (do this before doing anything else in the function) - if workload_name_suffix is None: - workload_name_suffix = get_default_workload_name_suffix(benchmark_name) - workload_name = get_workload_name(scale_factor, workload_name_suffix) - - if tuning_steps_dpath is None: - tuning_steps_dpath = get_default_tuning_steps_dpath( - dbgym_cfg.dbgym_workspace_path, - benchmark_name, - workload_name, - boot_enabled_during_tune, - ) - - # Fully resolve all input paths. - tuning_steps_dpath = fully_resolve_path(dbgym_cfg, tuning_steps_dpath) - - # Group args together to reduce the # of parameters we pass into functions - replay_args = ReplayArgs( - benchmark_name, - workload_name, - boot_enabled_during_tune, - workload_timeout_during_replay, - replay_all_variations, - simulated, - cutoff, - blocklist, - ) - - # Replay - replay_tuning_run(dbgym_cfg, tuning_steps_dpath, replay_args) - - -def replay_tuning_run( - dbgym_cfg: DBGymConfig, tuning_steps_dpath: Path, replay_args: ReplayArgs -) -> None: - """ - Replay a single tuning run (as in one tuning_steps/ folder). - """ - - def _is_tuning_step_line(line: str) -> bool: - return "mv" in line and "tuning_steps" in line and "baseline" not in line - - hpo_params_fpath = tuning_steps_dpath / "params.json" - with open_and_save(dbgym_cfg, hpo_params_fpath, "r") as f: - hpo_params = json.load(f) - - # Set defaults that depend on hpo_params - if replay_args.workload_timeout_during_replay is None: - replay_args.workload_timeout_during_replay = hpo_params["workload_timeout"][ - str(TuningMode.HPO) - ] - - # Set the hpo_params that are allowed to differ between HPO, tuning, and replay. - hpo_params["enable_boot"][str(TuningMode.REPLAY)] = False - hpo_params["boot_config_fpath"][str(TuningMode.REPLAY)] = None - hpo_params["workload_timeout"][ - str(TuningMode.REPLAY) - ] = replay_args.workload_timeout_during_replay - - # Go through replay_info.log and find the tuning_steps/[time]/ folders - # This finds all the [time] folders in tuning_steps/ (except "baseline" since we ignore that in `_is_tuning_step_line()`), - # so you could just do `ls tuning_steps/` if you wanted to. - folders = [] - start_time: Optional[datetime] = None - start_found = False - output_log_fpath = tuning_steps_dpath / ArtifactManager.REPLAY_INFO_LOG_FNAME - with open_and_save(dbgym_cfg, output_log_fpath, "r") as f: - for line in f: - assert isinstance(line, str), "This is done for typing purposes." - line = line.strip() - - if not start_found: - if "Baseline Metric" in line: - start_time = parse( - line.split("INFO:")[-1] - .split(" Baseline Metric")[0] - .split("[")[0] - ) - start_found = True - else: - if _is_tuning_step_line(line): - repo = line.split("dst=")[-1] - last_folder = repo.split("/")[-1] - time_since_start = parse( - line.split("INFO:")[-1].split(" mv")[0].split(" [")[0] - ) - assert type(start_time) is datetime - if ( - replay_args.cutoff is None - or (time_since_start - start_time).total_seconds() - < replay_args.cutoff * 3600 - ): - folders.append(last_folder) - - # Set tune_duration to be high so that it doesn't cut the replay off early - hpo_params["tune_duration"][str(TuningMode.REPLAY)] = ( - replay_args.workload_timeout_during_replay * len(folders) - ) - - # Build PostgresEnv. - _, _, agent_env, _, _ = build_trial( - dbgym_cfg, TuningMode.REPLAY, hpo_params["seed"], hpo_params - ) - pg_env: PostgresEnv = cast(PostgresEnv, agent_env.unwrapped) - action_space: HolonSpace = cast(HolonSpace, pg_env.action_space) - - # Reset things. - if not replay_args.simulated: - pg_env.pg_conn.restore_pristine_snapshot() - - num_lines = 0 - with open_and_save(dbgym_cfg, output_log_fpath, "r") as f: - for line in f: - if "Baseline Metric" in line: - num_lines += 1 - elif _is_tuning_step_line(line): - num_lines += 1 - - # A convenience wrapper around execute_workload() which fills in the arguments properly and processes the return values. - def _execute_workload_wrapper( - actions_info: ActionsInfo, - ) -> tuple[int, int, bool, float]: - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"\n\nfetch_server_knobs(): {fetch_server_knobs(pg_env.pg_conn.conn(), action_space.get_knob_space().tables, action_space.get_knob_space().knobs, pg_env.workload.queries)}\n\n" - ) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"\n\nfetch_server_indexes(): {fetch_server_indexes(pg_env.pg_conn.conn(), action_space.get_knob_space().tables)}\n\n" - ) - assert ( - replay_args.workload_timeout_during_replay - == hpo_params["workload_timeout"][str(TuningMode.REPLAY)] - == pg_env.workload.workload_timeout - ), "All these different sources of workload_timeout during replay should show the same value" - - if replay_args.replay_all_variations: - all_holon_action_variations = actions_info["all_holon_action_variations"] - actions = [ - holon_action for (_, holon_action) in all_holon_action_variations - ] - variation_names = [ - variation_name for (variation_name, _) in all_holon_action_variations - ] - else: - # Note that "best observed" is not an entirely accurate name. Specifically, if the workload times out, some queries - # will not have had a chance to run at all. Based on the behavior of `_mutilate_action_with_metrics()`, we select - # an arbitrary variation fo the queries that have not executed at all. - best_observed_holon_action = actions_info["best_observed_holon_action"] - assert best_observed_holon_action is not None - actions = [best_observed_holon_action] - variation_names = ["BestObserved"] - - num_timed_out_queries, did_workload_time_out, qid_runtime_data = ( - pg_env.workload.execute_workload( - pg_conn=pg_env.pg_conn, - actions=actions, - variation_names=variation_names, - observation_space=None, - action_space=action_space, - reset_metrics=None, - query_timeout=None, - workload_qdir=None, - blocklist=replay_args.blocklist, - first=False, - ) - ) - workload_runtime = Workload.compute_total_workload_runtime(qid_runtime_data) - num_executed_queries = len(qid_runtime_data) - return ( - num_executed_queries, - num_timed_out_queries, - did_workload_time_out, - workload_runtime, - ) - - replay_data = [] - progess_bar = tqdm.tqdm(total=num_lines) - with open_and_save(dbgym_cfg, output_log_fpath, "r") as f: - current_step = 0 - start_found = False - start_time = None - existing_index_acts: set[IndexAction] = set() - - for line in f: - assert isinstance(line, str), "This is done for typing purposes." - line = line.strip() - - # Keep going until we've found the start. - if not start_found: - if "Baseline Metric" in line: - start_found = True - start_time = parse( - line.split("INFO:")[-1] - .split(" Baseline Metric")[0] - .split("[")[0] - ) - progess_bar.update(1) - continue - - elif _is_tuning_step_line(line): - repo = line.split("dst=")[-1] - last_folder = repo.split("/")[-1] - time_since_start = parse( - line.split("INFO:")[-1].split(" mv")[0].split(" [")[0] - ) - - # Get the original runtime as well as whether any individual queries and/or the full workload timed out. - run_raw_csv_fpath = tuning_steps_dpath / last_folder / "run.raw.csv" - save_file(dbgym_cfg, run_raw_csv_fpath) - run_raw_csv = pd.read_csv(run_raw_csv_fpath) - assert len(run_raw_csv.columns) == 7 - # When setting `did_workload_time_out_in_original`, we can't just check whether the sum of latencies in run.raw.csv == `workload_timeout` - # because Proto-X decreases `workload_timeout` over the course of the tuning run. Specifically, at the end of a tuning step, Proto-X - # sets `workload_timeout` to be equal to the runtime of the workload that just ran. - # We separate the penalty rows from the non-penalty rows to process them separately. - run_raw_csv_penalty_rows = run_raw_csv[ - run_raw_csv["Transaction Name"] == "P" - ] - run_raw_csv_non_penalty_rows = run_raw_csv[ - run_raw_csv["Transaction Name"] != "P" - ] - # Get the number of executed queries. A query timing out is not the same as a query not being executed. We do this instead of getting the - # number of skipped queries since we don't have the total # of queries with the current codebase. - num_executed_queries_in_original = len(run_raw_csv_non_penalty_rows) - # `num_timed_out_queries_in_original` counts the number of queries where *all variations* timed out. Note that the query_timeout of - # a query may be set extremely low because the workload is about to time out, so it could be viewed as "unfair" to count those queries as - # having timed out. Regardless, that's how we currently do things. - num_timed_out_queries_in_original = run_raw_csv_non_penalty_rows[ - "Timed Out" - ].sum() - # Penalties are added when the workload times out so this is a reliable indicator of whether the workload timed out. - did_workload_time_out_in_original = len(run_raw_csv_penalty_rows) > 0 - # Penalties are meant to affect the reward of the tuning agent but they are unrelated to the actual runtime, so we ignore them when - # computing the original runtime. - original_workload_runtime = ( - run_raw_csv_non_penalty_rows["Latency (microseconds)"].sum() / 1e6 - ) - assert original_workload_runtime > 0 - - # Extract the necessary values from action.pkl - with open_and_save( - dbgym_cfg, tuning_steps_dpath / last_folder / "action.pkl", "rb" - ) as f: - actions_info: ActionsInfo = pickle.load(f) - all_holon_action_variations = actions_info[ - "all_holon_action_variations" - ] - # Extract the KnobSpaceAction and IndexAction from all_holon_action_variations. - # These two should be identical across all HolonActions, which we will assert. - _, first_holon_action = all_holon_action_variations[0] - knob_space_action = first_holon_action[0] - index_space_raw_sample = first_holon_action[1] - index_action = action_space.get_index_space().to_action( - index_space_raw_sample - ) - assert all( - [ - knob_space_action == holon_action[0] - for (_, holon_action) in all_holon_action_variations - ] - ) - assert all( - [ - index_action - == action_space.get_index_space().to_action(holon_action[1]) - for (_, holon_action) in all_holon_action_variations - ] - ) - - # Get the indexes from this action and the prior state - index_acts = set() - index_acts.add(index_action) - assert len(index_acts) > 0 - with open_and_save( - dbgym_cfg, - tuning_steps_dpath / last_folder / "prior_state.pkl", - "rb", - ) as f: - prior_states = pickle.load(f) - all_sc = set(prior_states[1]) - for index_act in index_acts: - all_sc.add(index_act) - - all_sc = {a for a in all_sc if not "USING btree ()" in a.sql(True)} - index_acts = all_sc - # Get the CREATE INDEX or DROP INDEX statements to turn the state into the one we should be in at this tuning step - index_modification_sqls = [] - for index_act in index_acts: - if index_act not in existing_index_acts: - index_modification_sqls.append(index_act.sql(True)) - for existing_index_act in existing_index_acts: - if existing_index_act not in index_acts: - index_modification_sqls.append(existing_index_act.sql(False)) - - # Modify Postgres to have the right indexes and system-wide knobs. `index_modification_sqls` holds the indexes - # while `cc` holds the system-wide knobs. - if not replay_args.simulated: - cc, _ = action_space.get_knob_space().generate_action_plan( - knob_space_action, prior_states[0] - ) - # Like in tuning, we don't dump the page cache when calling shift_state() to see how the workload - # performs in a warm cache scenario. - pg_env.shift_state( - cc, index_modification_sqls, dump_page_cache=False - ) - existing_index_acts = index_acts - - # Execute the workload to get the runtime. - if not replay_args.simulated: - ( - num_executed_queries_in_replay, - num_timed_out_queries_in_replay, - did_workload_time_out_in_replay, - replayed_workload_runtime, - ) = _execute_workload_wrapper(actions_info) - else: - ( - num_executed_queries_in_replay, - num_timed_out_queries_in_replay, - did_workload_time_out_in_replay, - replayed_workload_runtime, - ) = ( - num_executed_queries_in_original, - num_timed_out_queries_in_original, - did_workload_time_out_in_original, - original_workload_runtime, - ) - - # Perform some validity checks and then add this tuning step's data to `replay_data``. - assert isinstance(start_time, datetime) - this_step_replay_data = { - "step": current_step, - "time_since_start": (time_since_start - start_time).total_seconds(), - "original_workload_runtime": original_workload_runtime, - "num_executed_queries_in_original": num_executed_queries_in_original, - "num_timed_out_queries_in_original": num_timed_out_queries_in_original, - "did_workload_time_out_in_original": did_workload_time_out_in_original, - "replayed_workload_runtime": replayed_workload_runtime, - "num_executed_queries_in_replay": num_executed_queries_in_replay, - "num_timed_out_queries_in_replay": num_timed_out_queries_in_replay, - "did_workload_time_out_in_replay": did_workload_time_out_in_replay, - } - assert not ( - num_timed_out_queries_in_replay > 0 - and not did_workload_time_out_in_replay - ), "During replay, individual queries should not time out unless they timed out because the whole workload timed out." - replay_data.append(this_step_replay_data) - current_step += 1 - - if last_folder in folders and last_folder == folders[-1]: - break - progess_bar.update(1) - - # Output. - replay_data_df = pd.DataFrame(replay_data) - pd.set_option("display.max_columns", 10) - logging.getLogger(DBGYM_OUTPUT_LOGGER_NAME).info( - f"Finished replaying with replay_data_df=\n{replay_data_df}\n. Data stored in {dbgym_cfg.cur_task_runs_path()}." - ) - replay_data_fpath = ( - dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "replay_data.csv" - ) - replay_data_df.to_csv(replay_data_fpath, index=False) - link_result( - dbgym_cfg, - replay_data_fpath, - custom_result_name=get_default_replay_data_fname( - replay_args.benchmark_name, - replay_args.workload_name, - replay_args.boot_enabled_during_tune, - ) - + ".link", - ) - pg_env.close() diff --git a/tune/protox/agent/torch_layers.py b/tune/protox/agent/torch_layers.py deleted file mode 100644 index 1be91a1e..00000000 --- a/tune/protox/agent/torch_layers.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import List, Optional, Type - -import torch as th -from torch import nn - - -def init_layer( - layer: nn.Module, - weight_init: Optional[str] = None, - bias_zero: bool = False, - final_layer: bool = False, - final_layer_adjust: float = 1.0, -) -> None: - if isinstance(layer, nn.Linear): - if weight_init is not None: - if weight_init == "orthogonal": - nn.init.orthogonal_(layer.weight.data) - elif weight_init == "xavier_uniform": - nn.init.xavier_uniform_(layer.weight) - elif weight_init == "xavier_normal": - nn.init.xavier_normal_(layer.weight) - else: - raise ValueError(f"Unknown weight init: {weight_init}") - if bias_zero: - layer.bias.data.fill_(0.0) - - if final_layer: - # Last layer. - with th.no_grad(): - layer.weight.data = layer.weight.data / final_layer_adjust - - -def create_mlp( - input_dim: int, - output_dim: int, - net_arch: list[int], - activation_fn: Type[nn.Module] = nn.ReLU, - squash_output: bool = False, - with_bias: bool = True, - weight_init: Optional[str] = None, - bias_zero: bool = False, - final_layer_adjust: float = 1.0, -) -> list[nn.Module]: - """ - Create a multi layer perceptron (MLP), which is - a collection of fully-connected layers each followed by an activation function. - - :param input_dim: Dimension of the input vector - :param output_dim: - :param net_arch: Architecture of the neural net - It represents the number of units per layer. - The length of this list is the number of layers. - :param activation_fn: The activation function - to use after each layer. - :param squash_output: Whether to squash the output using a Tanh - activation function - :param with_bias: If set to False, the layers will not learn an additive bias - :return: - """ - - if len(net_arch) > 0: - modules = [nn.Linear(input_dim, net_arch[0], bias=with_bias), activation_fn()] - else: - modules = [] - - for idx in range(len(net_arch) - 1): - modules.append(nn.Linear(net_arch[idx], net_arch[idx + 1], bias=with_bias)) - modules.append(activation_fn()) - - if output_dim > 0: - last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim - modules.append(nn.Linear(last_layer_dim, output_dim, bias=with_bias)) - - # Initialize the linear layer weights. - for layer in modules: - init_layer( - layer, - weight_init, - bias_zero, - (layer == modules[-1]), - final_layer_adjust, - ) - - if squash_output: - modules.append(nn.Tanh()) - return modules diff --git a/tune/protox/agent/tune.py b/tune/protox/agent/tune.py deleted file mode 100644 index 327f267f..00000000 --- a/tune/protox/agent/tune.py +++ /dev/null @@ -1,169 +0,0 @@ -import json -import os -import shutil -import time -from pathlib import Path -from typing import Optional - -import click -import pandas as pd - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from tune.protox.agent.coerce_config import coerce_config -from tune.protox.agent.hpo import TuneTrial, build_space -from util.workspace import ( - BENCHMARK_NAME_PLACEHOLDER, - DEFAULT_BOOT_CONFIG_FPATH, - WORKLOAD_NAME_PLACEHOLDER, - WORKSPACE_PATH_PLACEHOLDER, - DBGymConfig, - TuningMode, - fully_resolve_path, - get_default_hpoed_agent_params_path, - get_default_tuning_steps_dname, - get_default_workload_name_suffix, - get_workload_name, - link_result, - open_and_save, -) - - -# This is used when you already have a good set of HPOs and just want to tune the DBMS -@click.command() -@click.pass_obj -@click.argument("benchmark-name") -@click.option( - "--workload-name-suffix", - type=str, - default=None, - help=f"The suffix of the workload name (the part after the scale factor).", -) -@click.option( - "--scale-factor", - type=float, - default=DEFAULT_SCALE_FACTOR, - help=f"The scale factor used when generating the data of the benchmark.", -) -@click.option( - "--hpoed-agent-params-path", - default=None, - type=Path, - help=f"The path to best params found by the agent HPO process. The default is {get_default_hpoed_agent_params_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER)}", -) -@click.option( - "--enable-boot-during-tune", - is_flag=True, - help="Whether to enable the Boot query accelerator during the tuning process. Deciding to use Boot during tuning is separate from deciding to use Boot during HPO.", -) -@click.option( - "--boot-config-fpath-during-tune", - default=DEFAULT_BOOT_CONFIG_FPATH, - type=Path, - help="The path to the file configuring Boot when tuning. This may be a different Boot config than the one used for HPO.", -) -@click.option( - "--tune-duration-during-tune", - default=None, - type=float, - help="The number of hours to run the tuning agent for. If you do not specify this argument, it will be the same as --tune-duration-during-hpo.", -) -def tune( - dbgym_cfg: DBGymConfig, - benchmark_name: str, - workload_name_suffix: Optional[str], - scale_factor: float, - hpoed_agent_params_path: Path, - enable_boot_during_tune: bool, - boot_config_fpath_during_tune: Path, - tune_duration_during_tune: float, -) -> None: - """IMPORTANT: The "tune" here is the one in "tune a DBMS". This is *different* from the "tune" in ray.tune.TuneConfig, which means to "tune hyperparameters".""" - # Set args to defaults programmatically (do this before doing anything else in the function) - if workload_name_suffix is None: - workload_name_suffix = get_default_workload_name_suffix(benchmark_name) - workload_name = get_workload_name(scale_factor, workload_name_suffix) - if hpoed_agent_params_path is None: - hpoed_agent_params_path = get_default_hpoed_agent_params_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ) - - # Fully resolve all input paths. - hpoed_agent_params_path = fully_resolve_path(dbgym_cfg, hpoed_agent_params_path) - boot_config_fpath_during_tune = fully_resolve_path( - dbgym_cfg, boot_config_fpath_during_tune - ) - - # Tune - with open_and_save(dbgym_cfg, hpoed_agent_params_path, "r") as f: - hpo_params = json.load(f) - - # Coerce using a dummy space. - hpo_params = coerce_config( - dbgym_cfg, - build_space( - sysknobs={}, - benchmark_config={}, - workload_path=Path(), - embedder_path=[], - pgconn_info={}, - ), - hpo_params, - ) - - # Set defaults that depend on hpo_params - if tune_duration_during_tune is None: - tune_duration_during_tune = hpo_params["tune_duration"][str(TuningMode.HPO)] - - # Set the hpo_params that are allowed to differ between HPO, tuning, and replay. - # In general, for configs that can differ between HPO, tuning, and replay I chose to name - # them "*tune*" and "*hpo*" to the end of them instead of naming them the same - # and overriding the config during tuning. It's just much less confusing if we - # make sure to never override any configs in hpo_params. - # Note that while we currently do not persist the hpo_params used during *tuning* back to - # a file, this is entirely possible to do in the future if needed. - hpo_params["enable_boot"][str(TuningMode.TUNE)] = enable_boot_during_tune - hpo_params["boot_config_fpath"][ - str(TuningMode.TUNE) - ] = boot_config_fpath_during_tune - hpo_params["tune_duration"][str(TuningMode.TUNE)] = tune_duration_during_tune - hpo_params["workload_timeout"][str(TuningMode.TUNE)] = hpo_params[ - "workload_timeout" - ][str(TuningMode.HPO)] - - # Piggyback off the HPO magic. - tune_trial = TuneTrial(dbgym_cfg, TuningMode.TUNE) - tune_trial.setup(hpo_params) - start = time.time() - - data = [] - step_data_fpath = dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "step_data.csv" - while (time.time() - start) < tune_duration_during_tune * 3600: - data.append(tune_trial.step()) - - # Continuously write the file out. - pd.DataFrame(data).to_csv(step_data_fpath, index=False) - - tune_trial.cleanup() - - # Output the step data. - pd.DataFrame(data).to_csv(step_data_fpath, index=False) - - # Link the tuning steps data (this directory allows you to replay the tuning run). - tuning_steps_dpath = dbgym_cfg.cur_task_runs_artifacts_path("tuning_steps") - # Replaying requires params.json, so we also copy it into the tuning_steps/ directory. - # We copy hpoed_agent_params_path instead of moving it because hpoed_agent_params_path was generated in another task run - # We copy instead of just symlinking so that tuning_steps/ is a fully self-contained directory. - hpoed_agent_params_copy_fpath = tuning_steps_dpath / "params.json" - shutil.copy(hpoed_agent_params_path, hpoed_agent_params_copy_fpath) - tuning_steps_link_dname = get_default_tuning_steps_dname( - benchmark_name, workload_name, enable_boot_during_tune - ) - link_result( - dbgym_cfg, - tuning_steps_dpath, - custom_result_name=tuning_steps_link_dname + ".link", - ) - # We also create a link to hpoed_agent_params_path. This is useful when we are _manually_ looking through - # run_*/ and want to see which other run_*/ was responsible for creating params.json - hpoed_agent_params_link_fpath = tuning_steps_dpath / "params.json.link" - os.symlink(hpoed_agent_params_path, hpoed_agent_params_link_fpath) diff --git a/tune/protox/agent/utils.py b/tune/protox/agent/utils.py deleted file mode 100644 index a7d25e9a..00000000 --- a/tune/protox/agent/utils.py +++ /dev/null @@ -1,111 +0,0 @@ -from enum import Enum -from itertools import zip_longest -from typing import Any, Iterable, NamedTuple, Optional, Type - -import torch as th - -from tune.protox.agent.noise import ( - ActionNoise, - NormalActionNoise, - OrnsteinUhlenbeckActionNoise, -) - - -class RolloutReturn(NamedTuple): - episode_timesteps: int - n_episodes: int - continue_training: bool - - -class TrainFrequencyUnit(Enum): - STEP = "step" - EPISODE = "episode" - - -class TrainFreq(NamedTuple): - frequency: int - unit: TrainFrequencyUnit # either "step" or "episode" - - -def parse_noise_type(noise_type: str) -> Optional[Type[ActionNoise]]: - if noise_type == "normal": - return NormalActionNoise - elif noise_type == "ou": - return OrnsteinUhlenbeckActionNoise - elif noise_type == "none": - return None - else: - raise ValueError(f"Unsupported noise {noise_type}") - - -def zip_strict(*iterables: Iterable[Any]) -> Iterable[Any]: - r""" - ``zip()`` function but enforces that iterables are of equal length. - Raises ``ValueError`` if iterables not of equal length. - Code inspired by Stackoverflow answer for question #32954486. - - :param \*iterables: iterables to ``zip()`` - """ - # As in Stackoverflow #32954486, use - # new object for "empty" in case we have - # Nones in iterable. - sentinel = object() - for combo in zip_longest(*iterables, fillvalue=sentinel): - if sentinel in combo: - raise ValueError("Iterables have different lengths") - yield combo - - -def polyak_update( - params: Iterable[th.Tensor], - target_params: Iterable[th.Tensor], - tau: float, -) -> None: - """ - Perform a Polyak average update on ``target_params`` using ``params``: - target parameters are slowly updated towards the main parameters. - ``tau``, the soft update coefficient controls the interpolation: - ``tau=1`` corresponds to copying the parameters to the target ones whereas nothing happens when ``tau=0``. - The Polyak update is done in place, with ``no_grad``, and therefore does not create intermediate tensors, - or a computation graph, reducing memory cost and improving performance. We scale the target params - by ``1-tau`` (in-place), add the new weights, scaled by ``tau`` and store the result of the sum in the target - params (in place). - See https://github.com/DLR-RM/stable-baselines3/issues/93 - - :param params: parameters to use to update the target params - :param target_params: parameters to update - :param tau: the soft update coefficient ("Polyak update", between 0 and 1) - """ - with th.no_grad(): - # zip does not raise an exception if length of parameters does not match. - for param, target_param in zip_strict(params, target_params): - target_param.data.mul_(1 - tau) - th.add(target_param.data, param.data, alpha=tau, out=target_param.data) - - -def should_collect_more_steps( - train_freq: TrainFreq, - num_collected_steps: int, - num_collected_episodes: int, -) -> bool: - """ - Helper used in ``collect_rollouts()`` of off-policy algorithms - to determine the termination condition. - - :param train_freq: How much experience should be collected before updating the policy. - :param num_collected_steps: The number of already collected steps. - :param num_collected_episodes: The number of already collected episodes. - :return: Whether to continue or not collecting experience - by doing rollouts of the current policy. - """ - if train_freq.unit == TrainFrequencyUnit.STEP: - return num_collected_steps < train_freq.frequency - - elif train_freq.unit == TrainFrequencyUnit.EPISODE: - return num_collected_episodes < train_freq.frequency - - else: - raise ValueError( - "The unit of the `train_freq` must be either TrainFrequencyUnit.STEP " - f"or TrainFrequencyUnit.EPISODE not '{train_freq.unit}'!" - ) diff --git a/tune/protox/agent/wolp/__init__.py b/tune/protox/agent/wolp/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/agent/wolp/policies.py b/tune/protox/agent/wolp/policies.py deleted file mode 100644 index d48f4fd2..00000000 --- a/tune/protox/agent/wolp/policies.py +++ /dev/null @@ -1,298 +0,0 @@ -import logging -import time -from typing import TYPE_CHECKING, Any, Optional, Tuple, Union, cast - -import numpy as np -import torch as th -import torch.nn.functional as F -from gymnasium import spaces -from numpy.typing import NDArray -from torch.optim import Optimizer # type: ignore[attr-defined] - -from tune.protox.agent.buffers import ReplayBufferSamples -from tune.protox.agent.noise import ActionNoise -from tune.protox.agent.policies import Actor, BaseModel, ContinuousCritic -from tune.protox.agent.utils import polyak_update -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.types import ( - DEFAULT_NEIGHBOR_PARAMETERS, - HolonAction, - NeighborParameters, -) -from util.log import DBGYM_LOGGER_NAME - -DETERMINISTIC_NEIGHBOR_PARAMETERS = { - "knob_num_nearest": 1, - "knob_span": 0, - "index_num_samples": 1, - "index_rules": False, -} - - -class WolpPolicy(BaseModel): - """ - Policy class (with both actor and critic) for Wolp. - - :param observation_space: Observation space - :param action_space: Action space - :param actor - :param actor_target - :param critic - :param critic_target - """ - - def __init__( - self, - observation_space: spaces.Space[Any], - action_space: spaces.Space[Any], - actor: Actor, - actor_target: Actor, - actor_optimizer: Optimizer, - critic: ContinuousCritic, - critic_target: ContinuousCritic, - critic_optimizer: Optimizer, - grad_clip: float = 1.0, - policy_l2_reg: float = 0.0, - tau: float = 0.005, - gamma: float = 0.99, - artifact_manager: Optional[ArtifactManager] = None, - ): - super().__init__(observation_space, action_space) - self.actor = actor - self.actor_target = actor_target - self.actor_optimizer = actor_optimizer - self.critic = critic - self.critic_target = critic_target - self.critic_optimizer = critic_optimizer - self.artifact_manager = artifact_manager - - self.grad_clip = grad_clip - self.policy_l2_reg = policy_l2_reg - self.tau = tau - self.gamma = gamma - - # Log all the networks. - logging.getLogger(DBGYM_LOGGER_NAME).info("Actor: %s", self.actor) - logging.getLogger(DBGYM_LOGGER_NAME).info("Critic: %s", self.critic) - - def forward(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor: - raise NotImplementedError() - - def set_training_mode(self, mode: bool) -> None: - """ - Put the policy in either training or evaluation mode. - - This affects certain modules, such as batch normalisation and dropout. - - :param mode: if true, set to training mode, else set to evaluation mode - """ - self.actor.set_training_mode(mode) - self.critic.set_training_mode(mode) - self.training = mode - - @time_record("discriminate") - def discriminate( - self, - use_target: bool, - states: th.Tensor, - embed_actions: th.Tensor, - actions_dim: th.Tensor, - env_actions: list[HolonAction], - ) -> tuple[list[HolonAction], th.Tensor]: - states_tile = states.repeat_interleave(actions_dim, dim=0) - if use_target: - next_q_values = th.cat( - self.critic_target(states_tile, embed_actions), dim=1 - ) - assert not th.isnan(next_q_values).any() - next_q_values, _ = th.min(next_q_values, dim=1, keepdim=True) - else: - next_q_values = th.cat(self.critic(states_tile, embed_actions), dim=1) - assert not th.isnan(next_q_values).any() - next_q_values, _ = th.min(next_q_values, dim=1, keepdim=True) - - env_splitter = [0] + list(actions_dim.cumsum(dim=0)) - if env_actions is not None: - split_env_actions = [ - env_actions[start:end] - for start, end in zip(env_splitter[:-1], env_splitter[1:]) - ] - # Split the actions. - split_embed_actions = th.split(embed_actions, actions_dim.tolist()) - # Find the maximizing q-value action. - actions_eval_split = th.split(next_q_values, actions_dim.tolist()) - max_indices = [th.argmax(split) for split in actions_eval_split] - # Find the maximal action. - if env_actions is not None: - env_actions = [ - split_env_actions[i][max_indices[i]] for i in range(len(max_indices)) - ] - embed_actions = th.stack( - [split_embed_actions[i][max_indices[i]] for i in range(len(max_indices))] - ) - assert states.shape[0] == embed_actions.shape[0] - return env_actions, embed_actions - - def wolp_act( - self, - states: Union[th.Tensor, NDArray[np.float32]], - use_target: bool = False, - action_noise: Optional[Union[ActionNoise, th.Tensor]] = None, - neighbor_parameters: NeighborParameters = DEFAULT_NEIGHBOR_PARAMETERS, - random_act: bool = False, - ) -> tuple[list[HolonAction], th.Tensor]: - # Get the tensor representation. - start_time = time.time() - if not isinstance(states, th.Tensor): - thstates = self.obs_to_tensor(states) - else: - thstates = states - - if random_act: - assert hasattr(self.action_space, "sample_latent") - raw_action = self.action_space.sample_latent() - raw_action = th.as_tensor(raw_action).float() - elif use_target: - raw_action = self.actor_target(thstates) - else: - raw_action = self.actor(thstates) - - # Transform and apply the noise. - noise = ( - None - if action_noise is None - else ( - action_noise - if isinstance(action_noise, th.Tensor) - else th.as_tensor(action_noise()) - ) - ) - if noise is not None and len(noise.shape) == 1: - # Insert a dimension. - noise = noise.view(-1, *noise.shape) - - if noise is not None: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Perturbing with noise class {action_noise}" - ) - - assert hasattr(self.action_space, "transform_noise") - raw_action = self.action_space.transform_noise(raw_action, noise=noise) - - # Smear the action. - if TYPE_CHECKING: - assert isinstance(self.action_space, HolonSpace) - env_actions, sample_actions, actions_dim = self.action_space.neighborhood( - raw_action, neighbor_parameters - ) - - # Log the neighborhood we are observing. - logging.getLogger(DBGYM_LOGGER_NAME).debug(f"Neighborhood Sizes {actions_dim}") - - if random_act: - # If we want a random action, don't use Q-value estimate. - rand_act = np.random.randint(0, high=len(env_actions)) - return [env_actions[rand_act]], sample_actions[rand_act : rand_act + 1] - - assert thstates.shape[0] == actions_dim.shape[0] - assert len(thstates.shape) == 2 - env_actions, embed_actions = self.discriminate( - use_target, thstates, sample_actions, actions_dim, env_actions - ) - assert not np.isnan(embed_actions).any() - return env_actions, embed_actions - - @time_record("train_critic") - def train_critic( - self, - replay_data: ReplayBufferSamples, - neighbor_parameters: NeighborParameters, - target_action_noise: Optional[ActionNoise] = None, - ) -> Any: - with th.no_grad(): - # wolp_act() actually gives both the env and the embedding actions. - # We evaluate the critic on the embedding action and not the environment action. - _, embed_actions = self.wolp_act( - replay_data.next_observations, - use_target=True, - action_noise=target_action_noise, - neighbor_parameters=neighbor_parameters, - ) - - # Compute the next Q-values: min over all critics targets - next_q_values = th.cat( - self.critic_target(replay_data.next_observations, embed_actions), dim=1 - ) - next_q_values, _ = th.min(next_q_values, dim=1, keepdim=True) - target_q_values = ( - replay_data.rewards - + (1 - replay_data.dones) * self.gamma * next_q_values - ) - - embeds = replay_data.actions.float() - - # Get current Q-values estimates for each critic network - current_q_values = self.critic(replay_data.observations, embeds) - # Compute critic loss. - critic_losses = [ - F.mse_loss(current_q, target_q_values) for current_q in current_q_values - ] - critic_loss = cast(th.Tensor, sum(critic_losses)) - - # Optimize the critics - self.critic_optimizer.zero_grad() - assert not th.isnan(critic_loss).any() - critic_loss.backward() # type: ignore - th.nn.utils.clip_grad_norm_( - list(self.critic.parameters()), self.grad_clip, error_if_nonfinite=True - ) - self.critic.check_grad() - self.critic_optimizer.step() - return critic_loss - - @time_record("train_actor") - def train_actor(self, replay_data: ReplayBufferSamples) -> Any: - # Get the current action representation. - embeds = replay_data.actions.float() - - # Compute actor loss - raw_actions = self.actor(replay_data.observations) - - if "lsc" in replay_data.infos[0]: - lscs = ( - th.as_tensor(np.array([i["lsc"] for i in replay_data.infos])) - .float() - .view(-1, 1) - ) - # TODO(wz2,PROTOX_DELTA): Assume that we're looking at the "center". - # Practically, maybe we should check through the critic what would actually be selected here. - # The paper uses the historic action. This refactor assumes the neighborhood center. - assert hasattr(self.action_space, "pad_center_latent") - raw_actions = self.action_space.pad_center_latent(raw_actions, lscs) - - actor_loss = -self.critic.q1_forward( - replay_data.observations, raw_actions - ).mean() - - # Attach l2. - if self.policy_l2_reg > 0: - for param in self.actor.parameters(): - actor_loss += 0.5 * (param**2).sum() - - # Optimize the actor - self.actor_optimizer.zero_grad() - assert not th.isnan(actor_loss).any() - actor_loss.backward() # type: ignore - th.nn.utils.clip_grad_norm_( - list(self.actor.parameters()), self.grad_clip, error_if_nonfinite=True - ) - self.actor.check_grad() - self.actor_optimizer.step() - return actor_loss - - def polyak_update(self) -> None: - polyak_update( - self.critic.parameters(), self.critic_target.parameters(), self.tau - ) - polyak_update(self.actor.parameters(), self.actor_target.parameters(), self.tau) diff --git a/tune/protox/agent/wolp/wolp.py b/tune/protox/agent/wolp/wolp.py deleted file mode 100644 index ca4e2083..00000000 --- a/tune/protox/agent/wolp/wolp.py +++ /dev/null @@ -1,195 +0,0 @@ -import logging -from copy import deepcopy -from typing import Any, Dict, Optional, Tuple - -import numpy as np -import torch as th -from numpy.typing import NDArray - -from tune.protox.agent.agent_env import AgentEnv -from tune.protox.agent.buffers import ReplayBuffer -from tune.protox.agent.noise import ActionNoise -from tune.protox.agent.off_policy_algorithm import OffPolicyAlgorithm -from tune.protox.agent.wolp.policies import ( - DETERMINISTIC_NEIGHBOR_PARAMETERS, - WolpPolicy, -) -from util.log import DBGYM_LOGGER_NAME - - -class Wolp(OffPolicyAlgorithm): - """ - Wolpertinger DDPG based on Twin Delayed DDPG (TD3) - Addressing Function Approximation Error in Actor-Critic Methods. - - Original implementation: https://github.com/sfujim/TD3 - Paper: https://arxiv.org/abs/1802.09477 - Introduction to TD3: https://spinningup.openai.com/en/latest/algorithms/td3.html - - :param policy: The policy model - :param replay_buffer - :param learning_starts: how many steps of the model to collect transitions for before learning starts - :param batch_size: Minibatch size for each gradient update - :param train_freq: Update the model every ``train_freq`` steps. Alternatively pass a tuple of frequency and unit - like ``(5, "step")`` or ``(2, "episode")``. - :param gradient_steps: How many gradient steps to do after each rollout (see ``train_freq``) - Set to ``-1`` means to do as many gradient steps as steps done in the environment - during the rollout. - :param action_noise: the action noise type (None by default), this can help - for hard exploration problem. Cf common.noise for the different action noise type. - :param target_policy_noise: Standard deviation of Gaussian noise added to target policy - (smoothing noise) - :param target_noise_clip: Limit for absolute value of target policy smoothing noise. - :param seed: Seed for the pseudo random generators - """ - - def __init__( - self, - policy: WolpPolicy, - replay_buffer: ReplayBuffer, - learning_starts: int = 100, - batch_size: int = 100, - train_freq: tuple[int, str] = (1, "episode"), - gradient_steps: int = -1, - action_noise: Optional[ActionNoise] = None, - target_action_noise: Optional[ActionNoise] = None, - seed: Optional[int] = None, - neighbor_parameters: dict[str, Any] = {}, - ray_trial_id: Optional[str] = None, - ): - super().__init__( - policy, - replay_buffer, - learning_starts, - batch_size, - train_freq, - gradient_steps, - action_noise=action_noise, - seed=seed, - ray_trial_id=ray_trial_id, - ) - - self.target_action_noise = target_action_noise - self.neighbor_parameters = neighbor_parameters - - def _store_transition( - self, - replay_buffer: ReplayBuffer, - buffer_action: NDArray[np.float32], - new_obs: NDArray[np.float32], - reward: float, - dones: bool, - infos: dict[str, Any], - ) -> None: - """ - Store transition in the replay buffer. - We store the normalized action and the unnormalized observation. - It also handles terminal observations (because AgentEnv resets automatically). - - :param replay_buffer: Replay buffer object where to store the transition. - :param buffer_action: normalized action - :param new_obs: next observation in the current episode - or first observation of the episode (when dones is True) - :param reward: reward for the current transition - :param dones: Termination signal - :param infos: List of additional information about the transition. - It may contain the terminal observations and information about timeout. - """ - # Avoid changing the original ones - self._last_original_obs, new_obs_, reward_ = self._last_obs, new_obs, reward - assert self._last_original_obs is not None - - # Avoid modification by reference - next_obs = deepcopy(new_obs_) - # As the Env resets automatically, new_obs is already the - # first observation of the next episode - if dones: - assert infos.get("terminal_observation") is not None - assert not isinstance(next_obs, dict) - next_obs = infos["terminal_observation"] - - if "maximal_embed" in infos and infos["maximal_embed"] is not None: - buffer_action = infos["maximal_embed"] - - replay_buffer.add( - self._last_original_obs, - next_obs, - buffer_action, - reward_, - dones, - infos, - ) - - self._last_obs = new_obs - - def _sample_action( - self, - learning_starts: int, - action_noise: Optional[ActionNoise] = None, - ) -> tuple[NDArray[np.float32], NDArray[np.float32]]: - """ - Sample an action according to the exploration policy. - This is either done by sampling the probability distribution of the policy, - or sampling a random action (from a uniform distribution over the action space) - or by adding noise to the deterministic output. - - :param action_noise: Action noise that will be used for exploration - Required for deterministic policy (e.g. TD3). This can also be used - in addition to the stochastic policy for SAC. - :param learning_starts: Number of steps before learning for the warm-up phase. - :return: action to take in the environment - and scaled action that will be stored in the replay buffer. - The two differs when the action space is not normalized (bounds are not [-1, 1]). - """ - # Select action randomly or according to policy - if self.num_timesteps < learning_starts: - # Warmup phase - self.policy.set_training_mode(False) - with th.no_grad(): - # Not sure how good of an idea it is to inject more stochasticity - # into the randomness of an action. Just let the star map guide you. - env_action, embed_action = self.policy.wolp_act( - self._last_obs, - use_target=False, - action_noise=None, - neighbor_parameters=DETERMINISTIC_NEIGHBOR_PARAMETERS, - random_act=True, - ) - else: - self.policy.set_training_mode(False) - with th.no_grad(): - env_action, embed_action = self.policy.wolp_act( - self._last_obs, - use_target=False, - action_noise=action_noise, - neighbor_parameters=self.neighbor_parameters, - random_act=False, - ) - - assert len(env_action) == 1 - return env_action[0], embed_action[0] - - def train(self, env: AgentEnv, gradient_steps: int, batch_size: int) -> None: - # Switch to train mode (this affects batch norm / dropout) - self.policy.set_training_mode(True) - - actor_losses, critic_losses = [], [] - for gs in range(gradient_steps): - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Training agent gradient step {gs}" - ) - self._n_updates += 1 - # Sample replay buffer - replay_data = self.replay_buffer.sample(batch_size) - - # Train the critic. - critic_loss = self.policy.train_critic( - replay_data, self.neighbor_parameters, self.target_action_noise - ) - critic_losses.append(critic_loss.item()) - - # Train the actor. - actor_loss = self.policy.train_actor(replay_data) - actor_losses.append(actor_loss.item()) - - self.policy.polyak_update() diff --git a/tune/protox/cli.py b/tune/protox/cli.py deleted file mode 100644 index 11bbea87..00000000 --- a/tune/protox/cli.py +++ /dev/null @@ -1,15 +0,0 @@ -import click - -from tune.protox.agent.cli import agent_group -from tune.protox.embedding.cli import embedding_group -from util.workspace import DBGymConfig - - -@click.group(name="protox") -@click.pass_obj -def protox_group(dbgym_cfg: DBGymConfig) -> None: - dbgym_cfg.append_group("protox") - - -protox_group.add_command(embedding_group) -protox_group.add_command(agent_group) diff --git a/tune/protox/default_job_benchbase_config.xml b/tune/protox/default_job_benchbase_config.xml deleted file mode 100644 index 7038308e..00000000 --- a/tune/protox/default_job_benchbase_config.xml +++ /dev/null @@ -1,37 +0,0 @@ - - - - - POSTGRES - org.postgresql.Driver - jdbc:postgresql://localhost:5432/benchbase?preferQueryMode=extended - admin - password - TRANSACTION_REPEATABLE_READ - 128 - true - - - data/tpch-sf0.01 - - - - tbl - - - 10 - - - 1 - - - true - unlimited - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - - - - - - - diff --git a/tune/protox/default_job_benchmark_config.yaml b/tune/protox/default_job_benchmark_config.yaml deleted file mode 100644 index 6c0580c7..00000000 --- a/tune/protox/default_job_benchmark_config.yaml +++ /dev/null @@ -1,198 +0,0 @@ -protox: - query_spec: - benchbase: False - oltp_workload: False - tbl_include_subsets_prune: True - tbl_fold_subsets: False - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 12 - tables: - - aka_name - - aka_title - - cast_info - - char_name - - comp_cast_type - - company_name - - company_type - - complete_cast - - info_type - - keyword - - kind_type - - link_type - - movie_companies - - movie_info - - movie_info_idx - - movie_keyword - - movie_link - - name - - person_info - - role_type - - title - - attributes: - aka_name: - - id - - person_id - - name - - imdb_index - - name_pcode_cf - - name_pcode_nf - - surname_pcode - - md5sum - aka_title: - - id - - movie_id - - title - - imdb_index - - kind_id - - production_year - - phonetic_code - - episode_of_id - - season_nr - - episode_nr - - note - - md5sum - cast_info: - - id - - person_id - - movie_id - - person_role_id - - note - - nr_order - - role_id - char_name: - - id - - name - - imdb_index - - imdb_id - - name_pcode_nf - - surname_pcode - - md5sum - comp_cast_type: - - id - - kind - company_name: - - id - - name - - country_code - - imdb_id - - name_pcode_nf - - name_pcode_sf - - md5sum - company_type: - - id - - kind - complete_cast: - - id - - movie_id - - subject_id - - status_id - info_type: - - id - - info - keyword: - - id - - keyword - - phonetic_code - kind_type: - - id - - kind - link_type: - - id - - link - movie_companies: - - id - - movie_id - - company_id - - company_type_id - - note - movie_info: - - id - - movie_id - - info_type_id - - info - - note - movie_info_idx: - - id - - movie_id - - info_type_id - - info - - note - movie_keyword: - - id - - movie_id - - keyword_id - movie_link: - - id - - movie_id - - linked_movie_id - - link_type_id - name: - - id - - name - - imdb_index - - imdb_id - - gender - - name_pcode_cf - - name_pcode_nf - - surname_pcode - - md5sum - person_info: - - id - - person_id - - info_type_id - - info - - note - role_type: - - id - - role - title: - - id - - title - - imdb_index - - kind_id - - production_year - - imdb_id - - phonetic_code - - episode_of_id - - season_nr - - episode_nr - - series_years - - md5sum - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: {} - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_scan_method: True - per_query_select_parallel: True - - index_space_aux_type: True - index_space_aux_include: True - - per_query_knob_gen: - enable_hashjoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_mergejoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_nestloop: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 0} - - per_query_knobs: {} diff --git a/tune/protox/default_tpch_benchbase_config.xml b/tune/protox/default_tpch_benchbase_config.xml deleted file mode 100644 index 967c10d2..00000000 --- a/tune/protox/default_tpch_benchbase_config.xml +++ /dev/null @@ -1,134 +0,0 @@ - - - - - POSTGRES - org.postgresql.Driver - jdbc:postgresql://localhost:5432/benchbase?preferQueryMode=extended - admin - password - TRANSACTION_REPEATABLE_READ - 128 - true - - - data/tpch-sf0.01 - - - - tbl - - - 10 - - - 1 - - - true - unlimited - 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 - - - - - - - odd - 1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0 - - - even - 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1 - - - - Q1 - 1 - - - Q2 - 2 - - - Q3 - 3 - - - Q4 - 4 - - - Q5 - 5 - - - Q6 - 6 - - - Q7 - 7 - - - Q8 - 8 - - - Q9 - 9 - - - Q10 - 10 - - - Q11 - 11 - - - Q12 - 12 - - - Q13 - 13 - - - Q14 - 14 - - - Q15 - 15 - - - Q16 - 16 - - - Q17 - 17 - - - Q18 - 18 - - - Q19 - 19 - - - Q20 - 20 - - - Q21 - 21 - - - Q22 - 22 - - - diff --git a/tune/protox/default_tpch_benchmark_config.yaml b/tune/protox/default_tpch_benchmark_config.yaml deleted file mode 100644 index 34e3708a..00000000 --- a/tune/protox/default_tpch_benchmark_config.yaml +++ /dev/null @@ -1,124 +0,0 @@ -protox: - query_spec: - benchbase: False - oltp_workload: False - tbl_include_subsets_prune: True - tbl_fold_subsets: False - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 16 - tables: - - part - - partsupp - - lineitem - - orders - - supplier - - customer - - nation - - region - - attributes: - region: - - r_regionkey - - r_name - - r_comment - nation: - - n_nationkey - - n_name - - n_regionkey - - n_comment - part: - - p_partkey - - p_name - - p_mfgr - - p_brand - - p_type - - p_size - - p_container - - p_retailprice - - p_comment - supplier: - - s_suppkey - - s_name - - s_address - - s_nationkey - - s_phone - - s_acctbal - - s_comment - partsupp: - - ps_partkey - - ps_suppkey - - ps_availqty - - ps_supplycost - - ps_comment - customer: - - c_custkey - - c_name - - c_address - - c_nationkey - - c_phone - - c_acctbal - - c_mktsegment - - c_comment - orders: - - o_orderkey - - o_custkey - - o_orderstatus - - o_totalprice - - o_orderdate - - o_orderpriority - - o_clerk - - o_shippriority - - o_comment - lineitem: - - l_orderkey - - l_partkey - - l_suppkey - - l_linenumber - - l_quantity - - l_extendedprice - - l_discount - - l_tax - - l_returnflag - - l_linestatus - - l_shipdate - - l_commitdate - - l_receiptdate - - l_shipinstruct - - l_shipmode - - l_comment - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: {} - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_scan_method: True - per_query_select_parallel: True - index_space_aux_type: True - index_space_aux_include: True - - per_query_knob_gen: - enable_hashjoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_mergejoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_nestloop: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 0} - - per_query_knobs: {} diff --git a/tune/protox/embedding/__init__.py b/tune/protox/embedding/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/embedding/analyze.py b/tune/protox/embedding/analyze.py deleted file mode 100644 index a3bf24df..00000000 --- a/tune/protox/embedding/analyze.py +++ /dev/null @@ -1,471 +0,0 @@ -import copy -import gc -import itertools -import json -import logging -import math -import os -import shutil -import time -from pathlib import Path -from typing import Any, Optional - -import numpy as np -import torch -import tqdm -import yaml - -from tune.protox.embedding.loss import CostLoss, get_bias_fn -from tune.protox.embedding.train_all import ( - create_vae_model, - fetch_index_parameters, - fetch_vae_parameters_from_workload, - load_input_data, -) -from tune.protox.embedding.train_args import ( - EmbeddingAnalyzeArgs, - EmbeddingTrainGenericArgs, -) -from tune.protox.embedding.trainer import StratifiedRandomSampler -from tune.protox.embedding.vae import VAELoss, gen_vae_collate -from tune.protox.env.space.latent_space.latent_index_space import LatentIndexSpace -from tune.protox.env.types import ProtoAction, TableAttrAccessSetsMap -from tune.protox.env.workload import Workload -from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig, open_and_save - -STATS_FNAME = "stats.txt" -RANGES_FNAME = "ranges.txt" - - -def compute_num_parts(num_samples: int) -> int: - # TODO(phw2): in the future, implement running different parts in parallel, set OMP_NUM_THREADS accordingly, and investigate the effect of having more parts - # TODO(phw2): if having more parts is effective, figure out a good way to specify num_parts (can it be determined automatically or should it be a CLI arg?) - # TODO(phw2): does anything bad happen if num_parts doesn't evenly divide num_samples? - return 1 - - -def redist_trained_models(dbgym_cfg: DBGymConfig, num_parts: int) -> None: - """ - Redistribute all embeddings_*/ folders inside the run_*/ folder into num_parts subfolders - """ - inputs = [ - f - for f in dbgym_cfg.cur_task_runs_data_path(mkdir=True).glob("embeddings*") - if os.path.isdir(f) - ] - - for part_i in range(num_parts): - Path(_get_part_i_dpath(dbgym_cfg, part_i)).mkdir(parents=True, exist_ok=True) - - for model_i, emb in enumerate(inputs): - part_i = model_i % num_parts - shutil.move(emb, _get_part_i_dpath(dbgym_cfg, part_i)) - - -def analyze_all_embeddings_parts( - dbgym_cfg: DBGymConfig, - num_parts: int, - generic_args: EmbeddingTrainGenericArgs, - analyze_args: EmbeddingAnalyzeArgs, -) -> None: - """ - Analyze all part*/ dirs _in parallel_ - """ - start_time = time.time() - for part_i in range(num_parts): - _analyze_embeddings_part(dbgym_cfg, part_i, generic_args, analyze_args) - analyze_all_parts_duration = time.time() - start_time - with open( - dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) / "analyze_all_time.txt", "w" - ) as f: - f.write(f"{analyze_all_parts_duration}") - - -def _analyze_embeddings_part( - dbgym_cfg: DBGymConfig, - part_i: int, - generic_args: EmbeddingTrainGenericArgs, - analyze_args: EmbeddingAnalyzeArgs, -) -> None: - """ - Analyze (meaning create both stats.txt and ranges.txt) all the embedding models in the part[part_i]/ dir - """ - part_dpath = _get_part_i_dpath(dbgym_cfg, part_i) - - start_time = time.time() - _create_stats_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args) - analyze_part_duration = time.time() - start_time - with open(part_dpath / "stats_time.txt", "w") as f: - f.write(f"{analyze_part_duration}") - - start_time = time.time() - _create_ranges_for_part(dbgym_cfg, part_dpath, generic_args, analyze_args) - create_range_duration = time.time() - start_time - with open(part_dpath / "ranges_time.txt", "w") as f: - f.write(f"{create_range_duration}") - - -def _create_stats_for_part( - dbgym_cfg: DBGymConfig, - part_dpath: Path, - generic_args: EmbeddingTrainGenericArgs, - analyze_args: EmbeddingAnalyzeArgs, -) -> None: - """ - Creates a stats.txt file inside each embeddings_*/models/epoch*/ dir inside this part*/ dir - TODO(wz2): what does stats.txt contain? - """ - # Unlike for training, we're safe to use all threads for creating stats - os.environ["OMP_NUM_THREADS"] = str(os.cpu_count()) - - # Load the benchmark configuration. - with open_and_save(dbgym_cfg, generic_args.benchmark_config_path, "r") as f: - data = yaml.safe_load(f) - data = data[[k for k in data.keys()][0]] - max_attrs, max_cat_features, _, _ = fetch_index_parameters( - dbgym_cfg, data, generic_args.workload_path - ) - - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - models = [m for m in itertools.chain(*[part_dpath.rglob("config")])] - for model_config in tqdm.tqdm(models): - if ((Path(model_config).parent) / "FAILED").exists(): - logging.getLogger(DBGYM_LOGGER_NAME).warning( - "Detected failure in: ", model_config - ) - continue - - # don't use open_and_save() because we generated model_config in this run - with open(model_config, "r") as f: - config = json.load(f) - - # Create them here since these are constant for a given "model" configuration. - dataset, idx_class, num_classes = None, None, None - class_mapping = None - metric_loss_fn, vae_loss = None, None - vae = create_vae_model(config, max_attrs, max_cat_features) - require_cost = config["metric_loss_md"].get("require_cost", False) - - submodules = [f for f in (Path(model_config).parent / "models").glob("*")] - submodules = sorted(submodules, key=lambda x: int(str(x).split("epoch")[-1])) - # This is done for semantic sense since the "first" is actually at no epoch. - modules = [submodules[r] for r in range(-1, len(submodules)) if r >= 0] - if modules[0] != submodules[0]: - modules = [submodules[0]] + modules - - if modules[-1] != submodules[-1]: - modules.append(submodules[-1]) - - modules = [ - m - for m in modules - if int(str(m).split("epoch")[-1]) >= analyze_args.start_epoch - ] - - for i, module in tqdm.tqdm(enumerate(modules), total=len(modules), leave=False): - epoch = int(str(module).split("epoch")[-1]) - module_path = os.path.join(module, f"embedder_{epoch}.pth") - - if Path(os.path.join(module, f"{STATS_FNAME}")).exists(): - continue - - # Load the specific epoch model. - vae.load_state_dict(torch.load(module_path, map_location=device)) - vae.to(device=device).eval() - collate_fn = gen_vae_collate(max_cat_features) - - if dataset is None: - # Get the dataset if we need to. - dataset, _, idx_class, _, num_classes = load_input_data( - dbgym_cfg, - generic_args.traindata_path, - 1.0, - max_attrs, - require_cost, - seed=0, - ) - - class_mapping = [] - for c in range(num_classes): - if idx_class[idx_class == c].shape[0] > 0: - class_mapping.append(c) - - # Use a common loss function. - metric_loss_fn = CostLoss(config["metric_loss_md"]) - vae_loss = VAELoss(config["loss_fn"], max_attrs, max_cat_features) - - # Construct the accumulator. - accumulated_stats: dict[str, list[Any]] = {} - for class_idx in class_mapping: - accumulated_stats[f"recon_{class_idx}"] = [] - - analyze_all_batches = analyze_args.num_batches == -1 - if analyze_all_batches or analyze_args.num_batches > 0: - accumulated_stats.update( - { - "recon_accum": [], - "metric_accum": [], - } - ) - - # Setup the dataloader. - if analyze_all_batches: - dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=analyze_args.batch_size, - collate_fn=collate_fn, - ) - total = len(dataloader) - else: - sampler = StratifiedRandomSampler( - idx_class, - max_class=num_classes, - batch_size=analyze_args.batch_size, - allow_repeats=False, - ) - dataloader = torch.utils.data.DataLoader( - dataset, - sampler=sampler, - batch_size=analyze_args.batch_size, - collate_fn=collate_fn, - ) - total = min(len(sampler), analyze_args.num_batches) - error = False - with torch.no_grad(): - with tqdm.tqdm(total=total, leave=False) as pbar: - for x, y in dataloader: - x = x.to(device=device) - - if config["use_bias"]: - bias_fn = get_bias_fn(config) - bias = bias_fn(x, y) - if isinstance(bias, torch.Tensor): - bias = bias.to(device=device) - else: - lbias = bias[0].to(device=device) - hbias = bias[1].to(device=device) - bias = (lbias, hbias) - else: - bias = None - - # Pass it through the VAE with the settings. - z, decoded, error = vae(x, bias=bias) - if error: - # If we've encountered an error, abort early. - # Don't use a model that can produce errors. - break - - # Flatten. - classes = y[:, -1].flatten() - - assert metric_loss_fn is not None - loss_dict = vae_loss.compute_loss( - preds=decoded, - unused0=None, - unused1=None, - tdata=(x, y), - is_eval=True, - ) - - assert vae_loss.loss_fn is not None - for class_idx in class_mapping: - y_mask = classes == class_idx - x_extract = x[y_mask.bool()] - if x_extract.shape[0] > 0: - decoded_extract = decoded[y_mask.bool()] - loss = vae_loss.loss_fn( - decoded_extract, x_extract, y[y_mask.bool()] - ) - accumulated_stats[f"recon_{class_idx}"].append( - loss.mean().item() - ) - - input_y = y - if y.shape[1] == 1: - input_y = y.flatten() - - metric_loss = metric_loss_fn(z, input_y, None).item() - accumulated_stats["recon_accum"].append( - loss_dict["recon_loss"]["losses"].item() - ) - accumulated_stats["metric_accum"].append(metric_loss) - - del z - del x - del y - - # Break out if we are done. - pbar.update(1) - total -= 1 - if total == 0: - break - - # Output the evaluated stats. - with open(os.path.join(module, f"{STATS_FNAME}"), "w") as f: - stats = { - stat_key: ( - stats - if isinstance(stats, np.ScalarType) - else (np.mean(stats) if len(stats) > 0 else 0) - ) - for stat_key, stats in accumulated_stats.items() - } - stats["error"] = error - f.write(json.dumps(stats, indent=4)) - - del dataloader - gc.collect() - gc.collect() - - -def _create_ranges_for_part( - dbgym_cfg: DBGymConfig, - part_dpath: Path, - generic_args: EmbeddingTrainGenericArgs, - analyze_args: EmbeddingAnalyzeArgs, -) -> None: - """ - Create the ranges.txt for all models in part_dpath - TODO(wz2): what does ranges.txt contain? - """ - # Unlike for training, we're safe to use all threads for creating ranges - os.environ["OMP_NUM_THREADS"] = str(os.cpu_count()) - paths = sorted( - [f for f in part_dpath.rglob("embedder_*.pth") if "optimizer" not in str(f)] - ) - for embedder_fpath in tqdm.tqdm(paths): - _create_ranges_for_embedder( - dbgym_cfg, embedder_fpath, generic_args, analyze_args - ) - - -def _create_ranges_for_embedder( - dbgym_cfg: DBGymConfig, - embedder_fpath: Path, - generic_args: EmbeddingTrainGenericArgs, - analyze_args: EmbeddingAnalyzeArgs, -) -> None: - """ - Create the ranges.txt file corresponding to a specific part*/embeddings_*/models/epoch*/embedder_*.pth file - """ - # Return right away if the epoch isn't high enough - epoch_i = int(str(embedder_fpath).split("embedder_")[-1].split(".pth")[0]) - if epoch_i < analyze_args.start_epoch: - return - - # Load the benchmark configuration. - with open_and_save(dbgym_cfg, generic_args.benchmark_config_path, "r") as f: - benchmark_config = yaml.safe_load(f) - benchmark_config = benchmark_config[[k for k in benchmark_config.keys()][0]] - - max_num_columns = benchmark_config["max_num_columns"] - tables = benchmark_config["tables"] - attributes = benchmark_config["attributes"] - query_spec = benchmark_config["query_spec"] - - workload = Workload( - dbgym_cfg, tables, attributes, query_spec, generic_args.workload_path, pid=None - ) - modified_attrs = workload.column_usages() - - # Load VAE. - embeddings_dpath = embedder_fpath.parent.parent.parent # part*/embeddings_*/ - embeddings_config_fpath = embeddings_dpath / "config" # part*/embeddings_*/config - # don't use open_and_save() because we generated embeddings_config_fpath in this run - with open(embeddings_config_fpath, "r") as f: - config = json.load(f) - assert config["mean_output_act"] == "sigmoid" - index_output_transform = ( - lambda x: torch.nn.Sigmoid()(x) * config["output_scale"] - ) - - def index_noise_scale(x: ProtoAction, n: Optional[torch.Tensor]) -> ProtoAction: - assert n is None - return ProtoAction(torch.clamp(x, 0.0, config["output_scale"])) - - max_attrs, max_cat_features = fetch_vae_parameters_from_workload( - workload, len(tables) - ) - vae = create_vae_model(config, max_attrs, max_cat_features) - # don't call save_file() because we generated embedder_fpath in this run - vae.load_state_dict(torch.load(embedder_fpath)) - vae.eval() - - idxs = LatentIndexSpace( - tables=tables, - max_num_columns=max_num_columns, - max_indexable_attributes=workload.max_indexable(), - seed=np.random.randint(1, int(1e10)), - rel_metadata=copy.deepcopy(modified_attrs), - attributes_overwrite=copy.deepcopy(modified_attrs), - tbl_include_subsets=TableAttrAccessSetsMap({}), - vae=vae, - index_space_aux_type=False, - index_space_aux_include=False, - deterministic_policy=True, - latent_dim=config["latent_dim"], - index_output_transform=index_output_transform, - # No-op noise. - index_noise_scale=index_noise_scale, - artifact_manager=None, - ) - - output_scale = config["metric_loss_md"]["output_scale"] - bias_separation = config["metric_loss_md"]["bias_separation"] - num_segments = min(analyze_args.max_segments, math.ceil(1.0 / bias_separation)) - - base = 0 - epoch_dpath = ( - embeddings_dpath / "models" / f"epoch{epoch_i}" - ) # part*/embeddings_*/models/epoch*/ - ranges_fpath = epoch_dpath / RANGES_FNAME - with open(ranges_fpath, "w") as f: - for _ in tqdm.tqdm(range(num_segments), total=num_segments, leave=False): - classes: dict[str, int] = {} - with torch.no_grad(): - points = ( - torch.rand(analyze_args.num_points_to_sample, config["latent_dim"]) - * output_scale - + base - ) - protos = idxs.from_latent(points) - neighbors = [ - idxs.neighborhood( - proto, - neighbor_parameters={ - "knob_num_nearest": 100, - "knob_span": 1, - "index_num_samples": 1, - "index_rules": False, - }, - )[0] - for proto in protos - ] - - for n in neighbors: - idx_class = idxs.get_index_class(n) - if idx_class not in classes: - classes[idx_class] = 0 - classes[idx_class] += 1 - sorted_classes = sorted( - [(k, v) for k, v in classes.items()], key=lambda x: x[1], reverse=True - ) - if analyze_args.num_classes_to_keep != 0: - sorted_classes = sorted_classes[: analyze_args.num_classes_to_keep] - - f.write(f"Generating range {base} - {base + output_scale}\n") - f.write( - "\n".join( - [ - f"{k}: {v / analyze_args.num_points_to_sample}" - for (k, v) in sorted_classes - ] - ) - ) - f.write("\n") - base += output_scale - - -def _get_part_i_dpath(dbgym_cfg: DBGymConfig, part_i: int) -> Path: - return dbgym_cfg.cur_task_runs_data_path(mkdir=True) / f"part{part_i}" diff --git a/tune/protox/embedding/cli.py b/tune/protox/embedding/cli.py deleted file mode 100644 index 9beb2bd2..00000000 --- a/tune/protox/embedding/cli.py +++ /dev/null @@ -1,15 +0,0 @@ -import click - -from tune.protox.embedding.datagen import datagen -from tune.protox.embedding.train import train -from util.workspace import DBGymConfig - - -@click.group("embedding") -@click.pass_obj -def embedding_group(dbgym_cfg: DBGymConfig) -> None: - dbgym_cfg.append_group("embedding") - - -embedding_group.add_command(datagen) -embedding_group.add_command(train) diff --git a/tune/protox/embedding/datagen.py b/tune/protox/embedding/datagen.py deleted file mode 100644 index d80a264f..00000000 --- a/tune/protox/embedding/datagen.py +++ /dev/null @@ -1,924 +0,0 @@ -import copy -import gc -import logging -import math -import os -import random -import shutil -import time -from itertools import chain, combinations -from multiprocessing import Pool -from pathlib import Path -from typing import Any, NewType, Optional, cast - -import click -import numpy as np -import pandas as pd -import psycopg -import yaml -from sklearn.preprocessing import quantile_transform - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from dbms.postgres.cli import start_postgres, stop_postgres -from tune.protox.embedding.loss import COST_COLUMNS -from tune.protox.env.space.primitive_space.index_space import IndexSpace -from tune.protox.env.types import ( - QuerySpec, - QueryType, - TableAttrAccessSetsMap, - TableAttrListMap, -) -from tune.protox.env.workload import Workload -from util.log import DBGYM_LOGGER_NAME -from util.pg import create_psycopg_conn -from util.shell import subprocess_run -from util.workspace import ( - BENCHMARK_NAME_PLACEHOLDER, - SCALE_FACTOR_PLACEHOLDER, - WORKLOAD_NAME_PLACEHOLDER, - WORKSPACE_PATH_PLACEHOLDER, - DBGymConfig, - fully_resolve_path, - get_default_benchmark_config_path, - get_default_dbdata_parent_dpath, - get_default_pgbin_path, - get_default_pristine_dbdata_snapshot_path, - get_default_traindata_fname, - get_default_workload_name_suffix, - get_default_workload_path, - get_workload_name, - is_fully_resolved, - is_ssd, - link_result, - open_and_save, - save_file, -) - -# FUTURE(oltp) -# try: -# sys.path.append("/home/wz2/noisepage-pilot") -# from behavior.utils.prepare_ou_data import clean_input_data -# except: -# pass - - -QueryBatches = NewType( - "QueryBatches", list[tuple[str, list[tuple[QueryType, str]], Any]] -) - - -# click steup -@click.command() -@click.pass_obj - -# generic args -@click.argument("benchmark-name") -@click.option( - "--workload-name-suffix", - type=str, - default=None, - help=f"The suffix of the workload name (the part after the scale factor).", -) -@click.option( - "--scale-factor", - type=float, - default=DEFAULT_SCALE_FACTOR, - help=f"The scale factor used when generating the data of the benchmark.", -) -@click.option( - "--pgbin-path", - type=Path, - default=None, - help=f"The path to the bin containing Postgres executables. The default is {get_default_pgbin_path(WORKSPACE_PATH_PLACEHOLDER)}.", -) -# TODO(phw2): need to run pgtune before gathering data -@click.option( - "--pristine-dbdata-snapshot-path", - type=Path, - default=None, - help=f"The path to the .tgz snapshot of the dbdata directory to build an embedding space over. The default is {get_default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.", -) -@click.option( - "--intended-dbdata-hardware", - type=click.Choice(["hdd", "ssd"]), - default="hdd", - help=f"The intended hardware dbdata should be on. Used as a sanity check for --dbdata-parent-dpath.", -) -@click.option( - "--dbdata-parent-dpath", - type=Path, - default=None, - help=f"The path to the parent directory of the dbdata which will be actively tuned. The default is {get_default_pristine_dbdata_snapshot_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, SCALE_FACTOR_PLACEHOLDER)}.", -) -@click.option( - "--benchmark-config-path", - type=Path, - default=None, - help=f"The path to the .yaml config file for the benchmark. The default is {get_default_benchmark_config_path(BENCHMARK_NAME_PLACEHOLDER)}.", -) -@click.option( - "--workload-path", - type=Path, - default=None, - help=f"The path to the directory that specifies the workload (such as its queries and order of execution). The default is {get_default_workload_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER)}.", -) -@click.option( - "--seed", - type=int, - default=None, - help="The seed used for all sources of randomness (random, np, torch, etc.). The default is a random value.", -) - -# dir gen args -@click.option( - "--leading-col-tbls", - type=str, - default=None, - help='All tables included here will have indexes created s.t. each column is represented equally often as the "leading column" of the index.', -) -# TODO(wz2): what if we sample tbl_sample_limit / len(cols) for tables in leading_col_tbls? this way, tbl_sample_limit will always represent the total # of indexes created on that table. currently the description of the param is a bit weird as you can see -@click.option( - "--default-sample-limit", - type=int, - default=2048, - help="The default sample limit of all tables, used unless override sample limit is specified. If the table is in --leading-col-tbls, sample limit is # of indexes to sample per column for that table table. If the table is in --leading-col-tbls, sample limit is the # of indexes to sample total for that table.", -) -@click.option( - "--override-sample-limits", - type=str, - default=None, - help='Override the sample limit for specific tables. An example input would be "lineitem,32768,orders,4096".', -) -# TODO(wz2): if I'm just outputting out.parquet instead of the full directory, do we even need file limit at all? -@click.option( - "--file-limit", - type=int, - default=1024, - help="The max # of data points (one data point = one hypothetical index) per file", -) -@click.option( - "--max-concurrent", - type=int, - default=None, - help="The max # of concurrent threads that will be creating hypothetical indexes. The default is `nproc`.", -) -# TODO(wz2): when would we not want to generate costs? -@click.option("--no-generate-costs", is_flag=True, help="Turn off generating costs.") - -# file gen args -@click.option("--table-shape", is_flag=True, help="TODO(wz2)") -@click.option("--dual-class", is_flag=True, help="TODO(wz2)") -@click.option("--pad-min", type=int, default=None, help="TODO(wz2)") -@click.option("--rebias", type=float, default=0, help="TODO(wz2)") -def datagen( - dbgym_cfg: DBGymConfig, - benchmark_name: str, - workload_name_suffix: Optional[str], - scale_factor: float, - pgbin_path: Optional[Path], - pristine_dbdata_snapshot_path: Optional[Path], - intended_dbdata_hardware: str, - dbdata_parent_dpath: Optional[Path], - benchmark_config_path: Optional[Path], - workload_path: Optional[Path], - seed: Optional[int], - leading_col_tbls: str, - default_sample_limit: int, - override_sample_limits: Optional[str], - file_limit: int, - max_concurrent: Optional[int], - no_generate_costs: bool, - table_shape: bool, - dual_class: bool, - pad_min: int, - rebias: float, -) -> None: - """ - Samples the effects of indexes on the workload as estimated by HypoPG. - Outputs all this data as a .parquet file in the run_*/ dir. - Updates the symlink in the data/ dir to point to the new .parquet file. - """ - # Set args to defaults programmatically (do this before doing anything else in the function). - # TODO(phw2): figure out whether different scale factors use the same config - # TODO(phw2): figure out what parts of the config should be taken out (like stuff about tables) - if workload_name_suffix is None: - workload_name_suffix = get_default_workload_name_suffix(benchmark_name) - workload_name = get_workload_name(scale_factor, workload_name_suffix) - if workload_path is None: - workload_path = get_default_workload_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ) - if benchmark_config_path is None: - benchmark_config_path = get_default_benchmark_config_path(benchmark_name) - if pgbin_path is None: - pgbin_path = get_default_pgbin_path(dbgym_cfg.dbgym_workspace_path) - if pristine_dbdata_snapshot_path is None: - pristine_dbdata_snapshot_path = get_default_pristine_dbdata_snapshot_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, scale_factor - ) - if dbdata_parent_dpath is None: - dbdata_parent_dpath = get_default_dbdata_parent_dpath( - dbgym_cfg.dbgym_workspace_path - ) - if max_concurrent is None: - max_concurrent = os.cpu_count() - assert max_concurrent is not None - if seed is None: - seed = random.randint(0, int(1e8)) - - # Fully resolve all input paths. - workload_path = fully_resolve_path(dbgym_cfg, workload_path) - benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) - pgbin_path = fully_resolve_path(dbgym_cfg, pgbin_path) - pristine_dbdata_snapshot_path = fully_resolve_path( - dbgym_cfg, pristine_dbdata_snapshot_path - ) - dbdata_parent_dpath = fully_resolve_path(dbgym_cfg, dbdata_parent_dpath) - - # Check assertions on args - if intended_dbdata_hardware == "hdd": - assert not is_ssd( - dbdata_parent_dpath - ), f"Intended hardware is HDD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an SSD" - elif intended_dbdata_hardware == "ssd": - assert is_ssd( - dbdata_parent_dpath - ), f"Intended hardware is SSD but dbdata_parent_dpath ({dbdata_parent_dpath}) is an HDD" - else: - assert False - - # Process the "data structure" args - leading_col_tbls_parsed: list[str] = ( - [] if leading_col_tbls is None else leading_col_tbls.split(",") - ) - # I chose to only use the "," delimiter in override_sample_limits_str, so the dictionary is encoded as [key],[value],[key],[value] - # I felt this was better than introducing a new delimiter which might conflict with the name of a table - override_sample_limits_parsed: dict[str, int] = dict() - if override_sample_limits is not None: - override_sample_limits_str_split = override_sample_limits.split(",") - assert ( - len(override_sample_limits_str_split) % 2 == 0 - ), f'override_sample_limits ("{override_sample_limits}") does not have an even number of values' - for i in range(0, len(override_sample_limits_str_split), 2): - tbl = override_sample_limits_str_split[i] - limit = int(override_sample_limits_str_split[i + 1]) - override_sample_limits_parsed[tbl] = limit - - # Group args together to reduce the # of parameters we pass into functions - # I chose to group them into separate objects instead because it felt hacky to pass a giant args object into every function - generic_args = EmbeddingDatagenGenericArgs( - benchmark_name, - workload_name, - scale_factor, - benchmark_config_path, - seed, - workload_path, - pristine_dbdata_snapshot_path, - dbdata_parent_dpath, - ) - dir_gen_args = EmbeddingDirGenArgs( - leading_col_tbls_parsed, - default_sample_limit, - override_sample_limits_parsed, - file_limit, - max_concurrent, - no_generate_costs, - ) - file_gen_args = EmbeddingFileGenArgs(table_shape, dual_class, pad_min, rebias) - - # run all steps - start_time = time.time() - dbdata_dpath = untar_snapshot( - dbgym_cfg, - generic_args.pristine_dbdata_snapshot_path, - generic_args.dbdata_parent_dpath, - ) - pgbin_path = fully_resolve_path( - dbgym_cfg, get_default_pgbin_path(dbgym_cfg.dbgym_workspace_path) - ) - start_postgres(dbgym_cfg, pgbin_path, dbdata_dpath) - _gen_traindata_dpath(dbgym_cfg, generic_args, dir_gen_args) - _combine_traindata_dpath_into_parquet(dbgym_cfg, generic_args, file_gen_args) - datagen_duration = time.time() - start_time - with open(f"{dbgym_cfg.dbgym_this_run_path}/datagen_time.txt", "w") as f: - f.write(f"{datagen_duration}") - stop_postgres(dbgym_cfg, pgbin_path, dbdata_dpath) - - -def untar_snapshot( - dbgym_cfg: DBGymConfig, dbdata_snapshot_fpath: Path, dbdata_parent_dpath: Path -) -> Path: - # It should be an absolute path and it should exist - assert is_fully_resolved( - dbdata_snapshot_fpath - ), f"untar_snapshot(): dbdata_snapshot_fpath ({dbdata_snapshot_fpath}) either doesn't exist or is not absolute" - save_file(dbgym_cfg, dbdata_snapshot_fpath) - dbdata_dpath = dbdata_parent_dpath / "dbdata" - # Make the parent dir and the dbdata dir. Note how we require that dbdata_dpath does not exist while it's ok if the parent does. - dbdata_parent_dpath.mkdir(parents=True, exist_ok=True) - if dbdata_dpath.exists(): - shutil.rmtree(dbdata_dpath) - dbdata_dpath.mkdir(parents=False, exist_ok=False) - subprocess_run(f"tar -xzf {dbdata_snapshot_fpath} -C {dbdata_dpath}") - return dbdata_dpath - - -class EmbeddingDatagenGenericArgs: - """ - I made Embedding*Args classes to reduce the # of parameters we pass into functions - I wanted to use classes over dictionaries to enforce which fields are allowed to be present - I wanted to make multiple classes instead of just one to conceptually separate the different args - """ - - def __init__( - self, - benchmark_name: str, - workload_name: str, - scale_factor: float, - benchmark_config_path: Path, - seed: int, - workload_path: Path, - pristine_dbdata_snapshot_path: Path, - dbdata_parent_dpath: Path, - ): - self.benchmark_name = benchmark_name - self.workload_name = workload_name - self.scale_factor = scale_factor - self.benchmark_config_path = benchmark_config_path - self.seed = seed - self.workload_path = workload_path - self.pristine_dbdata_snapshot_path = pristine_dbdata_snapshot_path - self.dbdata_parent_dpath = dbdata_parent_dpath - - -class EmbeddingDirGenArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, - leading_col_tbls: list[str], - default_sample_limit: int, - override_sample_limits: dict[str, int], - file_limit: int, - max_concurrent: int, - no_generate_costs: bool, - ): - self.leading_col_tbls = leading_col_tbls - self.default_sample_limit = default_sample_limit - self.override_sample_limits = override_sample_limits - self.file_limit = file_limit - self.max_concurrent = max_concurrent - self.no_generate_costs = no_generate_costs - - -class EmbeddingFileGenArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, table_shape: bool, dual_class: bool, pad_min: int, rebias: float - ): - self.table_shape = table_shape - self.dual_class = dual_class - self.pad_min = pad_min - self.rebias = rebias - - -def get_traindata_dpath(dbgym_cfg: DBGymConfig) -> Path: - return dbgym_cfg.cur_task_runs_data_path("traindata", mkdir=True) - - -def _gen_traindata_dpath( - dbgym_cfg: DBGymConfig, - generic_args: EmbeddingDatagenGenericArgs, - dir_gen_args: EmbeddingDirGenArgs, -) -> None: - with open_and_save(dbgym_cfg, generic_args.benchmark_config_path, "r") as f: - benchmark_config = yaml.safe_load(f) - - max_num_columns: int = benchmark_config["protox"]["max_num_columns"] - tables: list[str] = benchmark_config["protox"]["tables"] - attributes: TableAttrListMap = benchmark_config["protox"]["attributes"] - query_spec: QuerySpec = benchmark_config["protox"]["query_spec"] - - workload = Workload( - dbgym_cfg, tables, attributes, query_spec, generic_args.workload_path, pid=None - ) - modified_attrs = workload.column_usages() - traindata_dpath = get_traindata_dpath(dbgym_cfg) - - with Pool(dir_gen_args.max_concurrent) as pool: - results = [] - job_id = 0 - for tbl in tables: - cols: list[Optional[str]] = ( - [None] - if tbl not in dir_gen_args.leading_col_tbls - else cast(list[Optional[str]], modified_attrs[tbl]) - ) - for colidx, col in enumerate(cols): - if col is None: - output = traindata_dpath / tbl - else: - output = traindata_dpath / tbl / col - output.mkdir(parents=True, exist_ok=True) - - tbl_sample_limit = dir_gen_args.override_sample_limits.get( - tbl, dir_gen_args.default_sample_limit - ) - num_slices = math.ceil(tbl_sample_limit / dir_gen_args.file_limit) - - for _ in range(0, num_slices): - results.append( - pool.apply_async( - _produce_index_data, - args=( - dbgym_cfg, - tables, - attributes, - query_spec, - generic_args.workload_path, - max_num_columns, - generic_args.seed, - not dir_gen_args.no_generate_costs, - min(tbl_sample_limit, dir_gen_args.file_limit), - tbl, # target - colidx if col is not None else None, - col, - job_id, - output, - ), - ) - ) - job_id += 1 - - pool.close() - pool.join() - - for result in results: - result.get() - - -def _combine_traindata_dpath_into_parquet( - dbgym_cfg: DBGymConfig, - generic_args: EmbeddingDatagenGenericArgs, - file_gen_args: EmbeddingFileGenArgs, -) -> None: - tbl_dirs = {} - with open_and_save(dbgym_cfg, generic_args.benchmark_config_path, "r") as f: - benchmark_config = yaml.safe_load(f) - benchmark_config = benchmark_config[[k for k in benchmark_config.keys()][0]] - tables = benchmark_config["tables"] - for i, tbl in enumerate(tables): - tbl_dirs[tbl] = i - - traindata_dpath = get_traindata_dpath(dbgym_cfg) - files = [f for f in Path(traindata_dpath).rglob("*.parquet")] - - def read(file: Path) -> pd.DataFrame: - tbl = Path(file).parts[-2] - if tbl not in tbl_dirs: - tbl = Path(file).parts[-3] - df = pd.read_parquet(file) - df["tbl_index"] = tbl_dirs[tbl] - - if file_gen_args.pad_min is not None: - if df.shape[0] < file_gen_args.pad_min: - df = pd.concat([df] * int(file_gen_args.pad_min / df.shape[0])) - return df - - df = pd.concat(map(read, files)) - - if "reference_cost" in df.columns: - target_cost = df.target_cost - - # This expression is the improvement expression. - act_cost = df.reference_cost - (df.table_reference_cost - target_cost) - mult = df.reference_cost / act_cost - rel = (df.reference_cost - act_cost) / act_cost - mult_tbl = df.table_reference_cost / target_cost - rel_tbl = (df.table_reference_cost - target_cost) / target_cost - - if file_gen_args.table_shape: - df["quant_mult_cost_improvement"] = quantile_transform( - mult_tbl.to_numpy().reshape(-1, 1), - n_quantiles=100000, - subsample=df.shape[0], - ) - df["quant_rel_cost_improvement"] = quantile_transform( - rel_tbl.to_numpy().reshape(-1, 1), - n_quantiles=100000, - subsample=df.shape[0], - ) - else: - df["quant_mult_cost_improvement"] = quantile_transform( - mult.to_numpy().reshape(-1, 1), - n_quantiles=min(100000, df.shape[0]), - subsample=df.shape[0], - ) - df["quant_rel_cost_improvement"] = quantile_transform( - rel.to_numpy().reshape(-1, 1), - n_quantiles=min(100000, df.shape[0]), - subsample=df.shape[0], - ) - - df.drop( - columns=["reference_cost", "table_reference_cost", "target_cost"], - inplace=True, - errors="ignore", - ) - - if file_gen_args.dual_class: - df["real_idx_class"] = df["idx_class"] - df["idx_class"] = df["real_idx_class"] * df.col0.max() + df.col1 - - df.drop(columns=["table"], inplace=True) - df.fillna(0, inplace=True) - # Only int-ify non-cost columns. - columns = [ - c - for c in df.columns - if c not in COST_COLUMNS and "idx_class" not in c and "cmd" != c - ] - df[columns] = df[columns].astype(int) - - if file_gen_args.rebias > 0: - groups = ( - df.groupby(by=["tbl_index", "idx_class"]) - .quant_mult_cost_improvement.describe() - .sort_values(by=["max"], ascending=False) - ) - datum = [] - cur_bias = 1.0 - sep_bias = file_gen_args.rebias - for g in groups.itertuples(): - d = df[ - (df.tbl_index == g.Index[0]) # type: ignore - & (df.idx_class == g.Index[1]) # type: ignore - & (df.quant_mult_cost_improvement >= g._6) - ].copy() - d["quant_mult_cost_improvement"] = cur_bias - (file_gen_args.rebias / 2) - datum.append(d) - cur_bias -= sep_bias - df = pd.concat(datum, ignore_index=True) - - traindata_path = dbgym_cfg.cur_task_runs_data_path( - mkdir=True - ) / get_default_traindata_fname( - generic_args.benchmark_name, generic_args.workload_name - ) - df.to_parquet(traindata_path) - link_result(dbgym_cfg, traindata_path) - - -_INDEX_SERVER_COUNTS: dict[str, int] = {} - - -def _fetch_server_indexes(connection: psycopg.Connection[Any]) -> None: - global _INDEX_SERVER_COUNTS - query = """ - SELECT t.relname as table_name, i.relname as index_name - FROM pg_class t, pg_class i, pg_index ix - WHERE t.oid = ix.indrelid - and i.oid = ix.indexrelid - """ - - r = [r for r in connection.execute(query)] - _INDEX_SERVER_COUNTS = {} - for rr in r: - if rr[0] not in _INDEX_SERVER_COUNTS: - _INDEX_SERVER_COUNTS[rr[0]] = 0 - _INDEX_SERVER_COUNTS[rr[0]] += 1 - - -# FUTURE(oltp) -# def load_ou_models(dbgym_cfg: DBGymConfig, model_dir): -# models = {} -# for f in Path(model_dir).rglob("*.pkl"): -# ou_name = str(f.parts[-1]).split(".")[0] -# with open_and_save(dbgym_cfg, f, "rb") as model: -# models[ou_name] = pickle.load(model) -# return models - - -def _write(data: list[dict[str, Any]], output_dir: Path, batch_num: int) -> None: - df = pd.DataFrame(data) - cols = [c for c in df.columns if "col" in c and "str" not in c] - df[cols] = df[cols].astype(int) - df.to_parquet(output_dir / f"{batch_num}.parquet") - del df - - -def _augment_query_data(workload: Workload, data: dict[str, float]) -> dict[str, float]: - for qstem, value in workload.queries_mix.items(): - if qstem in data: - data[qstem] *= value - return data - - -def _execute_explains( - cursor: psycopg.Cursor[Any], batches: QueryBatches, models: Optional[dict[Any, Any]] -) -> dict[str, float]: - data: dict[str, float] = {} - ou_model_data: dict[str, list[Any]] = {} - - def acquire_model_data(q: str, plan: dict[str, Any]) -> None: - nonlocal ou_model_data - node_tag = plan["Node Type"] - node_tag = node_tag.replace(" ", "") - if node_tag == "ModifyTable": - assert "Operation" in plan - node_tag = { - "Insert": "ModifyTableInsert", - "Update": "ModifyTableUpdate", - "Delete": "ModifyTableDelete", - }[plan["Operation"]] - elif node_tag == "Aggregate": - node_tag = "Agg" - elif node_tag == "NestedLoop": - node_tag = "NestLoop" - - if node_tag == "ModifyTableInsert" or node_tag == "ModifyTableUpdate": - assert "Relation Name" in plan - global _INDEX_SERVER_COUNTS - tbl_name = plan["Relation Name"] - num_indexes = _INDEX_SERVER_COUNTS.get(tbl_name, 0) - - if num_indexes > 0: - if "ModifyTableIndexInsert" not in ou_model_data: - ou_model_data["ModifyTableIndexInsert"] = [] - - for _ in range(num_indexes): - ou_model_data["ModifyTableIndexInsert"].append( - { - "startup_cost": 0, - "total_cost": 0, - "q": q, - } - ) - - if node_tag not in ou_model_data: - ou_model_data[node_tag] = [] - - d = {"q": q} - for k, v in plan.items(): - if k == "Plan" or k == "Plans": - if isinstance(v, dict): - acquire_model_data(q, v) - else: - assert isinstance(v, list) - for vv in v: - acquire_model_data(q, vv) - else: - d[k] = v - d.update({"startup_cost": d["Startup Cost"], "total_cost": d["Total Cost"]}) - ou_model_data[node_tag].append(d) - - for q, sqls, tbl_aliases in batches: - data[q] = 0.0 - for qtype, sql in sqls: - if qtype != QueryType.SELECT and qtype != QueryType.INS_UPD_DEL: - cursor.execute(sql) - else: - ssql = "EXPLAIN (FORMAT JSON) {sql}".format(sql=sql) - explain = [r for r in cursor.execute(ssql, prepare=False)][0][0] - if models is None: - data[q] += explain[0]["Plan"]["Total Cost"] - else: - acquire_model_data(q, explain[0]["Plan"]) - - # FUTURE(oltp) - # if models is not None and len(ou_model_data) > 0: - # holistic_results = [] - # for ou_type, ou_data in ou_model_data.items(): - # if ou_type in models: - # df = pd.DataFrame(ou_data) - # df = clean_input_data(df, separate_indkey_features=False, is_train=True) - # preds = pd.Series(models[ou_type].predict(df).reshape(-1), name="elapsed_us") - # holistic_results.append(pd.concat([preds, df.q], axis=1)) - # else: - # continue - - # holistic_results = pd.concat(holistic_results).reset_index() - # holistic_results = holistic_results.groupby(by=["q"]).sum().reset_index() - # for t in holistic_results.itertuples(): - # if t.q not in data: - # data[t.q] = 0 - # data[t.q] += t.elapsed_us - return data - - -def _extract_refs( - generate_costs: bool, - target: Optional[str], - cursor: psycopg.Cursor[Any], - workload: Workload, - models: Optional[dict[Any, Any]], -) -> tuple[dict[str, float], dict[str, float]]: - ref_qs = {} - table_ref_qs = {} - if generate_costs: - # Get reference costs. - batches = QueryBatches( - [ - (q, workload.queries[q], workload.query_aliases[q]) - for q in workload.queries.keys() - ] - ) - ref_qs = _execute_explains(cursor, batches, models) - ref_qs = _augment_query_data(workload, ref_qs) - - # Get reference costs specific to the table. - if target is None: - table_ref_qs = ref_qs - else: - qs = workload.queries_for_table(target) - batches = QueryBatches( - [(q, workload.queries[q], workload.query_aliases[q]) for q in qs] - ) - table_ref_qs = _execute_explains(cursor, batches, models) - table_ref_qs = _augment_query_data(workload, table_ref_qs) - return ref_qs, table_ref_qs - - -def _produce_index_data( - dbgym_cfg: DBGymConfig, - tables: list[str], - attributes: TableAttrListMap, - query_spec: QuerySpec, - workload_path: Path, - max_num_columns: int, - seed: int, - generate_costs: bool, - sample_limit: int, - target: Optional[str], - leading_col: Optional[int], - leading_col_name: Optional[str], - p: int, - output: Path, -) -> None: - models = None - # FUTURE(oltp) - # if model_dir is not None: - # models = load_ou_models(model_dir) - - # Construct workload. - workload = Workload(dbgym_cfg, tables, attributes, query_spec, workload_path, pid=p) - modified_attrs = workload.column_usages() - - np.random.seed(seed) - random.seed(seed) - - # TODO: In theory we want to bias the sampling towards longer length. - # Since the options grow exponentially from there... - idxs = IndexSpace( - tables, - max_num_columns, - max_indexable_attributes=workload.max_indexable(), - seed=seed, - rel_metadata=copy.deepcopy(modified_attrs), - attributes_overwrite=copy.deepcopy(modified_attrs), - tbl_include_subsets=TableAttrAccessSetsMap({}), - index_space_aux_type=False, - index_space_aux_include=False, - deterministic_policy=False, - ) - - table_idx = 0 - if target is not None: - for i, tbl in enumerate(tables): - if tbl == target: - table_idx = i - break - - if len(modified_attrs[target]) == 0: - # there are no indexes to generate. - return - - with create_psycopg_conn() as connection: - _fetch_server_indexes(connection) - if generate_costs: - try: - connection.execute("CREATE EXTENSION IF NOT EXISTS hypopg") - except: - pass - - with connection.cursor() as cursor: - reference_qs, table_reference_qs = _extract_refs( - generate_costs, target, cursor, workload, models - ) - accum_data: list[dict[str, Any]] = [] - - # Repeatedly... - for i in range(sample_limit): - if (i % 1024) == 0: - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"{target} {leading_col_name} {p} progress update: {i} / {sample_limit}." - ) - - act = idxs.sample( - mask={ - "table_idx": None if target is None else table_idx, - "col_idx": leading_col, - } - ) - ia = idxs.to_action(act) - - accum: dict[str, Any] = { - "table": ia.tbl_name, - } - if generate_costs: - index_size = 0 - # Only try to build if we actually need the cost information. - ia = idxs.to_action(act) - cmds = [] - if ia.is_valid: - # Always try to add the index. - cmds = [ia.sql(add=True)] - - if len(cmds) > 0: - # Use hypopg to create the index. - r = [ - r - for r in cursor.execute( - f"SELECT * FROM hypopg_create_index('{cmds[0]}')" - ) - ] - if len(r) == 0: - assert False - - global _INDEX_SERVER_COUNTS - if ia.tbl_name not in _INDEX_SERVER_COUNTS: - _INDEX_SERVER_COUNTS[ia.tbl_name] = 0 - _INDEX_SERVER_COUNTS[ia.tbl_name] += 1 - - indexrelid = r[0][0] - if models is None: - qs_for_tbl = workload.queries_for_table_col( - ia.tbl_name, ia.columns[0] - ) - else: - qs_for_tbl = workload.queries_for_table(ia.tbl_name) - - batches = QueryBatches( - [ - (q, workload.queries[q], workload.query_aliases[q]) - for q in qs_for_tbl - ] - ) - data = _execute_explains(cursor, batches, models) - data = _augment_query_data(workload, data) - if models is None: - if len(data) != len(table_reference_qs): - # Fold the stuff we aren't aware of. - for k, v in table_reference_qs.items(): - if k not in data: - data[k] = v - assert set(data.keys()) == set(table_reference_qs.keys()) - else: - assert len(data) == len(table_reference_qs) - - _INDEX_SERVER_COUNTS[ia.tbl_name] -= 1 - - # Get the index size. - index_size = [ - r - for r in cursor.execute( - f"SELECT * FROM hypopg_relation_size({indexrelid})" - ) - ][0][0] - cursor.execute(f"SELECT hypopg_drop_index({indexrelid})") - accum["cmd"] = cmds[0] - - accum_elem = { - "reference_cost": np.sum([v for v in reference_qs.values()]), - "table_reference_cost": np.sum( - [v for v in table_reference_qs.values()] - ), - "target_cost": np.sum([v for v in data.values()]), - "index_size": index_size, - } - accum.update(accum_elem) - - # Put a bias on the fact that 0 is a "stop"/"invalid" token. - for i in range(max_num_columns): - accum[f"col{i}"] = 0 - - assert ia.col_idxs is not None - for i, col_idx in enumerate(ia.col_idxs): - accum[f"col{i}"] = col_idx + 1 - - # Fetch and install the class. - idx_class = idxs.get_index_class(act) - assert idx_class != "-1" - accum["idx_class"] = int(idx_class) - accum_data.append(accum) - - if len(accum_data) > 0: - _write(accum_data, output, p) - gc.collect() - gc.collect() - - # Log that we finished. - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"{target} {p} progress update: {sample_limit} / {sample_limit}." - ) diff --git a/tune/protox/embedding/default_hpo_space.json b/tune/protox/embedding/default_hpo_space.json deleted file mode 100644 index 46f7b9d4..00000000 --- a/tune/protox/embedding/default_hpo_space.json +++ /dev/null @@ -1,242 +0,0 @@ -{ - "_comment": "Generally 10 epochs is usually good enough.", - "_comment1": "Need to adjust the compression size! Based on the input dimensions.", - "initial_configs": [], - "config": { - "act": { - "choice_name": "act", - "type": "choice", - "choices": [ - "relu", - "mish" - ] - }, - "mean_output_act": { - "choice_name": "mean_output_act", - "type": "choice", - "choices": [ - "sigmoid" - ] - }, - "batch_size": { - "choice_name": "batch_size", - "type": "choice", - "choices": [ - 512, - 1024, - 2048 - ] - }, - "lr": { - "choice_name": "lr", - "type": "uniform", - "min": 0.00001, - "max": 0.01 - }, - "weight_decay": { - "choice_name": "weight_decay", - "type": "uniform", - "min": 1e-8, - "max": 0.00001 - }, - "loss_fn": { - "choice_name": "loss_fn", - "type": "choice", - "choices": [ - "vae_cat_loss" - ] - }, - "model": { - "choice_name": "model0", - "type": "subspaces", - "subspaces": [ - { - "latent_dim": { - "choice_name": "latent_dim", - "type": "constant", - "value": 32 - }, - "hidden_sizes": { - "choice_name": "hidden_sizes2", - "type": "choice", - "choices": [ - [ - 64 - ], - [ - 128 - ], - [ - 128, - 64 - ], - [ - 256 - ], - [ - 256, - 64 - ], - [ - 256, - 128 - ] - ] - } - }, - { - "latent_dim": { - "choice_name": "latent_dim", - "type": "constant", - "value": 64 - }, - "hidden_sizes": { - "choice_name": "hidden_sizes3", - "type": "choice", - "choices": [ - [ - 128 - ], - [ - 256 - ], - [ - 256, - 128 - ] - ] - } - } - ] - }, - "metric_loss_md": { - "choice_name": "metric_loss_md", - "type": "subspaces", - "subspaces": [ - { - "metric_loss": { - "choice_name": "metric_loss", - "type": "constant", - "value": "l2_distance_loss" - }, - "require_cost": { - "choice_name": "require_cost", - "type": "constant", - "value": true - }, - "cost_reduction_type": { - "choice_name": "cost_reduction_type1", - "type": "choice", - "choices": [ - "quant_mult_cost_improvement" - ] - }, - "distance_fn": { - "choice_name": "distance_fn7", - "type": "choice", - "choices": [ - "hard,l1" - ] - }, - "distance_scale": { - "choice_name": "distance_scale", - "type": "choice", - "choices": [ - "1" - ] - }, - "bias_separation": { - "choice_name": "bias_separation", - "type": "choice", - "choices": [ - 0.01, - 0.025 - ] - }, - "addtl_bias_separation": { - "choice_name": "addtl_bias_separation", - "type": "choice", - "choices": [ - 0, - 0.015 - ] - } - } - ] - }, - "metric_loss_weight": { - "choice_name": "metric_loss_weight", - "type": "choice", - "choices": [ - 0 - ] - }, - "grad_clip_amount": { - "choice_name": "grad_clip_amount", - "type": "choice", - "choices": [ - 1, - 5, - 10 - ] - }, - "save_every": { - "choice_name": "save_every", - "type": "constant", - "value": 1 - }, - "num_epochs": { - "choice_name": "num_epochs", - "type": "constant", - "value": 20 - }, - "bias_init": { - "choice_name": "bias_init", - "type": "choice", - "choices": [ - "default", - "zeros", - "constant0.05" - ] - }, - "weight_init": { - "choice_name": "weight_init", - "type": "choice", - "choices": [ - "xavier", - "spectral", - "orthogonal" - ] - }, - "weight_uniform": { - "choice_name": "weight_uniform", - "type": "choice", - "choices": [ - true, - false - ] - }, - "weak_bias": { - "choice_name": "weak_bias", - "type": "choice", - "choices": [ - true, - false - ] - }, - "use_bias": { - "choice_name": "use_bias", - "type": "choice", - "choices": [ - true - ] - }, - "output_scale": { - "choice_name": "output_scale", - "type": "choice", - "choices": [ - 1 - ] - } - } - } \ No newline at end of file diff --git a/tune/protox/embedding/loss.py b/tune/protox/embedding/loss.py deleted file mode 100644 index 5fbd85c6..00000000 --- a/tune/protox/embedding/loss.py +++ /dev/null @@ -1,181 +0,0 @@ -import math -from typing import Any, Callable, Optional, Tuple, Union, cast - -import torch -import torch.nn as nn -from pytorch_metric_learning import losses -from pytorch_metric_learning.utils import common_functions as c_f - -COST_COLUMNS = [ - "quant_mult_cost_improvement", - "quant_rel_cost_improvement", -] - - -def get_loss(distance_fn: str) -> nn.Module: - if distance_fn == "l1": - return nn.L1Loss(reduction="none") - elif distance_fn == "l2": - return nn.MSELoss(reduction="none") - else: - assert False - - -def get_bias_fn( - config: dict[str, Any] -) -> Callable[ - [torch.Tensor, torch.Tensor], Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]] -]: - def bias_fn( - data: torch.Tensor, labels: torch.Tensor - ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: - red_index = COST_COLUMNS.index(config["cost_reduction_type"]) - distance_scale = config["distance_scale"] - if distance_scale == "auto": - distance_scale = math.sqrt(data.shape[1]) - else: - distance_scale = float(distance_scale) - - bias_separation = config.get("bias_separation", 0.05) - addtl_bias_separation = config.get("addtl_bias_separation", 0) - - assert len(labels.shape) == 2 - assert labels.shape[1] == len(COST_COLUMNS) + 1 - target_loc = labels[:, red_index].reshape(-1, 1) - target_loc = distance_scale * (1 - target_loc) - - # FIXME: in reality, this should not be linear!. - perf_degrees = (target_loc / bias_separation).floor() - - if config.get("weak_bias", False): - # Assign the weak bias allocation based on "separation margin". - percent_sep = ( - target_loc - perf_degrees * bias_separation - ) / bias_separation - weak_bias = (bias_separation + addtl_bias_separation) * 0.95 * percent_sep - top_clamp = (perf_degrees + 1) * (bias_separation + addtl_bias_separation) - return ( - perf_degrees * (bias_separation + addtl_bias_separation) + weak_bias, - top_clamp, - ) - else: - return cast( - torch.Tensor, perf_degrees * (bias_separation + addtl_bias_separation) - ) - - return bias_fn - - -def _distance_cost( - distance_fn: str, - distance_scale: str, - reduction_type: str, - preds: torch.Tensor, - targets: torch.Tensor, - bias: Callable[ - [torch.Tensor, torch.Tensor], - Union[tuple[torch.Tensor, torch.Tensor], torch.Tensor], - ], - output_scale: float, -) -> Any: - bias_vals = bias(preds, targets) - assert isinstance(bias_vals, torch.Tensor) - preds = preds - bias_vals - - assert reduction_type in COST_COLUMNS - red_index = COST_COLUMNS.index(reduction_type) - - assert len(targets.shape) == 2 - assert targets.shape[1] == len(COST_COLUMNS) + 1 - target_loc = targets[:, red_index].reshape(-1, 1) - - # The better it is, the closer to zero it is. - # This subtraction should be fine because target_loc should be between 0 and 1 due to quantile. - # Then scale it to match the gaussian distance (I think). - if distance_scale == "auto": - distance_scale_val = math.sqrt(preds.shape[1]) - else: - distance_scale_val = float(distance_scale) - - # FIXME: Overwrite the distance_scale using the output scale. - distance_scale_val = output_scale - - # Produce the actual target location. - target_loc = distance_scale_val * (1 - target_loc) - target_loc = target_loc.expand(-1, preds.shape[1]) - preds_dist = preds - - comps = distance_fn.split(",") - margin, margin_spec = comps[0], comps[1] - if margin == "hard": - # Directly regress against the boundary. - losses = get_loss(margin_spec)(preds_dist, target_loc) - else: - assert margin == "soft" - # We accept [preds_dist - margin, preds_dist + margin] - tmargin = float(margin_spec) - - # How far away from the "boundary" that we are... - dists = torch.abs(preds_dist - target_loc) - losses = torch.clamp(dists - tmargin, min=0.0) - - # Reduce to a per-row sum loss term. - losses = losses.sum(dim=1) - return losses - - -class CostLoss(losses.BaseMetricLossFunction): # type: ignore - def __init__(self, metric_loss_md: dict[str, Any], *args: Any, **kwargs: Any): - super().__init__(*args, **kwargs) - self.spec = metric_loss_md - self.bias_fn = get_bias_fn(self.spec) - - def compute_loss( - self, - preds: torch.Tensor, - unused0: Any, - unused1: Any, - data: torch.Tensor, - *args: Any - ) -> dict[str, Any]: - losses = _distance_cost( - self.spec["distance_fn"], - self.spec["distance_scale"], - self.spec["cost_reduction_type"], - preds, - data, - self.bias_fn, - self.spec["output_scale"], - ) - - return { - "loss": { - "losses": losses.mean(), - "indices": None, - "reduction_type": "already_reduced", - }, - } - - def forward( - self, - embeddings: torch.Tensor, - labels: torch.Tensor, - indices_tuple: Optional[Any] = None, - ref_emb: Optional[Any] = None, - ref_labels: Optional[Any] = None, - ) -> Any: - """ - Args: - embeddings: tensor of size (batch_size, embedding_size) - labels: tensor of size (batch_size) - indices_tuple: tuple of size 3 for triplets (anchors, positives, negatives) - or size 4 for pairs (anchor1, postives, anchor2, negatives) - Can also be left as None - Returns: the loss - """ - self.reset_stats() - if labels is not None: - labels = c_f.to_device(labels, embeddings) - loss_dict = self.compute_loss(embeddings, None, None, labels) - self.add_embedding_regularization_to_loss_dict(loss_dict, embeddings) - return self.reducer(loss_dict, embeddings, labels) diff --git a/tune/protox/embedding/select.py b/tune/protox/embedding/select.py deleted file mode 100644 index ca9466ca..00000000 --- a/tune/protox/embedding/select.py +++ /dev/null @@ -1,254 +0,0 @@ -import json -import os -import shutil -from pathlib import Path -from typing import Any, Optional - -import numpy as np -import pandas as pd -import tqdm -from pandas import DataFrame - -from tune.protox.embedding.analyze import RANGES_FNAME, STATS_FNAME -from tune.protox.embedding.train_args import ( - EmbeddingSelectArgs, - EmbeddingTrainGenericArgs, -) -from util.workspace import DBGymConfig, get_default_embedder_dname, link_result - - -def select_best_embeddings( - dbgym_cfg: DBGymConfig, - generic_args: EmbeddingTrainGenericArgs, - select_args: EmbeddingSelectArgs, -) -> None: - data = _load_data(dbgym_cfg, select_args) - - if generic_args.traindata_path is not None and generic_args.traindata_path.exists(): - raw_data = pd.read_parquet(generic_args.traindata_path) - data = _attach(data, raw_data, select_args.idx_limit) - - curated_dpath = dbgym_cfg.cur_task_runs_data_path("curated", mkdir=True) - curated_results_fpath = ( - dbgym_cfg.cur_task_runs_data_path(mkdir=True) / "curated_results.csv" - ) - data.to_csv(curated_results_fpath, index=False) - - if "idx_class_total_error" in data: - data["elbo"] = data.elbo + data.idx_class_total_error - - if select_args.allow_all: - df = data.sort_values(by=["elbo"]).iloc[: select_args.num_curate] - else: - df = ( - data.sort_values(by=["elbo"]) - .groupby(by=["root"]) - .head(1) - .iloc[: select_args.num_curate] - ) - - if select_args.flatten_idx == -1: - for tup in df.itertuples(): - assert type(tup.path) is str - assert type(tup.root) is str - shutil.copytree( - tup.path, - curated_dpath / tup.path, - dirs_exist_ok=True, - ) - shutil.copy( - Path(tup.root) / "config", - curated_dpath / tup.root / "config", - ) - else: - idx = select_args.flatten_idx - info_txt = open(curated_dpath / "info.txt", "w") - - for loop_i, tup in enumerate(df.itertuples()): - assert type(tup.path) is str - assert type(tup.root) is str - epoch = int(str(tup.path).split("epoch")[-1]) - model_dpath = curated_dpath / f"model{idx}" - shutil.copytree(tup.path, model_dpath) - shutil.copy( - Path(tup.root) / "config", - model_dpath / "config", - ) - shutil.move( - model_dpath / f"embedder_{epoch}.pth", - model_dpath / "embedder.pth", - ) - - if loop_i == 0: - link_result( - dbgym_cfg, - model_dpath, - custom_result_name=get_default_embedder_dname( - generic_args.benchmark_name, generic_args.workload_name - ) - + ".link", - ) - - info_txt.write(f"model{idx}/embedder.pth\n") - idx += 1 - - info_txt.close() - - -def _load_data(dbgym_cfg: DBGymConfig, select_args: EmbeddingSelectArgs) -> DataFrame: - stat_infos = [] - stats = [s for s in dbgym_cfg.dbgym_this_run_path.rglob(STATS_FNAME)] - for stat in stats: - if "curated" in str(stat): - continue - - info = {} - # don't use open_and_save() because we generated stat in this run - with open(stat, "r") as f: - stat_dict = json.load(f) - info["recon"] = stat_dict["recon_accum"] - info["metric"] = stat_dict["metric_accum"] - info["elbo"] = info["recon"] - info["elbo_metric"] = info["recon"] + info["metric"] - info["all_loss"] = info["recon"] + info["metric"] - - if select_args.recon is not None and select_args.recon < info["recon"]: - # Did not pass reconstruction threshold. - continue - - info["path"] = str(stat.parent) - info["root"] = str(stat.parent.parent.parent) - - # don't use open_and_save() because we generated config in this run - with open(stat.parent.parent.parent / "config", "r") as f: - config = json.load(f) - - def recurse_set(source: dict[Any, Any], target: dict[Any, Any]) -> None: - for k, v in source.items(): - if isinstance(v, dict): - recurse_set(v, target) - else: - target[k] = v - - recurse_set(config, info) - if select_args.latent_dim is not None: - if info["latent_dim"] != select_args.latent_dim: - continue - - output_scale = config["metric_loss_md"]["output_scale"] - bias_sep = config["metric_loss_md"]["bias_separation"] - - if select_args.bias_sep is not None: - if select_args.bias_sep != bias_sep: - continue - - info["ranges_file"] = str(Path(stat).parent / RANGES_FNAME) - - stat_infos.append(info) - - data = DataFrame(stat_infos) - data = data.loc[:, ~(data == data.iloc[0]).all()] - - if "output_scale" not in data: - data["output_scale"] = output_scale - - if "bias_separation" not in data: - data["bias_separation"] = bias_sep - - return data - - -def _attach(data: DataFrame, raw_data: DataFrame, num_limit: int = 0) -> DataFrame: - # As the group index goes up, the perf should go up (i.e., bounds should tighten) - filtered_data: dict[tuple[float, float], DataFrame] = {} - new_data = [] - for tup in tqdm.tqdm(data.itertuples(), total=data.shape[0]): - tup_dict = {k: getattr(tup, k) for k in data.columns} - if raw_data is not None and Path(tup_dict["ranges_file"]).exists(): - - def compute_dist_score( - current_dists: dict[str, float], base: float, upper: float - ) -> float: - nonlocal filtered_data - key = (base, upper) - if key not in filtered_data: - data_range = raw_data[ - (raw_data.quant_mult_cost_improvement >= base) - & (raw_data.quant_mult_cost_improvement < upper) - ] - filtered_data[key] = data_range - if data_range.shape[0] == 0: - return 0 - else: - data_range = filtered_data[key] - - error = 0 - if "real_idx_class" in data_range: - data_dists = ( - data_range.real_idx_class.value_counts() / data_range.shape[0] - ) - else: - data_dists = ( - data_range.idx_class.value_counts() / data_range.shape[0] - ) - - for key, dist in zip(data_dists.index, data_dists): - if str(key) not in current_dists: - error += dist - else: - error += abs(current_dists[str(key)] - dist) - return error - - # don't use open_and_save() because we generated ranges in this run - with open(tup_dict["ranges_file"], "r") as f: - errors: list[float] = [] - drange: tuple[Optional[float], Optional[float]] = (None, None) - current_dists: dict[str, float] = {} - - for line in f: - if "Generating range" in line: - if len(current_dists) > 0: - assert drange[0] is not None - assert drange[1] is not None - errors.append( - compute_dist_score(current_dists, drange[0], drange[1]) - ) - if num_limit > 0 and len(errors) >= num_limit: - current_dists = {} - break - - if drange[0] is None: - drange = (1.0 - tup_dict["bias_separation"], 1.01) - else: - drange = ( - drange[0] - tup_dict["bias_separation"], - drange[0], - ) - current_dists = {} - - else: - ci = line.split(": ")[0] - dist = float(line.strip().split(": ")[-1]) - current_dists[ci] = dist - - if len(current_dists) > 0: - # Put the error in. - errors.append( - compute_dist_score( - current_dists, 0.0, tup_dict["bias_separation"] - ) - ) - - tup_dict["idx_class_errors"] = ",".join( - [str(np.round(e, 2)) for e in errors] - ) - for i, e in enumerate(errors): - tup_dict[f"idx_class_error{i}"] = np.round(e, 2) - - if len(errors) > 0: - tup_dict["idx_class_mean_error"] = np.mean(errors) - tup_dict["idx_class_total_error"] = np.sum(errors) - tup_dict["idx_class_min_error"] = np.min(errors) - tup_dict["idx_class_max_error"] = np.max(errors) - new_data.append(tup_dict) - return DataFrame(new_data) diff --git a/tune/protox/embedding/train.py b/tune/protox/embedding/train.py deleted file mode 100644 index 1e892416..00000000 --- a/tune/protox/embedding/train.py +++ /dev/null @@ -1,264 +0,0 @@ -import logging -import random -from pathlib import Path -from typing import Optional - -import click -import numpy as np -import torch - -from benchmark.constants import DEFAULT_SCALE_FACTOR -from tune.protox.embedding.analyze import ( - RANGES_FNAME, - STATS_FNAME, - analyze_all_embeddings_parts, - compute_num_parts, - redist_trained_models, -) -from tune.protox.embedding.select import select_best_embeddings -from tune.protox.embedding.train_all import train_all_embeddings -from tune.protox.embedding.train_args import ( - EmbeddingAnalyzeArgs, - EmbeddingSelectArgs, - EmbeddingTrainAllArgs, - EmbeddingTrainGenericArgs, -) -from util.workspace import ( - BENCHMARK_NAME_PLACEHOLDER, - DEFAULT_HPO_SPACE_PATH, - WORKLOAD_NAME_PLACEHOLDER, - WORKSPACE_PATH_PLACEHOLDER, - DBGymConfig, - fully_resolve_path, - get_default_benchmark_config_path, - get_default_traindata_path, - get_default_workload_name_suffix, - get_default_workload_path, - get_workload_name, -) - - -# click setup -@click.command() -@click.pass_obj - -# generic args -@click.argument("benchmark-name", type=str) -@click.option( - "--workload-name-suffix", - type=str, - default=None, - help=f"The suffix of the workload name (the part after the scale factor).", -) -@click.option( - "--scale-factor", - type=float, - default=DEFAULT_SCALE_FACTOR, - help=f"The scale factor used when generating the data of the benchmark.", -) -@click.option( - "--benchmark-config-path", - type=Path, - default=None, - help=f"The path to the .yaml config file for the benchmark. The default is {get_default_benchmark_config_path(BENCHMARK_NAME_PLACEHOLDER)}.", -) -@click.option( - "--traindata-path", - type=Path, - default=None, - help=f"The path to the .parquet file containing the training data to use to train the embedding models. The default is {get_default_traindata_path(WORKSPACE_PATH_PLACEHOLDER, BENCHMARK_NAME_PLACEHOLDER, WORKLOAD_NAME_PLACEHOLDER)}.", -) -@click.option( - "--seed", - type=int, - default=None, - help="The seed used for all sources of randomness (random, np, torch, etc.). The default is a random value.", -) - -# train args -@click.option( - "--hpo-space-path", - type=Path, - default=DEFAULT_HPO_SPACE_PATH, - help="The path to the .json file defining the search space for hyperparameter optimization (HPO).", -) -@click.option( - "--train-max-concurrent", - type=int, - default=1, - help="The max # of concurrent embedding models to train during hyperparameter optimization. This is usually set lower than `nproc` to reduce memory pressure.", -) -@click.option("--iterations-per-epoch", default=1000, help=f"TODO(wz2)") -@click.option( - "--num-samples", - type=int, - default=40, - help=f"The # of times to specific hyperparameter configs to sample from the hyperparameter search space and train embedding models with.", -) -@click.option("--train-size", type=float, default=0.99, help=f"TODO(wz2)") - -# analyze args -@click.option( - "--start-epoch", type=int, default=0, help="The epoch to start analyzing models at." -) -@click.option( - "--batch-size", - type=int, - default=8192, - help=f"The size of batches to use to build {STATS_FNAME}.", -) -@click.option( - "--num-batches", - type=int, - default=100, - help=f'The number of batches to use to build {STATS_FNAME}. Setting it to -1 indicates "use all batches".', -) -@click.option( - "--max-segments", - type=int, - default=15, - help=f"The maximum # of segments in the latent space when creating {RANGES_FNAME}.", -) -@click.option( - "--num-points-to-sample", - type=int, - default=8192, - help=f"The number of points to sample when creating {RANGES_FNAME}.", -) -@click.option( - "--num-classes-to-keep", - type=int, - default=5, - help=f"The number of classes to keep for each segment when creating {RANGES_FNAME}.", -) - -# select args -@click.option( - "--recon", - type=float, - default=None, - help="The reconstruction error threshold our selected model(s) need to pass.", -) -@click.option( - "--latent-dim", - type=int, - default=None, - help="The # of latent dimensions our selected model(s) need to have.", -) -@click.option( - "--bias-sep", - type=float, - default=None, - help="The bias separation our selected model(s) need to have.", -) -@click.option( - "--idx-limit", - type=int, - default=15, - help="The number of indexes whose errors to compute during _attach().", -) -@click.option( - "--num-curate", type=int, default=1, help="The number of models to curate" -) # TODO(wz2): why would we want to curate more than one? -@click.option( - "--allow-all", is_flag=True, help="Whether to curate within or across parts." -) -@click.option("--flatten-idx", type=int, default=0, help="TODO(wz2)") -def train( - dbgym_cfg: DBGymConfig, - benchmark_name: str, - workload_name_suffix: Optional[str], - scale_factor: float, - benchmark_config_path: Optional[Path], - traindata_path: Optional[Path], - seed: Optional[int], - hpo_space_path: Path, - train_max_concurrent: int, - iterations_per_epoch: int, - num_samples: int, - train_size: float, - start_epoch: int, - batch_size: int, - num_batches: int, - max_segments: int, - num_points_to_sample: int, - num_classes_to_keep: int, - recon: float, - latent_dim: int, - bias_sep: float, - idx_limit: int, - num_curate: int, - allow_all: bool, - flatten_idx: int, -) -> None: - """ - Trains embeddings with num_samples samples of the hyperparameter space. - Analyzes the accuracy of all epochs of all hyperparameter space samples. - Selects the best embedding(s) and packages it as a .pth file in the run_*/ dir. - """ - # set args to defaults programmatically (do this before doing anything else in the function) - if workload_name_suffix is None: - workload_name_suffix = get_default_workload_name_suffix(benchmark_name) - workload_name = get_workload_name(scale_factor, workload_name_suffix) - if traindata_path is None: - traindata_path = get_default_traindata_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ) - # TODO(phw2): figure out whether different scale factors use the same config - # TODO(phw2): figure out what parts of the config should be taken out (like stuff about tables) - if benchmark_config_path is None: - benchmark_config_path = get_default_benchmark_config_path(benchmark_name) - if seed is None: - seed = random.randint(0, int(1e8)) - - # Fully resolve all input paths. - benchmark_config_path = fully_resolve_path(dbgym_cfg, benchmark_config_path) - traindata_path = fully_resolve_path(dbgym_cfg, traindata_path) - hpo_space_path = fully_resolve_path(dbgym_cfg, hpo_space_path) - - # setup - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - - workload_path = fully_resolve_path( - dbgym_cfg, - get_default_workload_path( - dbgym_cfg.dbgym_workspace_path, benchmark_name, workload_name - ), - ) - # group args. see comment in datagen.py:datagen() - generic_args = EmbeddingTrainGenericArgs( - benchmark_name, - workload_name, - scale_factor, - benchmark_config_path, - traindata_path, - seed, - workload_path, - ) - train_args = EmbeddingTrainAllArgs( - hpo_space_path, - train_max_concurrent, - iterations_per_epoch, - num_samples, - train_size, - ) - analyze_args = EmbeddingAnalyzeArgs( - start_epoch, - batch_size, - num_batches, - max_segments, - num_points_to_sample, - num_classes_to_keep, - ) - select_args = EmbeddingSelectArgs( - recon, latent_dim, bias_sep, idx_limit, num_curate, allow_all, flatten_idx - ) - - # run all steps - train_all_embeddings(dbgym_cfg, generic_args, train_args) - num_parts = compute_num_parts(num_samples) - redist_trained_models(dbgym_cfg, num_parts) - analyze_all_embeddings_parts(dbgym_cfg, num_parts, generic_args, analyze_args) - select_best_embeddings(dbgym_cfg, generic_args, select_args) diff --git a/tune/protox/embedding/train_all.py b/tune/protox/embedding/train_all.py deleted file mode 100644 index 89ff96f1..00000000 --- a/tune/protox/embedding/train_all.py +++ /dev/null @@ -1,551 +0,0 @@ -import copy -import gc -import json -import logging -import os -import random -import sys -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Callable, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import ray -import torch -import torch.nn as nn -import tqdm -import yaml -from pytorch_metric_learning.utils import logging_presets -from ray.air import session -from ray.train import FailureConfig, RunConfig, SyncConfig -from ray.tune import TuneConfig, with_parameters, with_resources -from ray.tune.schedulers import FIFOScheduler -from ray.tune.search import ConcurrencyLimiter -from ray.tune.search.hyperopt import HyperOptSearch -from sklearn.model_selection import train_test_split -from torch.optim import Adam # type: ignore[attr-defined] -from torch.utils.data import TensorDataset -from typing_extensions import ParamSpec - -from tune.protox.embedding.loss import COST_COLUMNS, CostLoss, get_bias_fn -from tune.protox.embedding.train_args import ( - EmbeddingTrainAllArgs, - EmbeddingTrainGenericArgs, -) -from tune.protox.embedding.trainer import StratifiedRandomSampler, VAETrainer -from tune.protox.embedding.utils import f_unpack_dict, parse_hyperopt_config -from tune.protox.embedding.vae import VAE, VAELoss, gen_vae_collate -from tune.protox.env.space.primitive_space import IndexSpace -from tune.protox.env.types import ( - TableAttrAccessSetsMap, - TableAttrListMap, - TableColTuple, -) -from tune.protox.env.workload import Workload -from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig, open_and_save, restart_ray, save_file - - -def fetch_vae_parameters_from_workload(w: Workload, ntables: int) -> tuple[int, int]: - max_indexable = w.max_indexable() - max_cat_features = max( - ntables, max_indexable + 1 - ) # +1 for the "null" per attribute list. - max_attrs = max_indexable + 1 # +1 to account for the table index. - return max_attrs, max_cat_features - - -def fetch_index_parameters( - dbgym_cfg: DBGymConfig, - data: dict[str, Any], - workload_path: Path, -) -> tuple[int, int, TableAttrListMap, dict[TableColTuple, int]]: - tables = data["tables"] - attributes = data["attributes"] - query_spec = data["query_spec"] - workload = Workload( - dbgym_cfg, tables, attributes, query_spec, workload_path, pid=None - ) - modified_attrs = workload.column_usages() - - space = IndexSpace( - tables, - max_num_columns=data["max_num_columns"], - max_indexable_attributes=workload.max_indexable(), - seed=0, - rel_metadata=modified_attrs, - attributes_overwrite=copy.deepcopy(modified_attrs), - tbl_include_subsets=TableAttrAccessSetsMap({}), - index_space_aux_type=False, - index_space_aux_include=False, - deterministic_policy=True, - ) - - max_attrs, max_cat_features = fetch_vae_parameters_from_workload( - workload, len(tables) - ) - return max_attrs, max_cat_features, modified_attrs, space.class_mapping - - -def load_input_data( - dbgym_cfg: DBGymConfig, - traindata_path: Path, - train_size: float, - max_attrs: int, - require_cost: bool, - seed: int, -) -> tuple[TensorDataset, Any, Any, Optional[TensorDataset], int]: - # Load the input data. - columns = [] - columns += ["tbl_index", "idx_class"] - columns += [f"col{c}" for c in range(max_attrs - 1)] - if require_cost: - columns += COST_COLUMNS - - save_file(dbgym_cfg, traindata_path) - df = pd.read_parquet(traindata_path, columns=columns) - num_classes: int = df.idx_class.max() + 1 - - # Get the y's and the x's. - targets = (COST_COLUMNS + ["idx_class"]) if require_cost else ["idx_class"] - y = df[targets].values - df.drop(columns=COST_COLUMNS + ["idx_class"], inplace=True, errors="ignore") - x = df.values - del df - gc.collect() - gc.collect() - - if train_size == 1.0: - train_dataset = TensorDataset(torch.Tensor(x), torch.Tensor(y)) - del x - gc.collect() - gc.collect() - return train_dataset, y, y[:, -1], None, num_classes - - # Perform the train test split. - train_x, val_x, train_y, val_y = train_test_split( - x, - y, - test_size=1.0 - train_size, - train_size=train_size, - random_state=seed, - shuffle=True, - stratify=y[:, -1], - ) - del x - del y - gc.collect() - gc.collect() - - # Form the tensor datasets. - train_dataset = TensorDataset(torch.Tensor(train_x), torch.Tensor(train_y)) - val_dataset = TensorDataset(torch.Tensor(val_x), torch.Tensor(val_y)) - del val_x - del val_y - del train_x - gc.collect() - gc.collect() - logging.getLogger(DBGYM_LOGGER_NAME).info( - "Train Dataset Size: %s", len(train_dataset) - ) - return train_dataset, train_y, train_y[:, -1], val_dataset, num_classes - - -def create_vae_model( - config: dict[str, Any], max_attrs: int, max_cat_features: int -) -> VAE: - cat_input = max_attrs * max_cat_features - - assert config["act"] in ["relu", "mish"] - assert config["mean_output_act"] in ["tanh_squash", "sigmoid"] - - mean_output_act = { - "sigmoid": nn.Sigmoid, - }[config["mean_output_act"]] - - torch.set_float32_matmul_precision("high") - model = VAE( - max_categorical=max_cat_features, - input_dim=cat_input, - hidden_sizes=list(config["hidden_sizes"]), - latent_dim=config["latent_dim"], - act=nn.ReLU if config["act"] == "relu" else nn.Mish, - bias_init=config["bias_init"], - weight_init=config["weight_init"], - weight_uniform=config["weight_uniform"], - mean_output_act=mean_output_act, - output_scale=config.get("output_scale", 1.0), - ) - - return model - - -def train_all_embeddings( - dbgym_cfg: DBGymConfig, - generic_args: EmbeddingTrainGenericArgs, - train_all_args: EmbeddingTrainAllArgs, -) -> None: - """ - Trains all num_samples models using different samples of the hyperparameter space, writing their - results to different embedding_*/ folders in the run_*/ folder - """ - start_time = time.time() - - with open_and_save(dbgym_cfg, train_all_args.hpo_space_path, "r") as f: - json_dict = json.load(f) - space = parse_hyperopt_config(json_dict["config"]) - - # Connect to cluster or die. - restart_ray(dbgym_cfg.root_yaml["ray_gcs_port"]) - ray.init( - address=f"localhost:{dbgym_cfg.root_yaml['ray_gcs_port']}", log_to_driver=False - ) - - scheduler = FIFOScheduler() # type: ignore - # Search. - search = HyperOptSearch( - metric="loss", - mode="min", - points_to_evaluate=None, - n_initial_points=20, - space=space, - ) - limiter = ConcurrencyLimiter( - search, max_concurrent=train_all_args.train_max_concurrent - ) - tune_config = TuneConfig( - scheduler=scheduler, - search_alg=limiter, - num_samples=train_all_args.num_samples, - max_concurrent_trials=train_all_args.train_max_concurrent, - chdir_to_trial_dir=True, - ) - - dtime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - run_config = RunConfig( - name=f"ProtoXEmbeddingHPO_{dtime}", - failure_config=FailureConfig(max_failures=0, fail_fast=True), - sync_config=SyncConfig(), - verbose=2, - log_to_file=True, - storage_path=str( - dbgym_cfg.cur_task_runs_path("embedding_ray_results", mkdir=True) - ), - ) - - resources = {"cpu": 1} - trainable = with_resources( - with_parameters( - _hpo_train, - dbgym_cfg=dbgym_cfg, - generic_args=generic_args, - train_all_args=train_all_args, - ), - resources, - ) - - # Hopefully this is now serializable. - os.environ["RAY_CHDIR_TO_TRIAL_DIR"] = "0" # makes it so Ray doesn't change dir - tuner = ray.tune.Tuner( - trainable, - tune_config=tune_config, - run_config=run_config, - ) - - results = tuner.fit() - logging.getLogger(DBGYM_LOGGER_NAME).info( - "Best hyperparameters found were: ", - results.get_best_result(metric="loss", mode="min").config, - ) - if results.num_errors > 0: - logging.getLogger(DBGYM_LOGGER_NAME).error("Encountered exceptions!") - for i in range(len(results)): - if results[i].error: - logging.getLogger(DBGYM_LOGGER_NAME).error(f"Trial {results[i]} FAILED") - assert False - - train_all_embeddings_duration = time.time() - start_time - with open(f"{dbgym_cfg.dbgym_this_run_path}/hpo_train_time.txt", "w") as f: - f.write(f"{train_all_embeddings_duration}") - - -def _hpo_train( - config: dict[str, Any], - dbgym_cfg: DBGymConfig, - generic_args: EmbeddingTrainGenericArgs, - train_all_args: EmbeddingTrainAllArgs, -) -> None: - sys.path.append(os.fspath(dbgym_cfg.dbgym_repo_path)) - - # Explicitly set the number of torch threads. - os.environ["OMP_NUM_THREADS"] = str(train_all_args.train_max_concurrent) - - config = f_unpack_dict(config) - if config.get("use_bias", False): - if ( - "bias_separation" in config - and "addtl_bias_separation" in config - and "output_scale" in config - ): - # Do a hacky reconfigure. - if ( - config["output_scale"] - > config["bias_separation"] + config["addtl_bias_separation"] - ): - config["output_scale"] = ( - config["bias_separation"] + config["addtl_bias_separation"] - ) - config["metric_loss_md"]["output_scale"] = config["output_scale"] - - dtime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - trial_dpath = ( - dbgym_cfg.cur_task_runs_data_path(mkdir=True) - / f"embeddings_{dtime}_{os.getpid()}" - ) - assert ( - not trial_dpath.exists() - ), f"at this point, trial_dpath ({trial_dpath}) should not exist" - - # Seed - seed = np.random.randint(int(1), int(1e8)) - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - config["seed"] = seed - config["iterations_per_epoch"] = train_all_args.iterations_per_epoch - - logging.getLogger(DBGYM_LOGGER_NAME).info(config) - - # Build trainer and train. - trainer, epoch_end = _build_trainer( - dbgym_cfg, - config, - generic_args.traindata_path, - trial_dpath, - generic_args.benchmark_config_path, - train_all_args.train_size, - generic_args.workload_path, - dataloader_num_workers=0, - disable_tqdm=True, - ) - - # Dump the config that we are executing. - with open(f"{trial_dpath}/config", "w") as f: - f.write(json.dumps(config, indent=4)) - - trainer.train(num_epochs=config["num_epochs"]) - if trainer.failed: - # Trainer has failed. - with open(f"{trial_dpath}/FAILED", "w") as f: - if trainer.fail_msg is not None: - f.write(trainer.fail_msg) - - if trainer.fail_data is not None: - torch.save(trainer.fail_data, f"{trial_dpath}/fail_data.pth") - session.report({"loss": 1e8}) - else: - res_dict = epoch_end(trainer=trainer, force=True, suppress=True) - assert res_dict - loss = res_dict["total_avg_loss"] - session.report({"loss": loss}) - - -def _build_trainer( - dbgym_cfg: DBGymConfig, - config: dict[str, Any], - traindata_path: Path, - trial_dpath: Path, - benchmark_config_path: Path, - train_size: float, - workload_path: Path, - dataloader_num_workers: int = 0, - disable_tqdm: bool = False, -) -> tuple[VAETrainer, Callable[..., Optional[dict[str, Any]]]]: - max_cat_features = 0 - max_attrs = 0 - - # Load the benchmark configuration. - with open_and_save(dbgym_cfg, benchmark_config_path, "r") as f: - data = yaml.safe_load(f) - data = data[[k for k in data.keys()][0]] - max_attrs, max_cat_features, _, class_mapping = fetch_index_parameters( - dbgym_cfg, data, workload_path - ) - - config["class_mapping"] = {} - for (tbl, col), key in class_mapping.items(): - config["class_mapping"][str(key)] = { - "relname": tbl, - "ord_column": col, - } - - # Device. - device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") - - # Get the datasets. - train_dataset, train_y, idx_class, val_dataset, num_classes = load_input_data( - dbgym_cfg, - traindata_path, - train_size, - max_attrs, - config["metric_loss_md"].get("require_cost", False), - config["seed"], - ) - - # Acquire the collation function. - collate_fn = gen_vae_collate(max_cat_features) - - # Construct the models and optimizers. - model = create_vae_model(config, max_attrs, max_cat_features) - model.to(device=device) - - # Trunk is the identity. - trunk = nn.Sequential(nn.Identity()) - trunk.to(device=device) - - models = {"trunk": trunk, "embedder": model} - optimizers = { - "embedder_optimizer": Adam( - model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"] - ), - } - - metric_loss = CostLoss(config["metric_loss_md"]) - - # Define the loss functions. - loss_funcs = { - "metric_loss": metric_loss, - "vae_loss": VAELoss(config["loss_fn"], max_attrs, max_cat_features), - } - - loss_weights = {"metric_loss": config["metric_loss_weight"], "vae_loss": 1} - - # Define the sampler. - sampler = StratifiedRandomSampler( - idx_class, - max_class=num_classes, - batch_size=config["batch_size"], - allow_repeats=True, - ) - - # Define the tester hook. - record_keeper, _, _ = logging_presets.get_record_keeper( - trial_dpath / "logs", trial_dpath / "tboard" - ) - hooks = logging_presets.get_hook_container(record_keeper) - model_folder = trial_dpath / "models" - - # Validation step loop. - assert val_dataset - val_dl = torch.utils.data.DataLoader( - val_dataset, batch_size=4096, collate_fn=collate_fn - ) - epoch_end: Callable[..., Optional[dict[str, Any]]] = _construct_epoch_end( - val_dl, config, hooks, model_folder - ) - - def clip_grad() -> None: - if config["grad_clip_amount"] is not None: - torch.nn.utils.clip_grad_norm_( - model.parameters(), config["grad_clip_amount"] - ) - - bias_fn = None - if config["use_bias"]: - bias_fn = get_bias_fn(config) - - # Build the trainer. - return ( - VAETrainer( - disable_tqdm=disable_tqdm, - bias_fn=bias_fn, - models=models, - optimizers=optimizers, - batch_size=config["batch_size"], - loss_funcs=loss_funcs, - mining_funcs={}, - dataset=train_dataset, - sampler=sampler, - iterations_per_epoch=( - config["iterations_per_epoch"] - if config["iterations_per_epoch"] is not None - else int(len(train_dataset) / config["batch_size"]) - ), - data_device=device, - dtype=None, - loss_weights=loss_weights, - collate_fn=collate_fn, - lr_schedulers=None, - gradient_clippers={"embedder_grad_clipper": clip_grad}, - dataloader_num_workers=dataloader_num_workers, - end_of_iteration_hook=hooks.end_of_iteration_hook, - end_of_epoch_hook=epoch_end, - ), - epoch_end, - ) - - -P = ParamSpec("P") - - -def _construct_epoch_end( - val_dl: torch.utils.data.DataLoader[Any], - config: dict[str, Any], - hooks: Any, - model_folder: Union[str, Path], -) -> Callable[P, Optional[dict[str, Any]]]: - def epoch_end(*args: P.args, **kwargs: P.kwargs) -> Optional[dict[str, Any]]: - trainer = kwargs.get("trainer", None) - assert trainer - assert isinstance(trainer, VAETrainer) - - save_interval = config.get("save_every", 1) - if (trainer.epoch - 1) % save_interval == 0: - # Save. - mf = Path(model_folder) / f"epoch{trainer.epoch}" - mf.mkdir(parents=True, exist_ok=True) - hooks.save_models(trainer, str(mf), str(trainer.epoch)) - - force = bool(kwargs.get("force", False)) - suppress = bool(kwargs.get("suppress", False)) - - if force: - total_metric_loss = [] - total_recon_loss = [] - with torch.no_grad(): - # Switch to eval mode. - trainer.switch_eval() - - pbar = None if suppress else tqdm.tqdm(total=len(val_dl)) - for i, curr_batch in enumerate(val_dl): - # Get the losses. - trainer.calculate_loss(curr_batch) - if isinstance(trainer.losses["metric_loss"], torch.Tensor): - total_metric_loss.append(trainer.losses["metric_loss"].item()) - else: - total_metric_loss.append(trainer.losses["metric_loss"]) - total_recon_loss.append(trainer.last_recon_loss) - - if pbar is not None: - pbar.set_description( - "total_recon=%.5f total_metric=%.5f" - % (total_recon_loss[-1], total_metric_loss[-1]) - ) - pbar.update(1) - - # Switch to train mode. - trainer.switch_train() - - if force: - return { - "avg_metric": np.mean(total_metric_loss), - "avg_recon": np.mean(total_recon_loss), - "total_avg_loss": np.mean(total_metric_loss) - + np.mean(total_recon_loss), - } - - return None - - return epoch_end diff --git a/tune/protox/embedding/train_args.py b/tune/protox/embedding/train_args.py deleted file mode 100644 index c86a6392..00000000 --- a/tune/protox/embedding/train_args.py +++ /dev/null @@ -1,83 +0,0 @@ -from pathlib import Path - - -class EmbeddingTrainGenericArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, - benchmark_name: str, - workload_name: str, - scale_factor: float, - benchmark_config_path: Path, - traindata_path: Path, - seed: int, - workload_path: Path, - ) -> None: - self.benchmark_name = benchmark_name - self.workload_name = workload_name - self.scale_factor = scale_factor - self.benchmark_config_path = benchmark_config_path - self.traindata_path = traindata_path - self.seed = seed - self.workload_path = workload_path - - -class EmbeddingTrainAllArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, - hpo_space_path: Path, - train_max_concurrent: int, - iterations_per_epoch: int, - num_samples: int, - train_size: float, - ) -> None: - self.hpo_space_path = hpo_space_path - self.train_max_concurrent = train_max_concurrent - self.iterations_per_epoch = iterations_per_epoch - self.num_samples = num_samples - self.train_size = train_size - - -class EmbeddingAnalyzeArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, - start_epoch: int, - batch_size: int, - num_batches: int, - max_segments: int, - num_points_to_sample: int, - num_classes_to_keep: int, - ) -> None: - self.start_epoch = start_epoch - self.batch_size = batch_size - self.num_batches = num_batches - self.max_segments = max_segments - self.num_points_to_sample = num_points_to_sample - self.num_classes_to_keep = num_classes_to_keep - - -class EmbeddingSelectArgs: - """Same comment as EmbeddingDatagenGenericArgs""" - - def __init__( - self, - recon: float, - latent_dim: int, - bias_sep: float, - idx_limit: int, - num_curate: int, - allow_all: bool, - flatten_idx: int, - ) -> None: - self.recon = recon - self.latent_dim = latent_dim - self.bias_sep = bias_sep - self.idx_limit = idx_limit - self.num_curate = num_curate - self.allow_all = allow_all - self.flatten_idx = flatten_idx diff --git a/tune/protox/embedding/trainer.py b/tune/protox/embedding/trainer.py deleted file mode 100644 index e9e851fa..00000000 --- a/tune/protox/embedding/trainer.py +++ /dev/null @@ -1,249 +0,0 @@ -import itertools -import logging -import random -from typing import Any, Callable, Iterator, Optional, Tuple, Union - -import numpy as np -import torch -import tqdm -from numpy.typing import NDArray -from pytorch_metric_learning import trainers -from pytorch_metric_learning.utils import common_functions as c_f -from torch.utils.data import Sampler - -from util.log import DBGYM_LOGGER_NAME - - -class StratifiedRandomSampler(Sampler[int]): - def __init__( - self, - labels: NDArray[Any], - max_class: int, - batch_size: int, - allow_repeats: bool = True, - ): - self.allow_repeats = allow_repeats - self.labels = labels - self.max_class = max_class - self.batch_size = batch_size - self.elem_per_class = 0 - assert self.batch_size > 0 - - def compute(self) -> tuple[dict[int, tuple[int, NDArray[Any]]], int, int]: - r = {} - for c in range(self.max_class): - lc = np.argwhere(self.labels == c) - lc = lc.reshape(lc.shape[0]) - r[c] = (lc.shape[0], lc) - elem_per_class = self.batch_size // len([k for k in r if r[k][0] > 0]) - - min_size = min([r[k][0] for k in r if r[k][0] > 0]) - min_steps = min_size // elem_per_class - return r, elem_per_class, min_steps - - def __iter__(self) -> Iterator[int]: - r, elem_per_class, min_steps = self.compute() - if self.allow_repeats: - for _ in range(len(self.labels) // self.batch_size): - elems = [ - r[k][1][ - np.random.randint(0, high=r[k][0], size=(elem_per_class,)) - ].tolist() - for k in r - if r[k][0] > 0 - ] - yield from itertools.chain(*elems) - else: - for k in r: - if r[k][0] > 0: - random.shuffle(list(r[k][1])) - - for i in range(min_steps): - elems = [ - r[k][1][ - i * elem_per_class : i * elem_per_class + elem_per_class - ].tolist() - for k in r - if r[k][0] > 0 - ] - yield from itertools.chain(*elems) - - def __len__(self) -> int: - if self.allow_repeats: - return len(self.labels) // self.batch_size - else: - r, elem_per_class, min_steps = self.compute() - return min_steps - - -class VAETrainer(trainers.BaseTrainer): # type: ignore - def __init__( - self, - disable_tqdm: bool, - bias_fn: Optional[ - Callable[ - [torch.Tensor, torch.Tensor], - Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - ] - ], - *args: Any, - **kwargs: Any, - ): - super().__init__(*args, **kwargs) - self.failed = False - self.fail_msg: Optional[str] = None - self.fail_data: Optional[ - tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] - ] = None - self.disable_tqdm = disable_tqdm - self.bias_fn = bias_fn - self.eval = False - - self.last_recon_loss = 0 - - def compute(self, base_output: Any) -> None: - assert False - - def maybe_get_metric_loss( - self, embeddings: torch.Tensor, labels: torch.Tensor, indices_tuple: Any - ) -> Any: - if self.loss_weights.get("metric_loss", 0) > 0: - return self.loss_funcs["metric_loss"](embeddings, labels, indices_tuple) - return 0 - - def maybe_get_vae_loss( - self, preds: torch.Tensor, data: torch.Tensor, labels: torch.Tensor - ) -> Any: - if self.loss_weights.get("vae_loss", 0) > 0: - return self.loss_funcs["vae_loss"]( - preds, None, None, (data, labels), is_eval=self.eval - ) - return 0 - - def calculate_loss(self, curr_batch: tuple[torch.Tensor, torch.Tensor]) -> None: - data, labels = curr_batch - if labels.shape[1] == 1: - # Flatten labels if it's a class. - labels = labels.flatten().long() - - data = data.to(self.data_device) - labels = labels.to(self.data_device) - - bias = None - if self.bias_fn is not None: - bias = self.bias_fn(data, labels) - - # Ensure that the bias is all valid. - if isinstance(bias, torch.Tensor): - assert not (bias.isnan() | bias.isinf()).any() - else: - assert not (bias[0].isnan() | bias[0].isinf()).any() - assert not (bias[1].isnan() | bias[1].isinf()).any() - - # Compute latent space. - embeddings, preds, error = self.models["embedder"](data, bias=bias) - - if error: - # We've encountered an error. - self.failed = True - self.fail_msg = "Latents is undefined." - # Don't tamper with any losses and just return. - self.fail_data = (data, labels, embeddings, preds) - return - - indices_tuple = self.maybe_mine_embeddings(embeddings, labels) - ml = self.maybe_get_metric_loss(embeddings, labels, indices_tuple) - - self.losses["metric_loss"] = ml - self.losses["vae_loss"] = self.maybe_get_vae_loss(preds, data, labels) - self.last_recon_loss = ( - self.loss_funcs["vae_loss"].last_loss_dict["recon_loss"]["losses"].item() - ) - - def backward(self) -> None: - if not self.failed: - self.losses["total_loss"].backward() - - def train(self, start_epoch: int = 1, num_epochs: int = 1) -> None: - self.initialize_dataloader() - for self.epoch in range(start_epoch, num_epochs + 1): - self.set_to_train() - logging.getLogger(DBGYM_LOGGER_NAME).info("TRAINING EPOCH %d" % self.epoch) - - if not self.disable_tqdm: - pbar = tqdm.tqdm(range(self.iterations_per_epoch)) - else: - pbar = range(self.iterations_per_epoch) - - for self.iteration in pbar: - self.forward_and_backward() - self.end_of_iteration_hook(self) - if ( - self.failed - or np.isnan(self.losses["total_loss"].item()) - or np.isinf(self.losses["total_loss"].item()) - or np.isnan(self.losses["vae_loss"].item()) - or np.isinf(self.losses["vae_loss"].item()) - or np.isnan(self.last_recon_loss) - or np.isinf(self.last_recon_loss) - ): - # Abort this particular run in this case. - self.failed = True - - ml = self.losses["metric_loss"] - vl = self.losses["vae_loss"] - - if self.fail_msg is not None: - pass - elif np.isnan(self.losses["total_loss"].item()) or np.isinf( - self.losses["total_loss"].item() - ): - self.fail_msg = ( - f"Total Loss is invalid ({ml}, {vl}, {self.last_recon_loss}" - ) - elif np.isnan(self.losses["vae_loss"].item()) or np.isinf( - self.losses["vae_loss"].item() - ): - self.fail_msg = ( - "VAE Loss is invalid ({ml}, {vl}, {self.last_recon_loss}" - ) - elif np.isnan(self.last_recon_loss) or np.isinf( - self.last_recon_loss - ): - self.fail_msg = ( - "Recon Loss is invalid ({ml}, {vl}, {self.last_recon_loss}" - ) - - return - - if not self.disable_tqdm: - pbar.set_description( - "total=%.5f recon=%.5f metric=%.5f" - % ( - self.losses["total_loss"], - self.last_recon_loss, - self.losses["metric_loss"], - ) - ) - self.step_lr_schedulers(end_of_epoch=False) - self.step_lr_schedulers(end_of_epoch=True) - self.zero_losses() - if self.end_of_epoch_hook(trainer=self) is False: - break - - def compute_embeddings(self, base_output: Any) -> None: - assert False - - def get_batch(self) -> tuple[torch.Tensor, torch.Tensor]: - self.dataloader_iter, curr_batch = c_f.try_next_on_generator(self.dataloader_iter, self.dataloader) # type: ignore - data, labels = self.data_and_label_getter(curr_batch) - return data, labels - - def modify_schema(self) -> None: - self.schema["loss_funcs"].keys += ["vae_loss"] - - def switch_eval(self) -> None: - self.eval = True - - def switch_train(self) -> None: - self.eval = False diff --git a/tune/protox/embedding/utils.py b/tune/protox/embedding/utils.py deleted file mode 100644 index eae06a7d..00000000 --- a/tune/protox/embedding/utils.py +++ /dev/null @@ -1,54 +0,0 @@ -import logging -from typing import Any - -from hyperopt import hp - -from util.log import DBGYM_LOGGER_NAME - - -def f_unpack_dict(dct: dict[str, Any]) -> dict[str, Any]: - """ - Unpacks all sub-dictionaries in given dictionary recursively. - There should be no duplicated keys across all nested - subdictionaries, or some instances will be lost without warning - - Source: https://www.kaggle.com/fanvacoolt/tutorial-on-hyperopt - - Parameters: - ---------------- - dct : dictionary to unpack - - Returns: - ---------------- - : unpacked dictionary - """ - res: dict[str, Any] = {} - for k, v in dct.items(): - if isinstance(v, dict): - res = {**res, k: v, **f_unpack_dict(v)} - else: - res[k] = v - return res - - -def parse_hyperopt_config(config: dict[str, Any]) -> dict[str, Any]: - def parse_key(key_dict: dict[str, Any]) -> Any: - if key_dict["type"] == "constant": - return key_dict["value"] - elif key_dict["type"] == "uniform": - return hp.uniform(key_dict["choice_name"], key_dict["min"], key_dict["max"]) - elif key_dict["type"] == "choice": - return hp.choice(key_dict["choice_name"], key_dict["choices"]) - elif key_dict["type"] == "subspaces": - subspaces = [parse_hyperopt_config(c) for c in key_dict["subspaces"]] - return hp.choice(key_dict["choice_name"], subspaces) - else: - logging.getLogger(DBGYM_LOGGER_NAME).error( - "Unknown hyperopt config definition", key_dict - ) - assert False - - parsed_config = {} - for key, key_dict in config.items(): - parsed_config[key] = parse_key(key_dict) - return parsed_config diff --git a/tune/protox/embedding/vae.py b/tune/protox/embedding/vae.py deleted file mode 100644 index 40520ffd..00000000 --- a/tune/protox/embedding/vae.py +++ /dev/null @@ -1,402 +0,0 @@ -import logging -from typing import Any, Callable, Optional, Tuple, Type, Union, cast - -import torch -import torch.nn as nn -import torch.nn.functional as F -from pytorch_metric_learning import losses, reducers -from pytorch_metric_learning.utils import common_functions as c_f - -from util.log import DBGYM_LOGGER_NAME - - -def gen_vae_collate( - max_categorical: int, infer: bool = False -) -> Callable[[list[Any]], Union[tuple[torch.Tensor, torch.Tensor], torch.Tensor]]: - def vae_collate( - batch: list[Any], - ) -> Union[tuple[torch.Tensor, torch.Tensor], torch.Tensor]: - if infer: - x = torch.as_tensor(batch).type(torch.int64) - else: - assert len(batch) > 0 - x = torch.stack([e[0] for e in batch]).type(torch.int64) - - y_shape = batch[0][1].shape[0] - ret_y = torch.stack([e[1] for e in batch]).view((x.shape[0], y_shape)) - - # One-hot all the X's. - scatter_dim = len(x.size()) - x_tensor = x.view(*x.size(), -1) - zero_x = torch.zeros(*x.size(), max_categorical, dtype=x.dtype) - ret_x: torch.Tensor = ( - zero_x.scatter_(scatter_dim, x_tensor, 1) - .view(zero_x.shape[0], -1) - .type(torch.float32) - ) - - if infer: - return ret_x - else: - return ret_x, ret_y - - return vae_collate - - -def acquire_loss_function( - loss_type: str, max_attrs: int, max_categorical: int -) -> Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor]: - def vae_cat_loss( - preds: torch.Tensor, data: torch.Tensor, labels: torch.Tensor - ) -> torch.Tensor: - if len(labels.shape) == 2: - labels = labels[:, -1].flatten() - - preds = preds.view(preds.shape[0], -1, max_categorical) - data = data.view(data.shape[0], -1, max_categorical) - - # Shape: - preds = torch.swapaxes(preds, 1, 2) - data = torch.argmax(data, dim=2) - - # Pray for ignore_index..? - data[:, 1:][data[:, 1:] == 0] = -100 - - recon_loss = F.cross_entropy( - preds, - data, - weight=None, - ignore_index=-100, - label_smoothing=1.0 / max_categorical, - reduction="none", - ) - if torch.isnan(recon_loss).any(): - # Dump any found nan in the loss. - logging.getLogger(DBGYM_LOGGER_NAME).error(preds[torch.isnan(recon_loss)]) - assert False - - recon_loss = recon_loss.sum(dim=(1,)) - return recon_loss - - loss_fn = { - "vae_cat_loss": vae_cat_loss, - }[loss_type] - return loss_fn - - -class VAEReducer(reducers.MultipleReducers): # type: ignore - def __init__(self, *args: Any, **kwargs: Any) -> None: - reducer = { - "recon_loss": reducers.MeanReducer(), - "elbo": reducers.MeanReducer(), - } - super().__init__(reducer, *args, **kwargs) - - def sub_loss_reduction( - self, sub_losses: list[Any], embeddings: Any = None, labels: Any = None - ) -> Any: - assert "elbo" in self.reducers - for i, k in enumerate(self.reducers.keys()): - if k == "elbo": - return sub_losses[i] - - -class VAELoss(losses.BaseMetricLossFunction): # type: ignore - def __init__( - self, - loss_fn: str, - max_attrs: int, - max_categorical: int, - *args: Any, - **kwargs: Any - ): - super().__init__(reducer=VAEReducer(), *args, **kwargs) - self.loss_fn = acquire_loss_function(loss_fn, max_attrs, max_categorical) - - eval_loss_fn_name = "vae_cat_loss" - self.eval_loss_fn = acquire_loss_function( - eval_loss_fn_name, max_attrs, max_categorical - ) - - def forward( - self, - embeddings: torch.Tensor, - labels: Any = None, - indices_tuple: Any = None, - ref_emb: Optional[tuple[torch.Tensor, torch.Tensor]] = None, - ref_labels: Any = None, - is_eval: bool = False, - ) -> Any: - """ - Args: - embeddings: tensor of size (batch_size, embedding_size) - labels: tensor of size (batch_size) - indices_tuple: tuple of size 3 for triplets (anchors, positives, negatives) - or size 4 for pairs (anchor1, postives, anchor2, negatives) - Can also be left as None - Returns: the loss - """ - self.reset_stats() - c_f.check_shapes(embeddings, labels) - if labels is not None: - labels = c_f.to_device(labels, embeddings) - ref_emb, ref_labels = c_f.set_ref_emb(embeddings, labels, ref_emb, ref_labels) - loss_dict = self.compute_loss( - embeddings, labels, indices_tuple, ref_emb, ref_labels, is_eval=is_eval - ) - self.add_embedding_regularization_to_loss_dict(loss_dict, embeddings) - return self.reducer(loss_dict, embeddings, labels) - - def compute_loss( - self, - preds: torch.Tensor, - unused0: Any, - unused1: Any, - tdata: Optional[tuple[torch.Tensor, torch.Tensor]], - *args: Any, - **kwargs: Any - ) -> Any: - is_eval = kwargs.get("is_eval", False) - eval_fn = self.eval_loss_fn if is_eval else self.loss_fn - - assert tdata - data, labels = tdata - recon_loss = eval_fn(preds, data, labels) - - # ELBO: - elbo = torch.mean(recon_loss) - - self.last_loss_dict = { - "recon_loss": { - "losses": recon_loss.mean(), - "indices": None, - "reduction_type": "already_reduced", - }, - "elbo": { - "losses": elbo.mean(), - "indices": None, - "reduction_type": "already_reduced", - }, - } - return self.last_loss_dict - - def _sub_loss_names(self) -> list[str]: - return ["recon_loss", "elbo"] - - -class Network(nn.Module): - def __init__( - self, - input_dim: int, - hidden_sizes: list[int], - output_dim: int, - act: Callable[[], nn.Module], - ) -> None: - super(Network, self).__init__() - - # Parametrize each standard deviation separately. - dims = [input_dim] + hidden_sizes + [output_dim] - - layers: list[nn.Module] = [] - for d1, d2 in zip(dims[:-1], dims[1:]): - layers.append(nn.Linear(d1, d2)) - if act is not None: - layers.append(act()) - if act is not None: - layers = layers[:-1] - self.module = nn.Sequential(*layers) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return cast(torch.Tensor, self.module(x)) - - -# Define the encoder -class Encoder(nn.Module): - def __init__( - self, - input_dim: int, - hidden_sizes: list[int], - latent_dim: int, - act: Type[nn.Module], - mean_output_act: Optional[Type[nn.Module]] = None, - ): - super(Encoder, self).__init__() - - # Parametrize each standard deviation separately. - dims = [input_dim] + hidden_sizes + [latent_dim] - - layers: list[nn.Module] = [] - for d1, d2 in zip(dims[:-1], dims[1:]): - layers.append(nn.Linear(d1, d2)) - if act is not None: - layers.append(act()) - if act is not None: - layers = layers[:-1] - - self.module = nn.Sequential(*layers) - if mean_output_act is None: - self.mean_output_act = None - else: - self.mean_output_act = mean_output_act() - - def forward(self, x: torch.Tensor) -> torch.Tensor: - assert len(x.shape) == 2 - mu = self.module(x) - - # Apply activation function to mean if necessary. - if self.mean_output_act is not None: - mu = self.mean_output_act(mu) - - return cast(torch.Tensor, mu) - - -# Define the decoder -class Decoder(nn.Module): - def __init__( - self, - latent_dim: int, - hidden_sizes: list[int], - input_dim: int, - act: Type[nn.Module], - ): - super(Decoder, self).__init__() - - dims = [latent_dim] + [l for l in hidden_sizes] + [input_dim] - layers: list[nn.Module] = [] - for d1, d2 in zip(dims[:-1], dims[1:]): - layers.append(nn.Linear(d1, d2)) - if act is not None: - layers.append(act()) - if act is not None: - layers = layers[:-1] - self.module = nn.Sequential(*layers) - - def forward(self, z: torch.Tensor) -> torch.Tensor: - x_hat = self.module(z) - return cast(torch.Tensor, x_hat) - - -def init_modules( - encoder: Encoder, - decoder: Decoder, - bias_init: str, - weight_init: str, - weight_uniform: bool, -) -> None: - def init(layer: nn.Module) -> None: - if isinstance(layer, nn.Linear): - if bias_init == "zeros": - torch.nn.init.zeros_(layer.bias) - elif "constant" in bias_init: - cons = float(bias_init.split("constant")[-1]) - torch.nn.init.constant_(layer.bias, cons) - - if weight_init != "default": - init_fn: Callable[[Union[nn.Module, torch.Tensor]], None] = cast( - Callable[[Union[nn.Module, torch.Tensor]], None], - { - ("xavier", True): torch.nn.init.xavier_uniform_, - ("xavier", False): torch.nn.init.xavier_normal_, - ("kaiming", True): torch.nn.init.kaiming_uniform_, - ("kaiming", False): torch.nn.init.kaiming_normal_, - ("spectral", True): torch.nn.utils.spectral_norm, - ("spectral", False): torch.nn.utils.spectral_norm, - ("orthogonal", True): torch.nn.init.orthogonal_, - ("orthogonal", False): torch.nn.init.orthogonal_, - }[(weight_init, weight_uniform)], - ) - - if weight_init == "spectral": - init_fn(layer) - else: - init_fn(layer.weight) - - modules: list[nn.Module] = [encoder, decoder] - for module in modules: - if module is not None: - module.apply(init) - - -# Define the model -class VAE(nn.Module): - def __init__( - self, - max_categorical: int, - input_dim: int, - hidden_sizes: list[int], - latent_dim: int, - act: Type[nn.Module], - bias_init: str = "default", - weight_init: str = "default", - weight_uniform: bool = False, - mean_output_act: Optional[Type[nn.Module]] = None, - output_scale: float = 1.0, - ) -> None: - super(VAE, self).__init__() - self.encoder = Encoder( - input_dim, hidden_sizes, latent_dim, act, mean_output_act=mean_output_act - ) - self.decoder = Decoder(latent_dim, list(reversed(hidden_sizes)), input_dim, act) - init_modules(self.encoder, self.decoder, bias_init, weight_init, weight_uniform) - - self.input_dim = input_dim - self.max_categorical = max_categorical - self._collate: Optional[Callable[[torch.Tensor], torch.Tensor]] = None - self.output_scale = output_scale - - def get_collate(self) -> Callable[[torch.Tensor], torch.Tensor]: - if self._collate is None: - # In infer mode, we def know it'll only return 1 argument. - self._collate = cast( - Callable[[torch.Tensor], torch.Tensor], - gen_vae_collate(self.max_categorical, infer=True), - ) - return self._collate - - def forward( - self, - x: torch.Tensor, - bias: Optional[Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]] = None, - ) -> Union[tuple[torch.Tensor, torch.Tensor, bool], tuple[torch.Tensor, bool]]: - return self._compute(x, bias=bias, require_full=True) - - def latents( - self, - x: torch.Tensor, - bias: Optional[Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]] = None, - require_full: bool = False, - ) -> tuple[torch.Tensor, bool]: - rets = self._compute(x, bias=bias, require_full=False) - assert len(rets) == 2 - return rets[0], rets[1] - - def _compute( - self, - x: torch.Tensor, - bias: Optional[Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]] = None, - require_full: bool = False, - ) -> Union[tuple[torch.Tensor, torch.Tensor, bool], tuple[torch.Tensor, bool]]: - latents: torch.Tensor = self.encoder(x) - latents = latents * self.output_scale - - if bias is not None: - if isinstance(bias, torch.Tensor): - assert bias.shape[0] == latents.shape[0] - assert bias.shape[1] == 1 - latents = latents + bias - else: - # Add the bias. - latents = latents + bias[0] - if isinstance(bias[1], torch.Tensor): - latents = torch.clamp(latents, torch.zeros_like(bias[1]), bias[1]) - else: - latents = torch.clamp(latents, 0, bias[1]) - - lerror = bool((latents.isnan() | latents.isinf()).any()) - - if require_full: - decoded: torch.Tensor = self.decoder(latents) - derror = bool((decoded.isnan() | decoded.isinf()).any()) - return latents, decoded, (lerror or derror) - - return latents, lerror diff --git a/tune/protox/env/__init__.py b/tune/protox/env/__init__.py deleted file mode 100644 index b4744173..00000000 --- a/tune/protox/env/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from gymnasium import register - -register( - id="Postgres-v0", - entry_point="tune.protox.env.pg_env:PostgresEnv", -) diff --git a/tune/protox/env/artifact_manager.py b/tune/protox/env/artifact_manager.py deleted file mode 100644 index 11de1381..00000000 --- a/tune/protox/env/artifact_manager.py +++ /dev/null @@ -1,187 +0,0 @@ -import inspect -import json -import logging -import pickle -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Callable, Optional, TypeVar, Union - -import numpy as np -from plumbum import local -from torch.utils.tensorboard.writer import SummaryWriter -from typing_extensions import ParamSpec - -from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig - -P = ParamSpec("P") -T = TypeVar("T") - - -def time_record(key: str) -> Callable[[Callable[P, T]], Callable[P, T]]: - def wrap(f: Callable[P, T]) -> Callable[P, T]: - def wrapped_f(*args: P.args, **kwargs: P.kwargs) -> T: - start = time.time() - ret = f(*args, **kwargs) - - # TODO(wz2): This is a hack to get a artifact_manager instance. - first_arg = args[0] # Ignore the indexing type error - assert hasattr( - first_arg, "artifact_manager" - ), f"{first_arg} {type(first_arg)}" - - if first_arg.artifact_manager is None: - # If there is no artifact_manager, just return. - return ret - - assert isinstance(first_arg.artifact_manager, ArtifactManager) - if first_arg.artifact_manager is not None: - cls_name = type(first_arg).__name__ - first_arg.artifact_manager.record( - f"{cls_name}_{key}", time.time() - start - ) - return ret - - return wrapped_f - - return wrap - - -class Encoder(json.JSONEncoder): - def default(self, obj: Any) -> Any: - if isinstance(obj, np.integer): - return int(obj) - if isinstance(obj, np.floating): - return float(obj) - if isinstance(obj, np.ndarray): - return obj.tolist() - return super(Encoder, self).default(obj) - - -class ArtifactManager(object): - """ - This class manages the following artifacts of Proto-X: info for replaying and TensorBoard output. - - Note that the root logger is set up globally inside the upper-most task.py file. Do not reconfigure it here. - """ - - # The output log is the file that the root logger writes to - REPLAY_INFO_LOG_FNAME = "replay_info.log" - REPLAY_LOGGER_NAME = f"{DBGYM_LOGGER_NAME}.replay" - - def __init__( - self, - dbgym_cfg: DBGymConfig, - trace: bool, - ) -> None: - self.log_dpath = dbgym_cfg.cur_task_runs_artifacts_path(mkdir=True) - self.trace = trace - self.tensorboard_dpath = self.log_dpath / "tboard" - self.tuning_steps_dpath = self.log_dpath / "tuning_steps" - self.tuning_steps_dpath.mkdir(parents=True, exist_ok=True) - - # Create replay logger - replay_info_log_handler = logging.FileHandler( - self.tuning_steps_dpath / ArtifactManager.REPLAY_INFO_LOG_FNAME - ) - replay_info_log_handler.setFormatter( - logging.Formatter( - "%(levelname)s:%(asctime)s [%(filename)s:%(lineno)s] %(message)s" - ) - ) - replay_info_log_handler.setLevel(logging.INFO) - logging.getLogger(ArtifactManager.REPLAY_LOGGER_NAME).addHandler( - replay_info_log_handler - ) - - # Setup the writer. - self.writer: Union[SummaryWriter, None] = None - if self.trace: - self.tensorboard_dpath.mkdir(parents=True, exist_ok=True) - self.writer = SummaryWriter(self.tensorboard_dpath) # type: ignore[no-untyped-call] - - self.iteration = 1 - self.iteration_data: dict[str, Any] = {} - - def log_to_replay_info(self, message: str) -> None: - logging.getLogger(ArtifactManager.REPLAY_LOGGER_NAME).info(message) - - def stash_results( - self, - info_dict: dict[str, Any], - name_override: Optional[str] = None, - ray_trial_id: Optional[str] = None, - ) -> None: - """ - Stash data about this step of tuning so that it can be replayed. - """ - dname = ( - name_override - if name_override - else datetime.now().strftime("%Y-%m-%d_%H-%M-%S") - ) - if ray_trial_id != None: - # Orthogonal to whether name_override is used, ray_trial_id disambiguates between folders created - # by different HPO trials so that the folders don't overwrite each other. - dname += f"_{ray_trial_id}" - - target_stash_dpath = self.tuning_steps_dpath / dname - - if ( - info_dict["results_dpath"] is not None - and Path(info_dict["results_dpath"]).exists() - ): - local["mv"][info_dict["results_dpath"], str(target_stash_dpath)].run() - self.log_to_replay_info( - f"mv src={info_dict['results_dpath']} dst={str(target_stash_dpath)}" - ) - else: - target_stash_dpath.mkdir(parents=True, exist_ok=True) - - if info_dict["prior_pgconf"]: - local["cp"][ - info_dict["prior_pgconf"], str(target_stash_dpath / "old_pg.conf") - ].run() - - if info_dict["prior_state_container"]: - with open(target_stash_dpath / "prior_state.pkl", "wb") as f: - # info_dict["prior_state_container"] is a somewhat complex object so we use pickle over json - pickle.dump(info_dict["prior_state_container"], f) - - if info_dict["actions_info"]: - with open(target_stash_dpath / "action.pkl", "wb") as f: - pickle.dump(info_dict["actions_info"], f) - - def advance(self) -> None: - if self.writer is None: - return - - for key, value in self.iteration_data.items(): - if isinstance(value, str): - # str is considered a np.ScalarType - self.writer.add_text(key, value, self.iteration) # type: ignore[no-untyped-call] - else: - self.writer.add_scalar(key, value, self.iteration) # type: ignore[no-untyped-call] - - del self.iteration_data - self.iteration_data = {} - self.iteration += 1 - self.writer.flush() # type: ignore[no-untyped-call] - - def record(self, key: str, value: Any) -> None: - stack = inspect.stack(context=2) - caller = stack[1] - - # Accumulate data. - assert isinstance(value, np.ScalarType) - key = f"{caller.filename}:{caller.lineno}_{key}" - if key not in self.iteration_data: - self.iteration_data[key] = 0.0 - self.iteration_data[key] += value - - def flush(self) -> None: - if self.trace: - assert self.writer - self.advance() - self.writer.flush() # type: ignore[no-untyped-call] diff --git a/tune/protox/env/lsc/__init__.py b/tune/protox/env/lsc/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/env/lsc/lsc.py b/tune/protox/env/lsc/lsc.py deleted file mode 100644 index b1fecf06..00000000 --- a/tune/protox/env/lsc/lsc.py +++ /dev/null @@ -1,136 +0,0 @@ -import logging -from typing import Any, Optional, TypeVar, cast - -import numpy as np -import torch - -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.types import ProtoAction -from util.log import DBGYM_LOGGER_NAME - -T = TypeVar("T", torch.Tensor, np.typing.NDArray[np.float32]) - - -class LSC(object): - def __init__( - self, - horizon: int, - lsc_parameters: dict[str, Any], - vae_config: dict[str, Any], - artifact_manager: Optional[ArtifactManager], - ): - self.frozen = False - self.horizon = horizon - self.num_steps = 0 - self.num_episodes = 0 - self.vae_configuration = vae_config - self.enabled = lsc_parameters["enabled"] - - lsc_splits = lsc_parameters["initial"].split(",") - lsc_increments = lsc_parameters["increment"].split(",") - lsc_max = lsc_parameters["max"].split(",") - if len(lsc_splits) == 1: - lsc_splits = [float(lsc_splits[0])] * horizon - else: - assert len(lsc_splits) == horizon - lsc_splits = [float(f) for f in lsc_splits] - - if len(lsc_increments) == 1: - lsc_increments = [float(lsc_increments[0])] * horizon - else: - assert len(lsc_increments) == horizon - lsc_increments = [float(f) for f in lsc_increments] - - if len(lsc_max) == 1: - lsc_max = [float(lsc_max[0])] * horizon - else: - assert len(lsc_max) == horizon - lsc_max = [float(f) for f in lsc_max] - - self.shift_eps_freq = lsc_parameters["shift_eps_freq"] - self.lsc_shift = np.array(lsc_splits) - self.increment = np.array(lsc_increments) - self.max = np.array(lsc_max) - self.shift_after = lsc_parameters["shift_after"] - self.artifact_manager = artifact_manager - - logging.getLogger(DBGYM_LOGGER_NAME).info("LSC Shift: %s", self.lsc_shift) - logging.getLogger(DBGYM_LOGGER_NAME).info( - "LSC Shift Increment: %s", self.increment - ) - logging.getLogger(DBGYM_LOGGER_NAME).info("LSC Shift Max: %s", self.max) - - def apply_bias(self, action: ProtoAction) -> ProtoAction: - if not self.enabled: - return action - - assert ( - action.shape[-1] == self.vae_configuration["latent_dim"] - ), f"{action.shape} {self.vae_configuration['latent_dim']}" - - # Get the LSC shift associated with the current episode. - lsc_shift = self.lsc_shift[(self.num_steps % self.horizon)] - lsc_shift = lsc_shift * self.vae_configuration["output_scale"] - return ProtoAction(action + lsc_shift) - - def current_bias(self) -> float: - if not self.enabled: - return 0.0 - - # Get the LSC shift associated with the current episode. - lsc_shift = self.lsc_shift[(self.num_steps % self.horizon)] - lsc_shift = lsc_shift * self.vae_configuration["output_scale"] - return cast(float, lsc_shift) - - def current_scale(self) -> np.typing.NDArray[np.float32]: - if not self.enabled: - return np.array([-1.0], dtype=np.float32) - - lsc_shift = self.lsc_shift[(self.num_steps % self.horizon)] - lsc_max = self.max[(self.num_steps % self.horizon)] - rel = lsc_shift / lsc_max - return np.array([(rel * 2.0) - 1], dtype=np.float32) - - def inverse_scale(self, value: torch.Tensor) -> torch.Tensor: - if not self.enabled: - return torch.zeros_like(value).float() - - lsc_max = self.max[0] - lsc_shift = ((value + 1) / 2.0) * lsc_max - return cast(torch.Tensor, lsc_shift * self.vae_configuration["output_scale"]) - - def advance(self) -> None: - if self.frozen or (not self.enabled): - return - - self.num_steps += 1 - - def freeze(self) -> None: - self.frozen = True - - def unfreeze(self) -> None: - self.frozen = False - - def reset(self) -> None: - if self.frozen or (not self.enabled): - return - - # Advance the episode count. - self.num_episodes += 1 - if (self.num_episodes <= self.shift_after) or ( - (self.num_episodes - self.shift_after) % self.shift_eps_freq != 0 - ): - # Reset the number of steps we've taken. - self.num_steps = 0 - else: - # Get how many steps to make the update on. - bound = self.horizon - self.num_steps = 0 - - # Now try to perform the LSC shifts. - # Increment the current bias with the increment. - self.lsc_shift[:bound] += self.increment[:bound] - self.lsc_shift = self.lsc_shift % self.max - logging.getLogger(DBGYM_LOGGER_NAME).info( - "LSC Bias Update: %s", self.lsc_shift - ) diff --git a/tune/protox/env/lsc/lsc_wrapper.py b/tune/protox/env/lsc/lsc_wrapper.py deleted file mode 100644 index 378f3f71..00000000 --- a/tune/protox/env/lsc/lsc_wrapper.py +++ /dev/null @@ -1,54 +0,0 @@ -import logging -from typing import Any, Optional, Tuple - -import gymnasium as gym - -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.lsc.lsc import LSC -from tune.protox.env.target_reset.target_reset_wrapper import TargetResetWrapper -from util.log import DBGYM_LOGGER_NAME - - -class LSCWrapper(gym.Wrapper[Any, Any, Any, Any]): - def __init__( - self, - lsc: LSC, - env: gym.Env[Any, Any], - artifact_manager: Optional[ArtifactManager], - ): - assert not isinstance(env, TargetResetWrapper) - super().__init__(env) - self.lsc = lsc - self.artifact_manager = artifact_manager - - def reset(self, *args: Any, **kwargs: Any) -> tuple[Any, dict[str, Any]]: - state, info = self.env.reset(*args, **kwargs) - self.lsc.reset() - - state["lsc"] = self.lsc.current_scale() - lsc = state["lsc"] - logging.getLogger(DBGYM_LOGGER_NAME).debug(f"Attaching LSC: {lsc}") - - return state, info - - def step( - self, *args: Any, **kwargs: Any - ) -> tuple[Any, float, bool, bool, dict[str, Any]]: - state, reward, term, trunc, info = self.env.step(*args, **kwargs) - - # Remember the LSC when we considered this action. - info["lsc"] = self.lsc.current_scale() - old_bias = self.lsc.current_bias() - old_lsc = info["lsc"] - - # Store the new LSC. - self.lsc.advance() - state["lsc"] = self.lsc.current_scale() - new_bias = self.lsc.current_bias() - - lsc = state["lsc"] - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Shifting LSC: {old_lsc} ({old_bias}) -> {lsc} ({new_bias})" - ) - - return state, float(reward), term, trunc, info diff --git a/tune/protox/env/mqo/__init__.py b/tune/protox/env/mqo/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/env/mqo/mqo_wrapper.py b/tune/protox/env/mqo/mqo_wrapper.py deleted file mode 100644 index 66777231..00000000 --- a/tune/protox/env/mqo/mqo_wrapper.py +++ /dev/null @@ -1,430 +0,0 @@ -import copy -import json -import logging -from typing import Any, Optional, Tuple, Union - -import gymnasium as gym -import numpy as np -import torch - -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.pg_env import PostgresEnv -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.primitive import SettingType, is_binary_enum, is_knob_enum -from tune.protox.env.space.primitive.knob import CategoricalKnob, Knob -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.space.utils import parse_access_methods -from tune.protox.env.types import ( - BestQueryRun, - EnvInfoDict, - HolonAction, - KnobSpaceAction, - KnobSpaceContainer, - QuerySpaceKnobAction, - QueryTableAccessMap, -) -from util.log import DBGYM_LOGGER_NAME - - -def _mutilate_action_with_metrics( - action_space: HolonSpace, - action: HolonAction, - query_metric_data: Optional[dict[str, BestQueryRun]], - timeout_qknobs: Optional[QuerySpaceKnobAction] = None, -) -> HolonAction: - """ - Modify action to make it the one with the best query knobs out - of all variations we tried. - """ - - # At the start of the function, the query knobs in `action` are those selected by the agent. - - if query_metric_data is not None: - extract_q_knobs = action_space.extract_query(action) - assert extract_q_knobs - - processed = set() - for q, data in query_metric_data.items(): - # For queries where at least one variation didn't time out, modify the query knobs in `action` - # to be that from the best variation. - if not data.timed_out: - assert data.query_run - for k, v in data.query_run.qknobs.items(): - # Implant the best. - extract_q_knobs[k] = v - # For all queries that we ran, even if all their variations time out, add them to `processed`. - # By doing so, the next part of the function will not affect queries where all variations timed - # out and will leave their knobs equal to the ones selected by the agent. - processed.add(q) - - # If we have set `timeout_qknobs`, then use those knobs for the queries that we didn't run at all. - # Usually, these `timeout_qknobs` are those of the "PrevDual" variation. - if timeout_qknobs: - assert timeout_qknobs - - all_qids = set([k.query_name for k in timeout_qknobs.keys()]) - processed - for qid in all_qids: - qid_rest = { - k: v - for k, v in timeout_qknobs.items() - if k.name().startswith(f"{qid}_") - } - for k, v in qid_rest.items(): - # Implant the reset target for queries we didn't see. - extract_q_knobs[k] = v - - action = action_space.replace_query(action, extract_q_knobs) - - # There are three types of queries we handle in different ways. - # For queries that executed where at least one variation didn't time out, we can safely use the - # query knobs of their best variation. - # For queries that executed where all their variations timed out, we don't want to use the knobs - # in `timeout_qknobs` since those are known to be bad. Instead, we just use the knobs selected by - # by the agent, which may be different from the knobs of *all* variations. - # Finally, for queries that didn't execute, we'll assume that some arbitrary variation ("PrevDual") - # is probably better than the knobs set by the agent. - - return action - - -def _regress_query_knobs( - qknobs: QuerySpaceKnobAction, - sysknobs: Union[KnobSpaceAction, KnobSpaceContainer], - ams: QueryTableAccessMap, - artifact_manager: Optional[ArtifactManager] = None, -) -> QuerySpaceKnobAction: - global_qknobs = {} - for knob, _ in qknobs.items(): - if knob.knob_name in sysknobs: - # Defer to system using the base knob (without table/query prefix). - global_qknobs[knob] = sysknobs[knob.knob_name] - elif knob.knob_type in [ - SettingType.SCANMETHOD_ENUM, - SettingType.SCANMETHOD_ENUM_CATEGORICAL, - ]: - assert "_scanmethod" in knob.knob_name - alias = knob.knob_name.split("_scanmethod")[0] - qid_prefix = knob.query_name - assert qid_prefix - - if alias in ams[qid_prefix]: - value = 1.0 if "Index" in ams[qid_prefix][alias] else 0.0 - else: - # Log out the missing alias for debugging reference. - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Found missing {alias} in the parsed {ams}." - ) - value = 0.0 - global_qknobs[knob] = value - elif knob.knob_type == SettingType.BOOLEAN: - global_qknobs[knob] = 1.0 - elif knob.knob_name == "random_page_cost": - global_qknobs[knob] = knob.project_scraped_setting(4.0) - elif knob.knob_name == "seq_page_cost": - global_qknobs[knob] = knob.project_scraped_setting(1.0) - elif knob.knob_name == "hash_mem_multiplier": - global_qknobs[knob] = knob.project_scraped_setting(2.0) - elif isinstance(knob, CategoricalKnob): - global_qknobs[knob] = knob.default_value - assert len(global_qknobs) == len(qknobs) - return QuerySpaceKnobAction(global_qknobs) - - -class MQOWrapper(gym.Wrapper[Any, Any, Any, Any]): - def __init__( - self, - workload_eval_mode: str, - workload_eval_inverse: bool, - workload_eval_reset: bool, - query_timeout: int, - benchbase_config: dict[str, Any], - env: gym.Env[Any, Any], - artifact_manager: Optional[ArtifactManager], - ): - assert isinstance(env, PostgresEnv) or isinstance( - env.unwrapped, PostgresEnv - ), "MQOPostgresEnv must be directly above PostgresEnv" - super().__init__(env) - - self.workload_eval_mode = workload_eval_mode - assert self.workload_eval_mode in [ - "all", - "all_enum", - "global_dual", - "prev_dual", - "pq", - ] - - self.workload_eval_mode = workload_eval_mode - self.workload_eval_inverse = workload_eval_inverse - self.workload_eval_reset = workload_eval_reset - self.query_timeout = query_timeout - self.benchbase_config = benchbase_config - self.best_observed: dict[str, BestQueryRun] = {} - self.artifact_manager = artifact_manager - - def _update_best_observed( - self, query_metric_data: dict[str, BestQueryRun], force_overwrite: bool = False - ) -> None: - if query_metric_data is not None: - for qid, best_run in query_metric_data.items(): - if qid not in self.best_observed or force_overwrite: - self.best_observed[qid] = BestQueryRun( - best_run.query_run, - best_run.runtime, - best_run.timed_out, - None, - None, - ) - assert best_run.runtime is not None - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[best_observe] {qid}: {best_run.runtime/1e6} (force: {force_overwrite})" - ) - elif not best_run.timed_out: - qobs = self.best_observed[qid] - assert qobs.runtime and best_run.runtime - if best_run.runtime < qobs.runtime: - self.best_observed[qid] = BestQueryRun( - best_run.query_run, - best_run.runtime, - best_run.timed_out, - None, - None, - ) - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[best_observe] {qid}: {best_run.runtime/1e6}" - ) - - def step( # type: ignore - self, - action: HolonAction, - ) -> tuple[Any, float, bool, bool, EnvInfoDict]: - # Step based on the "global" action. - assert isinstance(self.unwrapped, PostgresEnv) - success, info = self.unwrapped.step_before_execution(action) - prior_state = info["prior_state_container"] - timeout_qknobs = None - - assert isinstance(self.action_space, HolonSpace) - extract_q_knobs = self.action_space.extract_query(action) - extract_knobs = self.action_space.extract_knobs(action) - assert extract_q_knobs - assert extract_knobs - - runs = [] - if prior_state and self.workload_eval_mode in ["all", "all_enum", "prev_dual"]: - # Load the prior knobs. - runs.append( - ( - "PrevDual", - self.action_space.replace_query( - action, self.action_space.extract_query(prior_state) - ), - ) - ) - - # FIXME(wz2): Default, restore towards what we've learned from the last step. - # We can optionally also restore towards the "default" optimizer behavior. - timeout_qknobs = self.action_space.extract_query(prior_state) - - if self.workload_eval_mode in ["all", "all_enum", "global_dual"]: - # Load the global (optimizer) knobs. - qid_ams = parse_access_methods( - self.unwrapped.pg_conn.conn(), self.unwrapped.workload.queries - ) - runs.append( - ( - "GlobalDual", - self.action_space.replace_query( - action, - _regress_query_knobs( - extract_q_knobs, - extract_knobs, - qid_ams, - self.artifact_manager, - ), - ), - ) - ) - - # The requested agent. - runs.append(("PerQuery", copy.deepcopy(action))) - - if self.workload_eval_inverse: - # The selected inverse. - runs.append( - ( - "PerQueryInverse", - self.action_space.replace_query( - action, - QuerySpaceKnobAction( - {k: k.invert(v) for k, v in extract_q_knobs.items()} - ), - ), - ) - ) - - if self.workload_eval_mode in ["all_enum"]: - # Construct top-enum and bottom-enum knobs. - def transmute( - k: Union[Knob, CategoricalKnob], v: Any, top: bool = True - ) -> Any: - if not is_knob_enum(k.knob_type): - return v - elif is_binary_enum(k.knob_type) and top: - return 1 - elif is_binary_enum(k.knob_type) and not top: - return 0 - else: - assert isinstance(k, CategoricalKnob) - return k.sample() - - runs.append( - ( - "TopEnum", - self.action_space.replace_query( - action, - QuerySpaceKnobAction( - { - k: transmute(k, v, top=True) - for k, v in extract_q_knobs.items() - } - ), - ), - ) - ) - - runs.append( - ( - "BottomEnum", - self.action_space.replace_query( - action, - QuerySpaceKnobAction( - { - k: transmute(k, v, top=False) - for k, v in extract_q_knobs.items() - } - ), - ), - ) - ) - - # Execute. - logging.getLogger(DBGYM_LOGGER_NAME).info("MQOWrapper called step_execute()") - success, info = self.unwrapped.step_execute(success, runs, info) - if info["query_metric_data"]: - self._update_best_observed(info["query_metric_data"]) - - best_observed_holon_action = _mutilate_action_with_metrics( - self.action_space, action, info["query_metric_data"], timeout_qknobs - ) - - with torch.no_grad(): - # Pass the mutilated action back through. - assert isinstance(self.action_space, HolonSpace) - assert info["actions_info"] is not None - info["actions_info"][ - "best_observed_holon_action" - ] = best_observed_holon_action - info["maximal_embed"] = self.action_space.to_latent( - [best_observed_holon_action] - ) - - obs, reward, term, trunc, info = self.step_post_execute(success, action, info) - # Since we called step_post_execute() with soft=False, we expect infos[1] (reward) to not be None. - assert reward is not None - return (obs, reward, term, trunc, info) - - def reset(self, *args: Any, **kwargs: Any) -> tuple[Any, EnvInfoDict]: # type: ignore - assert isinstance(self.unwrapped, PostgresEnv) - # First have to shift to the new state. - state, info = self.unwrapped.reset(*args, **kwargs) - - # Now we conditionally evaluate. - if self.workload_eval_reset and ( - kwargs - and ("options" in kwargs) - and (kwargs["options"]) - and ("query_metric_data" in kwargs["options"]) - ): - assert isinstance(self.action_space, HolonSpace) - assert isinstance(self.observation_space, StateSpace) - - # Get a null action. - null_action = self.action_space.null_action(info["state_container"]) - # Reset-to -- the configuration we reset towards. - reset_qknob = self.action_space.extract_query(info["state_container"]) - assert reset_qknob - - # Replace into the null action the best-observed so far. - best_qknobs = self.action_space.extract_query(null_action) - assert best_qknobs - for qid, qr in self.best_observed.items(): - assert qr.query_run - best_qknobs.update(qr.query_run.qknobs) - - runs = [ - ( - "ResetPerQuery", - self.action_space.replace_query(null_action, best_qknobs), - ) - ] - - # This isn't ideal, but directly invoke execute() on the underlying workload object. - # This will tell us whether the best_observed is better than what - # we are actually resetting towards or not in target_metric_data. - ( - success, - metric, - _, - results_dpath, - _, - target_metric_data, - ) = self.unwrapped.workload.execute( - pg_conn=self.unwrapped.pg_conn, - reward_utility=self.unwrapped.reward_utility, - observation_space=self.observation_space, - action_space=self.action_space, - actions=[r[1] for r in runs], - variation_names=[r[0] for r in runs], - benchbase_config=self.benchbase_config, - query_timeout=self.query_timeout, - reset_metrics=kwargs["options"]["query_metric_data"], - update=False, - first=False, - ) - - # Update the best observed. - self._update_best_observed(target_metric_data, force_overwrite=True) - - # runs[0][1]/best_qknobs -- is the best observed qknobs. - # target_metric_data tells us if best_qknobs is good or if the config we reset to is good. - # reset_qknob -- is the per query from the configuration being reset which we defer to if we've timed out. - # - # Merge the optimality between best_observed and what we are resetting - # to based on the feedback in target_metric_data. - action = _mutilate_action_with_metrics( - self.action_space, runs[0][1], target_metric_data, reset_qknob - ) - - # Reward should be irrelevant. If we do accidentally use it, cause an error. - # Similarly, metric should be irrelevant. Do not shift the workload timeout. - info = EnvInfoDict( - {"metric": None, "reward": None, "results_dpath": results_dpath} - ) - # Use this to adjust the container and state but don't shift the step. - state, _, _, _, info = self.unwrapped.step_post_execute( - True, action, info, soft=True - ) - - # Update the reward baseline. - if self.unwrapped.reward_utility: - assert self.unwrapped.baseline_metric - self.unwrapped.reward_utility.set_relative_baseline( - self.unwrapped.baseline_metric, - prev_result=metric, - ) - - logging.getLogger(DBGYM_LOGGER_NAME).debug("Maximized on reset.") - - return state, info diff --git a/tune/protox/env/pg_env.py b/tune/protox/env/pg_env.py deleted file mode 100644 index babe88bd..00000000 --- a/tune/protox/env/pg_env.py +++ /dev/null @@ -1,457 +0,0 @@ -import copy -import logging -import time -from typing import Any, Optional - -import gymnasium as gym -import psycopg -from plumbum import local - -from env.pg_conn import PostgresConn -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.space.utils import fetch_server_indexes, fetch_server_knobs -from tune.protox.env.types import ( - ActionsInfo, - EnvInfoDict, - HolonAction, - HolonStateContainer, - TargetResetConfig, -) -from tune.protox.env.util.reward import RewardUtility -from tune.protox.env.workload import Workload -from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig, TuningMode - - -class PostgresEnv(gym.Env[Any, Any]): - def __init__( - self, - dbgym_cfg: DBGymConfig, - tuning_mode: TuningMode, - observation_space: StateSpace, - action_space: HolonSpace, - workload: Workload, - horizon: int, - reward_utility: RewardUtility, - pg_conn: PostgresConn, - query_timeout: int, - benchbase_config: dict[str, Any], - artifact_manager: Optional[ArtifactManager] = None, - ): - super().__init__() - - self.dbgym_cfg = dbgym_cfg - self.tuning_mode = tuning_mode - self.artifact_manager = artifact_manager - self.action_space = action_space - self.observation_space = observation_space - self.workload = workload - self.horizon = horizon - self.reward_utility = reward_utility - - self.benchbase_config = benchbase_config - self.pg_conn = pg_conn - self.query_timeout = query_timeout - - self.current_state: Optional[Any] = None - self.baseline_metric: Optional[float] = None - self.state_container: Optional[HolonStateContainer] = None - - def _restore_last_snapshot(self) -> None: - assert self.horizon > 1 and self.workload.oltp_workload - assert self.pg_conn.restore_checkpointed_snapshot() - assert isinstance(self.action_space, HolonSpace) - - self.state_container = self.action_space.generate_state_container( - self.state_container, - None, - self.pg_conn.conn(), - self.workload.queries, - ) - - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[Restored snapshot] {self.state_container}" - ) - - @time_record("reset") - def reset( # type: ignore - self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None - ) -> tuple[Any, EnvInfoDict]: - reset_start = time.time() - logging.getLogger(DBGYM_LOGGER_NAME).info( - "Resetting database system state to snapshot." - ) - super().reset(seed=seed) - - target_config: Optional[TargetResetConfig] = None - if options is not None: - target_config = TargetResetConfig( - { - "metric": options.get("metric", None), - "env_state": options.get("env_state", None), - "config": options.get("config", None), - } - ) - - self.current_step = 0 - info = EnvInfoDict({}) - - if target_config is not None: - metric = target_config["metric"] - env_state = target_config["env_state"] - config = target_config["config"] - - if self.workload.oltp_workload and self.horizon == 1: - # Restore a pristine snapshot of the world if OTLP and horizon = 1 - self.pg_conn.restore_pristine_snapshot() - else: - # Instead of restoring a pristine snapshot, just reset the knobs. - # This in effect "resets" the baseline knob settings. - self.pg_conn.restart_with_changes(conf_changes=dict()) - - # Maneuver the state into the requested state/config. - assert isinstance(self.action_space, HolonSpace) - sc = self.action_space.generate_state_container( - self.state_container, - None, - self.pg_conn.conn(), - self.workload.queries, - ) - config_changes, sql_commands = self.action_space.generate_plan_from_config( - config, sc - ) - # Don't dump the page cache because we want to keep it warm to see the performance of - # workloads under a warm cache. - assert self.shift_state(config_changes, sql_commands, dump_page_cache=False) - - # Note that we do not actually update the baseline metric/reward used by the reward - # utility. This is so the reward is not stochastic with respect to the starting state. - # This also means the reward is deterministic w.r.t to improvement. - if self.reward_utility is not None: - assert self.baseline_metric - self.reward_utility.set_relative_baseline( - self.baseline_metric, prev_result=metric - ) - - self.state_container = copy.deepcopy(config) - self.current_state = env_state.copy() - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "[Finished] Reset to state (config): %s", config - ) - - else: - # Restore a pristine snapshot of the world. - self.pg_conn.restore_pristine_snapshot() - assert self.tuning_mode != TuningMode.REPLAY - - # On the first time, run the benchmark to get the baseline. - assert isinstance(self.observation_space, StateSpace) - assert isinstance(self.action_space, HolonSpace) - - # Get the stock state container. - sc = self.action_space.generate_state_container( - None, None, self.pg_conn.conn(), self.workload.queries - ) - default_action = self.action_space.null_action(sc) - - success, metric, _, results_dpath, _, query_metric_data = ( - self.workload.execute( - pg_conn=self.pg_conn, - reward_utility=self.reward_utility, - observation_space=self.observation_space, - action_space=self.action_space, - actions=[default_action], - variation_names=["GlobalDual"], - benchbase_config=self.benchbase_config, - query_timeout=self.query_timeout, - update=False, - first=True, - ) - ) - - # Ensure that the first run succeeds. - assert success - # Get the state. - self.state_container = self.action_space.generate_state_container( - self.state_container, - None, - self.pg_conn.conn(), - self.workload.queries, - ) - state = self.observation_space.construct_offline( - self.pg_conn.conn(), results_dpath, self.state_container - ) - - # Set the metric workload. - self.workload.set_workload_timeout(metric) - - self.reward_utility.set_relative_baseline(metric, prev_result=metric) - _, reward = self.reward_utility( - metric=metric, update=False, did_error=False - ) - self.current_state = state.copy() - info = EnvInfoDict( - { - "baseline_metric": metric, - "baseline_reward": reward, - "query_metric_data": query_metric_data, - "results_dpath": results_dpath, - "prior_state_container": None, - "prior_pgconf": None, - "actions_info": None, - } - ) - self.baseline_metric = metric - - assert self.state_container - info["state_container"] = copy.deepcopy(self.state_container) - return self.current_state, info - - @time_record("step_before_execution") - def step_before_execution(self, action: HolonAction) -> tuple[bool, EnvInfoDict]: - # Log the action in debug mode. - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "Selected action: %s", self.action_space.to_jsonable([action]) - ) - - # Get the prior state. - prior_state = copy.deepcopy(self.state_container) - # Save the old configuration file. - old_conf_path = f"{self.pg_conn.dbdata_dpath}/postgresql.auto.conf" - conf_path = f"{self.pg_conn.dbdata_dpath}/postgresql.auto.old" - local["cp"][old_conf_path, conf_path].run() - - # Figure out what we have to change to get to the new configuration. - assert isinstance(self.action_space, HolonSpace) - assert prior_state - config_changes, sql_commands = self.action_space.generate_action_plan( - action, prior_state - ) - # Attempt to maneuver to the new state. - # Don't dump the page cache in shift_state() in order to see how the workload performs in - # a warm cache scenario. - success = self.shift_state(config_changes, sql_commands, dump_page_cache=False) - return success, EnvInfoDict( - { - "attempted_changes": (config_changes, sql_commands), - "prior_state_container": prior_state, - "prior_pgconf": conf_path, - } - ) - - @time_record("step_execute") - def step_execute( - self, - setup_success: bool, - all_holon_action_variations: list[tuple[str, HolonAction]], - info: EnvInfoDict, - ) -> tuple[bool, EnvInfoDict]: - if setup_success: - assert isinstance(self.observation_space, StateSpace) - assert isinstance(self.action_space, HolonSpace) - # Evaluate the benchmark. - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"\n\nfetch_server_knobs(): {fetch_server_knobs(self.pg_conn.conn(), self.action_space.get_knob_space().tables, self.action_space.get_knob_space().knobs, self.workload.queries)}\n\n" - ) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"\n\nfetch_server_indexes(): {fetch_server_indexes(self.pg_conn.conn(), self.action_space.get_knob_space().tables)}\n\n" - ) - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"\n\naction_names: {[a[0] for a in all_holon_action_variations]}\n\n" - ) - ( - success, - metric, - reward, - results_dpath, - did_anything_time_out, - query_metric_data, - ) = self.workload.execute( - pg_conn=self.pg_conn, - reward_utility=self.reward_utility, - observation_space=self.observation_space, - action_space=self.action_space, - benchbase_config=self.benchbase_config, - query_timeout=self.query_timeout, - actions=[a[1] for a in all_holon_action_variations], - variation_names=[a[0] for a in all_holon_action_variations], - update=True, - ) - else: - # Illegal configuration. - logging.getLogger(DBGYM_LOGGER_NAME).info( - "Found illegal configuration: %s", info["attempted_changes"] - ) - success = False - # Since we reached an invalid area, just set the next state to be the current state. - metric, reward = self.reward_utility(did_error=True) - results_dpath, did_anything_time_out, query_metric_data = None, True, None - - # Build EnvInfoDict - info.update( - EnvInfoDict( - { - "metric": metric, - "did_anything_time_out": did_anything_time_out, - "query_metric_data": query_metric_data, - "reward": reward, - "results_dpath": results_dpath, - "actions_info": ActionsInfo( - { - "all_holon_action_variations": all_holon_action_variations, - "best_observed_holon_action": None, - } - ), - } - ) - ) - return success, info - - @time_record("step_post_execute") - def step_post_execute( - self, - success: bool, - action: HolonAction, - info: EnvInfoDict, - # If "soft" is true, it means we're calling step_post_execute() from reset(). If it's false, it means we're calling step_post_execute() from step(). - soft: bool = False, - ) -> tuple[Any, Optional[float], bool, bool, EnvInfoDict]: - # If we're calling step_post_execute() from reset(), we expect info["metric"] and info["reward"] to be None. - if not soft: - assert info["reward"] is not None - assert info["metric"] is not None - else: - assert info["reward"] is None - assert info["metric"] is None - - if self.workload.oltp_workload and self.horizon > 1: - # If horizon = 1, then we're going to reset anyways. So easier to just untar the original archive. - # Restore the crisp and clean snapshot. - # If we've "failed" due to configuration, then we will boot up the last "bootable" version. - self._restore_last_snapshot() - - if success: - if not soft: - if not self.workload.oltp_workload: - # Update the workload metric timeout if we've succeeded. - assert info["metric"] is not None - self.workload.set_workload_timeout(info["metric"]) - - # Get the current view of the state container. - assert isinstance(self.action_space, HolonSpace) - self.state_container = self.action_space.generate_state_container( - self.state_container, - action, - self.pg_conn.conn(), - self.workload.queries, - ) - - # Now. The state container should be accurate. - assert isinstance(self.observation_space, StateSpace) - next_state = self.observation_space.construct_offline( - self.pg_conn.conn(), info["results_dpath"], self.state_container - ) - else: - assert self.current_state - next_state = self.current_state.copy() - - if not soft: - self.current_step = self.current_step + 1 - self.current_state = next_state - return ( - self.current_state, - info["reward"], - (self.current_step >= self.horizon), - not success, - info, - ) - - def step( # type: ignore - self, action: HolonAction - ) -> tuple[Any, float, bool, bool, EnvInfoDict]: - assert self.tuning_mode != TuningMode.REPLAY - success, info = self.step_before_execution(action) - success, info = self.step_execute(success, [("PerQuery", action)], info) - obs, reward, term, trunc, info = self.step_post_execute(success, action, info) - # Since we called step_post_execute() with soft=False, we expect infos[1] (reward) to not be None. - assert reward is not None - return (obs, reward, term, trunc, info) - - @time_record("shift_state") - def shift_state( - self, - config_changes: list[str], - sql_commands: list[str], - dump_page_cache: bool = False, - ) -> bool: - def attempt_checkpoint(conn_str: str) -> None: - # CHECKPOINT to prevent the DBMS from entering a super slow shutdown - # if a shift_state has failed. - attempts = 0 - while True: - try: - with psycopg.connect( - conn_str, autocommit=True, prepare_threshold=None - ) as conn: - conn.execute("CHECKPOINT") - - break - except psycopg.OperationalError as e: - attempts += 1 - - if attempts >= 5: - assert ( - False - ), f"attempt_checkpoint() failed after 5 attempts with {e}" - - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[attempt_checkpoint]: {e}" - ) - time.sleep(5) - - # First enforce the SQL command changes. - for i, sql in enumerate(sql_commands): - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Executing {sql} [{i+1}/{len(sql_commands)}]" - ) - - ret, stderr = self.pg_conn.psql(sql) - if ret == -1: - if stderr: - logging.getLogger(DBGYM_LOGGER_NAME).warning(stderr) - assert ( - "index row requires" in stderr - or "canceling statement" in stderr - # We've killed the index operation. - or "operational" in stderr - ) - attempt_checkpoint(self.pg_conn.get_kv_connstr()) - return False - - assert ret == 0, stderr - - # LatentKnobSpace returns a config change in the form "{knob} = {val}" when restart_with_changes() wants (knob, val), so we convert it here. - # The reason LatentKnobSpace returns a list[str] instead of a list[tuple[str, str]] is because it must follow the same interface as the other - # spaces, which return list[str]. - dict_config_changes = dict() - for conf_change in config_changes: - knob, val = conf_change.split(" = ") - dict_config_changes[knob] = val - - # Now try and perform the configuration changes. - return self.pg_conn.restart_with_changes( - conf_changes=dict_config_changes, - dump_page_cache=dump_page_cache, - save_checkpoint=self.workload.oltp_workload and self.horizon > 1, - ) - - def close(self) -> None: - self.pg_conn.shutdown_postgres() - # This file may not be in in [workspace]/tmp/, so it's important to delete it - local["rm"]["-rf", self.pg_conn.dbdata_dpath].run() - # Even though these files get deleted because [workspace]/tmp/ gets deleted, - # we'll just delete them here anyways because why not - local["rm"]["-f", self.pg_conn.checkpoint_dbdata_snapshot_fpath].run() - local["rm"]["-f", f"{self.pg_conn.checkpoint_dbdata_snapshot_fpath}.tmp"].run() diff --git a/tune/protox/env/space/__init__.py b/tune/protox/env/space/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/env/space/holon_space.py b/tune/protox/env/space/holon_space.py deleted file mode 100644 index 46d39e44..00000000 --- a/tune/protox/env/space/holon_space.py +++ /dev/null @@ -1,372 +0,0 @@ -import copy -import itertools -import logging -from typing import Any, Iterable, List, Optional, Tuple, Union, cast - -import gymnasium as gym -import numpy as np -import torch -from gymnasium import spaces -from psycopg import Connection - -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.latent_space import ( - LatentIndexSpace, - LatentKnobSpace, - LatentQuerySpace, - LSCIndexSpace, -) -from tune.protox.env.space.utils import check_subspace -from tune.protox.env.types import ( - DEFAULT_NEIGHBOR_PARAMETERS, - HolonAction, - HolonStateContainer, - HolonSubAction, - IndexSpaceRawSample, - KnobSpaceAction, - NeighborParameters, - ProtoAction, - QuerySpaceAction, - QuerySpaceKnobAction, - QueryType, -) -from util.log import DBGYM_LOGGER_NAME - -HolonSubSpace = Union[LatentKnobSpace, LatentIndexSpace, LatentQuerySpace] - - -class HolonSpace(spaces.Tuple): - def _latent_assert_check( - self, - carprod_neighbors: list[HolonAction], - carprod_embeds: torch.Tensor, - first_drift: int, - ) -> None: - zero = self.to_latent([carprod_neighbors[0]])[0] - last = self.to_latent([carprod_neighbors[-1]])[0] - first_d = self.to_latent([carprod_neighbors[first_drift]])[0] - - def eq_fn(x: torch.Tensor, y: torch.Tensor) -> bool: - return bool(torch.isclose(x, y, atol=0.001).all().item()) - - assert eq_fn(zero, carprod_embeds[0]), f"{zero} {carprod_embeds[0]}" - assert eq_fn(last, carprod_embeds[-1]), f"{last} {carprod_embeds[-1]}" - assert eq_fn( - first_d, carprod_embeds[first_drift] - ), f"{first_d} {carprod_embeds[first_drift]}" - - logging.getLogger(DBGYM_LOGGER_NAME).debug("Neighborhood Check passed.") - - def __init__( - self, - knob_space: LatentKnobSpace, - index_space: LatentIndexSpace, - query_space: LatentQuerySpace, - seed: int, - artifact_manager: Optional[ArtifactManager], - ): - spaces: Iterable[gym.spaces.Space[Any]] = [knob_space, index_space, query_space] - super().__init__(spaces, seed=seed) - - raw_dims = [ - ( - gym.spaces.utils.flatdim(space) - if space.latent_dim() == 0 - else space.latent_dim() - ) - for space in self.spaces - if hasattr(space, "latent_dim") - ] - assert len(raw_dims) == 3 - self.raw_dims: list[int] = np.cumsum(raw_dims) - self.space_dims: Optional[list[int]] = None - self.artifact_manager = artifact_manager - - def get_spaces(self) -> list[tuple[str, HolonSubSpace]]: - r = cast( - list[tuple[str, HolonSubSpace]], - [(s.name, s) for s in self.spaces if hasattr(s, "name")], - ) - assert len(r) == 3 - return r - - def null_action(self, sc: HolonStateContainer) -> HolonAction: - assert isinstance(self.spaces[1], LatentIndexSpace) - null_index = self.spaces[1].null_action() - return HolonAction( - (cast(KnobSpaceAction, sc[0]), null_index, cast(QuerySpaceAction, sc[2])) - ) - - def split_action( - self, action: HolonAction - ) -> list[tuple[HolonSubSpace, HolonSubAction]]: - return [ - (cast(LatentKnobSpace, self.spaces[0]), action[0]), - (cast(LatentIndexSpace, self.spaces[1]), action[1]), - (cast(LatentQuerySpace, self.spaces[2]), action[2]), - ] - - def extract_query( - self, action: Union[HolonAction, HolonStateContainer] - ) -> QuerySpaceKnobAction: - for i, s in enumerate(self.spaces): - if isinstance(s, LatentQuerySpace): - q_act = action[i] - return s.extract_query(cast(QuerySpaceAction, q_act)) - raise ValueError("Missing query space in configuration") - - def extract_knobs(self, action: HolonAction) -> Optional[KnobSpaceAction]: - assert isinstance(self.spaces[0], LatentKnobSpace) - return action[0] - - def replace_query( - self, action: HolonAction, query: QuerySpaceKnobAction - ) -> HolonAction: - action = copy.deepcopy(action) - for i, s in enumerate(self.spaces): - if isinstance(s, LatentQuerySpace): - qknobs = s.replace_query(query) - assert check_subspace(s, qknobs) - return HolonAction((action[0], action[1], qknobs)) - return action - - def latent_dim(self) -> int: - return self.raw_dims[-1] - - def critic_dim(self) -> int: - r = [ - space.critic_dim() for space in self.spaces if hasattr(space, "critic_dim") - ] - assert len(r) == 3 - return sum(r) - - def get_knob_space(self) -> LatentKnobSpace: - assert isinstance(self.spaces[0], LatentKnobSpace) - return self.spaces[0] - - def get_index_space(self) -> LatentIndexSpace: - assert isinstance(self.spaces[1], LatentIndexSpace) - return self.spaces[1] - - def get_query_space(self) -> LatentQuerySpace: - assert isinstance(self.spaces[2], LatentQuerySpace) - return self.spaces[2] - - def pad_center_latent(self, proto: ProtoAction, lscs: torch.Tensor) -> ProtoAction: - assert len(proto.shape) == 2 - - components = [] - for i, s in enumerate(self.spaces): - start = self.raw_dims[i - 1] if i > 0 else 0 - end = self.raw_dims[i] - assert isinstance(s, (LatentKnobSpace, LSCIndexSpace, LatentQuerySpace)) - components.append( - s.pad_center_latent(ProtoAction(proto[:, start:end]), lscs) - ) - - return ProtoAction(torch.cat(cast(list[torch.Tensor], components), dim=1)) - - def transform_noise( - self, proto: ProtoAction, noise: Optional[torch.Tensor] = None - ) -> ProtoAction: - assert len(proto.shape) == 2 - for i, s in enumerate(self.spaces): - start = self.raw_dims[i - 1] if i > 0 else 0 - end = self.raw_dims[i] - - snoise = None - if noise is not None: - assert len(noise.shape) == 2 - snoise = noise[:, start:end] - - assert hasattr(s, "transform_noise") - proto[:, start:end] = s.transform_noise(proto[:, start:end], noise=snoise) - return proto - - def from_latent(self, proto: ProtoAction) -> ProtoAction: - components = [] - space_dims = [] - assert len(proto.shape) == 2 - for i, s in enumerate(self.spaces): - start = self.raw_dims[i - 1] if i > 0 else 0 - end = self.raw_dims[i] - assert hasattr(s, "from_latent") - components.append(s.from_latent(proto[:, start:end])) - space_dims.append(components[-1].shape[-1]) - - if self.space_dims is None: - self.space_dims = np.cumsum(space_dims) - - return ProtoAction(torch.cat(components, dim=1)) - - def to_latent(self, env_act: list[HolonAction]) -> ProtoAction: - assert isinstance(env_act, list) - latent_cmps = [] - for i, s in enumerate(self.spaces): - if isinstance(s, LatentIndexSpace): - latent_cmps.append( - s.to_latent( - cast(list[IndexSpaceRawSample], [a[i] for a in env_act]) - ) - ) - else: - assert isinstance(s, (LatentKnobSpace, LatentQuerySpace)) - latent_cmps.append( - s.to_latent(cast(list[KnobSpaceAction], [a[i] for a in env_act])) - ) - - return ProtoAction(torch.concat(cast(list[torch.Tensor], latent_cmps), dim=1)) - - def sample_latent(self, mask: Optional[Any] = None) -> ProtoAction: - r = [ - s.sample_latent(mask=mask) - for s in self.spaces - if hasattr(s, "sample_latent") - ] - assert len(r) == 3 - return ProtoAction(torch.concat(r, dim=1)) - - @time_record("neighborhood") - def neighborhood( - self, - raw_action: ProtoAction, - neighbor_parameters: NeighborParameters = DEFAULT_NEIGHBOR_PARAMETERS, - ) -> tuple[list[HolonAction], ProtoAction, torch.Tensor]: - env_acts = [] - emb_acts: list[torch.Tensor] = [] - ndims = [] - - env_action = self.from_latent(raw_action) - for proto in env_action: - # Figure out the neighbors for each subspace. - envs_neighbors: list[Any] = [] - embed_neighbors: list[Any] = [] - - # TODO(wz2,PROTOX_DELTA): For pseudo-backwards compatibility, we meld the knob + query space together. - # In this way, we don't actually generate knob x query cartesian product. - # Rather, we directly fuse min(knob_neighbors, query_neighbors) together and then cross with indexes. - meld_groups: list[list[Any]] = [ - [self.get_knob_space(), self.get_query_space()], - [self.get_index_space()], - ] - - for meld_group in meld_groups: - meld_group_envs = [] - meld_group_embeds = [] - for s in meld_group: - assert self.spaces.index(s) is not None - assert self.space_dims is not None - i = self.spaces.index(s) - - # Note that subproto is the "action embedding" from the context of - # the embedded actor-critic like architecutre. - subproto = proto[ - (self.space_dims[i - 1] if i > 0 else 0) : self.space_dims[i] - ] - assert isinstance( - s, (LatentKnobSpace, LatentIndexSpace, LatentQuerySpace) - ) - envs = s.neighborhood(subproto, neighbor_parameters) - meld_group_envs.append(envs) - - if isinstance(s, LatentIndexSpace): - # Compute their latent representation first. - meld_group_embeds.append( - s.to_latent(cast(list[IndexSpaceRawSample], envs)) - ) - else: - assert isinstance(s, (LatentKnobSpace, LatentQuerySpace)) - meld_group_embeds.append( - s.to_latent(cast(list[KnobSpaceAction], envs)) - ) - - if len(meld_group_envs) > 1: - # Join the meld groups. - envs_neighbors.append([z for z in zip(*meld_group_envs)]) - t_len = len(envs_neighbors[-1]) - embed_neighbors.append(zip(*[z[:t_len] for z in meld_group_embeds])) - else: - envs_neighbors.append(meld_group_envs[0]) - embed_neighbors.append(meld_group_embeds[0]) - - # Cartesian product itself is naturally in the joint space. - carprod_neighbors = cast( - list[HolonAction], - [ - (mg0[0], mg1, mg0[1]) - for (mg0, mg1) in itertools.product(*envs_neighbors) - ], - ) - # Trust that the cartesian product is generated the same way. - carprod_embeds = torch.stack( - list( - map( - lambda mg: torch.cat((mg[0][0], mg[1], mg[0][1])), - itertools.product(*embed_neighbors), - ) - ) - ) - assert len(carprod_neighbors) == carprod_embeds.shape[0] - - # This is a sanity check to avoid having to to_latent() on each holon. - # Guess for when the drift happens. - first_drift = len(envs_neighbors[1]) + 1 - - # Only run this check if we are attempting to sample an action and not during learn. - if len(env_action) == 1: - assert len(self.spaces) == len(carprod_neighbors[0]) - self._latent_assert_check( - carprod_neighbors, carprod_embeds, first_drift - ) - - env_acts.extend(carprod_neighbors) - emb_acts.append(carprod_embeds) - ndims.append(len(carprod_neighbors)) - - return env_acts, ProtoAction(torch.cat(emb_acts, dim=0)), torch.as_tensor(ndims) - - def generate_state_container( - self, - prev_state_container: Optional[HolonStateContainer], - action: Optional[HolonAction], - connection: Connection[Any], - queries: dict[str, list[tuple[QueryType, str]]], - ) -> HolonStateContainer: - t = tuple( - s.generate_state_container( - prev_state_container[i] if prev_state_container else None, - action[i] if action else None, - connection, - queries, - ) - for i, s in enumerate(self.spaces) - if hasattr(s, "generate_state_container") - ) - assert len(t) == 3 - return HolonStateContainer(t) - - def generate_action_plan( - self, action: HolonAction, state_container: HolonStateContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - outputs = [ - space.generate_action_plan(action[i], state_container[i], **kwargs) - for i, space in enumerate(self.spaces) - if hasattr(space, "generate_action_plan") - ] - assert len(outputs) == 3 - cc = list(itertools.chain(*[o[0] for o in outputs])) - sql_commands = list(itertools.chain(*[o[1] for o in outputs])) - return cc, sql_commands - - def generate_plan_from_config( - self, config: HolonStateContainer, sc: HolonStateContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - outputs = [ - space.generate_delta_action_plan(config[i], sc[i], **kwargs) - for i, space in enumerate(self.spaces) - if hasattr(space, "generate_delta_action_plan") - ] - assert len(outputs) == 3 - config_changes = list(itertools.chain(*[o[0] for o in outputs])) - sql_commands = list(itertools.chain(*[o[1] for o in outputs])) - return config_changes, sql_commands diff --git a/tune/protox/env/space/latent_space/__init__.py b/tune/protox/env/space/latent_space/__init__.py deleted file mode 100644 index b243cc5c..00000000 --- a/tune/protox/env/space/latent_space/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from tune.protox.env.space.latent_space.latent_index_space import LatentIndexSpace -from tune.protox.env.space.latent_space.latent_knob_space import LatentKnobSpace -from tune.protox.env.space.latent_space.latent_query_space import LatentQuerySpace -from tune.protox.env.space.latent_space.lsc_index_space import LSCIndexSpace - -__all__ = ["LatentIndexSpace", "LatentQuerySpace", "LatentKnobSpace", "LSCIndexSpace"] diff --git a/tune/protox/env/space/latent_space/latent_index_space.py b/tune/protox/env/space/latent_space/latent_index_space.py deleted file mode 100644 index aa6ba7b8..00000000 --- a/tune/protox/env/space/latent_space/latent_index_space.py +++ /dev/null @@ -1,305 +0,0 @@ -import logging -from typing import Any, Callable, Optional, Tuple - -import numpy as np -import psycopg -import torch -from numpy.typing import NDArray - -from tune.protox.embedding.vae import VAE -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.space.primitive_space import IndexSpace -from tune.protox.env.space.utils import check_subspace, fetch_server_indexes -from tune.protox.env.types import ( - DEFAULT_NEIGHBOR_PARAMETERS, - IndexSpaceContainer, - IndexSpaceRawSample, - NeighborParameters, - ProtoAction, - QueryMap, - TableAttrAccessSetsMap, - TableAttrListMap, -) -from util.log import DBGYM_LOGGER_NAME - - -class LatentIndexSpace(IndexSpace): - def __init__( - self, - tables: list[str], - max_num_columns: int, - max_indexable_attributes: int, - seed: int, - rel_metadata: TableAttrListMap, - attributes_overwrite: TableAttrListMap, - tbl_include_subsets: TableAttrAccessSetsMap, - vae: VAE, - index_space_aux_type: bool = False, - index_space_aux_include: bool = False, - deterministic_policy: bool = False, - latent_dim: int = 0, - index_output_transform: Optional[Callable[[ProtoAction], ProtoAction]] = None, - index_noise_scale: Optional[ - Callable[[ProtoAction, Optional[torch.Tensor]], ProtoAction] - ] = None, - artifact_manager: Optional[ArtifactManager] = None, - ) -> None: - - super().__init__( - tables, - max_num_columns, - max_indexable_attributes, - seed, - rel_metadata, - attributes_overwrite, - tbl_include_subsets, - index_space_aux_type, - index_space_aux_include, - deterministic_policy, - ) - - self.vae = vae - self._latent_dim = latent_dim - self.index_output_transform = index_output_transform - self.index_noise_scale = index_noise_scale - self.artifact_manager = artifact_manager - self.name = "index" - - def latent_dim(self) -> int: - return self._latent_dim - - def critic_dim(self) -> int: - index_dim = self.latent_dim() - if self.index_space_aux_type: - index_dim += 2 - - if self.index_space_aux_include: - index_dim += self.max_inc_columns - - return index_dim - - def uses_embed(self) -> bool: - return True - - def transform_noise( - self, subproto: ProtoAction, noise: Optional[torch.Tensor] = None - ) -> ProtoAction: - if self.index_output_transform is not None: - subproto = self.index_output_transform(subproto) - else: - subproto = ProtoAction(torch.tanh(subproto)) - - if noise is not None and self.index_noise_scale: - # Now perturb noise. - subproto = self.index_noise_scale(subproto, noise) - - return subproto - - @time_record("from_latent") - def from_latent(self, subproto: ProtoAction) -> ProtoAction: - assert len(subproto.shape) == 2 - decode_act = self.vae.decoder(subproto).detach().view(subproto.shape[0], -1) - - # Only need to do additional processing if we are treating as one-hot softmax representation. - # Now treat it as it came out of the neural network and process it. - if len(self.tables) < self.max_num_columns + 1: - # Yoink only the table components that we care about. - distort = [l for l in range(0, len(self.tables))] + [ - l for l in range(self.max_num_columns + 1, decode_act.shape[1]) - ] - else: - # Yoink only the index components that we care about. - distort = [l for l in range(0, len(self.tables))] - [ - distort.extend([b + i for i in range(0, self.max_num_columns + 1)]) # type: ignore - for b in range(len(self.tables), decode_act.shape[1], len(self.tables)) - ] - - decode_act = torch.index_select(decode_act, 1, torch.as_tensor(distort)) - return ProtoAction(self.policy.from_latent(decode_act)) - - @time_record("to_latent") - def to_latent(self, env_act: list[IndexSpaceRawSample]) -> ProtoAction: - assert isinstance(env_act, list) - if self.index_space_aux_include: - # Straighten out the list. - th_env_act = torch.as_tensor( - [(e[:-1]) + tuple(e[-1]) for e in env_act] - ).reshape(len(env_act), -1) - else: - th_env_act = torch.as_tensor(env_act).reshape(len(env_act), -1) - - index_type = None - include_col = None - if self.index_space_aux_type: - # Boink the index type. - index_val = th_env_act[:, 0].view(th_env_act.shape[0], -1).type(torch.int64) - index_type = torch.zeros(index_val.shape[0], 2, dtype=torch.int64) - index_type = index_type.scatter_(1, index_val, 1).float() - th_env_act = th_env_act[:, 1:] - - if self.index_space_aux_include: - include_col = th_env_act[:, -self.max_inc_columns :].float() - th_env_act = th_env_act[:, : -self.max_inc_columns] - - nets = self.vae.get_collate()(th_env_act).float() - - # There isn't much we can do if we encounter an error. - latents, error = self.vae.latents(nets) - assert not error - - if index_type is not None: - latents = torch.concat([index_type, latents], dim=1) - if include_col is not None: - latents = torch.concat([latents, include_col], dim=1) - - return ProtoAction(latents.float()) - - def sample_latent(self, mask: Optional[Any] = None) -> NDArray[np.float32]: - return ( - np.random.uniform(low=0.0, high=1.0, size=(self.latent_dim(),)) - .astype(np.float32) - .reshape(1, -1) - ) - - @time_record("neighborhood") - def neighborhood( - self, - raw_action: ProtoAction, - neighbor_parameters: NeighborParameters = DEFAULT_NEIGHBOR_PARAMETERS, - ) -> list[IndexSpaceRawSample]: - actions_set = set() - actions: list[IndexSpaceRawSample] = [] - - num_attempts = 0 - allow_random_samples = False - while len(actions) == 0: - for _ in range(neighbor_parameters["index_num_samples"]): - sampled_action = self.policy.sample_dist(raw_action, self.np_random) - assert self.contains(sampled_action) - candidates = [sampled_action] - - if allow_random_samples: - # Only allow *pure* random samples once the flag has been set. - random_act = self.sample() - assert self.contains(random_act) - candidates.append(random_act) - - # Sample subsets if we aren't sampling the length of the index already. - if neighbor_parameters["index_rules"]: - for candidate in self.policy.structural_neighbors(sampled_action): - assert self.contains(candidate) - candidates.append(candidate) - - for candidate in candidates: - ia = self.to_action(candidate) - # Make sure that we are setting the bias + raw representation. - assert ia.bias is not None and ia.raw_repr is not None - - if ia not in actions_set: - # See IndexAction.__hash__ comment. - actions.append(candidate) - actions_set.add(ia) - - if len(actions) >= neighbor_parameters["index_num_samples"]: - # We have generated enough candidates. At least based on num_samples. - break - - num_attempts += 1 - if num_attempts >= 100: - # Log but don't crash. - logging.getLogger(DBGYM_LOGGER_NAME).error( - "Spent 100 iterations and could not find any valid index action. This should not happen." - ) - allow_random_samples = True - return actions - - def generate_state_container( - self, - prev_state_container: Optional[IndexSpaceContainer], - action: Optional[IndexSpaceRawSample], - connection: psycopg.Connection[Any], - queries: QueryMap, - ) -> IndexSpaceContainer: - - ias = [] - _, indexes = fetch_server_indexes(connection, self.tables) - for tblname, indexdefs in indexes.items(): - for idxname, indexdef in indexdefs.items(): - ias.append( - IndexAction.construct_md( - idx_name=idxname, - table=tblname, - idx_type=indexdef["index_type"], - columns=indexdef["columns"], - inc_names=indexdef["include"], - ) - ) - - new_ia: Optional[IndexAction] = None - if action: - new_ia = self.to_action(action) - - for ia in ias: - if prev_state_container and ia in prev_state_container: - p = prev_state_container[prev_state_container.index(ia)] - ia.raw_repr = p.raw_repr - elif new_ia and ia == new_ia: - ia.raw_repr = new_ia.raw_repr - return IndexSpaceContainer(ias) - - def generate_action_plan( - self, action: IndexSpaceRawSample, sc: IndexSpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - assert check_subspace(self, action) - - sql_commands = [] - ia = self.to_action(action) - if not ia.is_valid: - # This is the case where the action we are taking is a no-op. - return [], [] - - exist_ia = ia in sc - if exist_ia: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "Contemplating %s (exist: True)", sc[sc.index(ia)] - ) - else: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - "Contemplating %s (exist: False)", ia - ) - # Add the new index with the current index counter. - sql_commands.append(ia.sql(add=True)) - - return [], sql_commands - - def generate_delta_action_plan( - self, action: IndexSpaceContainer, sc: IndexSpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - assert isinstance(action, list) - acts = [] - sql_commands = [] - for ia in action: - assert isinstance(ia, IndexAction) - acts.append(ia) - - if not ia.is_valid: - # This is the case where the action we are taking is a no-op. - continue - - if ia not in sc: - # Create if not exist. - sql_commands.append(ia.sql(add=True)) - - for ia in sc: - # Drop the index that is no longer needed. - if ia not in acts: - sql_commands.append(ia.sql(add=False)) - - return [], sql_commands - - def to_action(self, env_act: IndexSpaceRawSample) -> IndexAction: - ia = super().to_action(env_act) - ia.raw_repr = env_act - return ia diff --git a/tune/protox/env/space/latent_space/latent_knob_space.py b/tune/protox/env/space/latent_space/latent_knob_space.py deleted file mode 100644 index c695d13c..00000000 --- a/tune/protox/env/space/latent_space/latent_knob_space.py +++ /dev/null @@ -1,243 +0,0 @@ -import copy -import logging -from pprint import pformat -from typing import Any, Optional, Tuple - -import gymnasium as gym -import numpy as np -import torch -from psycopg import Connection - -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.primitive import KnobClass, SettingType, is_knob_enum -from tune.protox.env.space.primitive.knob import resolve_enum_value -from tune.protox.env.space.primitive.latent_knob import ( - LatentCategoricalKnob, - LatentKnob, -) -from tune.protox.env.space.primitive_space import KnobSpace -from tune.protox.env.space.utils import check_subspace, fetch_server_knobs -from tune.protox.env.types import ( - DEFAULT_NEIGHBOR_PARAMETERS, - KnobSpaceAction, - KnobSpaceContainer, - NeighborParameters, - ProtoAction, - QueryMap, -) - - -class LatentKnobSpace(KnobSpace): - def __init__( - self, - artifact_manager: Optional[ArtifactManager] = None, - *args: Any, - **kwargs: Any, - ) -> None: - super().__init__(*args, **kwargs) - self.final_dim = gym.spaces.utils.flatdim(self) - self.categorical_start = self.final_dim - self.artifact_manager = artifact_manager - self.cat_dims: list[int] = [] - self.name = "knobs" - - def latent_dim(self) -> int: - return gym.spaces.utils.flatdim(self) - - def critic_dim(self) -> int: - return self.latent_dim() - - def uses_embed(self) -> bool: - return False - - def transform_noise( - self, subproto: ProtoAction, noise: Optional[torch.Tensor] = None - ) -> ProtoAction: - cont_dim = self.categorical_start - cat_dim = self.final_dim - cont_dim - - cont, *cats = subproto.split([cont_dim] + self.cat_dims, dim=-1) # type: ignore - cont = torch.tanh(cont) - if noise is not None: - cont_noise, _ = noise.split([cont_dim, cat_dim], dim=-1) # type: ignore - # TODO(wz2): We only apply the noise to the continuous dimensions. - # In theory, for categorical, we would noise the logits and use something like boltzmann. - cont = torch.clamp(cont + cont_noise, -1.0, 1.0) - - if len(cats) > 0: - cats = torch.concat([cat.softmax(dim=-1) for cat in cats], dim=-1) - output = torch.concat([cont, cats], dim=-1) - else: - output = cont - return ProtoAction(output) - - def from_latent(self, a: ProtoAction) -> ProtoAction: - return a - - def nearest_env_action(self, output: ProtoAction) -> KnobSpaceAction: - cont_env_act = gym.spaces.utils.unflatten(self, output.numpy()) - env_act = KnobSpaceAction({}) - for key, knob in self.knobs.items(): - assert isinstance(knob, LatentKnob) or isinstance( - knob, LatentCategoricalKnob - ) - env_act[key] = knob.from_latent(cont_env_act[key]) - assert knob.contains(env_act[key]), f"{key} {env_act[key]} {knob}" - - assert self.contains(env_act) - return env_act - - @time_record("to_latent") - def to_latent(self, env_act: list[KnobSpaceAction]) -> ProtoAction: - assert isinstance(env_act, list) - - embeds = [] - for act in env_act: - assert check_subspace(self, act) - - kv_dict: dict[str, Any] = {} - for k, v in act.items(): - knob = self.knobs[k] - assert isinstance(knob, LatentKnob) or isinstance( - knob, LatentCategoricalKnob - ) - kv_dict[k] = knob.to_latent(v) - - embeds.append(gym.spaces.utils.flatten(self, kv_dict)) - return ProtoAction(torch.as_tensor(np.array(embeds)).float()) - - def sample_latent(self, mask: Optional[Any] = None) -> ProtoAction: - cont_dim = self.categorical_start - cat_dim = self.final_dim - cont_dim - - # Sample according to strategy within the latent dimension. - cont_action = (torch.rand(size=(cont_dim,)) * 2 - 1).view(1, -1) - cat_action = torch.rand(size=(cat_dim,)).view(1, -1) - return ProtoAction(torch.concat([cont_action, cat_action], dim=1).float()) - - def pad_center_latent( - self, subproto: ProtoAction, lscs: torch.Tensor - ) -> ProtoAction: - return subproto - - @time_record("neighborhood") - def neighborhood( - self, - raw_action: ProtoAction, - neighbor_parameters: NeighborParameters = DEFAULT_NEIGHBOR_PARAMETERS, - ) -> list[KnobSpaceAction]: - num_neighbors = neighbor_parameters["knob_num_nearest"] - span = neighbor_parameters["knob_span"] - env_action = self.nearest_env_action(raw_action) - cat_start = self.categorical_start - - valid_env_actions = [env_action] - for _ in range(num_neighbors): - adjust_mask = self.np_random.integers(-span, span + 1, (self.final_dim,)) - if np.sum(adjust_mask) == 0: - continue - - new_action = KnobSpaceAction(copy.deepcopy(env_action)) - cont_it = 0 - for knobname, knob in self.spaces.items(): - # Iterate through every knob and adjust based on the sampled mask. - if isinstance(knob, LatentCategoricalKnob): - new_value = knob.sample_weights( - weights=raw_action[ - cat_start : cat_start + knob.num_elems - ].tolist() - ) - new_action[knobname] = new_value - cat_start += knob.num_elems - else: - assert isinstance(knob, LatentKnob) - if adjust_mask[cont_it] != 0: - new_value = knob.shift_offset( - new_action[knobname], adjust_mask[cont_it] - ) - if new_value is not None: - # The adjustment has produced a new quantized value. - new_action[knobname] = new_value - cont_it += 1 - - # Check that we aren't adding superfluous actions. - # assert self.contains(new_action) - valid_env_actions.append(new_action) - - queued_actions = set() - real_actions = [] - for new_action in valid_env_actions: - sig = pformat(new_action) - if sig not in queued_actions: - real_actions.append(new_action) - queued_actions.add(sig) - - return real_actions - - def generate_state_container( - self, - prev_state_container: Optional[KnobSpaceContainer], - action: Optional[KnobSpaceAction], - connection: Connection[Any], - queries: QueryMap, - ) -> KnobSpaceContainer: - return fetch_server_knobs(connection, self.tables, self.knobs, queries=queries) - - def generate_action_plan( - self, action: KnobSpaceAction, sc: KnobSpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - config_changes = [] - sql_commands = [] - require_cleanup = False - - for act, val in action.items(): - assert act in self.knobs, f"{self.knobs} {act}" - assert self.knobs[act].knob_class != KnobClass.QUERY - if self.knobs[act].knob_class == KnobClass.TABLE: - if act not in sc or sc[act] != val: - # Need to perform a VACUUM ANALYZE. - require_cleanup = True - - tbl = self.knobs[act].table_name - knob = self.knobs[act].knob_name - sql_commands.append(f"ALTER TABLE {tbl} SET ({knob} = {val})") - # Rewrite immediately. - sql_commands.append(f"VACUUM FULL {tbl}") - - elif self.knobs[act].knob_type == SettingType.BOOLEAN: - # Boolean knob. - assert self.knobs[act].knob_class == KnobClass.KNOB - flag = "on" if val == 1 else "off" - config_changes.append(f"{act} = {flag}") - - elif is_knob_enum(self.knobs[act].knob_type): - out_val = resolve_enum_value(self.knobs[act], val, all_knobs=action) - config_changes.append(f"{act} = {out_val}") - - else: - # Integer or float knob. - assert self.knobs[act].knob_class == KnobClass.KNOB - kt = self.knobs[act].knob_type - param = ( - "{act} = {val:.2f}" - if kt == SettingType.FLOAT - else "{act} = {val:.0f}" - ) - assert ( - kt == SettingType.FLOAT - or kt == SettingType.INTEGER - or kt == SettingType.BYTES - or kt == SettingType.INTEGER_TIME - ) - config_changes.append(param.format(act=act, val=val)) - - if require_cleanup: - for tbl in self.tables: - sql_commands.append(f"VACUUM ANALYZE {tbl}") - sql_commands.append(f"CHECKPOINT") - return config_changes, sql_commands - - def generate_delta_action_plan( - self, action: KnobSpaceAction, sc: KnobSpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - return self.generate_action_plan(action, sc, **kwargs) diff --git a/tune/protox/env/space/latent_space/latent_query_space.py b/tune/protox/env/space/latent_space/latent_query_space.py deleted file mode 100644 index a5017608..00000000 --- a/tune/protox/env/space/latent_space/latent_query_space.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import Any, Optional, Tuple - -import psycopg - -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.space.latent_space.latent_knob_space import LatentKnobSpace -from tune.protox.env.space.primitive_space import QuerySpace -from tune.protox.env.types import ( - QueryMap, - QuerySpaceAction, - QuerySpaceContainer, - QuerySpaceKnobAction, -) - - -class LatentQuerySpace(LatentKnobSpace, QuerySpace): - def __init__( - self, - artifact_manager: Optional[ArtifactManager] = None, - *args: Any, - **kwargs: Any - ) -> None: - # Only manually initialize against QuerySpace. - QuerySpace.__init__(self, *args, **kwargs) - self.artifact_manager = artifact_manager - self.name = "query" - - def uses_embed(self) -> bool: - return False - - def generate_state_container( - self, - prev_state: Optional[QuerySpaceContainer], - action: Optional[QuerySpaceAction], - connection: psycopg.Connection[Any], - queries: QueryMap, - ) -> QuerySpaceContainer: - sc = super().generate_state_container(prev_state, action, connection, queries) - if action is not None: - for k, v in action.items(): - assert k in sc - sc[k] = v - return sc - - def generate_action_plan( - self, action: QuerySpaceAction, sc: QuerySpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - return [], [] - - def generate_delta_action_plan( - self, action: QuerySpaceAction, sc: QuerySpaceContainer, **kwargs: Any - ) -> tuple[list[str], list[str]]: - return [], [] - - def extract_query(self, action: QuerySpaceAction) -> QuerySpaceKnobAction: - ret_knobs = QuerySpaceKnobAction({}) - for k, v in action.items(): - assert k in self.knobs - ret_knobs[self.knobs[k]] = v - return ret_knobs - - def replace_query(self, query: QuerySpaceKnobAction) -> QuerySpaceAction: - return QuerySpaceAction({k.name(): v for k, v in query.items()}) diff --git a/tune/protox/env/space/latent_space/lsc_index_space.py b/tune/protox/env/space/latent_space/lsc_index_space.py deleted file mode 100644 index fe92d696..00000000 --- a/tune/protox/env/space/latent_space/lsc_index_space.py +++ /dev/null @@ -1,127 +0,0 @@ -from typing import Any, Callable, Optional - -import psycopg -import torch - -from tune.protox.embedding.vae import VAE -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.lsc.lsc import LSC -from tune.protox.env.space.latent_space.latent_index_space import LatentIndexSpace -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.types import ( - IndexSpaceContainer, - IndexSpaceRawSample, - ProtoAction, - QueryMap, - TableAttrAccessSetsMap, - TableAttrListMap, -) - - -class LSCIndexSpace(LatentIndexSpace): - def __init__( - self, - tables: list[str], - max_num_columns: int, - max_indexable_attributes: int, - seed: int, - rel_metadata: TableAttrListMap, - attributes_overwrite: TableAttrListMap, - tbl_include_subsets: TableAttrAccessSetsMap, - vae: VAE, - index_space_aux_type: bool = False, - index_space_aux_include: bool = False, - deterministic_policy: bool = False, - latent_dim: int = 0, - index_output_transform: Optional[Callable[[ProtoAction], ProtoAction]] = None, - index_noise_scale: Optional[ - Callable[[ProtoAction, Optional[torch.Tensor]], ProtoAction] - ] = None, - artifact_manager: Optional[ArtifactManager] = None, - lsc: Optional[LSC] = None, - ) -> None: - - super().__init__( - tables, - max_num_columns, - max_indexable_attributes, - seed, - rel_metadata, - attributes_overwrite, - tbl_include_subsets, - vae, - index_space_aux_type, - index_space_aux_include, - deterministic_policy, - latent_dim, - index_output_transform, - index_noise_scale, - artifact_manager, - ) - - assert lsc is not None - self.lsc = lsc - - def uses_embed(self) -> bool: - return True - - def pad_center_latent( - self, subproto: ProtoAction, lscs: torch.Tensor - ) -> ProtoAction: - subproto = ProtoAction(subproto + self.lsc.inverse_scale(lscs)) - - if self.index_space_aux_type: - aux_types = torch.tensor([[1.0, 0.0]] * subproto.shape[0]).float() - subproto = ProtoAction(torch.concat([aux_types, subproto], dim=1)) - - if self.index_space_aux_include: - aux_inc = torch.tensor( - [[0] * self.max_inc_columns] * subproto.shape[0] - ).float() - subproto = ProtoAction(torch.concat([subproto, aux_inc], dim=1)) - return subproto - - def from_latent(self, subproto: ProtoAction) -> ProtoAction: - subproto = self.lsc.apply_bias(subproto) - return super().from_latent(subproto) - - def to_latent(self, env_act: list[IndexSpaceRawSample]) -> ProtoAction: - latent = super().to_latent(env_act) - assert len(latent.shape) == 2 - - if latent.shape[-1] != self.latent_dim(): - so = 2 if self.index_space_aux_type else 0 - latent[:, so : so + self.latent_dim()] = self.lsc.apply_bias( - ProtoAction(latent[:, so : so + self.latent_dim()]) - ) - return latent - else: - return self.lsc.apply_bias(latent) - - def generate_state_container( - self, - prev_state: Optional[IndexSpaceContainer], - action: Optional[IndexSpaceRawSample], - connection: psycopg.Connection[Any], - queries: QueryMap, - ) -> IndexSpaceContainer: - ias = super().generate_state_container(prev_state, action, connection, queries) - - new_ia: Optional[IndexAction] = None - if action: - new_ia = self.to_action(action) - - for ia in ias: - if prev_state and ia in prev_state: - # Preserve the bias. - ia.bias = prev_state[prev_state.index(ia)].bias - - elif new_ia and ia == new_ia: - ia.bias = new_ia.bias - - return ias - - def to_action(self, env_act: IndexSpaceRawSample) -> IndexAction: - ia = super().to_action(env_act) - ia.bias = self.lsc.current_bias() - return ia diff --git a/tune/protox/env/space/primitive/__init__.py b/tune/protox/env/space/primitive/__init__.py deleted file mode 100644 index ba82291a..00000000 --- a/tune/protox/env/space/primitive/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -from enum import Enum, unique - - -@unique -class SettingType(Enum): - INVALID = -1 - BOOLEAN = 0 - INTEGER = 1 - BYTES = 2 - INTEGER_TIME = 3 - FLOAT = 4 - - BINARY_ENUM = 5 - SCANMETHOD_ENUM = 6 - SCANMETHOD_ENUM_CATEGORICAL = 7 - QUERY_TABLE_ENUM = 9 - - -@unique -class KnobClass(Enum): - INVALID = -1 - KNOB = 0 - TABLE = 1 - QUERY = 2 - - -def is_knob_enum(knob_type: SettingType) -> bool: - return knob_type in [ - SettingType.BINARY_ENUM, - SettingType.SCANMETHOD_ENUM, - SettingType.QUERY_TABLE_ENUM, - ] - - -def is_boolean(knob_type: SettingType) -> bool: - return knob_type in [ - SettingType.BOOLEAN, - SettingType.BINARY_ENUM, - SettingType.SCANMETHOD_ENUM, - ] - - -def is_binary_enum(knob_type: SettingType) -> bool: - return knob_type in [ - SettingType.BINARY_ENUM, - SettingType.SCANMETHOD_ENUM, - ] diff --git a/tune/protox/env/space/primitive/index.py b/tune/protox/env/space/primitive/index.py deleted file mode 100644 index 070bf092..00000000 --- a/tune/protox/env/space/primitive/index.py +++ /dev/null @@ -1,133 +0,0 @@ -from typing import ClassVar, Optional, Type, TypeVar - -from tune.protox.env.types import IndexSpaceRawSample - - -class IndexAction(object): - IA = TypeVar("IA", bound="IndexAction") - - index_name_counter = 0 - index_name_map: dict["IndexAction", str] = dict() - - def __init__( - self, - idx_type: str, - tbl: str, - columns: list[str], - col_idxs: Optional[list[int]], - inc_names: list[str], - raw_repr: Optional[IndexSpaceRawSample], - bias: float = 0.0, - ) -> None: - - self.idx_type = idx_type - self.tbl_name = tbl - self.columns = columns - self.col_idxs = col_idxs - self.inc_names = inc_names - self.raw_repr = raw_repr - self.bias = bias - - @property - def is_valid(self) -> bool: - return ( - self.tbl_name is not None - and self.columns is not None - and len(self.columns) > 0 - ) - - @classmethod - def construct_md( - cls: Type[IA], - idx_name: str, - table: str, - idx_type: str, - columns: list[str], - inc_names: list[str], - ) -> IA: - ia = cls( - idx_type=idx_type, - tbl=table, - columns=columns, - col_idxs=None, - inc_names=inc_names, - raw_repr=None, - bias=0.0, - ) - assert ( - ia.get_index_name() == idx_name - ), f"ia.get_index_name()={ia.get_index_name()} but idx_name={idx_name}" - return ia - - def sql(self, add: bool, allow_fail: bool = False) -> str: - idx_name = self.get_index_name() - if not add: - if allow_fail: - return f"DROP INDEX IF EXISTS {idx_name}" - return f"DROP INDEX {idx_name}" - - return "CREATE INDEX {flag} {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format( - flag="IF NOT EXISTS" if allow_fail else "", - idx_name=idx_name, - tbl_name=self.tbl_name, - idx_type=self.idx_type, - columns=",".join(self.columns), - inc_clause=( - "" - if len(self.inc_names) == 0 - else "INCLUDE (" + ",".join(self.inc_names) + ")" - ), - ) - - # A given index name (like "index5") maps one-to-one to the function of an - # index (i.e. its table, columns, etc.). - def get_index_name(self) -> str: - if self not in IndexAction.index_name_map: - IndexAction.index_name_map[self] = f"index{IndexAction.index_name_counter}" - IndexAction.index_name_counter += 1 - - return IndexAction.index_name_map[self] - - # This equality/hash mechanism is purely based off of index identity. - # We ensure that all other flags are exclusive from a "validity" pre-check. - # - # For instance, when de-duplication, one needs to check that the IndexAction - # can *actually* be used before relying on the identity test. Can't drop an - # index that doesn't exist; can't create an index that does for instance. - def __eq__(self, other: object) -> bool: - if type(other) is type(self): - assert isinstance(other, IndexAction) - ts = set(self.inc_names) - os = set(other.inc_names) - is_eq = ( - self.idx_type == other.idx_type - and self.tbl_name == other.tbl_name - and self.columns == other.columns - and ts == os - ) - return is_eq - return False - - def __hash__(self) -> int: - h = hash( - ( - self.idx_type, - self.tbl_name, - tuple(self.columns), - tuple(sorted(set(self.inc_names))), - ) - ) - return h - - def __repr__(self) -> str: - return "CREATE {idx_name} ON {tbl_name} USING {idx_type} ({columns}) {inc_clause}".format( - idx_name=self.get_index_name(), - tbl_name=self.tbl_name, - idx_type=self.idx_type, - columns=",".join(self.columns), - inc_clause=( - "" - if len(self.inc_names) == 0 - else "INCLUDE (" + ",".join(self.inc_names) + ")" - ), - ) diff --git a/tune/protox/env/space/primitive/knob.py b/tune/protox/env/space/primitive/knob.py deleted file mode 100644 index a09ce942..00000000 --- a/tune/protox/env/space/primitive/knob.py +++ /dev/null @@ -1,355 +0,0 @@ -import math -from typing import Any, Optional, Sequence, Tuple, TypedDict, Union, cast - -import numpy as np -from gymnasium.spaces import Box, Discrete, Space -from gymnasium.spaces.utils import flatdim, flatten, flatten_space, unflatten -from numpy.typing import NDArray - -from tune.protox.env.space.primitive import ( - KnobClass, - SettingType, - is_boolean, - is_knob_enum, -) - - -def full_knob_name( - table: Optional[str] = None, query: Optional[str] = None, knob_name: str = "" -) -> str: - assert knob_name != "" - - if table is not None: - return f"{table}_{knob_name}" - elif query is not None: - return f"{query}_{knob_name}" - else: - return knob_name - - -def _parse_setting_dtype(type_str: str) -> tuple[SettingType, Any]: - return { - "boolean": (SettingType.BOOLEAN, np.int32), - "integer": (SettingType.INTEGER, np.int32), - "bytes": (SettingType.BYTES, np.int32), - "integer_time": (SettingType.INTEGER_TIME, np.int32), - "float": (SettingType.FLOAT, np.float32), - "binary_enum": (SettingType.BINARY_ENUM, np.int32), - "scanmethod_enum": (SettingType.SCANMETHOD_ENUM, np.int32), - "query_table_enum": (SettingType.QUERY_TABLE_ENUM, np.int32), - }[type_str] - - -class KnobMetadata(TypedDict, total=False): - type: str - min: float - max: float - quantize: int - log_scale: int - unit: int - values: list[str] - default_value: int - - -class Knob(Space[Any]): - def __init__( - self, - table_name: Optional[str], - query_name: Optional[str], - knob_name: str, - metadata: KnobMetadata, - do_quantize: bool, - default_quantize_factor: int, - seed: int, - ) -> None: - - self.table_name = table_name - self.query_name = query_name - self.knob_name = knob_name - if table_name is not None: - self.knob_class = KnobClass.TABLE - elif query_name is not None: - self.knob_class = KnobClass.QUERY - else: - self.knob_class = KnobClass.KNOB - - self.knob_type, self.knob_dtype = _parse_setting_dtype(metadata["type"]) - self.knob_unit = metadata["unit"] - self.quantize_factor = ( - ( - default_quantize_factor - if metadata["quantize"] == -1 - else metadata["quantize"] - ) - if do_quantize - else 0 - ) - self.log2_scale = metadata["log_scale"] - assert not self.log2_scale or (self.log2_scale and self.quantize_factor == 0) - - # Setup all the metadata for the knob value. - self.space_correction_value = 0.0 - self.space_min_value = self.min_value = metadata["min"] - self.space_max_value = self.max_value = metadata["max"] - self.bucket_size = 0.0 - if self.log2_scale: - self.space_correction_value = 1.0 - self.space_min_value - self.space_min_value += self.space_correction_value - self.space_max_value += self.space_correction_value - - self.space_min_value = math.floor(math.log2(self.space_min_value)) - self.space_max_value = math.ceil(math.log2(self.space_max_value)) - elif self.quantize_factor > 0: - if self.knob_type == SettingType.FLOAT: - self.bucket_size = ( - self.max_value - self.min_value - ) / self.quantize_factor - else: - max_buckets = min(self.max_value - self.min_value, self.quantize_factor) - self.bucket_size = (self.max_value - self.min_value) / max_buckets - - super().__init__((), self.knob_dtype, seed=seed) - - def name(self) -> str: - # Construct the name. - return full_knob_name(self.table_name, self.query_name, self.knob_name) - - def to_internal(self, env_value: Any) -> Any: - if self.log2_scale: - return math.log2(env_value + self.space_correction_value) - return env_value - - def to_quantize(self, raw_value: Any) -> Any: - """Adjusts the raw value to the quantized bin value.""" - assert raw_value >= self.space_min_value and raw_value <= self.space_max_value - - # Handle log scaling values. - if self.log2_scale: - # We integralize the log-space to exploit the log-scaling and discretization. - proj_value = pow(2, round(raw_value)) - # Possibly adjust with the correction bias now. - proj_value -= self.space_correction_value - return np.clip(proj_value, self.min_value, self.max_value) - - # If we don't quantize, don't quantize. - if self.quantize_factor is None or self.quantize_factor == 0: - return np.clip(raw_value, self.min_value, self.max_value) - - # FIXME: We currently basically bias aggressively against the lower bucket, under the prior - # belief that the incremental gain of going higher is less potentially / more consumption - # and so it is ok to bias lower. - quantized_value = ( - math.floor(round(raw_value / self.bucket_size, 8)) * self.bucket_size - ) - return np.clip(quantized_value, self.min_value, self.max_value) - - def project_scraped_setting(self, value: Any) -> Any: - """Projects a point from the DBMS into the (possibly) more constrained environment space.""" - # Constrain the value to be within the actual min/max range. - value = np.clip(value, self.min_value, self.max_value) - return self.to_quantize(self.to_internal(value)) - - def resolve_per_query_knob(self, val: Any, all_knobs: dict[str, Any] = {}) -> str: - assert self.knob_class == KnobClass.QUERY - if is_knob_enum(self.knob_type): - return resolve_enum_value(self, val, all_knobs=all_knobs) - else: - kt = self.knob_type - if kt == SettingType.FLOAT: - param = f"{val:.2f}" - elif kt == SettingType.BOOLEAN: - param = "on" if val == 1 else "off" - else: - param = f"{val:d}" - - return f"Set ({self.knob_name} {param})" - - @property - def is_np_flattenable(self) -> bool: - """Checks whether this space can be flattened to a :class:`spaces.Box`.""" - return True - - def contains(self, x: Any) -> bool: - """Return boolean specifying if x is a valid member of this space.""" - return cast(bool, x >= self.min_value and x <= self.max_value) - - def to_jsonable(self, sample_n: Sequence[Any]) -> list[Any]: - """Convert a batch of samples from this space to a JSONable data type.""" - return [sample for sample in sample_n] - - def from_jsonable(self, sample_n: Sequence[Union[float, int]]) -> Any: - """Convert a JSONable data type to a batch of samples from this space.""" - return np.array(sample_n).astype(self.dtype) - - def sample(self, mask: None = None) -> Any: - """Samples a point from the environment action space subject to action space constraints.""" - raise NotImplementedError() - - def invert(self, value: Any) -> Any: - if is_boolean(self.knob_type): - if value == 1: - return 0 - else: - return 1 - return value - - def __hash__(self) -> int: - return hash(self.name()) - - def __eq__(self, other: object) -> bool: - if type(other) is type(self): - assert isinstance(other, Knob) - return self.name() == other.name() - - return False - - -@flatten.register(Knob) -def _flatten_knob(space: Knob, x: Any) -> NDArray[Any]: - return np.array([x], np.float32) - - -@unflatten.register(Knob) -def _unflatten_knob(space: Knob, x: NDArray[Any]) -> Any: - return x[0] - - -@flatten_space.register(Knob) -def _flatten_space_knob(space: Knob) -> Box: - return Box( - low=space.space_min_value, - high=space.space_max_value, - shape=(1,), - dtype=space.knob_dtype, - ) - - -@flatdim.register(Knob) -def _flatdim_knob(space: Knob) -> int: - return 1 - - -def _categorical_elems(type_str: str) -> tuple[SettingType, int]: - return { - "scanmethod_enum_categorical": (SettingType.SCANMETHOD_ENUM_CATEGORICAL, 2), - }[type_str] - - -class CategoricalKnob(Discrete): - def __init__( - self, - table_name: Optional[str], - query_name: Optional[str], - knob_name: str, - metadata: KnobMetadata, - seed: int, - ) -> None: - - self.table_name = table_name - self.query_name = query_name - self.knob_name = knob_name - assert self.table_name is None and self.query_name is not None - self.knob_class = KnobClass.QUERY - - if metadata["type"] == "query_table_enum": - self.knob_type = SettingType.QUERY_TABLE_ENUM - self.num_elems = len(metadata["values"]) + 1 - self.values = metadata["values"] - else: - self.knob_type, self.num_elems = _categorical_elems(metadata["type"]) - self.default_value = metadata["default_value"] - super().__init__(self.num_elems, seed=seed) - - def name(self) -> str: - # Construct the name. - return full_knob_name(self.table_name, self.query_name, self.knob_name) - - def project_scraped_setting(self, value: Any) -> Any: - """Projects a point from the DBMS into the (possibly) more constrained environment space.""" - # Constrain the value to be within the actual min/max range. - raise NotImplementedError() - - def sample(self, mask: Optional[NDArray[np.int8]] = None) -> Any: - """Samples a point from the environment action space subject to action space constraints.""" - return np.random.randint(0, self.num_elems) - - def resolve_per_query_knob(self, val: Any, all_knobs: dict[str, Any] = {}) -> str: - assert self.knob_class == KnobClass.QUERY - assert is_knob_enum(self.knob_type) - return resolve_enum_value(self, val, all_knobs=all_knobs) - - def invert(self, value: Any) -> Any: - return value - - def __hash__(self) -> int: - return hash(self.name()) - - def __eq__(self, other: object) -> bool: - if type(other) is type(self): - assert isinstance(other, CategoricalKnob) - return self.name() == other.name() - - return False - - -def resolve_enum_value( - knob: Union[Knob, CategoricalKnob], value: Any, all_knobs: dict[str, Any] = {} -) -> str: - assert is_knob_enum(knob.knob_type) - if knob.knob_type == SettingType.BINARY_ENUM: - return "on" if value == 1 else "off" - - if knob.knob_type == SettingType.QUERY_TABLE_ENUM: - assert isinstance(knob, CategoricalKnob) - integral_value = int(value) - if integral_value == 0: - return "" - - assert "max_worker_processes" in all_knobs - max_workers = all_knobs["max_worker_processes"] - - selected_table = knob.values[integral_value - 1] - # FIXME: pg_hint_plan lets specifying any and then pg will tweak it down. - return f"Parallel({selected_table} {max_workers})" - - if knob.knob_type in [ - SettingType.SCANMETHOD_ENUM, - SettingType.SCANMETHOD_ENUM_CATEGORICAL, - ]: - assert "_scanmethod" in knob.knob_name - tbl = knob.knob_name.split("_scanmethod")[0] - if value == 1: - return f"IndexOnlyScan({tbl})" - return f"SeqScan({tbl})" - - raise ValueError(f"Unsupported knob num {knob.knob_type}") - - -def _create_knob( - table_name: Optional[str], - query_name: Optional[str], - knob_name: str, - metadata: KnobMetadata, - do_quantize: bool, - default_quantize_factor: int, - seed: int, -) -> Union[Knob, CategoricalKnob]: - - if "default_value" in metadata: - return CategoricalKnob( - table_name=table_name, - query_name=query_name, - knob_name=knob_name, - metadata=metadata, - seed=seed, - ) - - return Knob( - table_name=table_name, - query_name=query_name, - knob_name=knob_name, - metadata=metadata, - do_quantize=do_quantize, - default_quantize_factor=default_quantize_factor, - seed=seed, - ) diff --git a/tune/protox/env/space/primitive/latent_knob.py b/tune/protox/env/space/primitive/latent_knob.py deleted file mode 100644 index 40fde137..00000000 --- a/tune/protox/env/space/primitive/latent_knob.py +++ /dev/null @@ -1,139 +0,0 @@ -from typing import Any, Optional, Sequence, Union - -import gymnasium as gym -import numpy as np - -from tune.protox.env.space.primitive import SettingType, is_boolean -from tune.protox.env.space.primitive.knob import CategoricalKnob, Knob, KnobMetadata - - -class LatentKnob(Knob): - def __init__( - self, - table_name: Optional[str], - query_name: Optional[str], - knob_name: str, - metadata: KnobMetadata, - do_quantize: bool, - default_quantize_factor: int, - seed: int, - ): - - super().__init__( - table_name, - query_name, - knob_name, - metadata, - do_quantize, - default_quantize_factor, - seed, - ) - - def _process(self, raw_value: Any) -> Any: - if is_boolean(self.knob_type): - return round(raw_value) - elif self.knob_type == SettingType.FLOAT: - return round(raw_value, 2) - else: - # Consistently apply rounding. - return int(raw_value) - - def to_latent(self, env_value: Any) -> Any: - """Projects a point from the environment space to the network space.""" - transform_value = self.to_internal(env_value) - # Scale into the network space. - relative_point = (transform_value - self.space_min_value) / ( - self.space_max_value - self.space_min_value - ) - return 2 * relative_point - 1 - - def from_latent(self, latent_value: Any) -> Any: - # This functionally assumes that the network_space and internal space maps linearly. - # If that assumption doesn't hold, project_embedding_into_internal_space will do something wonky to the values. - # TODO(wz2): Are there latent spaces that we don't want a linear mapping? Or we prefer a piecewise linear function? - - # First project into the [space_min_value, space_max_value] range. - int_space = (self.space_max_value - self.space_min_value) * ( - np.round((latent_value + 1) / 2.0, 8) - ) + self.space_min_value - raw_value = self.to_quantize(int_space) - if is_boolean(self.knob_type): - return round(raw_value) - elif self.knob_type == SettingType.FLOAT: - return round(raw_value, 2) - else: - # Consistently apply rounding. - return int(raw_value) - - def shift_offset(self, raw: Any, bin_shift: int) -> Any: - # Specially handle the case of booleans. - if is_boolean(self.knob_type): - if raw == 0 and bin_shift > 0: - return 1 - elif raw == 1 and bin_shift < 0: - return 0 - return None - - if self.log2_scale: - nvalue = self.to_internal(raw) + bin_shift - else: - nvalue = raw + self.bucket_size * bin_shift - - if nvalue < self.space_min_value or nvalue > self.space_max_value: - # Exceeded boundaries. - return None - - raw_value = self._process(self.to_quantize(nvalue)) - if raw_value == raw: - # Don't return duplicates. - return None - - return raw_value - - -class LatentCategoricalKnob(CategoricalKnob): - def to_latent(self, env_value: Any) -> Any: - return gym.spaces.utils.flatten(self, env_value) - - def from_latent(self, latent_value: Any) -> Any: - return np.argmax(latent_value) - - def sample_weights(self, weights: Optional[Sequence[float]] = None) -> Any: - return np.random.choice( - [i for i in range(self.num_elems)], - p=( - (weights / np.sum(weights)) - if weights is not None and np.sum(weights) > 0 - else None - ), - ) - - -def _create_latent_knob( - table_name: Optional[str], - query_name: Optional[str], - knob_name: str, - metadata: KnobMetadata, - do_quantize: bool, - default_quantize_factor: int, - seed: int, -) -> Union[LatentKnob, LatentCategoricalKnob]: - - if "default_value" in metadata: - return LatentCategoricalKnob( - table_name=table_name, - query_name=query_name, - knob_name=knob_name, - metadata=metadata, - seed=seed, - ) - - return LatentKnob( - table_name=table_name, - query_name=query_name, - knob_name=knob_name, - metadata=metadata, - do_quantize=do_quantize, - default_quantize_factor=default_quantize_factor, - seed=seed, - ) diff --git a/tune/protox/env/space/primitive_space/__init__.py b/tune/protox/env/space/primitive_space/__init__.py deleted file mode 100644 index 407ece01..00000000 --- a/tune/protox/env/space/primitive_space/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from tune.protox.env.space.primitive_space.index_policy import IndexPolicy -from tune.protox.env.space.primitive_space.index_space import IndexSpace -from tune.protox.env.space.primitive_space.knob_space import KnobSpace -from tune.protox.env.space.primitive_space.query_space import QuerySpace - -__all__ = ["KnobSpace", "QuerySpace", "IndexSpace", "IndexPolicy"] diff --git a/tune/protox/env/space/primitive_space/index_policy.py b/tune/protox/env/space/primitive_space/index_policy.py deleted file mode 100644 index dd03d209..00000000 --- a/tune/protox/env/space/primitive_space/index_policy.py +++ /dev/null @@ -1,307 +0,0 @@ -import copy -from typing import Any, List, Sequence, cast - -import numpy as np -import torch -from gymnasium import spaces -from gymnasium.spaces import Box -from torch.nn.functional import softmax - -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.types import ( - IndexSpaceRawSample, - TableAttrAccessSetsMap, - TableAttrListMap, -) - - -class IndexPolicy: - def __init__( - self, - tables: list[str], - rel_metadata: TableAttrListMap, - tbl_include_subsets: TableAttrAccessSetsMap, - max_key_columns: int, - max_num_columns: int, - index_space_aux_type: bool = False, - index_space_aux_include: bool = False, - deterministic: bool = False, - ): - - self.tables = tables - self.rel_metadata = rel_metadata - self.tbl_include_subsets = tbl_include_subsets - self.num_tables = len(self.tables) - self.max_key_columns = max_key_columns - self.max_num_columns = max_num_columns - self.deterministic = deterministic - - self.num_index_types = 2 - self.index_types = ["btree", "hash"] - self.index_space_aux_type = index_space_aux_type - self.index_space_aux_include = index_space_aux_include - - def spaces(self, seed: int) -> Sequence[spaces.Space[Any]]: - aux: list[spaces.Space[Any]] = [ - # One-hot encoding for the tables. - spaces.Discrete(self.num_tables, seed=seed), - # Ordering. Note that we use the postgres style ordinal notation. 0 is illegal/end-of-index. - *( - [spaces.Discrete(self.max_key_columns + 1, seed=seed)] - * self.max_key_columns - ), - ] - aux_include = [] - aux_type = [] - - if self.index_space_aux_type: - aux_type = [spaces.Discrete(self.num_index_types, seed=seed)] - - if self.index_space_aux_include > 0: - aux_include = [ - Box( - low=np.zeros(self.max_num_columns), - high=1.0, - seed=seed, - dtype=np.float32, - ) - ] - - return cast(list[spaces.Space[Any]], aux_type + aux + aux_include) - - def to_action(self, act: IndexSpaceRawSample) -> IndexAction: - # First index is the index type. - tbl_name = ( - self.tables[act[1]] if self.index_space_aux_type else self.tables[act[0]] - ) - idx_type = 0 - inc_cols: list[int] = [] - if self.index_space_aux_type and self.index_space_aux_include: - idx_type = act[0] - columns = act[2:-1] - inc_cols = act[-1] - elif self.index_space_aux_type: - idx_type = act[0] - columns = act[2:] - elif self.index_space_aux_include: - columns = act[1:-1] - inc_cols = act[-1] - else: - columns = act[1:] - - col_names = [] - col_idxs = [] - for i in columns: - if i == 0: - break - # Create the index key. - col_names.append(self.rel_metadata[tbl_name][i - 1]) - col_idxs.append(i - 1) - - if len(inc_cols) > 0: - # Get all include columns. - assert f"{tbl_name}_allcols" in self.rel_metadata - valid_names = [n for n in self.rel_metadata[f"{tbl_name}_allcols"]] - inc_names = [ - valid_names[i] - for i, val in enumerate(inc_cols) - if val == 1.0 and valid_names[i] not in col_names - ] - else: - inc_names = [] - - return IndexAction( - self.index_types[idx_type], - tbl_name, - col_names, - col_idxs, - inc_names, - act, - bias=0, - ) - - def sample_dist( - self, - action: torch.Tensor, - np_random: np.random.Generator, - sample_num_columns: bool = False, - break_early: bool = True, - ) -> IndexSpaceRawSample: - # Acquire the table index either deterministically or not. - if self.deterministic: - tbl_index = torch.argmax(action[: self.num_tables]).item() - else: - tbl_index = torch.multinomial(action[: self.num_tables], 1).item() - - # Get the number of columns. - num_columns = len(self.rel_metadata[self.tables[int(tbl_index)]]) - use_columns = num_columns - if sample_num_columns: - # If we sample columns, sample it. - use_columns = np_random.integers(1, num_columns + 1) - - # Prune off. - action = action[self.num_tables :] - - assert len(action.shape) == 1 - action = action.clone() - action = action.reshape((self.max_key_columns, self.max_key_columns + 1)) - action = action[:, 0 : num_columns + 1] - - if not break_early: - # Zero out the early break odds. - action[:, 0] = 0 - - current_index = 0 - col_indexes: list[int] = [] - while current_index < action.shape[0] and len(col_indexes) != use_columns: - if not torch.any(action[current_index]): - # No more positive probability to sample. - break - - # Acquire a column index depending on determinism or not. - if self.deterministic: - col_index = int(torch.argmax(action[current_index]).item()) - else: - col_index = int(torch.multinomial(action[current_index], 1).item()) - - if break_early and col_index == 0: - # We've explicitly decided to terminate it early. - break - - # Directly use the col_index. Observe that "0" is the illegal. - if col_index not in col_indexes: - action[:, col_index] = 0 - col_indexes.append(col_index) - - # Always advance since we don't let you take duplicates. - current_index += 1 - - np_col_indexes = np.pad( - np.array(col_indexes), - [0, self.max_key_columns - len(col_indexes)], - mode="constant", - constant_values=0, - ).astype(int) - if self.index_space_aux_type and self.index_space_aux_include: - return IndexSpaceRawSample( - ( - 0, - tbl_index, - *np_col_indexes, - np.array([0] * self.max_num_columns, dtype=np.float32), - ) - ) - elif self.index_space_aux_include: - return IndexSpaceRawSample( - ( - tbl_index, - *np_col_indexes, - np.array([0] * self.max_num_columns, dtype=np.float32), - ) - ) - elif self.index_space_aux_type: - return IndexSpaceRawSample((0, tbl_index, *np_col_indexes)) - else: - return IndexSpaceRawSample((tbl_index, *np_col_indexes)) - - def structural_neighbors( - self, action: IndexSpaceRawSample - ) -> list[IndexSpaceRawSample]: - idx_type = 0 - inc_columns: list[int] = [] - if self.index_space_aux_type and self.index_space_aux_include: - tbl_index = action[1] - columns = action[2:-1] - inc_columns = action[-1] - elif self.index_space_aux_type: - tbl_index = action[1] - columns = action[2:] - elif self.index_space_aux_include: - tbl_index = action[0] - columns = action[1:-1] - inc_columns = action[-1] - else: - tbl_index = action[0] - columns = action[1:] - - num_columns = len(columns) - new_candidates = [action] - - # Generate the "prefix rule". - for i in range(len(columns)): - # No more valid indexes to construct. - if columns[i] == 0: - break - - # Construct prefix index of the current index. - new_columns = [0 for _ in range(num_columns)] - new_columns[: i + 1] = columns[: i + 1] - - if self.index_space_aux_type and self.index_space_aux_include: - act = (idx_type, tbl_index, *new_columns, inc_columns) - elif self.index_space_aux_type: - act = (idx_type, tbl_index, *new_columns) - elif self.index_space_aux_include: - act = (tbl_index, *new_columns, inc_columns) - else: - act = (tbl_index, *new_columns) - new_candidates.append(IndexSpaceRawSample(act)) - - # Generate "index type" rule. - if self.index_space_aux_type: - hash_act = list(copy.deepcopy(action)) - hash_act[0] = 1 - for i in range(3, 2 + num_columns): - hash_act[i] = 0 - new_candidates.append(IndexSpaceRawSample(tuple(hash_act))) - - # Generate "include" rule. - if self.index_space_aux_include and self.tbl_include_subsets: - inc_subsets = self.tbl_include_subsets[self.tables[tbl_index]] - aux_candidates = [] - for candidate in new_candidates: - if self.index_space_aux_type: - if candidate[0] == 1: - # This is a HASH() - continue - columns = candidate[2:-1] - else: - columns = candidate[1:-1] - - names = [ - self.rel_metadata[self.tables[tbl_index]][col - 1] - for col in columns - if col > 0 - ] - for inc_subset in inc_subsets: - inc_cols = [s for s in inc_subset if s not in names] - if len(inc_cols) > 0: - # Construct the bit flag map. - flag = np.zeros(self.max_num_columns, dtype=np.float32) - for inc_col in inc_cols: - flag[ - self.rel_metadata[ - f"{self.tables[tbl_index]}_allcols" - ].index(inc_col) - ] = 1 - aux_candidates.append( - IndexSpaceRawSample((*candidate[:-1], flag)) - ) - new_candidates.extend(aux_candidates) - return new_candidates - - def from_latent(self, subproto: torch.Tensor) -> torch.Tensor: - num_tables = self.num_tables - max_cols = self.max_key_columns - assert len(subproto.shape) == 2 - - # First apply the softmax. - subproto[:, :num_tables] = softmax(subproto[:, :num_tables], dim=1) - # Now apply the per ordinal softmax. - x_reshape = subproto[:, num_tables:].reshape( - subproto.shape[0], max_cols, max_cols + 1 - ) - x_reshape = softmax(x_reshape, dim=2) - subproto[:, num_tables:] = x_reshape.reshape(subproto.shape[0], -1) - return subproto diff --git a/tune/protox/env/space/primitive_space/index_space.py b/tune/protox/env/space/primitive_space/index_space.py deleted file mode 100644 index f8e8ff41..00000000 --- a/tune/protox/env/space/primitive_space/index_space.py +++ /dev/null @@ -1,113 +0,0 @@ -from typing import Any, List, Optional - -import gymnasium as gym -import torch -from gymnasium import spaces - -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.space.primitive_space.index_policy import IndexPolicy -from tune.protox.env.types import ( - IndexSpaceRawSample, - TableAttrAccessSetsMap, - TableAttrListMap, - TableColTuple, -) - - -class IndexSpace(spaces.Tuple): - def get_index_class(self, env_act: IndexSpaceRawSample) -> str: - ia = self.to_action(env_act) - if not ia.is_valid: - return "-1" - return str(self.class_mapping[TableColTuple((ia.tbl_name, ia.columns[0]))]) - - def __init__( - self, - tables: list[str], - max_num_columns: int, - max_indexable_attributes: int, - seed: int, - rel_metadata: TableAttrListMap, - attributes_overwrite: TableAttrListMap, - tbl_include_subsets: TableAttrAccessSetsMap, - index_space_aux_type: bool = False, - index_space_aux_include: bool = False, - deterministic_policy: bool = False, - ): - - self.max_num_columns = self.max_inc_columns = max_num_columns - self.rel_metadata = rel_metadata - self.tbl_include_subsets = tbl_include_subsets - self.index_space_aux_type = index_space_aux_type - self.index_space_aux_include = index_space_aux_include - - if attributes_overwrite is not None: - # Overwrite the maximum number of columns. - self.max_num_columns = max_indexable_attributes - for k, v in attributes_overwrite.items(): - # Overwrite and substitute. - self.rel_metadata[f"{k}_allcols"] = self.rel_metadata[k] - self.rel_metadata[k] = v - - self.tables = tables - self.policy = IndexPolicy( - tables, - self.rel_metadata, - self.tbl_include_subsets, - self.max_num_columns, - max_num_columns, - index_space_aux_type, - index_space_aux_include, - deterministic_policy, - ) - - # Create class mapping. - self.class_mapping: dict[TableColTuple, int] = {} - for tbl in self.tables: - for col in rel_metadata[tbl]: - self.class_mapping[TableColTuple((tbl, col))] = len(self.class_mapping) - - super().__init__(spaces=self.policy.spaces(seed), seed=seed) - - def to_action(self, act: IndexSpaceRawSample) -> IndexAction: - return self.policy.to_action(act) - - def sample(self, mask: Optional[Any] = None) -> IndexSpaceRawSample: - table_idx = None if mask is None else mask.get("table_idx", None) - col_idx = None if mask is None else mask.get("col_idx", None) - action = torch.zeros(gym.spaces.utils.flatdim(self)) - assert not self.index_space_aux_type and not self.index_space_aux_include - if table_idx is None: - # Make equal weight. - action[0 : len(self.tables)] = 1.0 / len(self.tables) - else: - # Hit only the targeted table. - action[table_idx] = 1 - - if col_idx is not None: - action[len(self.tables) + col_idx + 1] = 1.0 - # Evenly distribute the column weights. - action[len(self.tables) + (self.max_num_columns + 1) :] = 1.0 / ( - self.max_num_columns + 1 - ) - else: - action[len(self.tables) :] = 1.0 / (self.max_num_columns + 1) - - return self.policy.sample_dist( - action, self.np_random, sample_num_columns=True, break_early=False - ) - - def null_action(self) -> IndexSpaceRawSample: - action = torch.zeros(gym.spaces.utils.flatdim(self)) - if self.index_space_aux_type: - action = action[2:] - if self.index_space_aux_include: - action = action[: -self.max_inc_columns] - - action[0] = 1.0 - return self.policy.sample_dist(action, self.np_random, sample_num_columns=False) - - def to_jsonable(self, sample_n) -> list[str]: # type: ignore - # Emit the representation of an index. - ias = [self.to_action(sample) for sample in sample_n] - return [ia.__repr__() for ia in ias] diff --git a/tune/protox/env/space/primitive_space/knob_space.py b/tune/protox/env/space/primitive_space/knob_space.py deleted file mode 100644 index 2c6fdda6..00000000 --- a/tune/protox/env/space/primitive_space/knob_space.py +++ /dev/null @@ -1,39 +0,0 @@ -from typing import Any, Optional - -from gymnasium import spaces - -from tune.protox.env.space.primitive.knob import KnobMetadata, _create_knob -from tune.protox.env.space.primitive.latent_knob import _create_latent_knob -from tune.protox.env.types import KnobMap - - -class KnobSpace(spaces.Dict): - def __init__( - self, - tables: list[str], - knobs: dict[str, KnobMetadata], - quantize: bool, - quantize_factor: int, - seed: int, - table_level_knobs: dict[str, dict[str, KnobMetadata]] = {}, - latent: bool = False, - ): - create_fn = _create_latent_knob if latent else _create_knob - self.knobs: KnobMap = KnobMap({}) - self.tables = tables - spaces = [] - for k, md in knobs.items(): - knob = create_fn(None, None, k, md, quantize, quantize_factor, seed) - self.knobs[knob.name()] = knob - spaces.append((knob.name(), knob)) - - for t, kv in table_level_knobs.items(): - for k, md in kv.items(): - knob = create_fn(t, None, k, md, quantize, quantize_factor, seed) - self.knobs[knob.name()] = knob - spaces.append((knob.name(), knob)) - - super().__init__(spaces, seed=seed) - - def sample(self, mask: Optional[Any] = None) -> Any: - raise NotImplementedError() diff --git a/tune/protox/env/space/primitive_space/query_space.py b/tune/protox/env/space/primitive_space/query_space.py deleted file mode 100644 index 2e0d32af..00000000 --- a/tune/protox/env/space/primitive_space/query_space.py +++ /dev/null @@ -1,100 +0,0 @@ -from typing import cast - -import gymnasium as gym -from gymnasium.spaces import Dict - -from tune.protox.env.space.primitive.knob import ( - CategoricalKnob, - KnobMetadata, - _create_knob, -) -from tune.protox.env.space.primitive.latent_knob import _create_latent_knob -from tune.protox.env.types import KnobMap, QueryTableAliasMap - - -class QuerySpace(Dict): - def __init__( - self, - tables: list[str], - quantize: bool, - quantize_factor: int, - seed: int, - per_query_knobs_gen: dict[str, KnobMetadata] = {}, - per_query_parallel: QueryTableAliasMap = QueryTableAliasMap({}), - per_query_scans: QueryTableAliasMap = QueryTableAliasMap({}), - query_names: list[str] = [], - latent: bool = False, - ) -> None: - - create_fn = _create_latent_knob if latent else _create_knob - self.knobs: KnobMap = KnobMap({}) - self.tables = tables - spaces = [] - for qname in query_names: - for q, kv in per_query_knobs_gen.items(): - knob = create_fn(None, qname, q, kv, quantize, quantize_factor, seed) - self.knobs[knob.name()] = knob - spaces.append((knob.name(), knob)) - - for q, pqs in per_query_scans.items(): - for _, aliases in pqs.items(): - for v in aliases: - md = KnobMetadata( - { - "type": "scanmethod_enum", - "min": 0.0, - "max": 1.0, - "quantize": False, - "log_scale": 0, - "unit": 0, - } - ) - - knob = create_fn( - table_name=None, - query_name=q, - knob_name=v + "_scanmethod", - metadata=md, - do_quantize=False, - default_quantize_factor=quantize_factor, - seed=seed, - ) - self.knobs[knob.name()] = knob - spaces.append((knob.name(), knob)) - - cat_spaces = [] - self.cat_dims = [] - for q, pqp in per_query_parallel.items(): - values = [] - for _, aliases in pqp.items(): - values.extend(aliases) - - if len(values) < 2: - continue - - md = KnobMetadata( - { - "type": "query_table_enum", - "values": values, - "default_value": 0, - } - ) - knob = create_fn( - table_name=None, - query_name=q, - knob_name=q + "_parallel_rel", - metadata=md, - do_quantize=False, - default_quantize_factor=0, - seed=seed, - ) - self.knobs[knob.name()] = knob - - cat_spaces.append((knob.name(), knob)) - self.cat_dims.append(cast(CategoricalKnob, knob).num_elems) - - # Figure out where the categorical inputs begin. - self.categorical_start = gym.spaces.utils.flatdim(Dict(spaces)) - spaces.extend(cat_spaces) - super().__init__(spaces, seed=seed) - self.final_dim = gym.spaces.utils.flatdim(self) diff --git a/tune/protox/env/space/state/__init__.py b/tune/protox/env/space/state/__init__.py deleted file mode 100644 index f307193a..00000000 --- a/tune/protox/env/space/state/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from tune.protox.env.space.state.lsc_space import ( - LSCMetricStateSpace, - LSCStructureStateSpace, -) - -__all__ = ["LSCStructureStateSpace", "LSCMetricStateSpace"] diff --git a/tune/protox/env/space/state/lsc_space.py b/tune/protox/env/space/state/lsc_space.py deleted file mode 100644 index bfa9dcab..00000000 --- a/tune/protox/env/space/state/lsc_space.py +++ /dev/null @@ -1,58 +0,0 @@ -from typing import Any - -import psycopg -from gymnasium.spaces import Box - -from tune.protox.env.lsc.lsc import LSC -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.state.metric import MetricStateSpace -from tune.protox.env.space.state.structure import StructureStateSpace -from tune.protox.env.space.utils import check_subspace -from util.workspace import DBGymConfig - - -class LSCStructureStateSpace(StructureStateSpace): - def __init__( - self, - lsc: LSC, - action_space: HolonSpace, - normalize: bool, - seed: int, - ) -> None: - spaces = {"lsc": Box(low=-1, high=1.0)} - super().__init__(action_space, spaces, normalize, seed) - self.lsc = lsc - - def construct_offline( - self, connection: psycopg.Connection[Any], data: Any, prev_state_container: Any - ) -> dict[str, Any]: - state = super().construct_offline(connection, data, prev_state_container) - state["lsc"] = self.lsc.current_scale() - assert check_subspace(self, state) - return state - - -class LSCMetricStateSpace(MetricStateSpace): - def __init__(self, dbgym_cfg: DBGymConfig, lsc: LSC, tables: list[str], seed: int): - spaces = {"lsc": Box(low=-1, high=1.0)} - super().__init__(dbgym_cfg, spaces, tables, seed) - self.lsc = lsc - - def construct_offline( - self, connection: psycopg.Connection[Any], data: Any, state_container: Any - ) -> dict[str, Any]: - state = super().construct_offline(connection, data, state_container) - state["lsc"] = self.lsc.current_scale() - assert check_subspace(self, state) - return state - - def construct_online(self, connection: psycopg.Connection[Any]) -> dict[str, Any]: - state = super().construct_online(connection) - state["lsc"] = self.lsc.current_scale() - return state - - def merge_deltas(self, merge_data: list[dict[str, Any]]) -> dict[str, Any]: - state = super().merge_deltas(merge_data) - state["lsc"] = self.lsc.current_scale() - assert check_subspace(self, state) - return state diff --git a/tune/protox/env/space/state/metric.py b/tune/protox/env/space/state/metric.py deleted file mode 100644 index 14317df5..00000000 --- a/tune/protox/env/space/state/metric.py +++ /dev/null @@ -1,315 +0,0 @@ -import json -from pathlib import Path -from typing import Any, Mapping, Optional, Union, cast - -import numpy as np -import psycopg -from gymnasium import spaces -from gymnasium.spaces import Box, Space -from psycopg.rows import dict_row - -from tune.protox.env.space.state.space import StateSpace -from util.pg import DBGYM_POSTGRES_DBNAME -from util.workspace import DBGymConfig, open_and_save - -# Defines the relevant metrics that we care about from benchbase. -# : whether to filter with the benchbase database. -# : whether to process the set of valid_keys per table. -METRICS_SPECIFICATION = { - "pg_stat_database": { - "filter_db": True, - "per_table": False, - "valid_keys": [ - "temp_files", - "tup_returned", - "xact_commit", - "xact_rollback", - "conflicts", - "blks_hit", - "blks_read", - "temp_bytes", - "deadlocks", - "tup_inserted", - "tup_fetched", - "tup_updated", - "tup_deleted", - ], - }, - "pg_stat_bgwriter": { - "filter_db": False, - "per_table": False, - "valid_keys": [ - "checkpoint_write_time", - "buffers_backend_fsync", - "buffers_clean", - "buffers_checkpoint", - "checkpoints_req", - "checkpoints_timed", - "buffers_alloc", - "buffers_backend", - "maxwritten_clean", - ], - }, - "pg_stat_database_conflicts": { - "filter_db": True, - "per_table": False, - "valid_keys": [ - "confl_deadlock", - "confl_lock", - "confl_bufferpin", - "confl_snapshot", - ], - }, - "pg_stat_user_tables": { - "filter_db": False, - "per_table": True, - "valid_keys": [ - "n_tup_ins", - "n_tup_upd", - "n_tup_del", - "n_ins_since_vacuum", - "n_mod_since_analyze", - "n_tup_hot_upd", - "idx_tup_fetch", - "seq_tup_read", - "autoanalyze_count", - "autovacuum_count", - "n_live_tup", - "n_dead_tup", - "seq_scan", - "idx_scan", - ], - }, - "pg_statio_user_tables": { - "filter_db": False, - "per_table": True, - "valid_keys": [ - "heap_blks_hit", - "heap_blks_read", - "idx_blks_hit", - "idx_blks_read", - "tidx_blks_hit", - "tidx_blks_read", - "toast_blks_hit", - "toast_blks_read", - ], - }, -} - - -# A metrics-based state returns the physical metrics (i.e., consequences) of running -# a particular workload in a given configuration. This serves to represent the -# assumption that we should be indifferent/invariant to -# pairs that yield the *same* physical metrics. -# -# In the RL state-action-reward-next_state sense: -# The benchmark is executed in the baseline configuration to determine the physical metrics -# as a consequence of the baseline configuration. That is the "previous state". -# -# You then pick an action that produces a new configuration. That configuration is then applied -# to the database. This is the action. -# -# You then run the benchmark again. This yields some "target" metric and also physical database -# metrics. "target" metric is used to determine the reward from the transition. The physical -# database metrics form the "next_state". -# -# In this way, the physical database metrics serves as proxy for the actual configuration at -# a given moment in time. This is arguably a little bit twisted?? Since we are using some -# metrics that are also indirectly a proxy for the actual runtime/tps. But we are banking -# on the metrics containing the relevant data to allow better action selection... -class MetricStateSpace(StateSpace, spaces.Dict): - @staticmethod - def construct_key(key: str, metric: str, per_tbl: bool, tbl: Optional[str]) -> str: - if per_tbl: - assert tbl - return f"{key}_{metric}_{tbl}" - return f"{key}_{metric}" - - def require_metrics(self) -> bool: - return True - - def __init__( - self, - dbgym_cfg: DBGymConfig, - spaces: Mapping[str, spaces.Space[Any]], - tables: list[str], - seed: int, - ) -> None: - self.dbgym_cfg = dbgym_cfg - self.tables = tables - self.internal_spaces: dict[str, Space[Any]] = {} - self.internal_spaces.update(spaces) - for key, spec in METRICS_SPECIFICATION.items(): - for key_metric in cast(list[str], spec["valid_keys"]): - if spec["per_table"]: - for tbl in tables: - tbl_metric = MetricStateSpace.construct_key( - key, key_metric, True, tbl - ) - assert tbl_metric not in self.internal_spaces - self.internal_spaces[tbl_metric] = Box(low=-np.inf, high=np.inf) - else: - metric = MetricStateSpace.construct_key( - key, key_metric, False, None - ) - assert metric not in self.internal_spaces - self.internal_spaces[metric] = Box(low=-np.inf, high=np.inf) - super().__init__(self.internal_spaces, seed) - - def check_benchbase( - self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path] - ) -> bool: - assert results_dpath is not None - assert Path(results_dpath).exists() - metric_files = [f for f in Path(results_dpath).rglob("*metrics.json")] - if len(metric_files) != 2: - return False - - initial = ( - metric_files[0] if "initial" in str(metric_files[0]) else metric_files[1] - ) - final = metric_files[1] if initial == metric_files[0] else metric_files[0] - - try: - with open_and_save(dbgym_cfg, initial) as f: - initial_metrics = json.load(f) - - with open_and_save(dbgym_cfg, final) as f: - final_metrics = json.load(f) - except Exception as e: - return False - - for key, spec in METRICS_SPECIFICATION.items(): - assert key in initial_metrics - if key not in initial_metrics or key not in final_metrics: - # Missing key. - return False - - initial_data = initial_metrics[key] - final_data = final_metrics[key] - if spec["filter_db"]: - initial_data = [ - d for d in initial_data if d["datname"] == DBGYM_POSTGRES_DBNAME - ] - final_data = [ - d for d in final_data if d["datname"] == DBGYM_POSTGRES_DBNAME - ] - elif spec["per_table"]: - initial_data = sorted( - [d for d in initial_data if d["relname"] in self.tables], - key=lambda x: x["relname"], - ) - final_data = sorted( - [d for d in final_data if d["relname"] in self.tables], - key=lambda x: x["relname"], - ) - - if len(initial_data) == 0 or len(final_data) == 0: - return False - - for pre, post in zip(initial_data, final_data): - for metric in cast(list[str], spec["valid_keys"]): - if metric not in pre or metric not in post: - return False - return True - - def construct_offline( - self, connection: psycopg.Connection[Any], data: Any, state_container: Any - ) -> dict[str, Any]: - assert data is not None - assert Path(data).exists() - - # This function computes the metrics state that is used to represent - # consequence of executing in the current environment. - metric_files = [f for f in Path(data).rglob("*metrics.json")] - if len(metric_files) == 1: - with open_and_save(self.dbgym_cfg, metric_files[0], "r") as f: - metrics = json.load(f) - assert "flattened" in metrics - metrics.pop("flattened") - - def npify(d: dict[str, Any]) -> Any: - data = {} - for k, v in d.items(): - if isinstance(v, dict): - data[k] = npify(v) - else: - data[k] = np.array([v], dtype=np.float32) - return data - - return cast(dict[str, Any], npify(metrics)) - - assert len(metric_files) == 2 - initial = ( - metric_files[0] if "initial" in str(metric_files[0]) else metric_files[1] - ) - final = metric_files[1] if initial == metric_files[0] else metric_files[0] - - with open_and_save(self.dbgym_cfg, initial) as f: - initial_metrics = json.load(f) - - with open_and_save(self.dbgym_cfg, final) as f: - final_metrics = json.load(f) - - return self.state_delta(initial_metrics, final_metrics) - - def state_delta( - self, initial: dict[str, Any], final: dict[str, Any] - ) -> dict[str, Any]: - metrics = {} - for key, spec in METRICS_SPECIFICATION.items(): - assert key in initial - assert isinstance(spec, dict) - - initial_data = initial[key] - final_data = final[key] - if spec["filter_db"]: - initial_data = [ - d for d in initial_data if d["datname"] == DBGYM_POSTGRES_DBNAME - ] - final_data = [ - d for d in final_data if d["datname"] == DBGYM_POSTGRES_DBNAME - ] - elif spec["per_table"]: - initial_data = sorted( - [d for d in initial_data if d["relname"] in self.tables], - key=lambda x: x["relname"], - ) - final_data = sorted( - [d for d in final_data if d["relname"] in self.tables], - key=lambda x: x["relname"], - ) - - for pre, post in zip(initial_data, final_data): - for metric in cast(list[str], spec["valid_keys"]): - if pre[metric] is None or post[metric] is None: - diff = 0.0 - else: - diff = max(float(post[metric]) - float(pre[metric]), 0.0) - - metric_key = MetricStateSpace.construct_key( - key, - metric, - bool(spec["per_table"]), - pre["relname"] if spec["per_table"] else None, - ) - metrics[metric_key] = np.array([diff], dtype=np.float32) - return metrics - - def construct_online(self, connection: psycopg.Connection[Any]) -> dict[str, Any]: - metric_data = {} - with connection.cursor(row_factory=dict_row) as cursor: - for key in METRICS_SPECIFICATION.keys(): - records = cursor.execute(f"SELECT * FROM {key}") - metric_data[key] = [r for r in records] - return metric_data - - def merge_deltas(self, merge_data: list[dict[str, Any]]) -> dict[str, Any]: - comb_data = {} - for datum in merge_data: - for key, value in datum.items(): - if key not in comb_data: - comb_data[key] = value - else: - comb_data[key] += value - return comb_data diff --git a/tune/protox/env/space/state/space.py b/tune/protox/env/space/state/space.py deleted file mode 100644 index 565c4206..00000000 --- a/tune/protox/env/space/state/space.py +++ /dev/null @@ -1,40 +0,0 @@ -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Union - -from gymnasium import spaces -from psycopg import Connection - -from util.workspace import DBGymConfig - - -class StateSpace(ABC, spaces.Space[Any]): - @abstractmethod - def require_metrics(self) -> bool: - pass - - @abstractmethod - def check_benchbase( - self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path] - ) -> bool: - pass - - @abstractmethod - def construct_offline( - self, connection: Connection[Any], data: Any, state_container: Any - ) -> dict[str, Any]: - pass - - @abstractmethod - def construct_online(self, connection: Connection[Any]) -> dict[str, Any]: - pass - - @abstractmethod - def state_delta( - self, initial: dict[str, Any], final: dict[str, Any] - ) -> dict[str, Any]: - pass - - @abstractmethod - def merge_deltas(self, merge_data: list[dict[str, Any]]) -> dict[str, Any]: - pass diff --git a/tune/protox/env/space/state/structure.py b/tune/protox/env/space/state/structure.py deleted file mode 100644 index 533687d7..00000000 --- a/tune/protox/env/space/state/structure.py +++ /dev/null @@ -1,137 +0,0 @@ -from pathlib import Path -from typing import Any, Dict, Mapping, Optional, Union, cast - -import gymnasium as gym -import numpy as np -import psycopg -import torch as th -from gymnasium import spaces - -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.latent_space import ( - LatentIndexSpace, - LatentKnobSpace, - LatentQuerySpace, -) -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.space.utils import check_subspace -from tune.protox.env.types import IndexSpaceRawSample, KnobSpaceAction, QuerySpaceAction -from util.workspace import DBGymConfig - - -class StructureStateSpace(StateSpace, spaces.Dict): - def __init__( - self, - action_space: HolonSpace, - spaces: Mapping[str, spaces.Space[Any]], - normalize: bool, - seed: int, - ) -> None: - self.action_space = action_space - self.normalize = normalize - - if self.normalize: - self.internal_spaces: dict[str, gym.spaces.Space[Any]] = { - k: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(s.critic_dim(),)) - for k, s in action_space.get_spaces() - } - else: - self.internal_spaces = { - k: ( - gym.spaces.Box(low=-np.inf, high=np.inf, shape=(s.critic_dim(),)) - if s.uses_embed() - else s - ) - for k, s in action_space.get_spaces() - } - - self.internal_spaces.update(spaces) - super().__init__(self.internal_spaces, seed) - - def require_metrics(self) -> bool: - return False - - def check_benchbase( - self, dbgym_cfg: DBGymConfig, results_dpath: Union[str, Path] - ) -> bool: - # We don't use benchbase metrics anyways. - return True - - def construct_offline( - self, connection: psycopg.Connection[Any], data: Any, prev_state_container: Any - ) -> dict[str, Any]: - assert isinstance(self.action_space, HolonSpace) - splits = self.action_space.split_action(prev_state_container) - knob_states = [v[1] for v in splits if isinstance(v[0], LatentKnobSpace)] - knob_state = cast( - Optional[KnobSpaceAction], None if len(knob_states) == 0 else knob_states[0] - ) - - ql_states = [v[1] for v in splits if isinstance(v[0], LatentQuerySpace)] - ql_state = cast( - Optional[QuerySpaceAction], None if len(ql_states) == 0 else ql_states[0] - ) - - if knob_state is not None: - knobs = self.action_space.get_knob_space() - assert isinstance(knobs, LatentKnobSpace) - assert check_subspace(knobs, knob_state) - - if self.normalize: - knob_state = np.array( - knobs.to_latent([knob_state]), - dtype=np.float32, - )[0] - assert self.internal_spaces["knobs"].contains(knob_state) - - if ql_state is not None: - query = self.action_space.get_query_space() - assert isinstance(query, LatentQuerySpace) - assert check_subspace(query, ql_state) - - if self.normalize: - query_state = np.array( - query.to_latent([ql_state]), - dtype=np.float32, - )[0] - else: - query_state = ql_state - - # Handle indexes. - indexes_ = [v[1] for v in splits if isinstance(v[0], LatentIndexSpace)] - indexes = cast(list[IndexAction], None if len(indexes_) == 0 else indexes_[0]) - index_state = None - - if indexes is not None: - index_space = self.action_space.get_index_space() - # TODO(wz2): Incorporate existing stock-config indexes into state and then make the guard len(indexes) > 0 instead of len(env_acts) > 0 - env_acts = [v.raw_repr for v in indexes if v.raw_repr] - if len(env_acts) > 0: - with th.no_grad(): - latents = index_space.to_latent(env_acts).numpy() - latents = latents.sum(axis=0) - latents /= len(indexes) - index_state = latents.flatten().astype(np.float32) - else: - index_state = np.zeros(index_space.critic_dim(), dtype=np.float32) - - state: dict[str, Any] = {} - if knob_state is not None: - state["knobs"] = knob_state - if query_state is not None: - state["query"] = query_state - if index_state is not None: - state["index"] = index_state - return state - - def construct_online(self, connection: psycopg.Connection[Any]) -> dict[str, Any]: - raise NotImplementedError() - - def state_delta( - self, initial: dict[str, Any], final: dict[str, Any] - ) -> dict[str, Any]: - raise NotImplementedError() - - def merge_deltas(self, merge_data: list[dict[str, Any]]) -> dict[str, Any]: - raise NotImplementedError() diff --git a/tune/protox/env/space/utils.py b/tune/protox/env/space/utils.py deleted file mode 100644 index 052ad70a..00000000 --- a/tune/protox/env/space/utils.py +++ /dev/null @@ -1,295 +0,0 @@ -import logging -import re -import typing -from distutils import util -from typing import Any, Union, cast - -from gymnasium import spaces -from gymnasium.spaces import Dict, Space -from psycopg import Connection -from psycopg.rows import dict_row - -from tune.protox.env.space.primitive import KnobClass, SettingType -from tune.protox.env.space.primitive.knob import CategoricalKnob, Knob, full_knob_name -from tune.protox.env.types import ( - KnobMap, - KnobSpaceContainer, - QueryMap, - QueryTableAccessMap, - QueryType, - ServerIndexMetadata, - ServerTableIndexMetadata, - TableAttrListMap, -) -from util.log import DBGYM_LOGGER_NAME - - -def check_subspace(space: Union[Dict, spaces.Tuple], action: Any) -> bool: - if not space.contains(action): - for i, subspace in enumerate(space.spaces): - if isinstance(subspace, str): - assert isinstance(space, Dict) - if not space.spaces[subspace].contains(action[subspace]): - logging.getLogger(DBGYM_LOGGER_NAME).error( - "Subspace %s rejects %s", subspace, action[subspace] - ) - return False - elif not cast(Space[Any], subspace).contains(action[i]): - logging.getLogger(DBGYM_LOGGER_NAME).error( - "Subspace %s rejects %s", subspace, action[i] - ) - return False - return True - - -def _parse_access_method(explain_data: dict[str, Any]) -> dict[str, str]: - def recurse(data: dict[str, Any]) -> dict[str, str]: - sub_data = {} - if "Plans" in data: - for p in data["Plans"]: - sub_data.update(recurse(p)) - elif "Plan" in data: - sub_data.update(recurse(data["Plan"])) - - if "Alias" in data: - sub_data[data["Alias"]] = data["Node Type"] - return sub_data - - return recurse(explain_data) - - -def parse_access_methods( - connection: Connection[Any], queries: QueryMap -) -> QueryTableAccessMap: - q_ams = QueryTableAccessMap({}) - for qid, qqueries in queries.items(): - qams = {} - for sql_type, query in qqueries: - if sql_type != QueryType.SELECT: - assert sql_type != QueryType.INS_UPD_DEL - connection.execute(query) - continue - - explain = "EXPLAIN (FORMAT JSON) " + query - explain_data = [r for r in connection.execute(explain)][0][0][0] - qams_delta = _parse_access_method(explain_data) - qams.update(qams_delta) - q_ams[qid] = qams - return q_ams - - -# Convert a string time unit to microseconds. -def _time_unit_to_us(str: str) -> float: - if str == "d": - return 1e6 * 60 * 60 * 24 - elif str == "h": - return 1e6 * 60 * 60 - elif str == "min": - return 1e6 * 60 - elif str == "s": - return 1e6 - elif str == "ms": - return 1e3 - elif str == "us": - return 1.0 - else: - return 1.0 - - -# Parse a pg_setting field value. -def _parse_field(type: SettingType, value: Any) -> Any: - if type == SettingType.BOOLEAN: - return util.strtobool(value) - elif type == SettingType.BINARY_ENUM: - if "off" in value.lower(): - return False - return True - elif type == SettingType.INTEGER: - return int(value) - elif type == SettingType.BYTES: - if value in ["-1", "0"]: - # Hardcoded default/disabled values for this field. - return int(value) - bytes_regex = re.compile(r"(\d+)\s*([kmgtp]?b)", re.IGNORECASE) - order = ("b", "kb", "mb", "gb", "tb", "pb") - field_bytes = None - for number, unit in bytes_regex.findall(value): - field_bytes = int(number) * (1024 ** order.index(unit.lower())) - assert ( - field_bytes is not None - ), f"Failed to parse bytes from value string {value}" - return field_bytes - elif type == SettingType.INTEGER_TIME: - if value == "-1": - # Hardcoded default/disabled values for this field. - return int(value) - bytes_regex = re.compile(r"(\d+)\s*((?:d|h|min|s|ms|us)?)", re.IGNORECASE) - field_us = None - for number, unit in bytes_regex.findall(value): - field_us = int(number) * _time_unit_to_us(unit) - assert field_us is not None, f"Failed to parse time from value string {value}" - return int(field_us) - elif type == SettingType.FLOAT: - return float(value) - else: - return None - - -def _project_pg_setting(knob: Knob, setting: Any) -> Any: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Projecting {setting} into knob {knob.knob_name}" - ) - value = _parse_field(knob.knob_type, setting) - value = value if knob.knob_unit == 0 else value / knob.knob_unit - return knob.project_scraped_setting(value) - - -def fetch_server_knobs( - connection: Connection[Any], - tables: list[str], - knobs: KnobMap, - queries: QueryMap, -) -> KnobSpaceContainer: - knob_targets = KnobSpaceContainer({}) - with connection.cursor(row_factory=dict_row) as cursor: - records = cursor.execute("SHOW ALL") - for record in records: - setting_name = record["name"] - if setting_name in knobs: - setting_str = record["setting"] - knob = knobs[setting_name] - assert isinstance(knob, Knob) - value = _project_pg_setting(knob, setting_str) - knob_targets[setting_name] = value - - for tbl in tables: - pgc_record = [ - r - for r in cursor.execute( - f"SELECT * FROM pg_class where relname = '{tbl}'", prepare=False - ) - ][0] - if pgc_record["reloptions"] is not None: - for record in pgc_record["reloptions"]: - for key, value in re.findall(r"(\w+)=(\w*)", cast(str, record)): - tbl_key = full_knob_name(table=tbl, knob_name=key) - if tbl_key in knobs: - knob = knobs[tbl_key] - assert isinstance(knob, Knob) - value = _project_pg_setting(knob, value) - knob_targets[tbl_key] = value - else: - for knobname, knob in knobs.items(): - if knob.knob_class == KnobClass.TABLE: - if knob.knob_name == "fillfactor": - tbl_key = full_knob_name( - table=tbl, knob_name=knob.knob_name - ) - assert isinstance(knob, Knob) - knob_targets[tbl_key] = _project_pg_setting(knob, 100.0) - - q_ams = None - for knobname, knob in knobs.items(): - if knob.knob_class == KnobClass.QUERY: - # Set the default to inherit from the base knob setting. - if knob.knob_name in knob_targets: - knob_targets[knobname] = knob_targets[knob.knob_name] - elif isinstance(knob, CategoricalKnob): - knob_targets[knobname] = knob.default_value - elif knob.knob_name.endswith("_scanmethod"): - assert knob.knob_name.endswith("_scanmethod") - assert knob.query_name is not None - installed = False - if q_ams is None: - q_ams = parse_access_methods(connection, queries) - - if knob.query_name in q_ams: - alias = knob.knob_name.split("_scanmethod")[0] - if alias in q_ams[knob.query_name]: - val = 1.0 if "Index" in q_ams[knob.query_name][alias] else 0.0 - knob_targets[knobname] = val - installed = True - - if not installed: - knob_targets[knobname] = 0.0 - logging.getLogger(DBGYM_LOGGER_NAME).warning( - f"Found missing alias for {knobname}" - ) - elif knob.knob_type == SettingType.BOOLEAN: - knob_targets[knobname] = 1.0 - elif knob.knob_name == "random_page_cost": - value = _project_pg_setting(knob, 4.0) - knob_targets[knobname] = value - elif knob.knob_name == "seq_page_cost": - value = _project_pg_setting(knob, 1.0) - knob_targets[knobname] = value - elif knob.knob_name == "hash_mem_multiplier": - value = _project_pg_setting(knob, 2.0) - knob_targets[knobname] = value - return knob_targets - - -def fetch_server_indexes( - connection: Connection[Any], tables: list[str] -) -> tuple[TableAttrListMap, ServerTableIndexMetadata]: - rel_metadata = TableAttrListMap({t: [] for t in tables}) - existing_indexes = ServerTableIndexMetadata({}) - with connection.cursor(row_factory=dict_row) as cursor: - records = cursor.execute( - """ - SELECT c.relname, a.attname - FROM pg_attribute a, pg_class c - WHERE a.attrelid = c.oid AND a.attnum > 0 - ORDER BY c.relname, a.attnum""" - ) - for record in records: - relname = record["relname"] - attname = record["attname"] - if relname in rel_metadata: - rel_metadata[relname].append(attname) - - records = cursor.execute( - """ - SELECT - t.relname as table_name, - i.relname as index_name, - am.amname as index_type, - a.attname as column_name, - array_position(ix.indkey, a.attnum) pos, - (array_position(ix.indkey, a.attnum) >= ix.indnkeyatts) as is_include - FROM pg_class t, pg_class i, pg_index ix, pg_attribute a, pg_am am - WHERE t.oid = ix.indrelid - and am.oid = i.relam - and i.oid = ix.indexrelid - and a.attrelid = t.oid - and a.attnum = ANY(ix.indkey) - and t.relkind = 'r' - and ix.indisunique = false - order by t.relname, i.relname, pos; - """ - ) - - for record in records: - relname = record["table_name"] - idxname = record["index_name"] - colname = record["column_name"] - index_type = record["index_type"] - is_include = record["is_include"] - if relname in rel_metadata: - if relname not in existing_indexes: - existing_indexes[relname] = {} - - if idxname not in existing_indexes[relname]: - existing_indexes[relname][idxname] = ServerIndexMetadata( - { - "index_type": index_type, - "columns": [], - "include": [], - } - ) - - if is_include: - existing_indexes[relname][idxname]["include"].append(colname) - else: - existing_indexes[relname][idxname]["columns"].append(colname) - return rel_metadata, existing_indexes diff --git a/tune/protox/env/target_reset/__init__.py b/tune/protox/env/target_reset/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/env/target_reset/target_reset_wrapper.py b/tune/protox/env/target_reset/target_reset_wrapper.py deleted file mode 100644 index 84c26121..00000000 --- a/tune/protox/env/target_reset/target_reset_wrapper.py +++ /dev/null @@ -1,110 +0,0 @@ -import logging -import random -from typing import Any, Optional, Tuple, cast - -import gymnasium as gym - -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.pg_env import PostgresEnv -from tune.protox.env.types import EnvInfoDict, HolonStateContainer, TargetResetConfig -from tune.protox.env.util.reward import RewardUtility -from util.log import DBGYM_LOGGER_NAME - - -class TargetResetWrapper(gym.core.Wrapper[Any, Any, Any, Any]): - def __init__( - self, - env: gym.Env[Any, Any], - maximize_state: bool, - reward_utility: RewardUtility, - start_reset: bool, - artifact_manager: Optional[ArtifactManager], - ): - super().__init__(env) - self.maximize_state = maximize_state - self.start_reset = start_reset - self.reward_utility = reward_utility - self.tracked_states: list[TargetResetConfig] = [] - self.best_metric = None - self.real_best_metric = None - self.artifact_manager = artifact_manager - - def _get_state(self) -> HolonStateContainer: - # There is a state_container at the bottom. - assert isinstance(self.unwrapped, PostgresEnv) - sc = self.unwrapped.state_container - assert sc - return sc - - def step( # type: ignore - self, *args: Any, **kwargs: Any - ) -> tuple[Any, float, bool, bool, EnvInfoDict]: - """Steps through the environment, normalizing the rewards returned.""" - obs, rews, terms, truncs, infos = self.env.step(*args, **kwargs) - query_metric_data = infos.get("query_metric_data", None) - assert self.best_metric is not None - did_anything_time_out = infos.get("did_anything_time_out", False) - - metric = infos["metric"] - if self.reward_utility.is_perf_better(metric, self.best_metric): - self.best_metric = infos["metric"] - if not did_anything_time_out: - self.real_best_metric = self.best_metric - - if self.maximize_state: - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Found new maximal state with {metric}." - ) - assert len(self.tracked_states) > 0 - state = self._get_state() - if self.start_reset: - self.tracked_states = [ - self.tracked_states[0], - TargetResetConfig( - { - "metric": metric, - "env_state": obs, - "config": state, - "query_metric_data": query_metric_data, - } - ), - ] - else: - self.tracked_states = [ - TargetResetConfig( - { - "metric": metric, - "env_state": obs, - "config": state, - "query_metric_data": query_metric_data, - } - ), - ] - return obs, rews, terms, truncs, infos - - def reset(self, **kwargs: Any) -> tuple[Any, dict[str, Any]]: - if len(self.tracked_states) == 0: - # First time. - state, info = self.env.reset(**kwargs) - assert "baseline_metric" in info - self.best_metric = info["baseline_metric"] - self.real_best_metric = self.best_metric - - self.tracked_states = [ - TargetResetConfig( - { - "metric": self.best_metric, - "env_state": state.copy(), - "config": self._get_state(), - "query_metric_data": info.get("query_metric_data", None), - } - ) - ] - else: - reset_config = random.choice(self.tracked_states) - if kwargs is None or "options" not in kwargs or kwargs["options"] is None: - kwargs = {} - kwargs["options"] = {} - kwargs["options"].update(reset_config) - state, info = self.env.reset(**kwargs) - return state, info diff --git a/tune/protox/env/types.py b/tune/protox/env/types.py deleted file mode 100644 index 47af8163..00000000 --- a/tune/protox/env/types.py +++ /dev/null @@ -1,209 +0,0 @@ -from enum import Enum, unique -from pathlib import Path -from typing import ( - TYPE_CHECKING, - Any, - NamedTuple, - NewType, - Optional, - Tuple, - TypeAlias, - TypedDict, - Union, -) - -import torch - -from tune.protox.env.space.primitive.knob import CategoricalKnob, Knob - -# https://mypy.readthedocs.io/en/stable/common_issues.html#import-cycles -if TYPE_CHECKING: - from tune.protox.env.space.primitive.index import IndexAction - - -@unique -class QueryType(Enum): - UNKNOWN = -1 - SELECT = 0 - CREATE_VIEW = 1 - DROP_VIEW = 2 - INS_UPD_DEL = 3 - - -class NeighborParameters(TypedDict, total=False): - knob_num_nearest: int - knob_span: int - index_num_samples: int - index_rules: bool - - -class ServerIndexMetadata(TypedDict, total=False): - index_type: str - columns: list[str] - include: list[str] - - -DEFAULT_NEIGHBOR_PARAMETERS = NeighborParameters( - { - "knob_num_nearest": 100, - "knob_span": 1, - "index_num_samples": 100, - "index_rules": True, - } -) - -# {table: {index1: ServerIndexMetadata, index2: ...}, ...} -ServerTableIndexMetadata = NewType( - "ServerTableIndexMetadata", dict[str, dict[str, ServerIndexMetadata]] -) -ProtoAction = NewType("ProtoAction", torch.Tensor) - -KnobMap = NewType("KnobMap", dict[str, Union[Knob, CategoricalKnob]]) -KnobSpaceRawAction = NewType("KnobSpaceRawAction", torch.Tensor) -# {knob.name(): knob_value, ...} -KnobSpaceAction = NewType("KnobSpaceAction", dict[str, Any]) -# {knob.name(): knob_value, ...} -KnobSpaceContainer = NewType("KnobSpaceContainer", dict[str, Any]) - -# {KnobObject: knob_value, ...} -QuerySpaceKnobAction = NewType( - "QuerySpaceKnobAction", dict[Union[Knob, CategoricalKnob], Any] -) -# {knob.name(): knob_value, ...} -QuerySpaceAction: TypeAlias = KnobSpaceAction -# {knob.name(): knob_value, ...} -QuerySpaceContainer: TypeAlias = KnobSpaceContainer - -# ([idx_type], [table_encoding], [key1_encoding], ... [key#_encoding], [include_mask]) -IndexSpaceRawSample = NewType("IndexSpaceRawSample", tuple[Any, ...]) -# [IndexAction(index1), ...] -IndexSpaceContainer = NewType("IndexSpaceContainer", list["IndexAction"]) - -# (table_name, column_name) -TableColTuple = NewType("TableColTuple", tuple[str, str]) - -# {table: [att1, att2, ...], ...} -TableAttrListMap = NewType("TableAttrListMap", dict[str, list[str]]) -TableAttrSetMap = NewType("TableAttrSetMap", dict[str, set[str]]) -# {attr: [tab1, tab2, ....], ...} -AttrTableListMap = NewType("AttrTableListMap", dict[str, list[str]]) - -# {table: set[ (att1, att3), (att3, att4), ... ], ...} -# This maps a table to a set of attributes accessed together. -TableAttrAccessSetsMap = NewType( - "TableAttrAccessSetsMap", dict[str, set[tuple[str, ...]]] -) - -# {qid: {table: scan_method, ...}, ...} -QueryTableAccessMap = NewType("QueryTableAccessMap", dict[str, dict[str, str]]) -# {table: [alias1, alias2, ...], ...} -TableAliasMap = NewType("TableAliasMap", dict[str, list[str]]) -# {qid: {table: [alias1, alias2, ...], ...}, ...} -QueryTableAliasMap = NewType("QueryTableAliasMap", dict[str, TableAliasMap]) -# {qid: [(query_type1, query_str1), (query_type2, query_str2), ...], ...} -QueryMap = NewType("QueryMap", dict[str, list[tuple[QueryType, str]]]) - -HolonAction = NewType( - "HolonAction", - tuple[ - KnobSpaceAction, - IndexSpaceRawSample, - QuerySpaceAction, - ], -) - -HolonStateContainer = NewType( - "HolonStateContainer", - tuple[ - KnobSpaceContainer, - IndexSpaceContainer, - QuerySpaceContainer, - ], -) -HolonSubAction = Union[KnobSpaceAction, IndexSpaceRawSample, QuerySpaceAction] - -QueryRun = NamedTuple( - "QueryRun", - [ - ("prefix", str), - ("prefix_qid", str), - ("qknobs", QuerySpaceKnobAction), - ], -) - -BestQueryRun = NamedTuple( - "BestQueryRun", - [ - ("query_run", Optional[QueryRun]), - ("runtime", Optional[float]), - ("timed_out", bool), - ("explain_data", Optional[dict[str, Any]]), - ("metric_data", Optional[dict[str, Any]]), - ], -) - - -class TargetResetConfig(TypedDict, total=False): - metric: Optional[float] - env_state: Any - config: HolonStateContainer - query_metric_data: dict[str, BestQueryRun] - - -class QuerySpec(TypedDict, total=False): - benchbase: bool - oltp_workload: bool - query_transactional: Path - query_directory: Path - query_order: Path - - execute_query_directory: Path - execute_query_order: Path - - tbl_include_subsets_prune: bool - tbl_fold_subsets: bool - tbl_fold_delta: int - tbl_fold_iterations: int - - -class ActionsInfo(TypedDict): - all_holon_action_variations: list[tuple[str, HolonAction]] - best_observed_holon_action: Optional[HolonAction] - - -class EnvInfoDict(TypedDict, total=False): - # Original baseline metric. - baseline_metric: float - # Original baseline reward. - baseline_reward: float - # Data generated from each run. - best_query_run_data: dict[str, BestQueryRun] - # Path to run artifacts. - results_dpath: Optional[Union[str, Path]] - - # Previous state container. - prior_state_container: Optional[HolonStateContainer] - # Previous pg conf. - prior_pgconf: Optional[Union[str, Path]] - - # Changes made to the DBMS during this step. - attempted_changes: tuple[list[str], list[str]] - - # Metric of this step. - metric: Optional[float] - # Reward of this step. - reward: Optional[float] - # Whether any queries timed out or the workload as a whole timed out. - did_anything_time_out: bool - # Query metric data. - query_metric_data: Optional[dict[str, BestQueryRun]] - # Information about the actions that were executed this step. - # The actions are in a format usable by replay. - actions_info: Optional[ActionsInfo] - # ProtoAction of the altered step action. - maximal_embed: ProtoAction - - # New state container. - state_container: HolonStateContainer - # What the LSC associated with the action is. - lsc: float diff --git a/tune/protox/env/util/__init__.py b/tune/protox/env/util/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/env/util/execute.py b/tune/protox/env/util/execute.py deleted file mode 100644 index 50ba7e40..00000000 --- a/tune/protox/env/util/execute.py +++ /dev/null @@ -1,106 +0,0 @@ -import logging -import math -import time -from typing import Any, Optional, Tuple, Union - -import psycopg -from psycopg import Connection -from psycopg.errors import QueryCanceled - -from env.pg_conn import PostgresConn -from tune.protox.env.artifact_manager import ArtifactManager -from tune.protox.env.space.primitive.knob import CategoricalKnob, Knob -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.types import ( - BestQueryRun, - KnobSpaceAction, - KnobSpaceContainer, - QueryRun, - QueryType, -) -from util.log import DBGYM_LOGGER_NAME - - -def _acquire_metrics_around_query( - pg_conn: PostgresConn, - query: str, - query_knobs: list[str], - query_timeout: float = 0.0, - observation_space: Optional[StateSpace] = None, -) -> tuple[float, bool, Optional[dict[str, Any]], Any]: - pg_conn.force_statement_timeout(0) - if observation_space and observation_space.require_metrics(): - initial_metrics = observation_space.construct_online(pg_conn.conn()) - - qid_runtime, did_time_out, explain_data = pg_conn.time_query( - query, query_knobs=query_knobs, add_explain=True, timeout=query_timeout - ) - - if observation_space and observation_space.require_metrics(): - final_metrics = observation_space.construct_online(pg_conn.conn()) - diff = observation_space.state_delta(initial_metrics, final_metrics) - else: - diff = None - - # qid_runtime is in microseconds. - return qid_runtime, did_time_out, explain_data, diff - - -def execute_variations( - pg_conn: PostgresConn, - runs: list[QueryRun], - query: str, - query_timeout: float = 0, - artifact_manager: Optional[ArtifactManager] = None, - sysknobs: Optional[KnobSpaceAction] = None, - observation_space: Optional[StateSpace] = None, -) -> BestQueryRun: - - # Initial timeout. - timeout_limit = query_timeout - # Best run invocation. - best_qr = BestQueryRun(None, None, True, None, None) - - for qr in runs: - # Get the per-query knobs for this query in the form list[str]. - query_knobs = [ - knob.resolve_per_query_knob( - value, - all_knobs=sysknobs if sysknobs else KnobSpaceContainer({}), - ) - for knob, value in qr.qknobs.items() - ] - - # Log out the knobs that we are using. - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"{qr.prefix_qid} executing with {query_knobs}" - ) - - runtime, did_time_out, explain_data, metric = _acquire_metrics_around_query( - pg_conn=pg_conn, - query=query, - query_knobs=query_knobs, - query_timeout=timeout_limit, - observation_space=observation_space, - ) - - if not did_time_out: - new_timeout_limit = math.ceil(runtime / 1e3) / 1.0e3 - if new_timeout_limit < timeout_limit: - timeout_limit = new_timeout_limit - - if best_qr.runtime is None or runtime < best_qr.runtime: - assert qr - best_qr = BestQueryRun( - qr, - runtime, - did_time_out, - explain_data, - metric, - ) - - if artifact_manager: - # Log how long we are executing each query + mode. - artifact_manager.record(qr.prefix_qid, runtime / 1e6) - - return best_qr diff --git a/tune/protox/env/util/reward.py b/tune/protox/env/util/reward.py deleted file mode 100644 index e06b2d02..00000000 --- a/tune/protox/env/util/reward.py +++ /dev/null @@ -1,197 +0,0 @@ -import json -import logging -from pathlib import Path -from typing import Optional, Tuple, Union - -import pandas as pd - -from tune.protox.env.artifact_manager import ArtifactManager -from util.log import DBGYM_LOGGER_NAME - -# Initial penalty to apply to create the "worst" perf from the baseline. -INITIAL_PENALTY_MULTIPLIER = 4.0 - - -class RewardUtility(object): - def __init__( - self, - target: str, - metric: str, - reward_scaler: float, - artifact_manager: ArtifactManager, - ) -> None: - self.reward_scaler = reward_scaler - self.target = target - self.metric = metric - self.maximize = target == "tps" - self.worst_perf: Optional[float] = None - self.relative_baseline: Optional[float] = None - self.previous_result: Optional[float] = None - self.artifact_manager = artifact_manager - - def is_perf_better(self, new_perf: float, old_perf: float) -> bool: - if self.maximize and new_perf > old_perf: - return True - elif not self.maximize and new_perf < old_perf: - return True - return False - - def set_relative_baseline( - self, relative_baseline: float, prev_result: Optional[float] = None - ) -> None: - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[set_relative_baseline]: {relative_baseline}" - ) - self.relative_baseline = relative_baseline - self.previous_result = prev_result - if self.worst_perf is None: - if self.maximize: - self.worst_perf = relative_baseline / INITIAL_PENALTY_MULTIPLIER - else: - self.worst_perf = relative_baseline * INITIAL_PENALTY_MULTIPLIER - elif not self.is_perf_better(relative_baseline, self.worst_perf): - self.worst_perf = relative_baseline - - if self.previous_result is None: - # Set the previous result to the baseline if not specified. - self.previous_result = relative_baseline - - def parse_tps_avg_p99_for_metric( - self, parent: Union[Path, str] - ) -> tuple[float, float, float]: - files = [f for f in Path(parent).rglob("*.summary.json")] - assert len(files) == 1 - - summary = files[0] - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Reading TPS metric from file: {summary}" - ) - # don't call open_and_save() because summary is generated from this run - with open(summary, "r") as f: - s = json.load(f) - tps = s["Throughput (requests/second)"] - p99 = s["Latency Distribution"]["99th Percentile Latency (microseconds)"] - avg = s["Latency Distribution"]["Average Latency (microseconds)"] - - return float(tps), float(p99), float(avg) - - def __parse_tps_for_metric(self, parent: Union[str, Path]) -> float: - files = [f for f in Path(parent).rglob("*.summary.json")] - assert len(files) == 1 - - summary = files[0] - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"Reading TPS metric from file: {summary}" - ) - # don't call open_and_save() because summary is generated from this run - with open(summary, "r") as f: - tps = json.load(f)["Throughput (requests/second)"] - return float(tps) - - def __parse_runtime_for_metric(self, parent: Union[str, Path]) -> float: - files = [f for f in Path(parent).rglob("*.raw.csv")] - assert len(files) > 0 - - summary = [f for f in Path(parent).rglob("*.raw.csv")][0] - data = pd.read_csv(summary) - assert len(data.columns) == 7 - summed_data = data.sum() - summed_latency: float = summed_data["Latency (microseconds)"] - return summed_latency / 1.0e6 - - def __call__( - self, - results_dpath: Union[str, Path, None] = None, - metric: Optional[float] = None, - update: bool = True, - did_error: bool = False, - ) -> tuple[float, float]: - - # TODO: we need to get the memory consumption of indexes. if the index usage - # exceeds the limit, then kill the reward function. may also want to penalize - # reward based on delta. - # - # (param) (new_tps/old_tps) + (1-param) (max(min_mem, new_mem)/min_mem - # - # minimum memory before start trading...) - assert did_error or results_dpath is not None or metric is not None - logging.getLogger(DBGYM_LOGGER_NAME).debug( - f"[reward_calc]: {results_dpath} {metric} {update} {did_error}" - ) - - if metric is None: - # Either it errored or we have a result directory to process. - assert did_error or results_dpath - - # Extract the metric if we're running it manually. - metric_fn = ( - self.__parse_tps_for_metric - if self.target == "tps" - else self.__parse_runtime_for_metric - ) - - if did_error: - metric = self.worst_perf - else: - assert results_dpath - metric = metric_fn(results_dpath) - actual_r = None - assert metric is not None - - # Note that if we are trying to minimize, the smaller metric is, the better we are. - # And policy optimization maximizes the rewards. - # - # As such, for all relative-ness, we treat maximize 100 -> 1000 with reward 9 - # similarly to the case of minimize 1000 -> 100 with reward 9. - # This can effectively be done as flipping what is considered baseline and what is not. - - if self.relative_baseline is None: - # Use the metric directly. - actual_r = metric - elif self.metric == "multiplier": - actual_r = ( - metric / self.relative_baseline - if self.maximize - else self.relative_baseline / metric - ) - elif self.metric == "relative": - if self.maximize: - actual_r = (metric - self.relative_baseline) / self.relative_baseline - else: - actual_r = (self.relative_baseline - metric) / self.relative_baseline - elif self.metric == "cdb_delta": - assert self.previous_result - - # refer to https://dbgroup.cs.tsinghua.edu.cn/ligl/papers/sigmod19-cdbtune.pdf. - relative_baseline = ( - (metric - self.relative_baseline) / self.relative_baseline - if self.maximize - else (self.relative_baseline - metric) / self.relative_baseline - ) - relative_prev = ( - (metric - self.previous_result) / self.previous_result - if self.maximize - else (self.previous_result - metric) / self.previous_result - ) - - if relative_baseline > 0: - actual_r = (pow(1 + relative_baseline, 2) - 1) * abs(1 + relative_prev) - else: - actual_r = -(pow(1 - relative_baseline, 2) - 1) * abs(1 - relative_prev) - - # Apply the truncation step. - if actual_r > 0 and relative_prev < 0: - actual_r = 0 - - if update: - # Update worst seen metric. - if self.worst_perf is None or not self.is_perf_better( - metric, self.worst_perf - ): - self.worst_perf = metric - - self.previous_result = metric - - # Scale the actual reward by the scaler. - assert actual_r is not None - return metric, actual_r * self.reward_scaler diff --git a/tune/protox/env/util/workload_analysis.py b/tune/protox/env/util/workload_analysis.py deleted file mode 100644 index a12f6d08..00000000 --- a/tune/protox/env/util/workload_analysis.py +++ /dev/null @@ -1,171 +0,0 @@ -from typing import Iterator, Optional, Tuple - -import pglast -from pglast import stream -from pglast.visitors import Continue, Visitor - -from tune.protox.env.types import ( - AttrTableListMap, - QueryType, - TableAliasMap, - TableAttrSetMap, - TableColTuple, -) - - -def traverse(stmt: pglast.ast.Node) -> Iterator[pglast.ast.Node]: - """ - Trying to mimic the .traverse() function pglast v3 in pglast v6 - For context, we switched from pglast v3 to pglast v6 - """ - visitor = Visitor() - generator = visitor.iterate(stmt) - - try: - item = generator.send(None) - yield item - except StopIteration: - return - - while True: - try: - item = generator.send(Continue) - yield item - except StopIteration: - return - - -def extract_aliases(stmts: pglast.ast.Node) -> TableAliasMap: - # Extract the aliases. - aliases: TableAliasMap = TableAliasMap({}) - ctes = set() - for stmt in stmts: - for _, node in traverse(stmt): - if isinstance(node, pglast.ast.Node): - if isinstance(node, pglast.ast.CommonTableExpr): - ctes.add(node.ctename) - elif isinstance(node, pglast.ast.RangeVar): - ft = node - relname = ft.relname - - if isinstance(stmt.stmt, pglast.ast.ViewStmt): - if node == stmt.stmt.view: - continue - - alias = ( - ft.relname - if ( - ft.alias is None - or ft.alias.aliasname is None - or ft.alias.aliasname == "" - ) - else ft.alias.aliasname - ) - if relname not in aliases: - aliases[relname] = [] - if alias not in aliases[relname]: - aliases[relname].append(alias) - # else: - # logging.getLogger(DBGYM_LOGGER_NAME).warning(f"Squashing {relname} {alias} on {sql_file}") - return TableAliasMap({k: v for k, v in aliases.items() if k not in ctes}) - - -def extract_sqltypes( - stmts: pglast.ast.Node, pid: Optional[int] -) -> list[tuple[QueryType, str]]: - sqls = [] - for stmt in stmts: - sql_type = QueryType.UNKNOWN - if isinstance(stmt, pglast.ast.RawStmt) and isinstance( - stmt.stmt, pglast.ast.SelectStmt - ): - sql_type = QueryType.SELECT - elif isinstance(stmt, pglast.ast.RawStmt) and isinstance( - stmt.stmt, pglast.ast.ViewStmt - ): - sql_type = QueryType.CREATE_VIEW - elif isinstance(stmt, pglast.ast.RawStmt) and isinstance( - stmt.stmt, pglast.ast.DropStmt - ): - drop_ast = stmt.stmt - if drop_ast.removeType == pglast.enums.parsenodes.ObjectType.OBJECT_VIEW: - sql_type = QueryType.DROP_VIEW - elif isinstance(stmt, pglast.ast.RawStmt) and any( - [ - isinstance(stmt.stmt, pglast.ast.InsertStmt), - isinstance(stmt.stmt, pglast.ast.UpdateStmt), - isinstance(stmt.stmt, pglast.ast.DeleteStmt), - ] - ): - sql_type = QueryType.INS_UPD_DEL - - q = stream.RawStream()(stmt) - if pid is not None and "pid" in q: - q = q.replace("pid", str(pid)) - elif pid is not None and "PID" in q: - q = q.replace("PID", str(pid)) - - sqls.append((sql_type, q)) - return sqls - - -def extract_columns( - stmt: pglast.ast.Node, - tables: list[str], - all_attributes: AttrTableListMap, - query_aliases: TableAliasMap, -) -> tuple[TableAttrSetMap, list[TableColTuple]]: - tbl_col_usages: TableAttrSetMap = TableAttrSetMap({t: set() for t in tables}) - - def traverse_extract_columns( - alias_set: dict[str, str], node: pglast.ast.Node, update: bool = True - ) -> list[TableColTuple]: - if node is None: - return [] - - columns = [] - for _, expr in traverse(node): - if isinstance(expr, pglast.ast.Node) and isinstance( - expr, pglast.ast.ColumnRef - ): - if len(expr.fields) == 2: - tbl, col = expr.fields[0], expr.fields[1] - assert isinstance(tbl, pglast.ast.String) and isinstance( - col, pglast.ast.String - ) - - tbl = tbl.sval - col = col.sval - if tbl in alias_set and ( - tbl in tbl_col_usages or alias_set[tbl] in tbl_col_usages - ): - tbl = alias_set[tbl] - if update: - tbl_col_usages[tbl].add(col) - else: - columns.append(TableColTuple((tbl, col))) - elif isinstance(expr.fields[0], pglast.ast.String): - col = expr.fields[0].sval - if col in all_attributes: - for tbl in all_attributes[col]: - if tbl in alias_set.values(): - if update: - tbl_col_usages[tbl].add(col) - else: - columns.append(TableColTuple((tbl, col))) - return columns - - # This is the query column usage. - all_refs = [] - for _, node in traverse(stmt): - if isinstance(node, pglast.ast.Node): - if isinstance(node, pglast.ast.SelectStmt): - aliases = {} - for relname, relalias in query_aliases.items(): - for alias in relalias: - aliases[alias] = relname - - # We derive the "touched" columns only from the WHERE clause. - traverse_extract_columns(aliases, node.whereClause) - all_refs.extend(traverse_extract_columns(aliases, node, update=False)) - return tbl_col_usages, all_refs diff --git a/tune/protox/env/workload.py b/tune/protox/env/workload.py deleted file mode 100644 index 591b4cb1..00000000 --- a/tune/protox/env/workload.py +++ /dev/null @@ -1,707 +0,0 @@ -import copy -import json -import logging -import math -import shutil -import tempfile -import time -from pathlib import Path -from typing import IO, Any, Optional, Tuple, Union, cast - -import numpy as np -import pglast -from plumbum import local - -from env.pg_conn import PostgresConn -from tune.protox.env.artifact_manager import ArtifactManager, time_record -from tune.protox.env.space.holon_space import HolonSpace -from tune.protox.env.space.latent_space import LatentKnobSpace, LatentQuerySpace -from tune.protox.env.space.state.space import StateSpace -from tune.protox.env.types import ( - AttrTableListMap, - BestQueryRun, - HolonAction, - KnobSpaceAction, - QueryMap, - QueryRun, - QuerySpaceAction, - QuerySpaceKnobAction, - QuerySpec, - QueryType, - TableAttrAccessSetsMap, - TableAttrListMap, - TableAttrSetMap, - TableColTuple, -) -from tune.protox.env.util.execute import execute_variations -from tune.protox.env.util.reward import RewardUtility -from tune.protox.env.util.workload_analysis import ( - extract_aliases, - extract_columns, - extract_sqltypes, -) -from util.log import DBGYM_LOGGER_NAME -from util.workspace import DBGymConfig, is_fully_resolved, open_and_save - - -class Workload(object): - # Usually, we want to call open_and_save() when opening a file for reading - # However, when creating a Workload object for unittesting, we just want to call open() - def _open_for_reading( - self, - path: Path, - ) -> IO[Any]: - # When opening for writing we always use open() so we don't need this function, which is - # why hardcode the mode as "r". - if self.dbgym_cfg is not None: - return open_and_save(self.dbgym_cfg, path) - else: - return open(path) - - def _crunch( - self, - all_attributes: AttrTableListMap, - sqls: list[tuple[str, Path, float]], - pid: Optional[int], - query_spec: QuerySpec, - ) -> None: - assert all( - is_fully_resolved(sql[1]) for sql in sqls - ), f"sqls ({sqls}) should only contain existent real absolute paths." - do_tbl_include_subsets_prune = query_spec["tbl_include_subsets_prune"] - self.order = [] - self.queries = QueryMap({}) - # Map table -> set(queries that use it) - self.tbl_queries_usage: dict[str, set[str]] = {} - # Map (table, column) -> set(queries that use it) - self.tbl_filter_queries_usage: dict[TableColTuple, set[str]] = {} - - # Build the SQL and table usage information. - self.queries_mix: dict[str, float] = {} - self.query_aliases = {} - self.query_usages = TableAttrListMap({t: [] for t in self.tables}) - tbl_include_subsets = TableAttrAccessSetsMap( - {tbl: set() for tbl in self.attributes.keys()} - ) - for stem, sql_file, ratio in sqls: - assert stem not in self.queries - self.order.append(stem) - self.queries_mix[stem] = ratio - - with self._open_for_reading(sql_file) as q: - sql = q.read() - assert not sql.startswith("/*") - - # TODO(WAN): HACK HACK HACK - if Path(sql_file).name == "15.sql" and "benchmark_tpch" in str( - Path(sql_file).absolute() - ): - sql = sql.replace("revenue0", "revenue0_PID") - - stmts = pglast.parse_sql(sql) - - # Extract aliases. - self.query_aliases[stem] = extract_aliases(stmts) - # Extract sql and query types. - self.queries[stem] = extract_sqltypes(stmts, pid) - - # Construct table query usages. - for tbl in self.query_aliases[stem]: - if tbl not in self.tbl_queries_usage: - self.tbl_queries_usage[tbl] = set() - self.tbl_queries_usage[tbl].add(stem) - - for stmt in stmts: - # Get all columns that appear in the predicates. - # Get all columns that appear together (all_refs). - tbl_col_usages, all_refs = extract_columns( - stmt, self.tables, all_attributes, self.query_aliases[stem] - ) - tbl_col_usages = TableAttrSetMap( - { - t: set([a for a in atts if a in self.attributes[t]]) - for t, atts in tbl_col_usages.items() - } - ) - - for tbl, atts in tbl_col_usages.items(): - for att in atts: - # Update the (tbl, col) query references. - if (tbl, att) not in self.tbl_filter_queries_usage: - self.tbl_filter_queries_usage[ - TableColTuple((tbl, att)) - ] = set() - self.tbl_filter_queries_usage[ - TableColTuple((tbl, att)) - ].add(stem) - - # Update query_usages (reflects predicate usage). - if att not in self.query_usages[tbl]: - self.query_usages[tbl].append(att) - - # Compute table -> unique set of columns used from that table - all_qref_sets = { - k: tuple(sorted([r[1] for r in set(all_refs) if r[0] == k])) - for k in set([r[0] for r in all_refs]) - } - for k, s in all_qref_sets.items(): - tbl_include_subsets[k].add(s) - - # Do this so query_usages is actually in the right order. - # Order based on the original attribute list. - self.query_usages = TableAttrListMap( - { - tbl: [a for a in atts if a in self.query_usages[tbl]] - for tbl, atts in self.attributes.items() - } - ) - - if do_tbl_include_subsets_prune: - self.tbl_include_subsets = TableAttrAccessSetsMap({}) - # First prune any "fully enclosed". - for tbl, attrsets in tbl_include_subsets.items(): - self.tbl_include_subsets[tbl] = set( - tbl - for tbl, not_enclosed in zip( - attrsets, - [ - # Basically: - # for v0 in attrsets: - # v0_not_enclosed = True - # for v1 in attrsets: - # if v0 <= v1: - # v0_not_enclosed = False - not any(set(v0) <= set(v1) for v1 in attrsets if v0 != v1) - for v0 in attrsets - ], - ) - if not_enclosed - ) - - if query_spec["tbl_fold_subsets"]: - tis = copy.deepcopy(self.tbl_include_subsets) - for tbl, subsets in tis.items(): - # Sort by length...these are not fully enclosed. - sorted_subsets = sorted(subsets, key=lambda x: len(x)) - for _ in range(query_spec["tbl_fold_iterations"]): - for i in range(len(sorted_subsets)): - s0 = set(sorted_subsets[i]) - for j in range(i + 1, len(sorted_subsets)): - s1 = set(sorted_subsets[j]) - # If the difference is small enough, merge them. - if len(s0 - s1) <= query_spec["tbl_fold_delta"]: - sorted_subsets[i] = tuple() - sorted_subsets[j] = tuple(sorted(s1.union(s0))) - # Filter out the sets that no longer exist. - sorted_subsets = sorted( - [s for s in sorted_subsets if len(s) > 0], - key=lambda x: len(x), - ) - self.tbl_include_subsets[tbl] = subsets - else: - self.tbl_include_subsets = tbl_include_subsets - - self.readonly_workload = not any( - [ - q == QueryType.INS_UPD_DEL - for _, sqls in self.queries.items() - for (q, _) in sqls - ] - ) - self.sql_files = {k: str(v) for (k, v, _) in sqls} - - def __init__( - self, - # dbgym_cfg is only optional so we can set it to None for unittests. Don't set it to None during normal operation. - dbgym_cfg: Optional[DBGymConfig], - tables: list[str], - attributes: TableAttrListMap, - query_spec: QuerySpec, - workload_path: Path, - pid: Optional[int] = None, - workload_timeout: float = 0, - workload_timeout_penalty: float = 1.0, - artifact_manager: Optional[ArtifactManager] = None, - ) -> None: - self.dbgym_cfg = dbgym_cfg - self.workload_path = workload_path - # Whether we should use benchbase or not. - self.benchbase = query_spec["benchbase"] - self.oltp_workload = query_spec["oltp_workload"] - self.workload_timeout = workload_timeout - self.workload_timeout_penalty = workload_timeout_penalty - self.artifact_manager = artifact_manager - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Initialized with workload timeout {workload_timeout}" - ) - - self.tables: list[str] = tables - self.attributes: TableAttrListMap = attributes - - # Mapping from attribute -> table that has it. - all_attributes = AttrTableListMap({}) - for tbl, cols in self.attributes.items(): - for col in cols: - if col not in all_attributes: - all_attributes[col] = [] - all_attributes[col].append(tbl) - - # Get the order in which we should execute in. - sqls = [] - order_or_txn_fname = "txn.txt" if self.oltp_workload else "order.txt" - workload_order_or_txn_fpath = self.workload_path / order_or_txn_fname - with self._open_for_reading(workload_order_or_txn_fpath) as f: - lines = f.read().splitlines() - sqls = [ - ( - line.split(",")[0], - self.workload_path / Path(line.split(",")[1]), - 1.0, - ) - for line in lines - ] - - # TODO(phw2): pass "query_transactional" somewhere other than query_spec, just like "query_order" is - if "query_transactional" in query_spec: - with self._open_for_reading(query_spec["query_transactional"]) as f: - lines = f.read().splitlines() - splits = [line.split(",") for line in lines] - sqls = [ - ( - split[0], - self.workload_path / Path(split[1]), - float(split[2]), - ) - for split in splits - ] - - self._crunch(all_attributes, sqls, pid, query_spec) - query_usages = copy.deepcopy(self.query_usages) - tbl_include_subsets = copy.deepcopy(self.tbl_include_subsets) - - # TODO(phw2): pass "execute_query_order" somewhere other than query_spec, just like "query_order" is - if "execute_query_order" in query_spec: - with self._open_for_reading(query_spec["execute_query_order"]) as f: - lines = f.read().splitlines() - sqls = [ - ( - line.split(",")[0], - Path(query_spec["execute_query_directory"]) - / line.split(",")[1], - 1.0, - ) - for line in lines - ] - - # Re-crunch with the new data. - self._crunch(all_attributes, sqls, pid, query_spec) - self.query_usages = query_usages - self.tbl_include_subsets = tbl_include_subsets - - def set_workload_timeout(self, metric: float) -> None: - if self.workload_timeout == 0: - self.workload_timeout = metric - else: - self.workload_timeout = min(self.workload_timeout, metric) - - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Workload timeout set to: {self.workload_timeout}" - ) - - def queries_for_table(self, table: str) -> list[str]: - return [q for q in self.order if q in self.tbl_queries_usage[table]] - - def queries_for_table_col(self, table: str, col: str) -> list[str]: - if (table, col) not in self.tbl_filter_queries_usage: - return [] - return [ - q - for q in self.order - if q in self.tbl_filter_queries_usage[TableColTuple((table, col))] - ] - - def column_usages(self) -> TableAttrListMap: - return copy.deepcopy(self.query_usages) - - def max_indexable(self) -> int: - return max([len(cols) for _, cols in self.query_usages.items()]) - - @staticmethod - def compute_total_workload_runtime( - qid_runtime_data: dict[str, BestQueryRun] - ) -> float: - total_runtime: float = 0.0 - for best_run in qid_runtime_data.values(): - assert best_run.runtime is not None - total_runtime += best_run.runtime - total_runtime /= 1.0e6 - return total_runtime - - @time_record("execute") - def execute_workload( - self, - pg_conn: PostgresConn, - actions: list[HolonAction] = [], - variation_names: list[str] = [], - results_dpath: Optional[Path] = None, - observation_space: Optional[StateSpace] = None, - action_space: Optional[HolonSpace] = None, - reset_metrics: Optional[dict[str, BestQueryRun]] = None, - override_workload_timeout: Optional[float] = None, - query_timeout: Optional[int] = None, - workload_qdir: Optional[tuple[Path, Path]] = None, - blocklist: list[str] = [], - first: bool = False, - ) -> tuple[int, bool, dict[str, Any]]: - this_execution_workload_timeout = ( - self.workload_timeout - if not override_workload_timeout - else override_workload_timeout - ) - assert len(actions) == len(variation_names) - - sysknobs = KnobSpaceAction({}) - ql_knobs = [] - if len(actions) > 0: - assert action_space - - sysknobs = cast( - KnobSpaceAction, - [ - v - for t, v in action_space.split_action(actions[0]) - if isinstance(t, LatentKnobSpace) - ][0], - ) - ql_knobs = cast( - list[tuple[LatentQuerySpace, QuerySpaceAction]], - [ - [ - (t, v) - for t, v in action_space.split_action(action) - if isinstance(t, LatentQuerySpace) - ][0] - for action in actions - ], - ) - - # Figure out workload to execute. - if workload_qdir is not None and workload_qdir[0] is not None: - # Load actual queries to execute. - workload_dir, workload_qlist = workload_qdir - with self._open_for_reading(workload_qlist) as f: - psql_order = [ - (f"Q{i+1}", Path(workload_dir) / l.strip()) - for i, l in enumerate(f.readlines()) - ] - - actual_order = [p[0] for p in psql_order] - actual_sql_files = {k: str(v) for (k, v) in psql_order} - actual_queries = {} - for qid, qpat in psql_order: - with self._open_for_reading(qpat) as f: - query = f.read() - actual_queries[qid] = [(QueryType.SELECT, query)] - else: - actual_order = self.order - actual_sql_files = self.sql_files - actual_queries = self.queries - - # Now let us start executing. - qid_runtime_data: dict[str, BestQueryRun] = {} - workload_timed_out = False - - for execute_idx, qid in enumerate(actual_order): - if workload_timed_out: - break - - queries = actual_queries[qid] - if any([b in actual_sql_files[qid] for b in blocklist]): - # Skip any query in blocklist. - continue - - for qidx, (sql_type, query) in enumerate(queries): - assert sql_type != QueryType.UNKNOWN - if sql_type != QueryType.SELECT: - # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. - assert sql_type != QueryType.INS_UPD_DEL - pg_conn.conn().execute(query) - continue - - # De-duplicate the runs. - runs: list[QueryRun] = [] - zruns: list[QueryRun] = [ - QueryRun( - act_name, - f"{act_name}_{qid}", - QuerySpaceKnobAction( - { - ql_knob[0].knobs[k]: ql_knob[1][k] - for k in ql_knob[1].keys() - if f"{qid}_" in k - } - ), - ) - for ql_knob, act_name in zip(ql_knobs, variation_names) - ] - for r in zruns: - if r[2] not in [rr[2] for rr in runs]: - runs.append(r) - - target_pqt = ( - query_timeout if query_timeout else this_execution_workload_timeout - ) - skip_execute = False - if ( - reset_metrics is not None - and qid in reset_metrics - and not reset_metrics[qid].timed_out - ): - # If we have a reset metric, use it's timeout and convert to seconds. - truntime = reset_metrics[qid].runtime - assert truntime is not None - target_pqt = math.ceil(truntime / 1.0e6) - - # If we've seen the exact same query knobs before, skip it. - rmetrics = reset_metrics[qid] - skip_execute = ( - (rmetrics.query_run is not None) - and (rmetrics.query_run.qknobs is not None) - and (rmetrics.query_run.qknobs == runs[-1].qknobs) - ) - - if not skip_execute: - best_run: BestQueryRun = execute_variations( - pg_conn=pg_conn, - runs=runs, - query=query, - query_timeout=min( - target_pqt, - this_execution_workload_timeout - - Workload.compute_total_workload_runtime(qid_runtime_data) - + 1, - ), - artifact_manager=self.artifact_manager, - sysknobs=sysknobs, - observation_space=observation_space, - ) - else: - assert reset_metrics - best_run = reset_metrics[qid] - - if reset_metrics is not None and qid in reset_metrics: - # Old one is actually better so let's use that. - rmetric = reset_metrics[qid] - if best_run.timed_out or ( - best_run.runtime - and rmetric.runtime - and rmetric.runtime < best_run.runtime - ): - best_run = rmetric - - assert best_run.runtime - qid_runtime_data[qid] = best_run - - if ( - Workload.compute_total_workload_runtime(qid_runtime_data) - > this_execution_workload_timeout - ): - # We need to undo any potential statements after the timed out query. - for st, rq in queries[qidx + 1 :]: - if st != QueryType.SELECT: - # This is a sanity check because any OLTP workload should be run through benchbase, and any OLAP workload should not have INS_UPD_DEL queries. If we do have INS_UPD_DEL queries, our "undo" logic will likely have to change. - assert st != QueryType.INS_UPD_DEL - pg_conn.conn().execute(rq) - - workload_timed_out = True - break - - # Undo any necessary state changes. - for qqid_index in range(execute_idx, len(actual_order)): - queries = self.queries[qid] - for sql_type, query in queries: - assert sql_type != QueryType.UNKNOWN - if sql_type != QueryType.SELECT: - assert sql_type != QueryType.INS_UPD_DEL - pg_conn.conn().execute(query) - - if results_dpath is not None: - # Make the result directory. - results_dpath = Path(results_dpath) - if not results_dpath.exists(): - results_dpath.mkdir(parents=True, exist_ok=True) - - with open(results_dpath / "run.plans", "w") as f: - # Output the explain data. - for qid, run in qid_runtime_data.items(): - if run.explain_data is not None: - assert run.query_run and run.query_run.qknobs is not None - pqkk = [ - (knob.name(), val) - for knob, val in run.query_run.qknobs.items() - ] - f.write(f"{qid}\n{run.query_run.prefix}: {pqkk}\n") - f.write(json.dumps(run.explain_data)) - f.write("\n\n") - - if observation_space and observation_space.require_metrics(): - # Create the metrics. - # Log the metrics data as a flattened. - accum_data = cast( - list[dict[str, Any]], - [v.metric_data for _, v in qid_runtime_data.items()], - ) - accum_stats = observation_space.merge_deltas(accum_data) - with open(results_dpath / "run.metrics.json", "w") as f: - # Flatten it. - def flatten(d: dict[str, Any]) -> dict[str, Any]: - flat: dict[str, Any] = {} - for k, v in d.items(): - if isinstance(v, dict): - flat[k] = flatten(v) - elif isinstance(v, np.ndarray): - flat[k] = float(v[0]) - elif isinstance(v, np.ScalarType): - if isinstance(v, str): - flat[k] = v - else: - flat[k] = float(cast(float, v)) - else: - flat[k] = v - return flat - - output = flatten(accum_stats) - output["flattened"] = True - f.write(json.dumps(output, indent=4)) - - # run.raw.csv will essentially contain the information in qid_runtime_data. However, run.raw.csv may have an extra line for the penalty. - with open(results_dpath / "run.raw.csv", "w") as f: - # Write the raw query data. - f.write( - "Transaction Type Index,Transaction Name,Start Time (microseconds),Latency (microseconds),Timed Out,Worker Id (start number),Phase Id (index in config file)\n" - ) - - start = 0.0 - for i, qid in enumerate(self.order): - if qid in qid_runtime_data: - best_run = qid_runtime_data[qid] - assert best_run and best_run.runtime and best_run.query_run - rtime = best_run.runtime - pfx = best_run.query_run.prefix - f.write( - f"{i+1},{qid},{start},{rtime},{best_run.timed_out},0,{pfx}\n" - ) - start += rtime / 1e6 - - # Write a penalty term if needed. - penalty = 0.0 - if workload_timed_out and self.workload_timeout_penalty > 1: - # Get the penalty. - penalty = ( - this_execution_workload_timeout * self.workload_timeout_penalty - - Workload.compute_total_workload_runtime(qid_runtime_data) - ) - penalty = (penalty + 1.05) * 1e6 if not first else penalty * 1e6 - elif workload_timed_out and not first: - # Always degrade it a little if we've timed out. - penalty = 3.0e6 - - if penalty > 0: - f.write(f"{len(self.order)},P,{time.time()},{penalty},,0,PENALTY\n") - - # Get all the timeouts. - num_timed_out_queries = sum( - [1 if best_run.timed_out else 0 for _, best_run in qid_runtime_data.items()] - ) - return num_timed_out_queries, workload_timed_out, qid_runtime_data - - @time_record("execute") - def _execute_benchbase( - self, benchbase_config: dict[str, Any], results_dpath: Union[str, Path] - ) -> bool: - bb_path = benchbase_config["benchbase_path"] - with local.cwd(bb_path): - code, _, _ = local["java"][ - "-jar", - "benchbase.jar", - "-b", - benchbase_config["benchmark"], - "-c", - benchbase_config["benchbase_config_path"], - "-d", - results_dpath, - "--execute=true", - ].run(retcode=None) - - if code != 0: - return False - return True - - def execute( - self, - pg_conn: PostgresConn, - reward_utility: RewardUtility, - observation_space: StateSpace, - action_space: HolonSpace, - actions: list[HolonAction], - variation_names: list[str], - benchbase_config: dict[str, Any], - query_timeout: Optional[int] = None, - reset_metrics: Optional[dict[str, BestQueryRun]] = None, - update: bool = True, - first: bool = False, - ) -> tuple[bool, float, float, Union[str, Path], bool, dict[str, BestQueryRun]]: - success = True - logging.getLogger(DBGYM_LOGGER_NAME).info("Starting to run benchmark...") - - # Generate a unique temporary directory to store results in. - results_dpath = Path(tempfile.mkdtemp()) - assert ( - results_dpath.is_dir() - and results_dpath.exists() - and not any(results_dpath.iterdir()) - ), "results_dpath should be existent and empty since mkdtemp should guarantee a unique dir." - - if self.benchbase: - # Execute benchbase if specified. - success = self._execute_benchbase(benchbase_config, results_dpath) - # We can only create a state if we succeeded. - assert self.dbgym_cfg is not None - success = observation_space.check_benchbase(self.dbgym_cfg, results_dpath) - else: - num_timed_out_queries, did_workload_time_out, query_metric_data = ( - self.execute_workload( - pg_conn, - actions=actions, - variation_names=variation_names, - results_dpath=results_dpath, - observation_space=observation_space, - action_space=action_space, - reset_metrics=reset_metrics, - override_workload_timeout=self.workload_timeout, - query_timeout=query_timeout, - workload_qdir=None, - blocklist=[], - first=first, - ) - ) - did_anything_time_out = num_timed_out_queries > 0 or did_workload_time_out - success = True - - metric, reward = None, None - if reward_utility is not None: - metric, reward = reward_utility( - results_dpath=results_dpath, update=update, did_error=not success - ) - - logging.getLogger(DBGYM_LOGGER_NAME).info( - f"Benchmark iteration with metric {metric} (reward: {reward}) (did_anything_timeout: {did_anything_time_out})" - ) - return ( - success, - metric, - reward, - results_dpath, - did_anything_time_out, - query_metric_data, - ) diff --git a/tune/protox/tests/__init__.py b/tune/protox/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tune/protox/tests/unittest_benchmark_configs/unittest_dsb.yaml b/tune/protox/tests/unittest_benchmark_configs/unittest_dsb.yaml deleted file mode 100644 index 3d522e90..00000000 --- a/tune/protox/tests/unittest_benchmark_configs/unittest_dsb.yaml +++ /dev/null @@ -1,519 +0,0 @@ -dsb: - query_spec: - benchbase: False - oltp_workload: False - tbl_include_subsets_prune: True - tbl_fold_subsets: True - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 34 - tables: - - customer_address - - customer_demographics - - date_dim - - warehouse - - ship_mode - - time_dim - - reason - - income_band - - item - - store - - call_center - - customer - - web_site - - store_returns - - household_demographics - - web_page - - promotion - - catalog_page - - inventory - - catalog_returns - - web_returns - - web_sales - - catalog_sales - - store_sales - - attributes: - customer_address: - - ca_address_sk - - ca_address_id - - ca_street_number - - ca_street_name - - ca_street_type - - ca_suite_number - - ca_city - - ca_county - - ca_state - - ca_zip - - ca_country - - ca_gmt_offset - - ca_location_type - customer_demographics: - - cd_demo_sk - - cd_gender - - cd_marital_status - - cd_education_status - - cd_purchase_estimate - - cd_credit_rating - - cd_dep_count - - cd_dep_employed_count - - cd_dep_college_count - date_dim: - - d_date_sk - - d_date_id - - d_date - - d_month_seq - - d_week_seq - - d_quarter_seq - - d_year - - d_dow - - d_moy - - d_dom - - d_qoy - - d_fy_year - - d_fy_quarter_seq - - d_fy_week_seq - - d_day_name - - d_quarter_name - - d_holiday - - d_weekend - - d_following_holiday - - d_first_dom - - d_last_dom - - d_same_day_ly - - d_same_day_lq - - d_current_day - - d_current_week - - d_current_month - - d_current_quarter - - d_current_year - warehouse: - - w_warehouse_sk - - w_warehouse_id - - w_warehouse_name - - w_warehouse_sq_ft - - w_street_number - - w_street_name - - w_street_type - - w_suite_number - - w_city - - w_county - - w_state - - w_zip - - w_country - - w_gmt_offset - ship_mode: - - sm_ship_mode_sk - - sm_ship_mode_id - - sm_type - - sm_code - - sm_carrier - - sm_contract - time_dim: - - t_time_sk - - t_time_id - - t_time - - t_hour - - t_minute - - t_second - - t_am_pm - - t_shift - - t_sub_shift - - t_meal_time - reason: - - r_reason_sk - - r_reason_id - - r_reason_desc - income_band: - - ib_income_band_sk - - ib_lower_bound - - ib_upper_bound - item: - - i_item_sk - - i_item_id - - i_rec_start_date - - i_rec_end_date - - i_item_desc - - i_current_price - - i_wholesale_cost - - i_brand_id - - i_brand - - i_class_id - - i_class - - i_category_id - - i_category - - i_manufact_id - - i_manufact - - i_size - - i_formulation - - i_color - - i_units - - i_container - - i_manager_id - - i_product_name - store: - - s_store_sk - - s_store_id - - s_rec_start_date - - s_rec_end_date - - s_closed_date_sk - - s_store_name - - s_number_employees - - s_floor_space - - s_hours - - s_manager - - s_market_id - - s_geography_class - - s_market_desc - - s_market_manager - - s_division_id - - s_division_name - - s_company_id - - s_company_name - - s_street_number - - s_street_name - - s_street_type - - s_suite_number - - s_city - - s_county - - s_state - - s_zip - - s_country - - s_gmt_offset - - s_tax_precentage - call_center: - - cc_call_center_sk - - cc_call_center_id - - cc_rec_start_date - - cc_rec_end_date - - cc_closed_date_sk - - cc_open_date_sk - - cc_name - - cc_class - - cc_employees - - cc_sq_ft - - cc_hours - - cc_manager - - cc_mkt_id - - cc_mkt_class - - cc_mkt_desc - - cc_market_manager - - cc_division - - cc_division_name - - cc_company - - cc_company_name - - cc_street_number - - cc_street_name - - cc_street_type - - cc_suite_number - - cc_city - - cc_county - - cc_state - - cc_zip - - cc_country - - cc_gmt_offset - - cc_tax_percentage - customer: - - c_customer_sk - - c_customer_id - - c_current_cdemo_sk - - c_current_hdemo_sk - - c_current_addr_sk - - c_first_shipto_date_sk - - c_first_sales_date_sk - - c_salutation - - c_first_name - - c_last_name - - c_preferred_cust_flag - - c_birth_day - - c_birth_month - - c_birth_year - - c_birth_country - - c_login - - c_email_address - - c_last_review_date_sk - web_site: - - web_site_sk - - web_site_id - - web_rec_start_date - - web_rec_end_date - - web_name - - web_open_date_sk - - web_close_date_sk - - web_class - - web_manager - - web_mkt_id - - web_mkt_class - - web_mkt_desc - - web_market_manager - - web_company_id - - web_company_name - - web_street_number - - web_street_name - - web_street_type - - web_suite_number - - web_city - - web_county - - web_state - - web_zip - - web_country - - web_gmt_offset - - web_tax_percentage - store_returns: - - sr_returned_date_sk - - sr_return_time_sk - - sr_item_sk - - sr_customer_sk - - sr_cdemo_sk - - sr_hdemo_sk - - sr_addr_sk - - sr_store_sk - - sr_reason_sk - - sr_ticket_number - - sr_return_quantity - - sr_return_amt - - sr_return_tax - - sr_return_amt_inc_tax - - sr_fee - - sr_return_ship_cost - - sr_refunded_cash - - sr_reversed_charge - - sr_store_credit - - sr_net_loss - household_demographics: - - hd_demo_sk - - hd_income_band_sk - - hd_buy_potential - - hd_dep_count - - hd_vehicle_count - web_page: - - wp_web_page_sk - - wp_web_page_id - - wp_rec_start_date - - wp_rec_end_date - - wp_creation_date_sk - - wp_access_date_sk - - wp_autogen_flag - - wp_customer_sk - - wp_url - - wp_type - - wp_char_count - - wp_link_count - - wp_image_count - - wp_max_ad_count - promotion: - - p_promo_sk - - p_promo_id - - p_start_date_sk - - p_end_date_sk - - p_item_sk - - p_cost - - p_response_target - - p_promo_name - - p_channel_dmail - - p_channel_email - - p_channel_catalog - - p_channel_tv - - p_channel_radio - - p_channel_press - - p_channel_event - - p_channel_demo - - p_channel_details - - p_purpose - - p_discount_active - catalog_page: - - cp_catalog_page_sk - - cp_catalog_page_id - - cp_start_date_sk - - cp_end_date_sk - - cp_department - - cp_catalog_number - - cp_catalog_page_number - - cp_description - - cp_type - inventory: - - inv_date_sk - - inv_item_sk - - inv_warehouse_sk - - inv_quantity_on_hand - catalog_returns: - - cr_returned_date_sk - - cr_returned_time_sk - - cr_item_sk - - cr_refunded_customer_sk - - cr_refunded_cdemo_sk - - cr_refunded_hdemo_sk - - cr_refunded_addr_sk - - cr_returning_customer_sk - - cr_returning_cdemo_sk - - cr_returning_hdemo_sk - - cr_returning_addr_sk - - cr_call_center_sk - - cr_catalog_page_sk - - cr_ship_mode_sk - - cr_warehouse_sk - - cr_reason_sk - - cr_order_number - - cr_return_quantity - - cr_return_amount - - cr_return_tax - - cr_return_amt_inc_tax - - cr_fee - - cr_return_ship_cost - - cr_refunded_cash - - cr_reversed_charge - - cr_store_credit - - cr_net_loss - web_returns: - _ wr_returned_date_sk - _ wr_returned_time_sk - _ wr_item_sk - _ wr_refunded_customer_sk - _ wr_refunded_cdemo_sk - _ wr_refunded_hdemo_sk - _ wr_refunded_addr_sk - _ wr_returning_customer_sk - _ wr_returning_cdemo_sk - _ wr_returning_hdemo_sk - _ wr_returning_addr_sk - _ wr_web_page_sk - _ wr_reason_sk - _ wr_order_number - _ wr_return_quantity - _ wr_return_amt - _ wr_return_tax - _ wr_return_amt_inc_tax - _ wr_fee - _ wr_return_ship_cost - _ wr_refunded_cash - _ wr_reversed_charge - _ wr_account_credit - _ wr_net_loss - web_sales: - - ws_sold_date_sk - - ws_sold_time_sk - - ws_ship_date_sk - - ws_item_sk - - ws_bill_customer_sk - - ws_bill_cdemo_sk - - ws_bill_hdemo_sk - - ws_bill_addr_sk - - ws_ship_customer_sk - - ws_ship_cdemo_sk - - ws_ship_hdemo_sk - - ws_ship_addr_sk - - ws_web_page_sk - - ws_web_site_sk - - ws_ship_mode_sk - - ws_warehouse_sk - - ws_promo_sk - - ws_order_number - - ws_quantity - - ws_wholesale_cost - - ws_list_price - - ws_sales_price - - ws_ext_discount_amt - - ws_ext_sales_price - - ws_ext_wholesale_cost - - ws_ext_list_price - - ws_ext_tax - - ws_coupon_amt - - ws_ext_ship_cost - - ws_net_paid - - ws_net_paid_inc_tax - - ws_net_paid_inc_ship - - ws_net_paid_inc_ship_tax - - ws_net_profit - catalog_sales: - - cs_sold_date_sk - - cs_sold_time_sk - - cs_ship_date_sk - - cs_bill_customer_sk - - cs_bill_cdemo_sk - - cs_bill_hdemo_sk - - cs_bill_addr_sk - - cs_ship_customer_sk - - cs_ship_cdemo_sk - - cs_ship_hdemo_sk - - cs_ship_addr_sk - - cs_call_center_sk - - cs_catalog_page_sk - - cs_ship_mode_sk - - cs_warehouse_sk - - cs_item_sk - - cs_promo_sk - - cs_order_number - - cs_quantity - - cs_wholesale_cost - - cs_list_price - - cs_sales_price - - cs_ext_discount_amt - - cs_ext_sales_price - - cs_ext_wholesale_cost - - cs_ext_list_price - - cs_ext_tax - - cs_coupon_amt - - cs_ext_ship_cost - - cs_net_paid - - cs_net_paid_inc_tax - - cs_net_paid_inc_ship - - cs_net_paid_inc_ship_tax - - cs_net_profit - store_sales: - - ss_sold_date_sk - - ss_sold_time_sk - - ss_item_sk - - ss_customer_sk - - ss_cdemo_sk - - ss_hdemo_sk - - ss_addr_sk - - ss_store_sk - - ss_promo_sk - - ss_ticket_number - - ss_quantity - - ss_wholesale_cost - - ss_list_price - - ss_sales_price - - ss_ext_discount_amt - - ss_ext_sales_price - - ss_ext_wholesale_cost - - ss_ext_list_price - - ss_ext_tax - - ss_coupon_amt - - ss_net_paid - - ss_net_paid_inc_tax - - ss_net_profit - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: {} - - index_space_aux_type: False - index_space_aux_include: True - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_scan_method: True - per_query_select_parallel: True - per_query_knobs: {} - per_query_knob_gen: - enable_hashjoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_mergejoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_nestloop: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 0} diff --git a/tune/protox/tests/unittest_benchmark_configs/unittest_jobfull.yaml b/tune/protox/tests/unittest_benchmark_configs/unittest_jobfull.yaml deleted file mode 100644 index 62870095..00000000 --- a/tune/protox/tests/unittest_benchmark_configs/unittest_jobfull.yaml +++ /dev/null @@ -1,198 +0,0 @@ -job: - query_spec: - benchbase: False - oltp_workload: False - tbl_include_subsets_prune: True - tbl_fold_subsets: False - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 12 - tables: - - aka_name - - aka_title - - cast_info - - char_name - - comp_cast_type - - company_name - - company_type - - complete_cast - - info_type - - keyword - - kind_type - - link_type - - movie_companies - - movie_info - - movie_info_idx - - movie_keyword - - movie_link - - name - - person_info - - role_type - - title - - attributes: - aka_name: - - id - - person_id - - name - - imdb_index - - name_pcode_cf - - name_pcode_nf - - surname_pcode - - md5sum - aka_title: - - id - - movie_id - - title - - imdb_index - - kind_id - - production_year - - phonetic_code - - episode_of_id - - season_nr - - episode_nr - - note - - md5sum - cast_info: - - id - - person_id - - movie_id - - person_role_id - - note - - nr_order - - role_id - char_name: - - id - - name - - imdb_index - - imdb_id - - name_pcode_nf - - surname_pcode - - md5sum - comp_cast_type: - - id - - kind - company_name: - - id - - name - - country_code - - imdb_id - - name_pcode_nf - - name_pcode_sf - - md5sum - company_type: - - id - - kind - complete_cast: - - id - - movie_id - - subject_id - - status_id - info_type: - - id - - info - keyword: - - id - - keyword - - phonetic_code - kind_type: - - id - - kind - link_type: - - id - - link - movie_companies: - - id - - movie_id - - company_id - - company_type_id - - note - movie_info: - - id - - movie_id - - info_type_id - - info - - note - movie_info_idx: - - id - - movie_id - - info_type_id - - info - - note - movie_keyword: - - id - - movie_id - - keyword_id - movie_link: - - id - - movie_id - - linked_movie_id - - link_type_id - name: - - id - - name - - imdb_index - - imdb_id - - gender - - name_pcode_cf - - name_pcode_nf - - surname_pcode - - md5sum - person_info: - - id - - person_id - - info_type_id - - info - - note - role_type: - - id - - role - title: - - id - - title - - imdb_index - - kind_id - - production_year - - imdb_id - - phonetic_code - - episode_of_id - - season_nr - - episode_nr - - series_years - - md5sum - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: {} - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_scan_method: True - per_query_select_parallel: True - - index_space_aux_type: True - index_space_aux_include: True - - per_query_knob_gen: - enable_hashjoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_mergejoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_nestloop: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 0} - - per_query_knobs: {} diff --git a/tune/protox/tests/unittest_benchmark_configs/unittest_tpcc.yaml b/tune/protox/tests/unittest_benchmark_configs/unittest_tpcc.yaml deleted file mode 100644 index 9851c430..00000000 --- a/tune/protox/tests/unittest_benchmark_configs/unittest_tpcc.yaml +++ /dev/null @@ -1,162 +0,0 @@ -tpcc: - query_spec: - benchbase: True - oltp_workload: True - tbl_include_subsets_prune: True - tbl_fold_subsets: False - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 21 - tables: - - warehouse - - district - - customer - - history - - oorder - - order_line - - new_order - - stock - - item - - attributes: - warehouse: - - w_id - - w_ytd - - w_tax - - w_name - - w_street_1 - - w_street_2 - - w_city - - w_state - - w_zip - item: - - i_id - - i_name - - i_price - - i_data - - i_im_id - stock: - - s_w_id - - s_i_id - - s_quantity - - s_ytd - - s_order_cnt - - s_remote_cnt - - s_data - - s_dist_01 - - s_dist_02 - - s_dist_03 - - s_dist_04 - - s_dist_05 - - s_dist_06 - - s_dist_07 - - s_dist_08 - - s_dist_09 - - s_dist_10 - district: - - d_w_id - - d_id - - d_ytd - - d_tax - - d_next_o_id - - d_name - - d_street_1 - - d_street_2 - - d_city - - d_state - - d_zip - customer: - - c_w_id - - c_d_id - - c_id - - c_discount - - c_credit - - c_last - - c_first - - c_credit_lim - - c_balance - - c_ytd_payment - - c_payment_cnt - - c_delivery_cnt - - c_street_1 - - c_street_2 - - c_city - - c_state - - c_zip - - c_phone - - c_since - - c_middle - - c_data - history: - - h_c_id - - h_c_d_id - - h_c_w_id - - h_d_id - - h_w_id - - h_date - - h_amount - - h_data - oorder: - - o_w_id - - o_d_id - - o_id - - o_c_id - - o_carrier_id - - o_ol_cnt - - o_all_local - - o_entry_d - new_order: - - no_w_id - - no_d_id - - no_o_id - order_line: - - ol_w_id - - ol_d_id - - ol_o_id - - ol_number - - ol_i_id - - ol_delivery_d - - ol_amount - - ol_supply_w_id - - ol_quantity - - ol_dist_info - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: - warehouse: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - district: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - customer: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - history: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - oorder: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - order_line: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - new_order: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - stock: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - item: - fillfactor: {type: "integer", min: 10, max: 100, quantize: 9, log_scale: 0, unit_scale: 0, unit: 0, round: True} - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_knobs: {} - per_query_scan_method: False - per_query_select_parallel: False - index_space_aux_type: False - index_space_aux_include: False - per_query_knob_gen: {} - per_query_knobs: {} diff --git a/tune/protox/tests/unittest_benchmark_configs/unittest_tpch.yaml b/tune/protox/tests/unittest_benchmark_configs/unittest_tpch.yaml deleted file mode 100644 index 5f8df562..00000000 --- a/tune/protox/tests/unittest_benchmark_configs/unittest_tpch.yaml +++ /dev/null @@ -1,124 +0,0 @@ -tpch: - query_spec: - benchbase: False - oltp_workload: False - tbl_include_subsets_prune: True - tbl_fold_subsets: False - tbl_fold_delta: 1 - tbl_fold_iterations: 1 - - max_num_columns: 16 - tables: - - part - - partsupp - - lineitem - - orders - - supplier - - customer - - nation - - region - - attributes: - region: - - r_regionkey - - r_name - - r_comment - nation: - - n_nationkey - - n_name - - n_regionkey - - n_comment - part: - - p_partkey - - p_name - - p_mfgr - - p_brand - - p_type - - p_size - - p_container - - p_retailprice - - p_comment - supplier: - - s_suppkey - - s_name - - s_address - - s_nationkey - - s_phone - - s_acctbal - - s_comment - partsupp: - - ps_partkey - - ps_suppkey - - ps_availqty - - ps_supplycost - - ps_comment - customer: - - c_custkey - - c_name - - c_address - - c_nationkey - - c_phone - - c_acctbal - - c_mktsegment - - c_comment - orders: - - o_orderkey - - o_custkey - - o_orderstatus - - o_totalprice - - o_orderdate - - o_orderpriority - - o_clerk - - o_shippriority - - o_comment - lineitem: - - l_orderkey - - l_partkey - - l_suppkey - - l_linenumber - - l_quantity - - l_extendedprice - - l_discount - - l_tax - - l_returnflag - - l_linestatus - - l_shipdate - - l_commitdate - - l_receiptdate - - l_shipinstruct - - l_shipmode - - l_comment - - # Additional table level knobs. - # Format: - # : - # - # - # ... - table_level_knobs: {} - - # Per-query knobs. - # Format: - # : - # - # ... - per_query_scan_method: True - per_query_select_parallel: True - index_space_aux_type: True - index_space_aux_include: True - - per_query_knob_gen: - enable_hashjoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_mergejoin: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_nestloop: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_sort: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_gathermerge: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_hashagg: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_parallel_hash: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_material: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - enable_memoize: {type: "boolean", min: 0, max: 1, quantize: 0, log_scale: 0, unit: 0} - random_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - seq_page_cost: {type: "float", min: 0, max: 2048, quantize: 0, log_scale: 1, unit: 0} - hash_mem_multiplier: {type: "float", min: 1, max: 1000, quantize: 0, log_scale: 1, unit: 0} - - per_query_knobs: {} diff --git a/tune/protox/tests/unittest_dsb_dir/order.txt b/tune/protox/tests/unittest_dsb_dir/order.txt deleted file mode 100644 index b8f755e0..00000000 --- a/tune/protox/tests/unittest_dsb_dir/order.txt +++ /dev/null @@ -1,53 +0,0 @@ -Q1,query001s0.sql -Q2,query010s0.sql -Q3,query013s0_spj.sql -Q4,query013s0.sql -Q5,query014s0.sql -Q6,query018s0_spj.sql -Q7,query018s0.sql -Q8,query019s0.sql -Q9,query019s0_spj.sql -Q10,query023s0.sql -Q11,query025s0_spj.sql -Q12,query025s0.sql -Q13,query027s0.sql -Q14,query027s0_spj.sql -Q15,query030s0.sql -Q16,query031s0.sql -Q17,query032s0.sql -Q18,query038s0.sql -Q19,query039as0.sql -Q20,query039bs0.sql -Q21,query040s0.sql -Q22,query040s0_spj.sql -Q23,query050s0_spj.sql -Q24,query050s0.sql -Q25,query054s0.sql -Q26,query058s0.sql -Q27,query059s0.sql -Q28,query064s0.sql -Q29,query065s0.sql -Q30,query069s0.sql -Q31,query072s0.sql -Q32,query072s0_spj.sql -Q33,query075s0.sql -Q34,query080s0.sql -Q35,query081s0.sql -Q36,query083s0.sql -Q37,query084s0_spj.sql -Q38,query084s0.sql -Q39,query085s0_spj.sql -Q40,query085s0.sql -Q41,query087s0.sql -Q42,query091s0.sql -Q43,query091s0_spj.sql -Q44,query092s0.sql -Q45,query094s0.sql -Q46,query099s0.sql -Q47,query099s0_spj.sql -Q48,query100s0.sql -Q49,query100s0_spj.sql -Q50,query101s0_spj.sql -Q51,query101s0.sql -Q52,query102s0_spj.sql -Q53,query102s0.sql diff --git a/tune/protox/tests/unittest_dsb_dir/query001s0.sql b/tune/protox/tests/unittest_dsb_dir/query001s0.sql deleted file mode 100644 index 131ed8da..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query001s0.sql +++ /dev/null @@ -1,33 +0,0 @@ -with customer_total_return as -(select sr_customer_sk as ctr_customer_sk -,sr_store_sk as ctr_store_sk -,sr_reason_sk as ctr_reason_sk -,sum(SR_REFUNDED_CASH) as ctr_total_return -from store_returns -,date_dim -where sr_returned_date_sk = d_date_sk -and d_year =2001 -and sr_return_amt / sr_return_quantity between 236 and 295 -group by sr_customer_sk -,sr_store_sk, sr_reason_sk) - select c_customer_id -from customer_total_return ctr1 -,store -,customer -,customer_demographics -where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 -from customer_total_return ctr2 -where ctr1.ctr_store_sk = ctr2.ctr_store_sk -) -and ctr1.ctr_reason_sk BETWEEN 28 AND 31 -and s_store_sk = ctr1.ctr_store_sk -and s_state IN ('MI', 'NC', 'WI') -and ctr1.ctr_customer_sk = c_customer_sk -and c_current_cdemo_sk = cd_demo_sk -and cd_marital_status IN ('W', 'W') -and cd_education_status IN ('4 yr Degree', 'College') -and cd_gender = 'M' -and c_birth_month = 5 -and c_birth_year BETWEEN 1950 AND 1956 -order by c_customer_id -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query010s0.sql b/tune/protox/tests/unittest_dsb_dir/query010s0.sql deleted file mode 100644 index e1ffa566..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query010s0.sql +++ /dev/null @@ -1,75 +0,0 @@ -select - cd_gender, - cd_marital_status, - cd_education_status, - count(*) cnt1, - cd_purchase_estimate, - count(*) cnt2, - cd_credit_rating, - count(*) cnt3, - cd_dep_count, - count(*) cnt4, - cd_dep_employed_count, - count(*) cnt5, - cd_dep_college_count, - count(*) cnt6 - from - customer c,customer_address ca,customer_demographics - where - c.c_current_addr_sk = ca.ca_address_sk and - ca_county in ('Audubon County','Dade County','Dewey County','Hardeman County','Talbot County') and - c.c_birth_month in (4, 5) and - cd_demo_sk = c.c_current_cdemo_sk - and cd_marital_status in ('M', 'U', 'U') - and cd_education_status in ('Primary', 'College', '4 yr Degree') - and cd_gender = 'M' and - exists (select * - from store_sales,date_dim, item - where c.c_customer_sk = ss_customer_sk and - ss_sold_date_sk = d_date_sk and - d_year = 2001 and - d_moy between 1 and 1+3 and - ss_item_sk = i_item_sk and - i_category in ('Books', 'Home', 'Sports') - and ss_sales_price / ss_list_price BETWEEN 17 * 0.01 AND 27 * 0.01 - and i_manager_id BETWEEN 91 and 100 - ) and - (exists (select * - from web_sales,date_dim, item - where c.c_customer_sk = ws_bill_customer_sk and - ws_sold_date_sk = d_date_sk and - d_year = 2001 and - d_moy between 1 ANd 1+3 and - ws_item_sk = i_item_sk and - i_category in ('Books', 'Home', 'Sports') - and i_manager_id BETWEEN 91 and 100 - and ws_sales_price / ws_list_price BETWEEN 17 * 0.01 AND 27 * 0.01 - ) or - exists (select * - from catalog_sales,date_dim, item - where c.c_customer_sk = cs_ship_customer_sk and - cs_sold_date_sk = d_date_sk and - d_year = 2001 and - d_moy between 1 and 1+3 and - cs_item_sk = i_item_sk and - i_category in ('Books', 'Home', 'Sports') - and i_manager_id BETWEEN 91 and 100 - and cs_sales_price / cs_list_price BETWEEN 17 * 0.01 AND 27 * 0.01 - )) - group by cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating, - cd_dep_count, - cd_dep_employed_count, - cd_dep_college_count - order by cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating, - cd_dep_count, - cd_dep_employed_count, - cd_dep_college_count -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query013s0.sql b/tune/protox/tests/unittest_dsb_dir/query013s0.sql deleted file mode 100644 index b857851a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query013s0.sql +++ /dev/null @@ -1,48 +0,0 @@ -select avg(ss_quantity) -,avg(ss_ext_sales_price) -,avg(ss_ext_wholesale_cost) -,sum(ss_ext_wholesale_cost) -from store_sales - ,store - ,customer_demographics - ,household_demographics - ,customer_address - ,date_dim -where s_store_sk = ss_store_sk -and ss_sold_date_sk = d_date_sk and d_year = 2001 -and((ss_hdemo_sk=hd_demo_sk -and cd_demo_sk = ss_cdemo_sk -and cd_marital_status = 'U' -and cd_education_status = 'College' -and ss_sales_price between 100.00 and 150.00 -and hd_dep_count = 3 - )or - (ss_hdemo_sk=hd_demo_sk -and cd_demo_sk = ss_cdemo_sk -and cd_marital_status = 'W' -and cd_education_status = '2 yr Degree' -and ss_sales_price between 50.00 and 100.00 -and hd_dep_count = 1 - ) or - (ss_hdemo_sk=hd_demo_sk -and cd_demo_sk = ss_cdemo_sk -and cd_marital_status = 'S' -and cd_education_status = 'College' -and ss_sales_price between 150.00 and 200.00 -and hd_dep_count = 1 - )) -and((ss_addr_sk = ca_address_sk -and ca_country = 'United States' -and ca_state in ('IN', 'NM', 'VA') -and ss_net_profit between 100 and 200 - ) or - (ss_addr_sk = ca_address_sk -and ca_country = 'United States' -and ca_state in ('MT', 'OH', 'OR') -and ss_net_profit between 150 and 300 - ) or - (ss_addr_sk = ca_address_sk -and ca_country = 'United States' -and ca_state in ('GA', 'IL', 'TX') -and ss_net_profit between 50 and 250 - )); \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query013s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query013s0_spj.sql deleted file mode 100644 index 8345ec5f..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query013s0_spj.sql +++ /dev/null @@ -1,48 +0,0 @@ -select min(ss_quantity) - ,min(ss_ext_sales_price) - ,min(ss_ext_wholesale_cost) - ,min(ss_ext_wholesale_cost) - from store_sales - ,store - ,customer_demographics - ,household_demographics - ,customer_address - ,date_dim - where s_store_sk = ss_store_sk - and ss_sold_date_sk = d_date_sk and d_year = 2001 - and((ss_hdemo_sk=hd_demo_sk - and cd_demo_sk = ss_cdemo_sk - and cd_marital_status = 'U' - and cd_education_status = 'College' - and ss_sales_price between 100.00 and 150.00 - and hd_dep_count = 3 - )or - (ss_hdemo_sk=hd_demo_sk - and cd_demo_sk = ss_cdemo_sk - and cd_marital_status = 'W' - and cd_education_status = '2 yr Degree' - and ss_sales_price between 50.00 and 100.00 - and hd_dep_count = 1 - ) or - (ss_hdemo_sk=hd_demo_sk - and cd_demo_sk = ss_cdemo_sk - and cd_marital_status = 'S' - and cd_education_status = 'College' - and ss_sales_price between 150.00 and 200.00 - and hd_dep_count = 1 - )) - and((ss_addr_sk = ca_address_sk - and ca_country = 'United States' - and ca_state in ('IN', 'NM', 'VA') - and ss_net_profit between 100 and 200 - ) or - (ss_addr_sk = ca_address_sk - and ca_country = 'United States' - and ca_state in ('MT', 'OH', 'OR') - and ss_net_profit between 150 and 300 - ) or - (ss_addr_sk = ca_address_sk - and ca_country = 'United States' - and ca_state in ('GA', 'IL', 'TX') - and ss_net_profit between 50 and 250 - )); \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query014s0.sql b/tune/protox/tests/unittest_dsb_dir/query014s0.sql deleted file mode 100644 index 9aa3e39a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query014s0.sql +++ /dev/null @@ -1,124 +0,0 @@ -with cross_items as - (select i_item_sk ss_item_sk - from item, - (select iss.i_brand_id brand_id - ,iss.i_class_id class_id - ,iss.i_category_id category_id - from store_sales - ,item iss - ,date_dim d1 - where ss_item_sk = iss.i_item_sk - and ss_sold_date_sk = d1.d_date_sk - and d1.d_year between 1998 AND 1998 + 2 - and i_category IN ('Books', 'Home', 'Jewelry') - and i_manager_id BETWEEN 28 and 37 - and ss_wholesale_cost BETWEEN 80 AND 100 -intersect - select ics.i_brand_id - ,ics.i_class_id - ,ics.i_category_id - from catalog_sales - ,item ics - ,date_dim d2 - where cs_item_sk = ics.i_item_sk - and cs_sold_date_sk = d2.d_date_sk - and d2.d_year between 1998 AND 1998 + 2 - and i_category IN ('Books', 'Home', 'Jewelry') - and i_manager_id BETWEEN 28 and 37 - and cs_wholesale_cost BETWEEN 80 AND 100 -intersect - select iws.i_brand_id - ,iws.i_class_id - ,iws.i_category_id - from web_sales - ,item iws - ,date_dim d3 - where ws_item_sk = iws.i_item_sk - and ws_sold_date_sk = d3.d_date_sk - and ws_wholesale_cost BETWEEN 80 AND 100 - and d3.d_year between 1998 AND 1998 + 2) x - where i_brand_id = brand_id - and i_class_id = class_id - and i_category_id = category_id - and i_category IN ('Books', 'Home', 'Jewelry') - and i_manager_id BETWEEN 28 and 37 -), - avg_sales as -(select avg(quantity*list_price) average_sales - from (select ss_quantity quantity - ,ss_list_price list_price - from store_sales - ,date_dim - where ss_sold_date_sk = d_date_sk - and d_year between 1998 and 1998 + 2 - and ss_wholesale_cost BETWEEN 80 AND 100 - union all - select cs_quantity quantity - ,cs_list_price list_price - from catalog_sales - ,date_dim - where cs_sold_date_sk = d_date_sk - and d_year between 1998 and 1998 + 2 - and cs_wholesale_cost BETWEEN 80 AND 100 - union all - select ws_quantity quantity - ,ws_list_price list_price - from web_sales - ,date_dim - where ws_sold_date_sk = d_date_sk - and ws_wholesale_cost BETWEEN 80 AND 100 - and d_year between 1998 and 1998 + 2) x) - select this_year.channel ty_channel - ,this_year.i_brand_id ty_brand - ,this_year.i_class_id ty_class - ,this_year.i_category_id ty_category - ,this_year.sales ty_sales - ,this_year.number_sales ty_number_sales - ,last_year.channel ly_channel - ,last_year.i_brand_id ly_brand - ,last_year.i_class_id ly_class - ,last_year.i_category_id ly_category - ,last_year.sales ly_sales - ,last_year.number_sales ly_number_sales - from - (select 'store' channel, i_brand_id,i_class_id,i_category_id - ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales - from store_sales - ,item - ,date_dim - where ss_item_sk in (select ss_item_sk from cross_items) - and ss_item_sk = i_item_sk - and ss_sold_date_sk = d_date_sk - and d_week_seq = (select d_week_seq - from date_dim - where d_year = 1998 + 1 - and d_moy = 12 - and d_dom = 5) - and i_category IN ('Books', 'Home', 'Jewelry') - and i_manager_id BETWEEN 28 and 37 - and ss_wholesale_cost BETWEEN 80 AND 100 - group by i_brand_id,i_class_id,i_category_id - having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, - (select 'store' channel, i_brand_id,i_class_id - ,i_category_id, sum(ss_quantity*ss_list_price) sales, count(*) number_sales - from store_sales - ,item - ,date_dim - where ss_item_sk in (select ss_item_sk from cross_items) - and ss_item_sk = i_item_sk - and ss_sold_date_sk = d_date_sk - and d_week_seq = (select d_week_seq - from date_dim - where d_year = 1998 - and d_moy = 12 - and d_dom = 5) - and i_category IN ('Books', 'Home', 'Jewelry') - and ss_wholesale_cost BETWEEN 80 AND 100 - and i_manager_id BETWEEN 28 and 37 -group by i_brand_id,i_class_id,i_category_id - having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year - where this_year.i_brand_id= last_year.i_brand_id - and this_year.i_class_id = last_year.i_class_id - and this_year.i_category_id = last_year.i_category_id - order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query018s0.sql b/tune/protox/tests/unittest_dsb_dir/query018s0.sql deleted file mode 100644 index 0acac34a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query018s0.sql +++ /dev/null @@ -1,29 +0,0 @@ -select i_item_id, - ca_country, - ca_state, - ca_county, - avg( cast(cs_quantity as decimal(12,2))) agg1, - avg( cast(cs_list_price as decimal(12,2))) agg2, - avg( cast(cs_coupon_amt as decimal(12,2))) agg3, - avg( cast(cs_sales_price as decimal(12,2))) agg4, - avg( cast(cs_net_profit as decimal(12,2))) agg5, - avg( cast(c_birth_year as decimal(12,2))) agg6 -from catalog_sales, customer_demographics, customer, customer_address, date_dim, item -where cs_sold_date_sk = d_date_sk and - cs_item_sk = i_item_sk and - cs_bill_cdemo_sk = cd_demo_sk and - cs_bill_customer_sk = c_customer_sk and - cd_gender = 'F' and - cd_education_status = 'College' and - c_current_addr_sk = ca_address_sk and - d_year = 2001 and - c_birth_month = 5 and - ca_state in ('MT', 'OH', 'OR') - and cs_wholesale_cost BETWEEN 84 AND 89 - AND i_category = 'Jewelry' - group by rollup (i_item_id, ca_country, ca_state, ca_county) - order by ca_country, - ca_state, - ca_county, - i_item_id - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query018s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query018s0_spj.sql deleted file mode 100644 index ea77a1e7..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query018s0_spj.sql +++ /dev/null @@ -1,24 +0,0 @@ -select min(i_item_id), - min(ca_country), - min(ca_state), - min(ca_county), - min(cs_quantity), - min(cs_list_price), - min(cs_coupon_amt), - min(cs_sales_price), - min(cs_net_profit), - min(c_birth_year), - min(cd_dep_count) - from catalog_sales, customer_demographics, customer, customer_address, date_dim, item - where cs_sold_date_sk = d_date_sk and - cs_item_sk = i_item_sk and - cs_bill_cdemo_sk = cd_demo_sk and - cs_bill_customer_sk = c_customer_sk and - cd_gender = 'F' and - cd_education_status = 'College' and - c_current_addr_sk = ca_address_sk and - d_year = 2001 and - c_birth_month = 5 and - ca_state in ('MT', 'OH', 'OR') - and cs_wholesale_cost BETWEEN 84 AND 89 - AND i_category = 'Jewelry'; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query019s0.sql b/tune/protox/tests/unittest_dsb_dir/query019s0.sql deleted file mode 100644 index 1a50b0a6..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query019s0.sql +++ /dev/null @@ -1,25 +0,0 @@ -select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, - sum(ss_ext_sales_price) ext_price - from date_dim, store_sales, item,customer,customer_address,store - where d_date_sk = ss_sold_date_sk - and ss_item_sk = i_item_sk - and ss_customer_sk = c_customer_sk - and c_current_addr_sk = ca_address_sk - and ss_store_sk = s_store_sk - AND i_category = 'Home' - and d_year=2002 - and d_moy = 8 - and substring(ca_zip,1,5) <> substring(s_zip,1,5) - and ca_state = 'GA' - and c_birth_month = 4 - and ss_wholesale_cost BETWEEN 80 AND 100 - group by i_brand - ,i_brand_id - ,i_manufact_id - ,i_manufact - order by ext_price desc - ,i_brand - ,i_brand_id - ,i_manufact_id - ,i_manufact -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query019s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query019s0_spj.sql deleted file mode 100644 index 2766ab2f..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query019s0_spj.sql +++ /dev/null @@ -1,15 +0,0 @@ -select min(i_brand_id), min(i_manufact_id), - min(ss_ext_sales_price) - from date_dim, store_sales, item,customer,customer_address,store - where d_date_sk = ss_sold_date_sk - and ss_item_sk = i_item_sk - and ss_customer_sk = c_customer_sk - and c_current_addr_sk = ca_address_sk - and ss_store_sk = s_store_sk - AND i_category = 'Home' - and d_year=2002 - and d_moy = 8 - and substring(ca_zip,1,5) <> substring(s_zip,1,5) - and ca_state = 'GA' - and c_birth_month = 4 - and ss_wholesale_cost BETWEEN 80 AND 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query023s0.sql b/tune/protox/tests/unittest_dsb_dir/query023s0.sql deleted file mode 100644 index 39dea945..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query023s0.sql +++ /dev/null @@ -1,55 +0,0 @@ -with frequent_ss_items as - (select substring(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt - from store_sales - ,date_dim - ,item - where ss_sold_date_sk = d_date_sk - and ss_item_sk = i_item_sk - and d_year = 2001 - and i_manager_id BETWEEN 77 and 96 - AND i_category IN ('Books', 'Jewelry', 'Sports') - group by substring(i_item_desc,1,30),i_item_sk,d_date - having count(*) >4), - max_store_sales as - (select max(csales) tpcds_cmax - from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales - from store_sales - ,customer - ,date_dim - where ss_customer_sk = c_customer_sk - and ss_sold_date_sk = d_date_sk - and d_year = 2001 - and ss_wholesale_cost BETWEEN 2 AND 12 - group by c_customer_sk) tmp1), - best_ss_customer as - (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales - from store_sales - ,customer - where ss_customer_sk = c_customer_sk - and c_birth_year BETWEEN 1977 AND 1983 - group by c_customer_sk - having sum(ss_quantity*ss_sales_price) > (95/100.0) * (select - * -from - max_store_sales)) - select sum(sales) - from (select cs_quantity*cs_list_price sales - from catalog_sales - ,date_dim - where d_year = 2001 - and d_moy = 10 - and cs_sold_date_sk = d_date_sk - and cs_item_sk in (select item_sk from frequent_ss_items) - and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) - and cs_wholesale_cost BETWEEN 2 AND 12 - union all - select ws_quantity*ws_list_price sales - from web_sales - ,date_dim - where d_year = 2001 - and d_moy = 10 - and ws_sold_date_sk = d_date_sk - and ws_item_sk in (select item_sk from frequent_ss_items) - and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) - and ws_wholesale_cost BETWEEN 2 AND 12) tmp2 - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query025s0.sql b/tune/protox/tests/unittest_dsb_dir/query025s0.sql deleted file mode 100644 index 81323fb2..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query025s0.sql +++ /dev/null @@ -1,45 +0,0 @@ -select - i_item_id - ,i_item_desc - ,s_store_id - ,s_store_name - ,sum(ss_net_profit) as store_sales_profit - ,sum(sr_net_loss) as store_returns_loss - ,sum(cs_net_profit) as catalog_sales_profit - from - store_sales - ,store_returns - ,catalog_sales - ,date_dim d1 - ,date_dim d2 - ,date_dim d3 - ,store - ,item - where - d1.d_moy = 8 - and d1.d_year = 2001 - and d1.d_date_sk = ss_sold_date_sk - and i_item_sk = ss_item_sk - and s_store_sk = ss_store_sk - and ss_customer_sk = sr_customer_sk - and ss_item_sk = sr_item_sk - and ss_ticket_number = sr_ticket_number - and sr_returned_date_sk = d2.d_date_sk - and d2.d_moy between 8 and 8 + 2 - and d2.d_year = 2001 - and sr_customer_sk = cs_bill_customer_sk - and sr_item_sk = cs_item_sk - and cs_sold_date_sk = d3.d_date_sk - and d3.d_moy between 8 and 8 + 2 - and d3.d_year = 2001 - group by - i_item_id - ,i_item_desc - ,s_store_id - ,s_store_name - order by - i_item_id - ,i_item_desc - ,s_store_id - ,s_store_name - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query025s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query025s0_spj.sql deleted file mode 100644 index 116b80e2..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query025s0_spj.sql +++ /dev/null @@ -1,37 +0,0 @@ -select - min(i_item_id) - ,min(i_item_desc) - ,min(s_store_id) - ,min(s_store_name) - ,min(ss_net_profit) - ,min(sr_net_loss) - ,min(cs_net_profit) - ,min(ss_item_sk) - ,min(sr_ticket_number) - ,min(cs_order_number) - from - store_sales - ,store_returns - ,catalog_sales - ,date_dim d1 - ,date_dim d2 - ,date_dim d3 - ,store - ,item - where - d1.d_moy = 6 - and d1.d_year = 2000 - and d1.d_date_sk = ss_sold_date_sk - and i_item_sk = ss_item_sk - and s_store_sk = ss_store_sk - and ss_customer_sk = sr_customer_sk - and ss_item_sk = sr_item_sk - and ss_ticket_number = sr_ticket_number - and sr_returned_date_sk = d2.d_date_sk - and d2.d_moy between 6 and 6 + 2 - and d2.d_year = 2000 - and sr_customer_sk = cs_bill_customer_sk - and sr_item_sk = cs_item_sk - and cs_sold_date_sk = d3.d_date_sk - and d3.d_moy between 6 and 6 + 2 - and d3.d_year = 2000; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query027s0.sql b/tune/protox/tests/unittest_dsb_dir/query027s0.sql deleted file mode 100644 index e6df6683..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query027s0.sql +++ /dev/null @@ -1,21 +0,0 @@ -select i_item_id, - s_state, grouping(s_state) g_state, - avg(ss_quantity) agg1, - avg(ss_list_price) agg2, - avg(ss_coupon_amt) agg3, - avg(ss_sales_price) agg4 -from store_sales, customer_demographics, date_dim, store, item -where ss_sold_date_sk = d_date_sk and - ss_item_sk = i_item_sk and - ss_store_sk = s_store_sk and - ss_cdemo_sk = cd_demo_sk and - cd_gender = 'M' and - cd_marital_status = 'S' and - cd_education_status = 'College' and - d_year = 2002 and - s_state = 'IL' and - i_category = 'Jewelry' - group by rollup (i_item_id, s_state) - order by i_item_id - ,s_state - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query027s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query027s0_spj.sql deleted file mode 100644 index 24997c06..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query027s0_spj.sql +++ /dev/null @@ -1,19 +0,0 @@ -select min(i_item_id), - min(s_state), - min(ss_quantity), - min(ss_list_price), - min(ss_coupon_amt), - min(ss_sales_price), - min(ss_item_sk), - min(ss_ticket_number) - from store_sales, customer_demographics, date_dim, store, item - where ss_sold_date_sk = d_date_sk and - ss_item_sk = i_item_sk and - ss_store_sk = s_store_sk and - ss_cdemo_sk = cd_demo_sk and - cd_gender = 'M' and - cd_marital_status = 'S' and - cd_education_status = 'College' and - d_year = 2002 and - s_state = 'IL' and - i_category = 'Jewelry'; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query030s0.sql b/tune/protox/tests/unittest_dsb_dir/query030s0.sql deleted file mode 100644 index 5530eafe..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query030s0.sql +++ /dev/null @@ -1,35 +0,0 @@ -with customer_total_return as - (select wr_returning_customer_sk as ctr_customer_sk - ,ca_state as ctr_state -,wr_reason_sk as ctr_reason_sk -,sum(wr_return_amt) as ctr_total_return - from web_returns - ,date_dim - ,customer_address - ,item - where wr_returned_date_sk = d_date_sk - and d_year =2001 - and wr_returning_addr_sk = ca_address_sk - and wr_item_sk = i_item_sk - and i_manager_id BETWEEN 69 and 78 - and wr_return_amt / wr_return_quantity between 28 and 57 - group by wr_returning_customer_sk - ,ca_state, wr_reason_sk) - select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag - ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date_sk,ctr_total_return - from customer_total_return ctr1 - ,customer_address - ,customer - where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 - from customer_total_return ctr2 - where ctr1.ctr_state = ctr2.ctr_state) - and ca_address_sk = c_current_addr_sk - and ca_state in ('IA', 'MT', 'NE', 'TX') - and ctr1.ctr_customer_sk = c_customer_sk - and ctr1.ctr_reason_sk in (3, 31) - and c_birth_year BETWEEN 1938 AND 1944 - order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag - ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date_sk,ctr_total_return -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query031s0.sql b/tune/protox/tests/unittest_dsb_dir/query031s0.sql deleted file mode 100644 index fb10806e..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query031s0.sql +++ /dev/null @@ -1,59 +0,0 @@ -with ss as - (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales - from store_sales,date_dim,customer_address, item - where ss_sold_date_sk = d_date_sk - and ss_addr_sk=ca_address_sk - and ss_item_sk = i_item_sk - and i_color IN ('purple', 'sandy') - and i_manager_id BETWEEN 36 and 55 - and ss_list_price between 77 and 91 - and ca_state in ('GA','IL') - group by ca_county,d_qoy, d_year), - ws as - (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales - from web_sales,date_dim,customer_address, item - where ws_sold_date_sk = d_date_sk - and ws_bill_addr_sk=ca_address_sk - and ws_item_sk = i_item_sk - and i_color IN ('purple', 'sandy') - and i_manager_id BETWEEN 36 and 55 - and ws_list_price between 77 and 91 - and ca_state in ('GA','IL') -group by ca_county,d_qoy, d_year) - select - ss1.ca_county - ,ss1.d_year - ,ws2.web_sales/ws1.web_sales web_q1_q2_increase - ,ss2.store_sales/ss1.store_sales store_q1_q2_increase - ,ws3.web_sales/ws2.web_sales web_q2_q3_increase - ,ss3.store_sales/ss2.store_sales store_q2_q3_increase - from - ss ss1 - ,ss ss2 - ,ss ss3 - ,ws ws1 - ,ws ws2 - ,ws ws3 - where - ss1.d_qoy = 1 - and ss1.d_year = 2001 - and ss1.ca_county = ss2.ca_county - and ss2.d_qoy = 2 - and ss2.d_year = 2001 - and ss2.ca_county = ss3.ca_county - and ss3.d_qoy = 3 - and ss3.d_year = 2001 - and ss1.ca_county = ws1.ca_county - and ws1.d_qoy = 1 - and ws1.d_year = 2001 - and ws1.ca_county = ws2.ca_county - and ws2.d_qoy = 2 - and ws2.d_year = 2001 - and ws1.ca_county = ws3.ca_county - and ws3.d_qoy = 3 - and ws3.d_year =2001 - and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end - > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end - and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end - > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end - order by store_q1_q2_increase; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query032s0.sql b/tune/protox/tests/unittest_dsb_dir/query032s0.sql deleted file mode 100644 index 8d7ac83d..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query032s0.sql +++ /dev/null @@ -1,32 +0,0 @@ -with cte as -( - select - cs_item_sk as cte_item_sk, - 1.3 * avg(cs_ext_discount_amt) as avg_ceda - from - catalog_sales - ,date_dim - where d_date between '1999-01-14' and - cast('1999-01-14' as date) + interval '90' day - and d_date_sk = cs_sold_date_sk - and cs_list_price between 236 and 265 - and cs_sales_price / cs_list_price BETWEEN 45 * 0.01 AND 65 * 0.01 - group by cs_item_sk -) -select sum(cs_ext_discount_amt) as "excess discount amount" -from - catalog_sales - ,item - ,date_dim - ,cte -where -(i_manufact_id in (117, 306, 658, 849, 891) -or i_manager_id BETWEEN 28 and 57) -and i_item_sk = cs_item_sk -and d_date between '1999-01-14' and - cast('1999-01-14' as date) + interval '90' day -and d_date_sk = cs_sold_date_sk -and cte.cte_item_sk = i_item_sk -and cs_ext_discount_amt > cte.avg_ceda -order by sum(cs_ext_discount_amt) -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query038s0.sql b/tune/protox/tests/unittest_dsb_dir/query038s0.sql deleted file mode 100644 index 9d6d28d8..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query038s0.sql +++ /dev/null @@ -1,29 +0,0 @@ -select count(*) from ( - select distinct c_last_name, c_first_name, d_date - from store_sales, date_dim, customer - where store_sales.ss_sold_date_sk = date_dim.d_date_sk - and store_sales.ss_customer_sk = customer.c_customer_sk - and d_month_seq between 1185 and 1185 + 11 - and c_birth_month in (2, 3, 10, 12) - and ss_list_price between 28 and 87 - and ss_wholesale_cost BETWEEN 80 AND 100 - intersect - select distinct c_last_name, c_first_name, d_date - from catalog_sales, date_dim, customer - where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - and d_month_seq between 1185 and 1185 + 11 - and c_birth_month in (2, 3, 10, 12) - and cs_list_price between 28 and 87 - and cs_wholesale_cost BETWEEN 80 AND 100 - intersect - select distinct c_last_name, c_first_name, d_date - from web_sales, date_dim, customer - where web_sales.ws_sold_date_sk = date_dim.d_date_sk - and web_sales.ws_bill_customer_sk = customer.c_customer_sk - and d_month_seq between 1185 and 1185 + 11 - and c_birth_month in (2, 3, 10, 12) - and ws_list_price between 28 and 87 - and ws_wholesale_cost BETWEEN 80 AND 100 -) hot_cust -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query039as0.sql b/tune/protox/tests/unittest_dsb_dir/query039as0.sql deleted file mode 100644 index 457f23ff..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query039as0.sql +++ /dev/null @@ -1,27 +0,0 @@ -with inv as -(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, case mean when 0 then null else stdev/mean end cov - from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean - from inventory - ,item - ,warehouse - ,date_dim - where inv_item_sk = i_item_sk - and inv_warehouse_sk = w_warehouse_sk - and inv_date_sk = d_date_sk - and d_year =1998 - and i_category IN ('Books', 'Home') - and i_manager_id BETWEEN 28 and 47 - and inv_quantity_on_hand between 225 and 425 - group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - where case mean when 0 then 0 else stdev/mean end > 1) -select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -from inv inv1,inv inv2 -where inv1.i_item_sk = inv2.i_item_sk - and inv1.w_warehouse_sk = inv2.w_warehouse_sk - and inv1.d_moy=2 - and inv2.d_moy=2+1 -order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query039bs0.sql b/tune/protox/tests/unittest_dsb_dir/query039bs0.sql deleted file mode 100644 index 05f5466b..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query039bs0.sql +++ /dev/null @@ -1,28 +0,0 @@ -with inv as -(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stdev,mean, case mean when 0 then null else stdev/mean end cov - from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy - ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean - from inventory - ,item - ,warehouse - ,date_dim - where inv_item_sk = i_item_sk - and inv_warehouse_sk = w_warehouse_sk - and inv_date_sk = d_date_sk - and d_year =1998 - and i_category IN ('Books', 'Home') - and i_manager_id BETWEEN 28 and 47 - and inv_quantity_on_hand between 225 and 425 - group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo - where case mean when 0 then 0 else stdev/mean end > 1) -select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov - ,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov -from inv inv1,inv inv2 -where inv1.i_item_sk = inv2.i_item_sk - and inv1.w_warehouse_sk = inv2.w_warehouse_sk - and inv1.d_moy=2 - and inv2.d_moy=2+1 - and inv1.cov > 1.5 -order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov - ,inv2.d_moy,inv2.mean, inv2.cov; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query040s0.sql b/tune/protox/tests/unittest_dsb_dir/query040s0.sql deleted file mode 100644 index c69459c4..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query040s0.sql +++ /dev/null @@ -1,28 +0,0 @@ -select - w_state - ,i_item_id - ,sum(case when (cast(d_date as date) < cast ('1998-04-26' as date)) - then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before - ,sum(case when (cast(d_date as date) >= cast ('1998-04-26' as date)) - then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after - from - catalog_sales left outer join catalog_returns on - (cs_order_number = cr_order_number - and cs_item_sk = cr_item_sk) - ,warehouse - ,item - ,date_dim - where - i_item_sk = cs_item_sk - and cs_warehouse_sk = w_warehouse_sk - and cs_sold_date_sk = d_date_sk - and d_date between (cast ('1998-04-26' as date) - interval '30' day) - and (cast ('1998-04-26' as date) + interval '30' day) - and i_category = 'Home' - and i_manager_id between 28 and 67 - and cs_wholesale_cost between 69 and 88 - and cr_reason_sk = 11 - group by - w_state,i_item_id - order by w_state,i_item_id -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query040s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query040s0_spj.sql deleted file mode 100644 index f950039f..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query040s0_spj.sql +++ /dev/null @@ -1,24 +0,0 @@ -select - min(w_state) - ,min(i_item_id) - ,min(cs_item_sk) - ,min(cs_order_number) - ,min(cr_item_sk) - ,min(cr_order_number) - from - catalog_sales left outer join catalog_returns on - (cs_order_number = cr_order_number - and cs_item_sk = cr_item_sk) - ,warehouse - ,item - ,date_dim - where - i_item_sk = cs_item_sk - and cs_warehouse_sk = w_warehouse_sk - and cs_sold_date_sk = d_date_sk - and d_date between (cast ('1998-04-26' as date) - interval '30' day) - and (cast ('1998-04-26' as date) + interval '30' day) - and i_category = 'Home' - and i_manager_id between 28 and 67 - and cs_wholesale_cost between 69 and 88 - and cr_reason_sk = 11; diff --git a/tune/protox/tests/unittest_dsb_dir/query050s0.sql b/tune/protox/tests/unittest_dsb_dir/query050s0.sql deleted file mode 100644 index 3b46f1dd..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query050s0.sql +++ /dev/null @@ -1,58 +0,0 @@ -select - s_store_name - ,s_company_id - ,s_street_number - ,s_street_name - ,s_street_type - ,s_suite_number - ,s_city - ,s_county - ,s_state - ,s_zip - ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" - ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and - (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" - ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and - (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" - ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and - (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" - ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as ">120 days" -from - store_sales - ,store_returns - ,store - ,date_dim d1 - ,date_dim d2 -where - d2.d_year = 2000 -and d2.d_moy = 4 -and ss_ticket_number = sr_ticket_number -and ss_item_sk = sr_item_sk -and ss_sold_date_sk = d1.d_date_sk -and sr_returned_date_sk = d2.d_date_sk -and ss_customer_sk = sr_customer_sk -and ss_store_sk = s_store_sk -and d1.d_date between (d2.d_date - interval '120' day) - and d2.d_date -group by - s_store_name - ,s_company_id - ,s_street_number - ,s_street_name - ,s_street_type - ,s_suite_number - ,s_city - ,s_county - ,s_state - ,s_zip -order by s_store_name - ,s_company_id - ,s_street_number - ,s_street_name - ,s_street_type - ,s_suite_number - ,s_city - ,s_county - ,s_state - ,s_zip -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query050s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query050s0_spj.sql deleted file mode 100644 index cbbd36d4..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query050s0_spj.sql +++ /dev/null @@ -1,32 +0,0 @@ -select - min(s_store_name) - ,min(s_company_id) - ,min(s_street_number) - ,min(s_street_name) - ,min(s_suite_number) - ,min(s_city) - ,min(s_zip) - ,min(ss_ticket_number) - ,min(ss_sold_date_sk) - ,min(sr_returned_date_sk) - ,min(ss_item_sk) - ,min(d1.d_date_sk) -from - store_sales - ,store_returns - ,store - ,date_dim d1 - ,date_dim d2 -where - d2.d_moy = 8 -and ss_ticket_number = sr_ticket_number -and ss_item_sk = sr_item_sk -and ss_sold_date_sk = d1.d_date_sk -and sr_returned_date_sk = d2.d_date_sk -and ss_customer_sk = sr_customer_sk -and ss_store_sk = s_store_sk -and sr_store_sk = s_store_sk -and d1.d_date between (d2.d_date - interval '120' day) - and d2.d_date -and d1.d_dow = 2 -and s_state in ('GA','IL','OH'); diff --git a/tune/protox/tests/unittest_dsb_dir/query054s0.sql b/tune/protox/tests/unittest_dsb_dir/query054s0.sql deleted file mode 100644 index d80d04b3..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query054s0.sql +++ /dev/null @@ -1,62 +0,0 @@ -with my_customers as ( - select distinct c_customer_sk - , c_current_addr_sk - from - ( select cs_sold_date_sk sold_date_sk, - cs_bill_customer_sk customer_sk, - cs_item_sk item_sk, - cs_wholesale_cost wholesale_cost - from catalog_sales - union all - select ws_sold_date_sk sold_date_sk, - ws_bill_customer_sk customer_sk, - ws_item_sk item_sk, - ws_wholesale_cost wholesale_cost - from web_sales - ) cs_or_ws_sales, - item, - date_dim, - customer - where sold_date_sk = d_date_sk - and item_sk = i_item_sk - and i_category = 'Men' - and i_class = 'accessories' - and c_customer_sk = cs_or_ws_sales.customer_sk - and d_moy = 7 - and d_year = 1999 - and wholesale_cost BETWEEN 44 AND 74 - and c_birth_year BETWEEN 1943 AND 1956 - ) - , my_revenue as ( - select c_customer_sk, - sum(ss_ext_sales_price) as revenue - from my_customers, - store_sales, - customer_address, - store, - date_dim - where c_current_addr_sk = ca_address_sk - and ca_county = s_county - and ca_state = s_state - and ss_sold_date_sk = d_date_sk - and c_customer_sk = ss_customer_sk - and ss_wholesale_cost BETWEEN 44 AND 74 - and s_state in ('GA','IL','IN' - ,'MT','NC','NM' - ,'OH','OR','TX' - ,'VA') - and d_month_seq between (select distinct d_month_seq+1 - from date_dim where d_year = 1999 and d_moy = 7) - and (select distinct d_month_seq+3 - from date_dim where d_year = 1999 and d_moy = 7) - group by c_customer_sk - ) - , segments as - (select cast((revenue/50) as int) as segment - from my_revenue - ) - select segment, count(*) as num_customers, segment*50 as segment_base - from segments - group by segment - order by segment, num_customers - limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query058s0.sql b/tune/protox/tests/unittest_dsb_dir/query058s0.sql deleted file mode 100644 index d685572a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query058s0.sql +++ /dev/null @@ -1,82 +0,0 @@ -with ss_items as - (select i_item_id item_id - ,c_birth_year birth_year - ,sum(ss_ext_sales_price) ss_item_rev - from store_sales - ,item - ,date_dim - ,customer - where ss_item_sk = i_item_sk - and d_date in (select d_date - from date_dim - where d_month_seq = (select d_month_seq - from date_dim - where d_date = '1999-02-11')) - and ss_sold_date_sk = d_date_sk - and ss_list_price between 269 and 298 - and i_manager_id BETWEEN 28 and 57 - and ss_customer_sk = c_customer_sk - and c_birth_year BETWEEN 1938 AND 1944 -group by i_item_id, c_birth_year), - cs_items as - (select i_item_id item_id - ,c_birth_year birth_year - ,sum(cs_ext_sales_price) cs_item_rev - from catalog_sales - ,item - ,date_dim - ,customer - where cs_item_sk = i_item_sk - and d_date in (select d_date - from date_dim - where d_month_seq = (select d_month_seq - from date_dim - where d_date = '1999-02-11')) - and cs_sold_date_sk = d_date_sk - and cs_list_price between 269 and 298 - and i_manager_id BETWEEN 28 and 57 - and cs_bill_customer_sk = c_customer_sk - and c_birth_year BETWEEN 1938 AND 1944 -group by i_item_id, c_birth_year), - ws_items as - (select i_item_id item_id - ,c_birth_year birth_year - ,sum(ws_ext_sales_price) ws_item_rev - from web_sales - ,item - ,date_dim - ,customer - where ws_item_sk = i_item_sk - and d_date in (select d_date - from date_dim - where d_month_seq = (select d_month_seq - from date_dim - where d_date = '1999-02-11')) - and ws_sold_date_sk = d_date_sk - and ws_list_price between 269 and 298 - and i_manager_id BETWEEN 28 and 57 - and ws_bill_customer_sk = c_customer_sk - and c_birth_year BETWEEN 1938 AND 1944 -group by i_item_id, c_birth_year) - select ss_items.item_id, ss_items.birth_year - ,ss_item_rev - ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev - ,cs_item_rev - ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev - ,ws_item_rev - ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev - ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average - from ss_items,cs_items,ws_items - where ss_items.item_id=cs_items.item_id - and ss_items.item_id=ws_items.item_id - and ss_items.birth_year = cs_items.birth_year - and ss_items.birth_year = ws_items.birth_year - and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev - and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev - and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev - and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev - and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev - and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev - order by item_id, birth_year - ,ss_item_rev - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query059s0.sql b/tune/protox/tests/unittest_dsb_dir/query059s0.sql deleted file mode 100644 index f6b864af..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query059s0.sql +++ /dev/null @@ -1,47 +0,0 @@ -with wss as - (select d_week_seq, - ss_store_sk, - sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, - sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, - sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, - sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, - sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, - sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, - sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales - from store_sales,date_dim - where d_date_sk = ss_sold_date_sk - and ss_sales_price / ss_list_price BETWEEN 17 * 0.01 AND 37 * 0.01 - group by d_week_seq,ss_store_sk - ) - select s_store_name1,s_store_id1,d_week_seq1 - ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 - ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 - ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 - from - (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 - ,s_store_id s_store_id1,sun_sales sun_sales1 - ,mon_sales mon_sales1,tue_sales tue_sales1 - ,wed_sales wed_sales1,thu_sales thu_sales1 - ,fri_sales fri_sales1,sat_sales sat_sales1 - from wss,store,date_dim d - where d.d_week_seq = wss.d_week_seq and - ss_store_sk = s_store_sk and - d_month_seq between 1207 and 1207 + 11 - and s_state in ('GA','IL','IN' - ,'MT','NM','OH','OR','TX') - ) y, - (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 - ,s_store_id s_store_id2,sun_sales sun_sales2 - ,mon_sales mon_sales2,tue_sales tue_sales2 - ,wed_sales wed_sales2,thu_sales thu_sales2 - ,fri_sales fri_sales2,sat_sales sat_sales2 - from wss,store,date_dim d - where d.d_week_seq = wss.d_week_seq and - ss_store_sk = s_store_sk and - d_month_seq between 1207+ 12 and 1207 + 23 - and s_state in ('GA','IL','IN' - ,'MT','NM','OH','OR','TX')) x - where s_store_id1=s_store_id2 - and d_week_seq1=d_week_seq2-52 - order by s_store_name1,s_store_id1,d_week_seq1 -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query064s0.sql b/tune/protox/tests/unittest_dsb_dir/query064s0.sql deleted file mode 100644 index b1434cec..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query064s0.sql +++ /dev/null @@ -1,126 +0,0 @@ -with cs_ui as - (select cs_item_sk - ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund - from catalog_sales - ,catalog_returns - where cs_item_sk = cr_item_sk - and cs_order_number = cr_order_number - and cs_wholesale_cost BETWEEN 80 AND 100 - group by cs_item_sk - having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), -cross_sales as - (select i_product_name product_name - ,i_item_sk item_sk - ,s_store_name store_name - ,s_zip store_zip - ,ad1.ca_street_number b_street_number - ,ad1.ca_street_name b_street_name - ,ad1.ca_city b_city - ,ad1.ca_zip b_zip - ,ad2.ca_street_number c_street_number - ,ad2.ca_street_name c_street_name - ,ad2.ca_city c_city - ,ad2.ca_zip c_zip - ,d1.d_year as syear - ,d2.d_year as fsyear - ,d3.d_year s2year - ,count(*) cnt - ,sum(ss_wholesale_cost) s1 - ,sum(ss_list_price) s2 - ,sum(ss_coupon_amt) s3 - FROM store_sales - ,store_returns - ,cs_ui - ,date_dim d1 - ,date_dim d2 - ,date_dim d3 - ,store - ,customer - ,customer_demographics cd1 - ,customer_demographics cd2 - ,promotion - ,household_demographics hd1 - ,household_demographics hd2 - ,customer_address ad1 - ,customer_address ad2 - ,income_band ib1 - ,income_band ib2 - ,item - WHERE ss_store_sk = s_store_sk AND - ss_sold_date_sk = d1.d_date_sk AND - ss_customer_sk = c_customer_sk AND - ss_cdemo_sk= cd1.cd_demo_sk AND - ss_hdemo_sk = hd1.hd_demo_sk AND - ss_addr_sk = ad1.ca_address_sk and - ss_item_sk = i_item_sk and - ss_item_sk = sr_item_sk and - ss_ticket_number = sr_ticket_number and - ss_item_sk = cs_ui.cs_item_sk and - c_current_cdemo_sk = cd2.cd_demo_sk AND - c_current_hdemo_sk = hd2.hd_demo_sk AND - c_current_addr_sk = ad2.ca_address_sk and - c_first_sales_date_sk = d2.d_date_sk and - c_first_shipto_date_sk = d3.d_date_sk and - ss_promo_sk = p_promo_sk and - hd1.hd_income_band_sk = ib1.ib_income_band_sk and - hd2.hd_income_band_sk = ib2.ib_income_band_sk and - cd1.cd_marital_status <> cd2.cd_marital_status and - i_current_price between 1 and 1 + 10 - and p_channel_email = 'Y' - and p_channel_tv = 'Y' - and p_channel_radio = 'Y' - and ad2.ca_state in ('GA','IL','OH') - and ss_wholesale_cost BETWEEN 80 AND 100 - and cd1.cd_marital_status in ('W', 'W', 'D') - and cd1.cd_education_status in ('College', '4 yr Degree', 'College') - and cd2.cd_marital_status in ('W', 'W', 'D') - and cd2.cd_education_status in ('College', '4 yr Degree', 'College') -group by i_product_name - ,i_item_sk - ,s_store_name - ,s_zip - ,ad1.ca_street_number - ,ad1.ca_street_name - ,ad1.ca_city - ,ad1.ca_zip - ,ad2.ca_street_number - ,ad2.ca_street_name - ,ad2.ca_city - ,ad2.ca_zip - ,d1.d_year - ,d2.d_year - ,d3.d_year -) -select cs1.product_name - ,cs1.store_name - ,cs1.store_zip - ,cs1.b_street_number - ,cs1.b_street_name - ,cs1.b_city - ,cs1.b_zip - ,cs1.c_street_number - ,cs1.c_street_name - ,cs1.c_city - ,cs1.c_zip - ,cs1.syear - ,cs1.cnt - ,cs1.s1 as s11 - ,cs1.s2 as s21 - ,cs1.s3 as s31 - ,cs2.s1 as s12 - ,cs2.s2 as s22 - ,cs2.s3 as s32 - ,cs2.syear - ,cs2.cnt -from cross_sales cs1,cross_sales cs2 -where cs1.item_sk=cs2.item_sk and - cs1.syear = 1999 and - cs2.syear = 1999 + 1 and - cs2.cnt <= cs1.cnt and - cs1.store_name = cs2.store_name and - cs1.store_zip = cs2.store_zip -order by cs1.product_name - ,cs1.store_name - ,cs2.cnt - ,cs1.s1 - ,cs2.s1; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query065s0.sql b/tune/protox/tests/unittest_dsb_dir/query065s0.sql deleted file mode 100644 index 57c895e4..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query065s0.sql +++ /dev/null @@ -1,30 +0,0 @@ -select - s_store_name, - i_item_desc, - sc.revenue, - i_current_price, - i_wholesale_cost, - i_brand - from store, item, - (select ss_store_sk, avg(revenue) as ave - from - (select ss_store_sk, ss_item_sk, - sum(ss_sales_price) as revenue - from store_sales, date_dim - where ss_sold_date_sk = d_date_sk and d_month_seq between 1215 and 1215+11 - and ss_sales_price / ss_list_price BETWEEN 22 * 0.01 AND 32 * 0.01 - group by ss_store_sk, ss_item_sk) sa - group by ss_store_sk) sb, - (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue - from store_sales, date_dim - where ss_sold_date_sk = d_date_sk and d_month_seq between 1215 and 1215+11 - and ss_sales_price / ss_list_price BETWEEN 22 * 0.01 AND 32 * 0.01 - group by ss_store_sk, ss_item_sk) sc - where sb.ss_store_sk = sc.ss_store_sk and - sc.revenue <= 0.1 * sb.ave and - s_store_sk = sc.ss_store_sk and - i_item_sk = sc.ss_item_sk - and i_manager_id BETWEEN 16 and 20 - and s_state in ('GA','IL','TX') - order by s_store_name, i_item_desc -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query069s0.sql b/tune/protox/tests/unittest_dsb_dir/query069s0.sql deleted file mode 100644 index 8ff02ef5..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query069s0.sql +++ /dev/null @@ -1,52 +0,0 @@ -select - cd_gender, - cd_marital_status, - cd_education_status, - count(*) cnt1, - cd_purchase_estimate, - count(*) cnt2, - cd_credit_rating, - count(*) cnt3 - from - customer c,customer_address ca,customer_demographics - where - c.c_current_addr_sk = ca.ca_address_sk and - ca_state in ('IN','NM','VA') and - cd_demo_sk = c.c_current_cdemo_sk - and cd_marital_status in ('W', 'S', 'M') - and cd_education_status in ('Secondary', 'Primary') and - exists (select * - from store_sales,date_dim - where c.c_customer_sk = ss_customer_sk and - ss_sold_date_sk = d_date_sk and - d_year = 2000 and - d_moy between 9 and 9+2 - and ss_list_price between 5 and 94 - ) and - (not exists (select * - from web_sales,date_dim - where c.c_customer_sk = ws_bill_customer_sk and - ws_sold_date_sk = d_date_sk and - d_year = 2000 and - d_moy between 9 and 9+2 - and ws_list_price between 5 and 94 - ) and - not exists (select * - from catalog_sales,date_dim - where c.c_customer_sk = cs_ship_customer_sk and - cs_sold_date_sk = d_date_sk and - d_year = 2000 and - d_moy between 9 and 9+2 - and cs_list_price between 5 and 94) - ) - group by cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating - order by cd_gender, - cd_marital_status, - cd_education_status, - cd_purchase_estimate, - cd_credit_rating - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query072s0.sql b/tune/protox/tests/unittest_dsb_dir/query072s0.sql deleted file mode 100644 index 1ff6be64..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query072s0.sql +++ /dev/null @@ -1,29 +0,0 @@ -select i_item_desc - ,w_warehouse_name - ,d1.d_week_seq - ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo - ,sum(case when p_promo_sk is not null then 1 else 0 end) promo - ,count(*) total_cnt -from catalog_sales -join inventory on (cs_item_sk = inv_item_sk) -join warehouse on (w_warehouse_sk=inv_warehouse_sk) -join item on (i_item_sk = cs_item_sk) -join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) -join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) -join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) -join date_dim d2 on (inv_date_sk = d2.d_date_sk) -join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) -left outer join promotion on (cs_promo_sk=p_promo_sk) -left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) -where d1.d_week_seq = d2.d_week_seq - and inv_quantity_on_hand < cs_quantity - and d3.d_date > d1.d_date + interval '3' day - and hd_buy_potential = '1001-5000' - and d1.d_year = 2001 - and cd_marital_status = 'U' - and cd_dep_count between 5 and 7 - and i_category IN ('Books', 'Home', 'Sports') - and cs_wholesale_cost BETWEEN 80 AND 100 -group by i_item_desc,w_warehouse_name,d1.d_week_seq -order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query072s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query072s0_spj.sql deleted file mode 100644 index 8062761c..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query072s0_spj.sql +++ /dev/null @@ -1,26 +0,0 @@ -select min(i_item_sk) - ,min(w_warehouse_name) - ,min(d1.d_week_seq) - ,min(cs_item_sk) - ,min(cs_order_number) - ,min(inv_item_sk) -from catalog_sales -join inventory on (cs_item_sk = inv_item_sk) -join warehouse on (w_warehouse_sk=inv_warehouse_sk) -join item on (i_item_sk = cs_item_sk) -join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) -join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) -join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) -join date_dim d2 on (inv_date_sk = d2.d_date_sk) -join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) -left outer join promotion on (cs_promo_sk=p_promo_sk) -left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) -where d1.d_week_seq = d2.d_week_seq - and inv_quantity_on_hand < cs_quantity - and d3.d_date > d1.d_date + interval '3' day - and hd_buy_potential = '1001-5000' - and d1.d_year = 2001 - and cd_marital_status = 'U' - and cd_dep_count between 5 and 7 - and i_category IN ('Books', 'Home', 'Sports') - and cs_wholesale_cost BETWEEN 80 AND 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query075s0.sql b/tune/protox/tests/unittest_dsb_dir/query075s0.sql deleted file mode 100644 index 8aead12a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query075s0.sql +++ /dev/null @@ -1,74 +0,0 @@ -WITH all_sales AS ( - SELECT d_year - ,i_brand_id - ,i_class_id - ,i_category_id - ,i_manufact_id - ,SUM(sales_cnt) AS sales_cnt - ,SUM(sales_amt) AS sales_amt - FROM (SELECT d_year - ,i_brand_id - ,i_class_id - ,i_category_id - ,i_manufact_id - ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt - ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt - FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk - JOIN date_dim ON d_date_sk=cs_sold_date_sk - LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number - AND cs_item_sk=cr_item_sk) - WHERE i_category='Sports' - and cs_sales_price / cs_list_price BETWEEN 80 * 0.01 AND 100 * 0.01 - and cr_reason_sk in (7, 10, 12, 29, 45) - UNION - SELECT d_year - ,i_brand_id - ,i_class_id - ,i_category_id - ,i_manufact_id - ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt - ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt - FROM store_sales JOIN item ON i_item_sk=ss_item_sk - JOIN date_dim ON d_date_sk=ss_sold_date_sk - LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number - AND ss_item_sk=sr_item_sk) - WHERE i_category='Sports' - and ss_sales_price / ss_list_price BETWEEN 80 * 0.01 AND 100 * 0.01 - and sr_reason_sk in (7, 10, 12, 29, 45) - UNION - SELECT d_year - ,i_brand_id - ,i_class_id - ,i_category_id - ,i_manufact_id - ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt - ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt - FROM web_sales JOIN item ON i_item_sk=ws_item_sk - JOIN date_dim ON d_date_sk=ws_sold_date_sk - LEFT JOIN web_returns ON (ws_order_number=wr_order_number - AND ws_item_sk=wr_item_sk) - WHERE i_category='Sports' - and ws_sales_price / ws_list_price BETWEEN 80 * 0.01 AND 100 * 0.01 - and wr_reason_sk in (7, 10, 12, 29, 45)) sales_detail -GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) - SELECT prev_yr.d_year AS prev_year - ,curr_yr.d_year AS o_year - ,curr_yr.i_brand_id - ,curr_yr.i_class_id - ,curr_yr.i_category_id - ,curr_yr.i_manufact_id - ,prev_yr.sales_cnt AS prev_yr_cnt - ,curr_yr.sales_cnt AS curr_yr_cnt - ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff - ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff - FROM all_sales curr_yr, all_sales prev_yr - WHERE curr_yr.i_brand_id=prev_yr.i_brand_id - AND curr_yr.i_class_id=prev_yr.i_class_id - AND curr_yr.i_category_id=prev_yr.i_category_id - AND curr_yr.i_manufact_id=prev_yr.i_manufact_id - AND curr_yr.d_year=1999 - AND prev_yr.d_year=1999-1 - AND prev_yr.sales_cnt > 0 - AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 - ORDER BY sales_cnt_diff,sales_amt_diff - limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query080s0.sql b/tune/protox/tests/unittest_dsb_dir/query080s0.sql deleted file mode 100644 index 32e2a565..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query080s0.sql +++ /dev/null @@ -1,111 +0,0 @@ -with ssr as - (select s_store_id as store_id, - sum(ss_ext_sales_price) as sales, - sum(coalesce(sr_return_amt, 0)) as ssr_returns, - sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit - from store_sales left outer join store_returns on - (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), - date_dim, - store, - item, - promotion - where ss_sold_date_sk = d_date_sk - and d_date between cast('2001-08-11' as date) - and cast('2001-08-11' as date) + interval '30' day - and ss_store_sk = s_store_sk - and ss_item_sk = i_item_sk - and i_current_price > 50 - and ss_promo_sk = p_promo_sk - and p_channel_email = 'N' - and p_channel_tv = 'N' - and p_channel_radio = 'Y' - and p_channel_press = 'N' - and p_channel_event = 'Y' - and ss_wholesale_cost BETWEEN 45 AND 60 - and i_category IN ('Jewelry', 'Sports') - group by s_store_id) - , - csr as - (select cp_catalog_page_id as catalog_page_id, - sum(cs_ext_sales_price) as sales, - sum(coalesce(cr_return_amount, 0)) as csr_returns, - sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit - from catalog_sales left outer join catalog_returns on - (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), - date_dim, - catalog_page, - item, - promotion - where cs_sold_date_sk = d_date_sk - and d_date between cast('2001-08-11' as date) - and cast('2001-08-11' as date) + interval '30' day - and cs_catalog_page_sk = cp_catalog_page_sk - and cs_item_sk = i_item_sk - and i_current_price > 50 - and cs_promo_sk = p_promo_sk - and p_channel_email = 'N' - and p_channel_tv = 'N' - and p_channel_radio = 'Y' - and p_channel_press = 'N' - and p_channel_event = 'Y' - and cs_wholesale_cost BETWEEN 45 AND 60 - and i_category IN ('Jewelry', 'Sports') -group by cp_catalog_page_id) - , - wsr as - (select web_site_id, - sum(ws_ext_sales_price) as sales, - sum(coalesce(wr_return_amt, 0)) as wsr_returns, - sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit - from web_sales left outer join web_returns on - (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), - date_dim, - web_site, - item, - promotion - where ws_sold_date_sk = d_date_sk - and d_date between cast('2001-08-11' as date) - and cast('2001-08-11' as date) + interval '30' day - and ws_web_site_sk = web_site_sk - and ws_item_sk = i_item_sk - and i_current_price > 50 - and ws_promo_sk = p_promo_sk - and p_channel_email = 'N' - and p_channel_tv = 'N' - and p_channel_radio = 'Y' - and p_channel_press = 'N' - and p_channel_event = 'Y' - and ws_wholesale_cost BETWEEN 45 AND 60 - and i_category IN ('Jewelry', 'Sports') -group by web_site_id) - select channel - , id - , sum(sales) as sales - , sum(rets) as rets - , sum(profit) as profit - from - (select 'store channel' as channel - , 'store' || store_id as id - , sales - , ssr_returns as rets - , profit - from ssr - union all - select 'catalog channel' as channel - , 'catalog_page' || catalog_page_id as id - , sales - , csr_returns as rets - , profit - from csr - union all - select 'web channel' as channel - , 'web_site' || web_site_id as id - , sales - , wsr_returns as rets - , profit - from wsr - ) x - group by rollup (channel, id) - order by channel - ,id - limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query081s0.sql b/tune/protox/tests/unittest_dsb_dir/query081s0.sql deleted file mode 100644 index d4f3551e..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query081s0.sql +++ /dev/null @@ -1,35 +0,0 @@ -with customer_total_return as - (select cr_returning_customer_sk as ctr_customer_sk - ,ca_state as ctr_state, - sum(cr_return_amt_inc_tax) as ctr_total_return - from catalog_returns - ,date_dim - ,customer_address - where cr_returned_date_sk = d_date_sk - and d_year =1998 - and cr_returning_addr_sk = ca_address_sk - group by cr_returning_customer_sk - ,ca_state ) -, -ctr2 as -(select ctr_state as ctr2_state, avg(ctr_total_return)*1.2 as avg_ctr -from customer_total_return -group by ctr_state -) - - select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name - ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset - ,ca_location_type,ctr_total_return - from customer_total_return ctr1 - ,customer_address - ,customer - ,ctr2 - where ctr1.ctr_total_return > ctr2.avg_ctr - and ctr1.ctr_state = ctr2.ctr2_state - and ca_address_sk = c_current_addr_sk - and ca_state = 'IL' - and ctr1.ctr_customer_sk = c_customer_sk - order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name - ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset - ,ca_location_type,ctr_total_return - limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query083s0.sql b/tune/protox/tests/unittest_dsb_dir/query083s0.sql deleted file mode 100644 index 17ef8bdd..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query083s0.sql +++ /dev/null @@ -1,76 +0,0 @@ -with sr_items as - (select i_item_id item_id, - sum(sr_return_quantity) sr_item_qty - from store_returns, - item, - date_dim - where sr_item_sk = i_item_sk - and d_date in - (select d_date - from date_dim - where d_month_seq in - (select d_month_seq - from date_dim - where d_date in ('1998-03-05','1998-06-15','1998-09-12','1998-10-31'))) - and sr_returned_date_sk = d_date_sk - and i_category IN ('Books', 'Sports') - and i_manager_id BETWEEN 36 and 45 - and sr_return_amt / sr_return_quantity between 141 and 170 - and sr_reason_sk in (6, 29, 42, 45, 62) -group by i_item_id), - cr_items as - (select i_item_id item_id, - sum(cr_return_quantity) cr_item_qty - from catalog_returns, - item, - date_dim - where cr_item_sk = i_item_sk - and d_date in - (select d_date - from date_dim - where d_month_seq in - (select d_month_seq - from date_dim - where d_date in ('1998-03-05','1998-06-15','1998-09-12','1998-10-31'))) - and cr_returned_date_sk = d_date_sk - and i_category IN ('Books', 'Sports') - and i_manager_id BETWEEN 36 and 45 - and cr_return_amount / cr_return_quantity between 141 and 170 - and cr_reason_sk in (6, 29, 42, 45, 62) - group by i_item_id), - wr_items as - (select i_item_id item_id, - sum(wr_return_quantity) wr_item_qty - from web_returns, - item, - date_dim - where wr_item_sk = i_item_sk - and d_date in - (select d_date - from date_dim - where d_month_seq in - (select d_month_seq - from date_dim - where d_date in ('1998-03-05','1998-06-15','1998-09-12','1998-10-31'))) - and wr_returned_date_sk = d_date_sk - and i_category IN ('Books', 'Sports') - and i_manager_id BETWEEN 36 and 45 - and wr_return_amt / wr_return_quantity between 141 and 170 - and wr_reason_sk in (6, 29, 42, 45, 62) - group by i_item_id) - select sr_items.item_id - ,sr_item_qty - ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev - ,cr_item_qty - ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev - ,wr_item_qty - ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev - ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average - from sr_items - ,cr_items - ,wr_items - where sr_items.item_id=cr_items.item_id - and sr_items.item_id=wr_items.item_id - order by sr_items.item_id - ,sr_item_qty - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query084s0.sql b/tune/protox/tests/unittest_dsb_dir/query084s0.sql deleted file mode 100644 index 07fcf851..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query084s0.sql +++ /dev/null @@ -1,18 +0,0 @@ -select c_customer_id as customer_id - , coalesce(c_last_name,'') || ', ' || coalesce(c_first_name,'') as customername - from customer - ,customer_address - ,customer_demographics - ,household_demographics - ,income_band - ,store_returns - where ca_city = 'Hopewell' - and c_current_addr_sk = ca_address_sk - and ib_lower_bound >= 32287 - and ib_upper_bound <= 32287 + 50000 - and ib_income_band_sk = hd_income_band_sk - and cd_demo_sk = c_current_cdemo_sk - and hd_demo_sk = c_current_hdemo_sk - and sr_cdemo_sk = cd_demo_sk - order by c_customer_id - limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query084s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query084s0_spj.sql deleted file mode 100644 index 02c2081a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query084s0_spj.sql +++ /dev/null @@ -1,17 +0,0 @@ -select min(c_customer_id), - min(sr_ticket_number), - min(sr_item_sk) - from customer - ,customer_address - ,customer_demographics - ,household_demographics - ,income_band - ,store_returns - where ca_city = 'Hopewell' - and c_current_addr_sk = ca_address_sk - and ib_lower_bound >= 3 * 10000 - and ib_upper_bound <= 3 * 10000 + 50000 - and ib_income_band_sk = hd_income_band_sk - and cd_demo_sk = c_current_cdemo_sk - and hd_demo_sk = c_current_hdemo_sk - and sr_cdemo_sk = cd_demo_sk; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query085s0.sql b/tune/protox/tests/unittest_dsb_dir/query085s0.sql deleted file mode 100644 index f34ddf27..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query085s0.sql +++ /dev/null @@ -1,81 +0,0 @@ -select substring(r_reason_desc,1,20) - ,avg(ws_quantity) - ,avg(wr_refunded_cash) - ,avg(wr_fee) - from web_sales, web_returns, web_page, customer_demographics cd1, - customer_demographics cd2, customer_address, date_dim, reason - where ws_web_page_sk = wp_web_page_sk - and ws_item_sk = wr_item_sk - and ws_order_number = wr_order_number - and ws_sold_date_sk = d_date_sk and d_year = 1998 - and cd1.cd_demo_sk = wr_refunded_cdemo_sk - and cd2.cd_demo_sk = wr_returning_cdemo_sk - and ca_address_sk = wr_refunded_addr_sk - and r_reason_sk = wr_reason_sk - and - ( - ( - cd1.cd_marital_status = 'W' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = '2 yr Degree' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 100.00 and 150.00 - ) - or - ( - cd1.cd_marital_status = 'S' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = 'College' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 50.00 and 100.00 - ) - or - ( - cd1.cd_marital_status = 'D' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = 'Advanced Degree' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 150.00 and 200.00 - ) - ) - and - ( - ( - ca_country = 'United States' - and - ca_state in ('GA', 'IN', 'VA') - and ws_net_profit between 100 and 200 - ) - or - ( - ca_country = 'United States' - and - ca_state in ('MT', 'NM', 'OR') - and ws_net_profit between 150 and 300 - ) - or - ( - ca_country = 'United States' - and - ca_state in ('GA', 'IL', 'OH') - and ws_net_profit between 50 and 250 - ) - ) -group by r_reason_desc -order by substring(r_reason_desc,1,20) - ,avg(ws_quantity) - ,avg(wr_refunded_cash) - ,avg(wr_fee) -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query085s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query085s0_spj.sql deleted file mode 100644 index 8a1fcd42..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query085s0_spj.sql +++ /dev/null @@ -1,78 +0,0 @@ -select min(ws_quantity) - ,min(wr_refunded_cash) - ,min(wr_fee) - ,min(ws_item_sk) - ,min(wr_order_number) - ,min(cd1.cd_demo_sk) - ,min(cd2.cd_demo_sk) - from web_sales, web_returns, web_page, customer_demographics cd1, - customer_demographics cd2, customer_address, date_dim, reason - where ws_web_page_sk = wp_web_page_sk - and ws_item_sk = wr_item_sk - and ws_order_number = wr_order_number - and ws_sold_date_sk = d_date_sk and d_year = 1998 - and cd1.cd_demo_sk = wr_refunded_cdemo_sk - and cd2.cd_demo_sk = wr_returning_cdemo_sk - and ca_address_sk = wr_refunded_addr_sk - and r_reason_sk = wr_reason_sk - and - ( - ( - cd1.cd_marital_status = 'W' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = '2 yr Degree' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 100.00 and 150.00 - ) - or - ( - cd1.cd_marital_status = 'S' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = 'College' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 50.00 and 100.00 - ) - or - ( - cd1.cd_marital_status = 'D' - and - cd1.cd_marital_status = cd2.cd_marital_status - and - cd1.cd_education_status = 'Advanced Degree' - and - cd1.cd_education_status = cd2.cd_education_status - and - ws_sales_price between 150.00 and 200.00 - ) - ) - and - ( - ( - ca_country = 'United States' - and - ca_state in ('GA', 'IN', 'VA') - and ws_net_profit between 100 and 200 - ) - or - ( - ca_country = 'United States' - and - ca_state in ('MT', 'NM', 'OR') - and ws_net_profit between 150 and 300 - ) - or - ( - ca_country = 'United States' - and - ca_state in ('GA', 'IL', 'OH') - and ws_net_profit between 50 and 250 - ) - ); \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query087s0.sql b/tune/protox/tests/unittest_dsb_dir/query087s0.sql deleted file mode 100644 index 98ab9db8..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query087s0.sql +++ /dev/null @@ -1,31 +0,0 @@ -select count(*) -from ((select distinct c_last_name, c_first_name, d_date - from store_sales, date_dim, customer - where store_sales.ss_sold_date_sk = date_dim.d_date_sk - and store_sales.ss_customer_sk = customer.c_customer_sk - and d_month_seq between 1222 and 1222+11 - and ss_list_price between 269 and 298 - and c_birth_year BETWEEN 1958 AND 1964 - and ss_wholesale_cost BETWEEN 90 AND 100 - ) - except - (select distinct c_last_name, c_first_name, d_date - from catalog_sales, date_dim, customer - where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk - and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk - and d_month_seq between 1222 and 1222+11 - and cs_list_price between 269 and 298 - and c_birth_year BETWEEN 1958 AND 1964 - and cs_wholesale_cost BETWEEN 90 AND 100 - ) - except - (select distinct c_last_name, c_first_name, d_date - from web_sales, date_dim, customer - where web_sales.ws_sold_date_sk = date_dim.d_date_sk - and web_sales.ws_bill_customer_sk = customer.c_customer_sk - and d_month_seq between 1222 and 1222+11 - and ws_list_price between 269 and 298 - and c_birth_year BETWEEN 1958 AND 1964 - and ws_wholesale_cost BETWEEN 90 AND 100 - ) -) cool_cust; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query091s0.sql b/tune/protox/tests/unittest_dsb_dir/query091s0.sql deleted file mode 100644 index 39f88329..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query091s0.sql +++ /dev/null @@ -1,28 +0,0 @@ -select - cc_call_center_id Call_Center, - cc_name Call_Center_Name, - cc_manager Manager, - sum(cr_net_loss) Returns_Loss -from - call_center, - catalog_returns, - date_dim, - customer, - customer_address, - customer_demographics, - household_demographics -where - cr_call_center_sk = cc_call_center_sk -and cr_returned_date_sk = d_date_sk -and cr_returning_customer_sk= c_customer_sk -and cd_demo_sk = c_current_cdemo_sk -and hd_demo_sk = c_current_hdemo_sk -and ca_address_sk = c_current_addr_sk -and d_year = 1999 -and d_moy = 5 -and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') - or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) -and hd_buy_potential like '0-500%' -and ca_gmt_offset = -7 -group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status -order by sum(cr_net_loss) desc; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query091s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query091s0_spj.sql deleted file mode 100644 index f332805b..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query091s0_spj.sql +++ /dev/null @@ -1,28 +0,0 @@ -select - min(cc_call_center_id), - min(cc_name), - min(cc_manager), - min(cr_net_loss), - min(cr_item_sk), - min(cr_order_number) -from - call_center, - catalog_returns, - date_dim, - customer, - customer_address, - customer_demographics, - household_demographics -where - cr_call_center_sk = cc_call_center_sk -and cr_returned_date_sk = d_date_sk -and cr_returning_customer_sk= c_customer_sk -and cd_demo_sk = c_current_cdemo_sk -and hd_demo_sk = c_current_hdemo_sk -and ca_address_sk = c_current_addr_sk -and d_year = 1999 -and d_moy = 5 -and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') - or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) -and hd_buy_potential like '0-500%' -and ca_gmt_offset = -7; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query092s0.sql b/tune/protox/tests/unittest_dsb_dir/query092s0.sql deleted file mode 100644 index d3c50e0f..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query092s0.sql +++ /dev/null @@ -1,35 +0,0 @@ -with cte as -( - SELECT - ws_item_sk as cte_item_sk, - 1.3 * avg(ws_ext_discount_amt) as avg_dsct - FROM - web_sales - ,date_dim - WHERE d_date between '2002-02-11' and - cast('2002-02-11' as date) + interval '90' day - and d_date_sk = ws_sold_date_sk - and ws_wholesale_cost BETWEEN 68 AND 88 - and ws_sales_price / ws_list_price BETWEEN 85 * 0.01 AND 100 * 0.01 - group by ws_item_sk - ) - -select - sum(ws_ext_discount_amt) as "Excess Discount Amount" -from - web_sales - ,item - ,date_dim - ,cte -where -(i_manufact_id BETWEEN 394 and 593 -or i_category IN ('Books', 'Home', 'Sports')) -and i_item_sk = ws_item_sk -and d_date between '2002-02-11' and - cast('2002-02-11' as date) + interval '90' day -and d_date_sk = ws_sold_date_sk -and ws_wholesale_cost BETWEEN 68 AND 88 -and cte.cte_item_sk = i_item_sk -and ws_ext_discount_amt > cte.avg_dsct -order by sum(ws_ext_discount_amt) -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query094s0.sql b/tune/protox/tests/unittest_dsb_dir/query094s0.sql deleted file mode 100644 index 1a78f67d..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query094s0.sql +++ /dev/null @@ -1,30 +0,0 @@ -select - count(distinct ws_order_number) as "order count" - ,sum(ws_ext_ship_cost) as "total shipping cost" - ,sum(ws_net_profit) as "total net profit" -from - web_sales ws1 - ,date_dim - ,customer_address - ,web_site -where - d_date between '2000-10-01' and - cast('2000-10-01' as date) + interval '60' day -and ws1.ws_ship_date_sk = d_date_sk -and ws1.ws_ship_addr_sk = ca_address_sk -and ca_state in ('IA','IN','MT' - ,'NE' ,'OK' ,'TX') -and ws1.ws_web_site_sk = web_site_sk -and web_gmt_offset >= -7 -and ws1.ws_list_price between 141 and 170 -and exists (select * - from web_sales ws2 - where ws1.ws_order_number = ws2.ws_order_number - and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) -and not exists(select * - from web_returns wr1 - where ws1.ws_order_number = wr1.wr_order_number - and wr1.wr_reason_sk in (7, 10, 12, 29, 45) - ) -order by count(distinct ws_order_number) -limit 100; diff --git a/tune/protox/tests/unittest_dsb_dir/query099s0.sql b/tune/protox/tests/unittest_dsb_dir/query099s0.sql deleted file mode 100644 index 44159e7e..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query099s0.sql +++ /dev/null @@ -1,36 +0,0 @@ -select - substring(w_warehouse_name,1,20) - ,sm_type - ,cc_name - ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as "30 days" - ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and - (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as "31-60 days" - ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and - (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as "61-90 days" - ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and - (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as "91-120 days" - ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as ">120 days" -from - catalog_sales - ,warehouse - ,ship_mode - ,call_center - ,date_dim -where -d_month_seq between 1193 and 1193 + 23 -and cs_ship_date_sk = d_date_sk -and cs_warehouse_sk = w_warehouse_sk -and cs_ship_mode_sk = sm_ship_mode_sk -and cs_call_center_sk = cc_call_center_sk -and cs_list_price between 77 and 106 -and sm_type = 'TWO DAY' -and cc_class = 'small' -and w_gmt_offset = -5 -group by - substring(w_warehouse_name,1,20) - ,sm_type - ,cc_name -order by substring(w_warehouse_name,1,20) - ,sm_type - ,cc_name -limit 100; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query099s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query099s0_spj.sql deleted file mode 100644 index e0ca167a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query099s0_spj.sql +++ /dev/null @@ -1,21 +0,0 @@ -select min(w_warehouse_name) - ,min(sm_type) - ,min(cc_name) - ,min(cs_order_number) - ,min(cs_item_sk) -from - catalog_sales - ,warehouse - ,ship_mode - ,call_center - ,date_dim -where - d_month_seq between 1193 and 1193 + 23 -and cs_ship_date_sk = d_date_sk -and cs_warehouse_sk = w_warehouse_sk -and cs_ship_mode_sk = sm_ship_mode_sk -and cs_call_center_sk = cc_call_center_sk -and cs_list_price between 77 and 106 -and sm_type = 'TWO DAY' -and cc_class = 'small' -and w_gmt_offset = -5; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query100s0.sql b/tune/protox/tests/unittest_dsb_dir/query100s0.sql deleted file mode 100644 index d6ade1bf..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query100s0.sql +++ /dev/null @@ -1,26 +0,0 @@ -select item1.i_item_sk, item2.i_item_sk, count(*) as cnt -FROM item AS item1, -item AS item2, -store_sales AS s1, -store_sales AS s2, -date_dim, -customer, -customer_address, -customer_demographics -WHERE -item1.i_item_sk < item2.i_item_sk -AND s1.ss_ticket_number = s2.ss_ticket_number -AND s1.ss_item_sk = item1.i_item_sk and s2.ss_item_sk = item2.i_item_sk -AND s1.ss_customer_sk = c_customer_sk -and c_current_addr_sk = ca_address_sk -and c_current_cdemo_sk = cd_demo_sk -AND d_year between 1998 and 1998 + 1 -and d_date_sk = s1.ss_sold_date_sk -and item1.i_category in ('Jewelry', 'Music') -and item2.i_manager_id between 77 and 96 -and cd_marital_status = 'W' -and cd_education_status = 'Primary' -and s1.ss_list_price between 236 and 250 -and s2.ss_list_price between 236 and 250 -GROUP BY item1.i_item_sk, item2.i_item_sk -ORDER BY cnt; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query100s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query100s0_spj.sql deleted file mode 100644 index bad76809..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query100s0_spj.sql +++ /dev/null @@ -1,27 +0,0 @@ -select min(item1.i_item_sk), - min(item2.i_item_sk), - min(s1.ss_ticket_number), - min(s1.ss_item_sk) -FROM item AS item1, -item AS item2, -store_sales AS s1, -store_sales AS s2, -date_dim, -customer, -customer_address, -customer_demographics -WHERE -item1.i_item_sk < item2.i_item_sk -AND s1.ss_ticket_number = s2.ss_ticket_number -AND s1.ss_item_sk = item1.i_item_sk and s2.ss_item_sk = item2.i_item_sk -AND s1.ss_customer_sk = c_customer_sk -and c_current_addr_sk = ca_address_sk -and c_current_cdemo_sk = cd_demo_sk -AND d_year between 1998 and 1998 + 1 -and d_date_sk = s1.ss_sold_date_sk -and item1.i_category in ('Jewelry', 'Music') -and item2.i_manager_id between 77 and 96 -and cd_marital_status = 'W' -and cd_education_status = 'Primary' -and s1.ss_list_price between 236 and 250 -and s2.ss_list_price between 236 and 250; \ No newline at end of file diff --git a/tune/protox/tests/unittest_dsb_dir/query101s0.sql b/tune/protox/tests/unittest_dsb_dir/query101s0.sql deleted file mode 100644 index 273a33ac..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query101s0.sql +++ /dev/null @@ -1,31 +0,0 @@ -select c_customer_sk, c_first_name, c_last_name, count(*) as cnt -FROM -store_sales, -store_returns, -web_sales, -date_dim d1, -date_dim d2, -item, -customer, -customer_address, -household_demographics -WHERE -ss_ticket_number = sr_ticket_number -AND ss_customer_sk = ws_bill_customer_sk -AND ss_customer_sk = c_customer_sk -AND c_current_addr_sk = ca_address_sk -AND c_current_hdemo_sk = hd_demo_sk -AND ss_item_sk = sr_item_sk -AND sr_item_sk = ws_item_sk -AND i_item_sk = ss_item_sk -AND i_category IN ('Books', 'Shoes', 'Sports') -AND sr_returned_date_sk = d1.d_date_sk -AND ws_sold_date_sk = d2.d_date_sk -AND d2.d_date between d1.d_date AND (d1.d_date + interval '90' day) -AND ca_state in ('IN', 'MT', 'NM', 'OH', 'OR') -AND d1.d_year = 1999 -AND hd_income_band_sk BETWEEN 14 AND 20 -AND hd_buy_potential = '5001-10000' -AND ss_sales_price / ss_list_price BETWEEN 80 * 0.01 AND 100 * 0.01 -GROUP BY c_customer_sk, c_first_name, c_last_name -ORDER BY cnt; diff --git a/tune/protox/tests/unittest_dsb_dir/query101s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query101s0_spj.sql deleted file mode 100644 index fe4ce18a..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query101s0_spj.sql +++ /dev/null @@ -1,29 +0,0 @@ -select min(c_customer_sk), min(ss_item_sk), min(sr_ticket_number), min(ws_order_number) -FROM -store_sales, -store_returns, -web_sales, -date_dim d1, -date_dim d2, -item, -customer, -customer_address, -household_demographics -WHERE -ss_ticket_number = sr_ticket_number -AND ss_customer_sk = ws_bill_customer_sk -AND ss_customer_sk = c_customer_sk -AND c_current_addr_sk = ca_address_sk -AND c_current_hdemo_sk = hd_demo_sk -AND ss_item_sk = sr_item_sk -AND sr_item_sk = ws_item_sk -AND i_item_sk = ss_item_sk -AND i_category IN ('Books', 'Shoes', 'Sports') -AND sr_returned_date_sk = d1.d_date_sk -AND ws_sold_date_sk = d2.d_date_sk -AND d2.d_date between d1.d_date AND (d1.d_date + interval '90' day) -AND ca_state in ('IN', 'MT', 'NM', 'OH', 'OR') -AND d1.d_year = 1999 -AND hd_income_band_sk BETWEEN 14 AND 20 -AND hd_buy_potential = '5001-10000' -AND ss_sales_price / ss_list_price BETWEEN 80 * 0.01 AND 100 * 0.01; diff --git a/tune/protox/tests/unittest_dsb_dir/query102s0.sql b/tune/protox/tests/unittest_dsb_dir/query102s0.sql deleted file mode 100644 index 402cb7ce..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query102s0.sql +++ /dev/null @@ -1,43 +0,0 @@ -select - cd_gender, - cd_marital_status, - cd_education_status, - hd_vehicle_count, - count(*) as cnt -from - store_sales, - web_sales, - date_dim d1, - date_dim d2, - customer, - inventory, - store, - warehouse, - item, - customer_demographics, - household_demographics, - customer_address - where - ss_item_sk = i_item_sk - and ws_item_sk = ss_item_sk - and ss_sold_date_sk = d1.d_date_sk - and ws_sold_date_sk = d2.d_date_sk - and d2.d_date between d1.d_date and (d1.d_date + interval '30' DAY) - and ss_customer_sk = c_customer_sk - and ws_bill_customer_sk = c_customer_sk - and ws_warehouse_sk = inv_warehouse_sk - and ws_warehouse_sk = w_warehouse_sk - and inv_item_sk = ss_item_sk - and inv_date_sk = ss_sold_date_sk - and inv_quantity_on_hand >= ss_quantity - and s_state = w_state - AND i_category IN ('Books', 'Home', 'Sports') - and i_manager_id IN (3, 15, 17, 26, 43, 44, 55, 70, 82, 95) - and c_current_cdemo_sk = cd_demo_sk - and c_current_hdemo_sk = hd_demo_sk - and c_current_addr_sk = ca_address_sk - and ca_state in ('IN', 'LA', 'NE', 'NM', 'OH') - and d1.d_year = 2001 - and ws_wholesale_cost BETWEEN 80 AND 100 - group by cd_gender, cd_marital_status, cd_education_status, hd_vehicle_count - order by cnt; diff --git a/tune/protox/tests/unittest_dsb_dir/query102s0_spj.sql b/tune/protox/tests/unittest_dsb_dir/query102s0_spj.sql deleted file mode 100644 index 2759a7bd..00000000 --- a/tune/protox/tests/unittest_dsb_dir/query102s0_spj.sql +++ /dev/null @@ -1,41 +0,0 @@ -select min(ss_item_sk), - min(ss_ticket_number), - min(ws_order_number), - min(c_customer_sk), - min(cd_demo_sk), - min(hd_demo_sk) -from - store_sales, - web_sales, - date_dim d1, - date_dim d2, - customer, - inventory, - store, - warehouse, - item, - customer_demographics, - household_demographics, - customer_address -where - ss_item_sk = i_item_sk - and ws_item_sk = ss_item_sk - and ss_sold_date_sk = d1.d_date_sk - and ws_sold_date_sk = d2.d_date_sk - and d2.d_date between d1.d_date and (d1.d_date + interval '30' day) - and ss_customer_sk = c_customer_sk - and ws_bill_customer_sk = c_customer_sk - and ws_warehouse_sk = inv_warehouse_sk - and ws_warehouse_sk = w_warehouse_sk - and inv_item_sk = ss_item_sk - and inv_date_sk = ss_sold_date_sk - and inv_quantity_on_hand >= ss_quantity - and s_state = w_state - AND i_category IN ('Books', 'Home', 'Sports') - and i_manager_id IN (3, 15, 17, 26, 43, 44, 55, 70, 82, 95) - and c_current_cdemo_sk = cd_demo_sk - and c_current_hdemo_sk = hd_demo_sk - and c_current_addr_sk = ca_address_sk - and ca_state in ('IN', 'LA', 'NE', 'NM', 'OH') - and d1.d_year = 2001 - and ws_wholesale_cost BETWEEN 80 AND 100; diff --git a/tune/protox/tests/unittest_index_space.py b/tune/protox/tests/unittest_index_space.py deleted file mode 100644 index 588d3b9d..00000000 --- a/tune/protox/tests/unittest_index_space.py +++ /dev/null @@ -1,102 +0,0 @@ -import unittest -from pathlib import Path - -import numpy as np -import yaml - -from tune.protox.env.space.primitive_space import IndexSpace -from tune.protox.env.space.utils import check_subspace -from tune.protox.env.types import IndexSpaceRawSample -from tune.protox.env.workload import Workload - - -class IndexSpaceTests(unittest.TestCase): - @staticmethod - def load( - config_path: Path = Path( - "tune/protox/tests/unittest_benchmark_configs/unittest_tpch.yaml" - ).resolve(), - aux_type: bool = True, - aux_include: bool = True, - ) -> tuple[Workload, IndexSpace]: - # don't call open_and_save() because this is a unittest - with open(config_path, "r") as f: - benchmark_config = yaml.safe_load(f) - benchmark_key = [k for k in benchmark_config.keys()][0] - benchmark_config = benchmark_config[benchmark_key] - benchmark_config["benchmark"] = benchmark_key - - w = Workload( - dbgym_cfg=None, - tables=benchmark_config["tables"], - attributes=benchmark_config["attributes"], - query_spec=benchmark_config["query_spec"], - workload_path=Path("tune/protox/tests/unittest_tpch_dir").resolve(), - pid=None, - workload_timeout=0, - workload_timeout_penalty=1.0, - artifact_manager=None, - ) - - i = IndexSpace( - tables=benchmark_config["tables"], - max_num_columns=benchmark_config["max_num_columns"], - max_indexable_attributes=w.max_indexable(), - seed=0, - rel_metadata=benchmark_config["attributes"], - attributes_overwrite=w.column_usages(), - tbl_include_subsets=w.tbl_include_subsets, - index_space_aux_type=aux_type, - index_space_aux_include=aux_include, - deterministic_policy=True, - ) - return w, i - - def test_null_action(self) -> None: - w, i = IndexSpaceTests.load() - null_action = i.null_action() - self.assertTrue(check_subspace(i, null_action)) - - w, i = IndexSpaceTests.load(aux_type=False, aux_include=False) - null_action = i.null_action() - self.assertTrue(check_subspace(i, null_action)) - - def test_sample(self) -> None: - w, i = IndexSpaceTests.load(aux_type=False, aux_include=False) - for _ in range(100): - self.assertTrue(check_subspace(i, i.sample())) - - def test_sample_table(self) -> None: - w, i = IndexSpaceTests.load(aux_type=False, aux_include=False) - for _ in range(100): - mask = {"table_idx": 2} - ia = i.to_action(i.sample(mask)) - self.assertEqual(ia.tbl_name, "lineitem") - - def test_sample_table_col(self) -> None: - w, i = IndexSpaceTests.load(aux_type=False, aux_include=False) - for _ in range(100): - mask = {"table_idx": 2, "col_idx": 1} - ia = i.to_action(i.sample(mask)) - self.assertEqual(ia.tbl_name, "lineitem") - self.assertEqual(ia.columns[0], "l_partkey") - - def test_neighborhood(self) -> None: - w, i = IndexSpaceTests.load(aux_type=True, aux_include=True) - _, isa = IndexSpaceTests.load(aux_type=False, aux_include=False) - - act = isa.sample(mask={"table_idx": 2, "col_idx": 1}) - act = IndexSpaceRawSample( - tuple([0, *act, np.zeros(i.max_inc_columns, dtype=np.float32)]) - ) - self.assertTrue(check_subspace(i, act)) - - neighbors = i.policy.structural_neighbors(act) - for n in neighbors: - ia = i.to_action(n) - self.assertEqual(n, ia.raw_repr) - self.assertTrue(check_subspace(i, n)) - - -if __name__ == "__main__": - unittest.main() diff --git a/tune/protox/tests/unittest_jobfull_dir/1.sql b/tune/protox/tests/unittest_jobfull_dir/1.sql deleted file mode 100644 index cb464bb5..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/1.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT MIN(mc.note) AS production_note, - MIN(t.title) AS movie_title, - MIN(t.production_year) AS movie_year -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info_idx AS mi_idx, - title AS t -WHERE ct.kind = 'production companies' - AND it.info = 'top 250 rank' - AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' - AND (mc.note LIKE '%(co-production)%' - OR mc.note LIKE '%(presents)%') - AND ct.id = mc.company_type_id - AND t.id = mc.movie_id - AND t.id = mi_idx.movie_id - AND mc.movie_id = mi_idx.movie_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/10.sql b/tune/protox/tests/unittest_jobfull_dir/10.sql deleted file mode 100644 index 13dcad63..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/10.sql +++ /dev/null @@ -1,22 +0,0 @@ -SELECT MIN(chn.name) AS uncredited_voiced_character, - MIN(t.title) AS russian_movie -FROM char_name AS chn, - cast_info AS ci, - company_name AS cn, - company_type AS ct, - movie_companies AS mc, - role_type AS rt, - title AS t -WHERE ci.note LIKE '%(voice)%' - AND ci.note LIKE '%(uncredited)%' - AND cn.country_code = '[ru]' - AND rt.role = 'actor' - AND t.production_year > 2005 - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mc.movie_id - AND chn.id = ci.person_role_id - AND rt.id = ci.role_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/10b.sql b/tune/protox/tests/unittest_jobfull_dir/10b.sql deleted file mode 100644 index 916a78af..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/10b.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT MIN(chn.name) AS character, - MIN(t.title) AS russian_mov_with_actor_producer -FROM char_name AS chn, - cast_info AS ci, - company_name AS cn, - company_type AS ct, - movie_companies AS mc, - role_type AS rt, - title AS t -WHERE ci.note LIKE '%(producer)%' - AND cn.country_code = '[ru]' - AND rt.role = 'actor' - AND t.production_year > 2010 - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mc.movie_id - AND chn.id = ci.person_role_id - AND rt.id = ci.role_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/10c.sql b/tune/protox/tests/unittest_jobfull_dir/10c.sql deleted file mode 100644 index 234c4e05..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/10c.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT MIN(chn.name) AS character, - MIN(t.title) AS movie_with_american_producer -FROM char_name AS chn, - cast_info AS ci, - company_name AS cn, - company_type AS ct, - movie_companies AS mc, - role_type AS rt, - title AS t -WHERE ci.note LIKE '%(producer)%' - AND cn.country_code = '[us]' - AND t.production_year > 1990 - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mc.movie_id - AND chn.id = ci.person_role_id - AND rt.id = ci.role_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; diff --git a/tune/protox/tests/unittest_jobfull_dir/11.sql b/tune/protox/tests/unittest_jobfull_dir/11.sql deleted file mode 100644 index 24bc6b62..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/11.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(cn.name) AS from_company, - MIN(lt.link) AS movie_link_type, - MIN(t.title) AS non_polish_sequel_movie -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND t.production_year BETWEEN 1950 AND 2000 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/11b.sql b/tune/protox/tests/unittest_jobfull_dir/11b.sql deleted file mode 100644 index 881ad7cb..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/11b.sql +++ /dev/null @@ -1,31 +0,0 @@ -SELECT MIN(cn.name) AS from_company, - MIN(lt.link) AS movie_link_type, - MIN(t.title) AS sequel_movie -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follows%' - AND mc.note IS NULL - AND t.production_year = 1998 - AND t.title LIKE '%Money%' - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/11c.sql b/tune/protox/tests/unittest_jobfull_dir/11c.sql deleted file mode 100644 index 03dd71b6..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/11c.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT MIN(cn.name) AS from_company, - MIN(mc.note) AS production_note, - MIN(t.title) AS movie_based_on_book -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '20th Century Fox%' - OR cn.name LIKE 'Twentieth Century Fox%') - AND ct.kind != 'production companies' - AND ct.kind IS NOT NULL - AND k.keyword IN ('sequel', - 'revenge', - 'based-on-novel') - AND mc.note IS NOT NULL - AND t.production_year > 1950 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/11d.sql b/tune/protox/tests/unittest_jobfull_dir/11d.sql deleted file mode 100644 index 9872fda8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/11d.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(cn.name) AS from_company, - MIN(mc.note) AS production_note, - MIN(t.title) AS movie_based_on_book -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND ct.kind != 'production companies' - AND ct.kind IS NOT NULL - AND k.keyword IN ('sequel', - 'revenge', - 'based-on-novel') - AND mc.note IS NOT NULL - AND t.production_year > 1950 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/12.sql b/tune/protox/tests/unittest_jobfull_dir/12.sql deleted file mode 100644 index e5b33664..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/12.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS drama_horror_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - title AS t -WHERE cn.country_code = '[us]' - AND ct.kind = 'production companies' - AND it1.info = 'genres' - AND it2.info = 'rating' - AND mi.info IN ('Drama', - 'Horror') - AND mi_idx.info > '8.0' - AND t.production_year BETWEEN 2005 AND 2008 - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND mi.info_type_id = it1.id - AND mi_idx.info_type_id = it2.id - AND t.id = mc.movie_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/12b.sql b/tune/protox/tests/unittest_jobfull_dir/12b.sql deleted file mode 100644 index 16bc8989..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/12b.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(mi.info) AS budget, - MIN(t.title) AS unsuccsessful_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - title AS t -WHERE cn.country_code ='[us]' - AND ct.kind IS NOT NULL - AND (ct.kind ='production companies' - OR ct.kind = 'distributors') - AND it1.info ='budget' - AND it2.info ='bottom 10 rank' - AND t.production_year >2000 - AND (t.title LIKE 'Birdemic%' - OR t.title LIKE '%Movie%') - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND mi.info_type_id = it1.id - AND mi_idx.info_type_id = it2.id - AND t.id = mc.movie_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/12c.sql b/tune/protox/tests/unittest_jobfull_dir/12c.sql deleted file mode 100644 index 97e5a116..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/12c.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS mainstream_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - title AS t -WHERE cn.country_code = '[us]' - AND ct.kind = 'production companies' - AND it1.info = 'genres' - AND it2.info = 'rating' - AND mi.info IN ('Drama', - 'Horror', - 'Western', - 'Family') - AND mi_idx.info > '7.0' - AND t.production_year BETWEEN 2000 AND 2010 - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND mi.info_type_id = it1.id - AND mi_idx.info_type_id = it2.id - AND t.id = mc.movie_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/13.sql b/tune/protox/tests/unittest_jobfull_dir/13.sql deleted file mode 100644 index dfef6fe0..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/13.sql +++ /dev/null @@ -1,29 +0,0 @@ -SELECT MIN(mi.info) AS release_date, - MIN(miidx.info) AS rating, - MIN(t.title) AS german_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it, - info_type AS it2, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS miidx, - title AS t -WHERE cn.country_code ='[de]' - AND ct.kind ='production companies' - AND it.info ='rating' - AND it2.info ='release dates' - AND kt.kind ='movie' - AND mi.movie_id = t.id - AND it2.id = mi.info_type_id - AND kt.id = t.kind_id - AND mc.movie_id = t.id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND miidx.movie_id = t.id - AND it.id = miidx.info_type_id - AND mi.movie_id = miidx.movie_id - AND mi.movie_id = mc.movie_id - AND miidx.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/13b.sql b/tune/protox/tests/unittest_jobfull_dir/13b.sql deleted file mode 100644 index ecc262fe..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/13b.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(miidx.info) AS rating, - MIN(t.title) AS movie_about_winning -FROM company_name AS cn, - company_type AS ct, - info_type AS it, - info_type AS it2, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS miidx, - title AS t -WHERE cn.country_code ='[us]' - AND ct.kind ='production companies' - AND it.info ='rating' - AND it2.info ='release dates' - AND kt.kind ='movie' - AND t.title != '' - AND (t.title LIKE '%Champion%' - OR t.title LIKE '%Loser%') - AND mi.movie_id = t.id - AND it2.id = mi.info_type_id - AND kt.id = t.kind_id - AND mc.movie_id = t.id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND miidx.movie_id = t.id - AND it.id = miidx.info_type_id - AND mi.movie_id = miidx.movie_id - AND mi.movie_id = mc.movie_id - AND miidx.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/13c.sql b/tune/protox/tests/unittest_jobfull_dir/13c.sql deleted file mode 100644 index 921df955..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/13c.sql +++ /dev/null @@ -1,32 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(miidx.info) AS rating, - MIN(t.title) AS movie_about_winning -FROM company_name AS cn, - company_type AS ct, - info_type AS it, - info_type AS it2, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS miidx, - title AS t -WHERE cn.country_code ='[us]' - AND ct.kind ='production companies' - AND it.info ='rating' - AND it2.info ='release dates' - AND kt.kind ='movie' - AND t.title != '' - AND (t.title LIKE 'Champion%' - OR t.title LIKE 'Loser%') - AND mi.movie_id = t.id - AND it2.id = mi.info_type_id - AND kt.id = t.kind_id - AND mc.movie_id = t.id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND miidx.movie_id = t.id - AND it.id = miidx.info_type_id - AND mi.movie_id = miidx.movie_id - AND mi.movie_id = mc.movie_id - AND miidx.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/13d.sql b/tune/protox/tests/unittest_jobfull_dir/13d.sql deleted file mode 100644 index 734748bf..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/13d.sql +++ /dev/null @@ -1,29 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(miidx.info) AS rating, - MIN(t.title) AS movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it, - info_type AS it2, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS miidx, - title AS t -WHERE cn.country_code ='[us]' - AND ct.kind ='production companies' - AND it.info ='rating' - AND it2.info ='release dates' - AND kt.kind ='movie' - AND mi.movie_id = t.id - AND it2.id = mi.info_type_id - AND kt.id = t.kind_id - AND mc.movie_id = t.id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND miidx.movie_id = t.id - AND it.id = miidx.info_type_id - AND mi.movie_id = miidx.movie_id - AND mi.movie_id = mc.movie_id - AND miidx.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/14.sql b/tune/protox/tests/unittest_jobfull_dir/14.sql deleted file mode 100644 index aaa86ca4..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/14.sql +++ /dev/null @@ -1,40 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS northern_dark_movie -FROM info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind = 'movie' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2010 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/14b.sql b/tune/protox/tests/unittest_jobfull_dir/14b.sql deleted file mode 100644 index e7364f47..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/14b.sql +++ /dev/null @@ -1,41 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS western_dark_production -FROM info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title') - AND kt.kind = 'movie' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info > '6.0' - AND t.production_year > 2010 - AND (t.title LIKE '%murder%' - OR t.title LIKE '%Murder%' - OR t.title LIKE '%Mord%') - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/14c.sql b/tune/protox/tests/unittest_jobfull_dir/14c.sql deleted file mode 100644 index bb1cf314..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/14c.sql +++ /dev/null @@ -1,42 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS north_european_dark_production -FROM info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IS NOT NULL - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Danish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/15.sql b/tune/protox/tests/unittest_jobfull_dir/15.sql deleted file mode 100644 index 4471b8c7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/15.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT MIN(mi.info) AS release_date, - MIN(t.title) AS internet_movie -FROM aka_title AS at, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cn.country_code = '[us]' - AND it1.info = 'release dates' - AND mc.note LIKE '%(200%)%' - AND mc.note LIKE '%(worldwide)%' - AND mi.note LIKE '%internet%' - AND mi.info LIKE 'USA:% 200%' - AND t.production_year > 2000 - AND t.id = at.movie_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = at.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = at.movie_id - AND mc.movie_id = at.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/15b.sql b/tune/protox/tests/unittest_jobfull_dir/15b.sql deleted file mode 100644 index 425489b5..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/15b.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT MIN(mi.info) AS release_date, - MIN(t.title) AS youtube_movie -FROM aka_title AS at, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cn.country_code = '[us]' - AND cn.name = 'YouTube' - AND it1.info = 'release dates' - AND mc.note LIKE '%(200%)%' - AND mc.note LIKE '%(worldwide)%' - AND mi.note LIKE '%internet%' - AND mi.info LIKE 'USA:% 200%' - AND t.production_year BETWEEN 2005 AND 2010 - AND t.id = at.movie_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = at.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = at.movie_id - AND mc.movie_id = at.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/15c.sql b/tune/protox/tests/unittest_jobfull_dir/15c.sql deleted file mode 100644 index bdead025..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/15c.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT MIN(mi.info) AS release_date, - MIN(t.title) AS modern_american_internet_movie -FROM aka_title AS at, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cn.country_code = '[us]' - AND it1.info = 'release dates' - AND mi.note LIKE '%internet%' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'USA:% 199%' - OR mi.info LIKE 'USA:% 200%') - AND t.production_year > 1990 - AND t.id = at.movie_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = at.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = at.movie_id - AND mc.movie_id = at.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/15d.sql b/tune/protox/tests/unittest_jobfull_dir/15d.sql deleted file mode 100644 index fc62cb53..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/15d.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(at.title) AS aka_title, - MIN(t.title) AS internet_movie_title -FROM aka_title AS at, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cn.country_code = '[us]' - AND it1.info = 'release dates' - AND mi.note LIKE '%internet%' - AND t.production_year > 1990 - AND t.id = at.movie_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = at.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = at.movie_id - AND mc.movie_id = at.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/16.sql b/tune/protox/tests/unittest_jobfull_dir/16.sql deleted file mode 100644 index 7ce0bb06..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/16.sql +++ /dev/null @@ -1,26 +0,0 @@ -SELECT MIN(an.name) AS cool_actor_pseudonym, - MIN(t.title) AS series_named_after_char -FROM aka_name AS an, - cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND t.episode_nr >= 50 - AND t.episode_nr < 100 - AND an.person_id = n.id - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND an.person_id = ci.person_id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/16b.sql b/tune/protox/tests/unittest_jobfull_dir/16b.sql deleted file mode 100644 index 8aa7371c..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/16b.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT MIN(an.name) AS cool_actor_pseudonym, - MIN(t.title) AS series_named_after_char -FROM aka_name AS an, - cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND an.person_id = n.id - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND an.person_id = ci.person_id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/16c.sql b/tune/protox/tests/unittest_jobfull_dir/16c.sql deleted file mode 100644 index 91f4bbeb..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/16c.sql +++ /dev/null @@ -1,25 +0,0 @@ -SELECT MIN(an.name) AS cool_actor_pseudonym, - MIN(t.title) AS series_named_after_char -FROM aka_name AS an, - cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND t.episode_nr < 100 - AND an.person_id = n.id - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND an.person_id = ci.person_id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/16d.sql b/tune/protox/tests/unittest_jobfull_dir/16d.sql deleted file mode 100644 index f7fce518..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/16d.sql +++ /dev/null @@ -1,26 +0,0 @@ -SELECT MIN(an.name) AS cool_actor_pseudonym, - MIN(t.title) AS series_named_after_char -FROM aka_name AS an, - cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND t.episode_nr >= 5 - AND t.episode_nr < 100 - AND an.person_id = n.id - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND an.person_id = ci.person_id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17.sql b/tune/protox/tests/unittest_jobfull_dir/17.sql deleted file mode 100644 index 2db3358d..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17.sql +++ /dev/null @@ -1,22 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_american_movie, - MIN(n.name) AS a1 -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND n.name LIKE 'B%' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17b.sql b/tune/protox/tests/unittest_jobfull_dir/17b.sql deleted file mode 100644 index 7585e06d..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17b.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_movie, - MIN(n.name) AS a1 -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword ='character-name-in-title' - AND n.name LIKE 'Z%' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17c.sql b/tune/protox/tests/unittest_jobfull_dir/17c.sql deleted file mode 100644 index 8d177cb6..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17c.sql +++ /dev/null @@ -1,21 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_movie, - MIN(n.name) AS a1 -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword ='character-name-in-title' - AND n.name LIKE 'X%' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17d.sql b/tune/protox/tests/unittest_jobfull_dir/17d.sql deleted file mode 100644 index 9be724d6..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17d.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_movie -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword ='character-name-in-title' - AND n.name LIKE '%Bert%' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17e.sql b/tune/protox/tests/unittest_jobfull_dir/17e.sql deleted file mode 100644 index 784902e7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17e.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_movie -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/17f.sql b/tune/protox/tests/unittest_jobfull_dir/17f.sql deleted file mode 100644 index 35bb0c49..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/17f.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(n.name) AS member_in_charnamed_movie -FROM cast_info AS ci, - company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword ='character-name-in-title' - AND n.name LIKE '%B%' - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.movie_id = mc.movie_id - AND ci.movie_id = mk.movie_id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/18.sql b/tune/protox/tests/unittest_jobfull_dir/18.sql deleted file mode 100644 index edd21f3f..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/18.sql +++ /dev/null @@ -1,26 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(t.title) AS movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - movie_info AS mi, - movie_info_idx AS mi_idx, - name AS n, - title AS t -WHERE ci.note IN ('(producer)', - '(executive producer)') - AND it1.info = 'budget' - AND it2.info = 'votes' - AND n.gender = 'm' - AND n.name LIKE '%Tim%' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/18b.sql b/tune/protox/tests/unittest_jobfull_dir/18b.sql deleted file mode 100644 index 03e685d8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/18b.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(t.title) AS movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - movie_info AS mi, - movie_info_idx AS mi_idx, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'rating' - AND mi.info IN ('Horror', - 'Thriller') - AND mi.note IS NULL - AND mi_idx.info > '8.0' - AND n.gender IS NOT NULL - AND n.gender = 'f' - AND t.production_year BETWEEN 2008 AND 2014 - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/18c.sql b/tune/protox/tests/unittest_jobfull_dir/18c.sql deleted file mode 100644 index 9762e815..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/18c.sql +++ /dev/null @@ -1,34 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(t.title) AS movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - movie_info AS mi, - movie_info_idx AS mi_idx, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND mi.info IN ('Horror', - 'Action', - 'Sci-Fi', - 'Thriller', - 'Crime', - 'War') - AND n.gender = 'm' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND mi.movie_id = mi_idx.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/19.sql b/tune/protox/tests/unittest_jobfull_dir/19.sql deleted file mode 100644 index 68a0eb69..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/19.sql +++ /dev/null @@ -1,42 +0,0 @@ -SELECT MIN(n.name) AS voicing_actress, - MIN(t.title) AS voiced_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND mc.note IS NOT NULL - AND (mc.note LIKE '%(USA)%' - OR mc.note LIKE '%(worldwide)%') - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%200%' - OR mi.info LIKE 'USA:%200%') - AND n.gender ='f' - AND n.name LIKE '%Ang%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2005 AND 2009 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mi.movie_id = ci.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/19b.sql b/tune/protox/tests/unittest_jobfull_dir/19b.sql deleted file mode 100644 index c2c5ba73..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/19b.sql +++ /dev/null @@ -1,40 +0,0 @@ -SELECT MIN(n.name) AS voicing_actress, - MIN(t.title) AS kung_fu_panda -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note = '(voice)' - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND mc.note LIKE '%(200%)%' - AND (mc.note LIKE '%(USA)%' - OR mc.note LIKE '%(worldwide)%') - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%2007%' - OR mi.info LIKE 'USA:%2008%') - AND n.gender ='f' - AND n.name LIKE '%Angel%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2007 AND 2008 - AND t.title LIKE '%Kung%Fu%Panda%' - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mi.movie_id = ci.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/19c.sql b/tune/protox/tests/unittest_jobfull_dir/19c.sql deleted file mode 100644 index c8139bc7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/19c.sql +++ /dev/null @@ -1,39 +0,0 @@ -SELECT MIN(n.name) AS voicing_actress, - MIN(t.title) AS jap_engl_voiced_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%200%' - OR mi.info LIKE 'USA:%200%') - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.production_year > 2000 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mi.movie_id = ci.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/19d.sql b/tune/protox/tests/unittest_jobfull_dir/19d.sql deleted file mode 100644 index 03c7a850..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/19d.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT MIN(n.name) AS voicing_actress, - MIN(t.title) AS jap_engl_voiced_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND n.gender ='f' - AND rt.role ='actress' - AND t.production_year > 2000 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mi.movie_id = ci.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/1b.sql b/tune/protox/tests/unittest_jobfull_dir/1b.sql deleted file mode 100644 index c471f862..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/1b.sql +++ /dev/null @@ -1,18 +0,0 @@ -SELECT MIN(mc.note) AS production_note, - MIN(t.title) AS movie_title, - MIN(t.production_year) AS movie_year -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info_idx AS mi_idx, - title AS t -WHERE ct.kind = 'production companies' - AND it.info = 'bottom 10 rank' - AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' - AND t.production_year BETWEEN 2005 AND 2010 - AND ct.id = mc.company_type_id - AND t.id = mc.movie_id - AND t.id = mi_idx.movie_id - AND mc.movie_id = mi_idx.movie_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/1c.sql b/tune/protox/tests/unittest_jobfull_dir/1c.sql deleted file mode 100644 index 41600c37..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/1c.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT MIN(mc.note) AS production_note, - MIN(t.title) AS movie_title, - MIN(t.production_year) AS movie_year -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info_idx AS mi_idx, - title AS t -WHERE ct.kind = 'production companies' - AND it.info = 'top 250 rank' - AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' - AND (mc.note LIKE '%(co-production)%') - AND t.production_year >2010 - AND ct.id = mc.company_type_id - AND t.id = mc.movie_id - AND t.id = mi_idx.movie_id - AND mc.movie_id = mi_idx.movie_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/1d.sql b/tune/protox/tests/unittest_jobfull_dir/1d.sql deleted file mode 100644 index 96a6eda9..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/1d.sql +++ /dev/null @@ -1,18 +0,0 @@ -SELECT MIN(mc.note) AS production_note, - MIN(t.title) AS movie_title, - MIN(t.production_year) AS movie_year -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info_idx AS mi_idx, - title AS t -WHERE ct.kind = 'production companies' - AND it.info = 'bottom 10 rank' - AND mc.note NOT LIKE '%(as Metro-Goldwyn-Mayer Pictures)%' - AND t.production_year >2000 - AND ct.id = mc.company_type_id - AND t.id = mc.movie_id - AND t.id = mi_idx.movie_id - AND mc.movie_id = mi_idx.movie_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/2.sql b/tune/protox/tests/unittest_jobfull_dir/2.sql deleted file mode 100644 index 6e6b46ed..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/2.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - title AS t -WHERE cn.country_code ='[de]' - AND k.keyword ='character-name-in-title' - AND cn.id = mc.company_id - AND mc.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/20.sql b/tune/protox/tests/unittest_jobfull_dir/20.sql deleted file mode 100644 index 3eaedce6..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/20.sql +++ /dev/null @@ -1,39 +0,0 @@ -SELECT MIN(t.title) AS complete_downey_ironman_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - keyword AS k, - kind_type AS kt, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name NOT LIKE '%Sherlock%' - AND (chn.name LIKE '%Tony%Stark%' - OR chn.name LIKE '%Iron%Man%') - AND k.keyword IN ('superhero', - 'sequel', - 'second-part', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence') - AND kt.kind = 'movie' - AND t.production_year > 1950 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND ci.movie_id = cc.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/20b.sql b/tune/protox/tests/unittest_jobfull_dir/20b.sql deleted file mode 100644 index 68c038ca..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/20b.sql +++ /dev/null @@ -1,40 +0,0 @@ -SELECT MIN(t.title) AS complete_downey_ironman_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - keyword AS k, - kind_type AS kt, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name NOT LIKE '%Sherlock%' - AND (chn.name LIKE '%Tony%Stark%' - OR chn.name LIKE '%Iron%Man%') - AND k.keyword IN ('superhero', - 'sequel', - 'second-part', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence') - AND kt.kind = 'movie' - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND ci.movie_id = cc.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/20c.sql b/tune/protox/tests/unittest_jobfull_dir/20c.sql deleted file mode 100644 index 14bc3665..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/20c.sql +++ /dev/null @@ -1,42 +0,0 @@ -SELECT MIN(n.name) AS cast_member, - MIN(t.title) AS complete_dynamic_hero_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - keyword AS k, - kind_type AS kt, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name IS NOT NULL - AND (chn.name LIKE '%man%' - OR chn.name LIKE '%Man%') - AND k.keyword IN ('superhero', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence', - 'magnet', - 'web', - 'claw', - 'laser') - AND kt.kind = 'movie' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND ci.movie_id = cc.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/21.sql b/tune/protox/tests/unittest_jobfull_dir/21.sql deleted file mode 100644 index e0b5c137..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/21.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT MIN(cn.name) AS company_name, - MIN(lt.link) AS link_type, - MIN(t.title) AS western_follow_up -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German') - AND t.production_year BETWEEN 1950 AND 2000 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/21b.sql b/tune/protox/tests/unittest_jobfull_dir/21b.sql deleted file mode 100644 index ffd2dcf8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/21b.sql +++ /dev/null @@ -1,37 +0,0 @@ -SELECT MIN(cn.name) AS company_name, - MIN(lt.link) AS link_type, - MIN(t.title) AS german_follow_up -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Germany', - 'German') - AND t.production_year BETWEEN 2000 AND 2010 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/21c.sql b/tune/protox/tests/unittest_jobfull_dir/21c.sql deleted file mode 100644 index b7cc4dca..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/21c.sql +++ /dev/null @@ -1,44 +0,0 @@ -SELECT MIN(cn.name) AS company_name, - MIN(lt.link) AS link_type, - MIN(t.title) AS western_follow_up -FROM company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'English') - AND t.production_year BETWEEN 1950 AND 2010 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/22.sql b/tune/protox/tests/unittest_jobfull_dir/22.sql deleted file mode 100644 index 19d361d3..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/22.sql +++ /dev/null @@ -1,48 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS western_violent_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Germany', - 'German', - 'USA', - 'American') - AND mi_idx.info < '7.0' - AND t.production_year > 2008 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/22b.sql b/tune/protox/tests/unittest_jobfull_dir/22b.sql deleted file mode 100644 index bfd174e3..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/22b.sql +++ /dev/null @@ -1,48 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS western_violent_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Germany', - 'German', - 'USA', - 'American') - AND mi_idx.info < '7.0' - AND t.production_year > 2009 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/22c.sql b/tune/protox/tests/unittest_jobfull_dir/22c.sql deleted file mode 100644 index eacec5e0..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/22c.sql +++ /dev/null @@ -1,54 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS western_violent_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Danish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/22d.sql b/tune/protox/tests/unittest_jobfull_dir/22d.sql deleted file mode 100644 index 3442b43f..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/22d.sql +++ /dev/null @@ -1,52 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS western_violent_movie -FROM company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Danish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/23.sql b/tune/protox/tests/unittest_jobfull_dir/23.sql deleted file mode 100644 index 60789f38..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/23.sql +++ /dev/null @@ -1,39 +0,0 @@ -SELECT MIN(kt.kind) AS movie_kind, - MIN(t.title) AS complete_us_internet_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'complete+verified' - AND cn.country_code = '[us]' - AND it1.info = 'release dates' - AND kt.kind IN ('movie') - AND mi.note LIKE '%internet%' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'USA:% 199%' - OR mi.info LIKE 'USA:% 200%') - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND cct1.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/23b.sql b/tune/protox/tests/unittest_jobfull_dir/23b.sql deleted file mode 100644 index f1ba168f..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/23b.sql +++ /dev/null @@ -1,41 +0,0 @@ -SELECT MIN(kt.kind) AS movie_kind, - MIN(t.title) AS complete_nerdy_internet_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'complete+verified' - AND cn.country_code = '[us]' - AND it1.info = 'release dates' - AND k.keyword IN ('nerd', - 'loner', - 'alienation', - 'dignity') - AND kt.kind IN ('movie') - AND mi.note LIKE '%internet%' - AND mi.info LIKE 'USA:% 200%' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND cct1.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/23c.sql b/tune/protox/tests/unittest_jobfull_dir/23c.sql deleted file mode 100644 index 26895f48..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/23c.sql +++ /dev/null @@ -1,42 +0,0 @@ -SELECT MIN(kt.kind) AS movie_kind, - MIN(t.title) AS complete_us_internet_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - company_name AS cn, - company_type AS ct, - info_type AS it1, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'complete+verified' - AND cn.country_code = '[us]' - AND it1.info = 'release dates' - AND kt.kind IN ('movie', - 'tv movie', - 'video movie', - 'video game') - AND mi.note LIKE '%internet%' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'USA:% 199%' - OR mi.info LIKE 'USA:% 200%') - AND t.production_year > 1990 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND cn.id = mc.company_id - AND ct.id = mc.company_type_id - AND cct1.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/24.sql b/tune/protox/tests/unittest_jobfull_dir/24.sql deleted file mode 100644 index ba0df9c6..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/24.sql +++ /dev/null @@ -1,50 +0,0 @@ -SELECT MIN(chn.name) AS voiced_char_name, - MIN(n.name) AS voicing_actress_name, - MIN(t.title) AS voiced_action_movie_jap_eng -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND k.keyword IN ('hero', - 'martial-arts', - 'hand-to-hand-combat') - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%201%' - OR mi.info LIKE 'USA:%201%') - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.production_year > 2010 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mk.movie_id - AND mi.movie_id = ci.movie_id - AND mi.movie_id = mk.movie_id - AND ci.movie_id = mk.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/24b.sql b/tune/protox/tests/unittest_jobfull_dir/24b.sql deleted file mode 100644 index ed2482e1..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/24b.sql +++ /dev/null @@ -1,53 +0,0 @@ -SELECT MIN(chn.name) AS voiced_char_name, - MIN(n.name) AS voicing_actress_name, - MIN(t.title) AS kung_fu_panda -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND cn.name = 'DreamWorks Animation' - AND it.info = 'release dates' - AND k.keyword IN ('hero', - 'martial-arts', - 'hand-to-hand-combat', - 'computer-animated-movie') - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%201%' - OR mi.info LIKE 'USA:%201%') - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.production_year > 2010 - AND t.title LIKE 'Kung Fu Panda%' - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mk.movie_id - AND mi.movie_id = ci.movie_id - AND mi.movie_id = mk.movie_id - AND ci.movie_id = mk.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/25.sql b/tune/protox/tests/unittest_jobfull_dir/25.sql deleted file mode 100644 index 80f52d0a..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/25.sql +++ /dev/null @@ -1,42 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS male_writer, - MIN(t.title) AS violent_movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'blood', - 'gore', - 'death', - 'female-nudity') - AND mi.info = 'Horror' - AND n.gender = 'm' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi_idx.movie_id = mk.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/25b.sql b/tune/protox/tests/unittest_jobfull_dir/25b.sql deleted file mode 100644 index e752c3ef..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/25b.sql +++ /dev/null @@ -1,44 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS male_writer, - MIN(t.title) AS violent_movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'blood', - 'gore', - 'death', - 'female-nudity') - AND mi.info = 'Horror' - AND n.gender = 'm' - AND t.production_year > 2010 - AND t.title LIKE 'Vampire%' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi_idx.movie_id = mk.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/25c.sql b/tune/protox/tests/unittest_jobfull_dir/25c.sql deleted file mode 100644 index 3f0fc18e..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/25c.sql +++ /dev/null @@ -1,49 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS male_writer, - MIN(t.title) AS violent_movie_title -FROM cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Action', - 'Sci-Fi', - 'Thriller', - 'Crime', - 'War') - AND n.gender = 'm' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi_idx.movie_id = mk.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/26.sql b/tune/protox/tests/unittest_jobfull_dir/26.sql deleted file mode 100644 index 3dc3c329..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/26.sql +++ /dev/null @@ -1,53 +0,0 @@ -SELECT MIN(chn.name) AS character_name, - MIN(mi_idx.info) AS rating, - MIN(n.name) AS playing_actor, - MIN(t.title) AS complete_hero_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name IS NOT NULL - AND (chn.name LIKE '%man%' - OR chn.name LIKE '%Man%') - AND it2.info = 'rating' - AND k.keyword IN ('superhero', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence', - 'magnet', - 'web', - 'claw', - 'laser') - AND kt.kind = 'movie' - AND mi_idx.info > '7.0' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND mk.movie_id = mi_idx.movie_id - AND ci.movie_id = cc.movie_id - AND ci.movie_id = mi_idx.movie_id - AND cc.movie_id = mi_idx.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/26b.sql b/tune/protox/tests/unittest_jobfull_dir/26b.sql deleted file mode 100644 index 71e912e4..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/26b.sql +++ /dev/null @@ -1,46 +0,0 @@ -SELECT MIN(chn.name) AS character_name, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS complete_hero_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name IS NOT NULL - AND (chn.name LIKE '%man%' - OR chn.name LIKE '%Man%') - AND it2.info = 'rating' - AND k.keyword IN ('superhero', - 'marvel-comics', - 'based-on-comic', - 'fight') - AND kt.kind = 'movie' - AND mi_idx.info > '8.0' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND mk.movie_id = mi_idx.movie_id - AND ci.movie_id = cc.movie_id - AND ci.movie_id = mi_idx.movie_id - AND cc.movie_id = mi_idx.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/26c.sql b/tune/protox/tests/unittest_jobfull_dir/26c.sql deleted file mode 100644 index 45e36c64..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/26c.sql +++ /dev/null @@ -1,51 +0,0 @@ -SELECT MIN(chn.name) AS character_name, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS complete_hero_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE '%complete%' - AND chn.name IS NOT NULL - AND (chn.name LIKE '%man%' - OR chn.name LIKE '%Man%') - AND it2.info = 'rating' - AND k.keyword IN ('superhero', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence', - 'magnet', - 'web', - 'claw', - 'laser') - AND kt.kind = 'movie' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND t.id = cc.movie_id - AND t.id = mi_idx.movie_id - AND mk.movie_id = ci.movie_id - AND mk.movie_id = cc.movie_id - AND mk.movie_id = mi_idx.movie_id - AND ci.movie_id = cc.movie_id - AND ci.movie_id = mi_idx.movie_id - AND cc.movie_id = mi_idx.movie_id - AND chn.id = ci.person_role_id - AND n.id = ci.person_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND it2.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/27.sql b/tune/protox/tests/unittest_jobfull_dir/27.sql deleted file mode 100644 index 091da711..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/27.sql +++ /dev/null @@ -1,52 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(lt.link) AS link_type, - MIN(t.title) AS complete_western_sequel -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cct1.kind IN ('cast', - 'crew') - AND cct2.kind = 'complete' - AND cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Sweden', - 'Germany', - 'Swedish', - 'German') - AND t.production_year BETWEEN 1950 AND 2000 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND t.id = cc.movie_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id - AND ml.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = cc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/27b.sql b/tune/protox/tests/unittest_jobfull_dir/27b.sql deleted file mode 100644 index 580ab710..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/27b.sql +++ /dev/null @@ -1,52 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(lt.link) AS link_type, - MIN(t.title) AS complete_western_sequel -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cct1.kind IN ('cast', - 'crew') - AND cct2.kind = 'complete' - AND cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Sweden', - 'Germany', - 'Swedish', - 'German') - AND t.production_year = 1998 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND t.id = cc.movie_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id - AND ml.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = cc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/27c.sql b/tune/protox/tests/unittest_jobfull_dir/27c.sql deleted file mode 100644 index f070c817..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/27c.sql +++ /dev/null @@ -1,56 +0,0 @@ -SELECT MIN(cn.name) AS producing_company, - MIN(lt.link) AS link_type, - MIN(t.title) AS complete_western_sequel -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - keyword AS k, - link_type AS lt, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - movie_link AS ml, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind LIKE 'complete%' - AND cn.country_code !='[pl]' - AND (cn.name LIKE '%Film%' - OR cn.name LIKE '%Warner%') - AND ct.kind ='production companies' - AND k.keyword ='sequel' - AND lt.link LIKE '%follow%' - AND mc.note IS NULL - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'English') - AND t.production_year BETWEEN 1950 AND 2010 - AND lt.id = ml.link_type_id - AND ml.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND t.id = mc.movie_id - AND mc.company_type_id = ct.id - AND mc.company_id = cn.id - AND mi.movie_id = t.id - AND t.id = cc.movie_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id - AND ml.movie_id = mk.movie_id - AND ml.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND ml.movie_id = mi.movie_id - AND mk.movie_id = mi.movie_id - AND mc.movie_id = mi.movie_id - AND ml.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = cc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/28.sql b/tune/protox/tests/unittest_jobfull_dir/28.sql deleted file mode 100644 index 5fea698d..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/28.sql +++ /dev/null @@ -1,66 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS complete_euro_dark_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'crew' - AND cct2.kind != 'complete+verified' - AND cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Danish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2000 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mc.movie_id = cc.movie_id - AND mi_idx.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/28b.sql b/tune/protox/tests/unittest_jobfull_dir/28b.sql deleted file mode 100644 index c368ab93..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/28b.sql +++ /dev/null @@ -1,60 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS complete_euro_dark_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'crew' - AND cct2.kind != 'complete+verified' - AND cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Sweden', - 'Germany', - 'Swedish', - 'German') - AND mi_idx.info > '6.5' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mc.movie_id = cc.movie_id - AND mi_idx.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/28c.sql b/tune/protox/tests/unittest_jobfull_dir/28c.sql deleted file mode 100644 index 796852a2..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/28c.sql +++ /dev/null @@ -1,66 +0,0 @@ -SELECT MIN(cn.name) AS movie_company, - MIN(mi_idx.info) AS rating, - MIN(t.title) AS complete_euro_dark_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - company_name AS cn, - company_type AS ct, - info_type AS it1, - info_type AS it2, - keyword AS k, - kind_type AS kt, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind = 'complete' - AND cn.country_code != '[us]' - AND it1.info = 'countries' - AND it2.info = 'rating' - AND k.keyword IN ('murder', - 'murder-in-title', - 'blood', - 'violence') - AND kt.kind IN ('movie', - 'episode') - AND mc.note NOT LIKE '%(USA)%' - AND mc.note LIKE '%(200%)%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Danish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND mi_idx.info < '8.5' - AND t.production_year > 2005 - AND kt.id = t.kind_id - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND t.id = mi_idx.movie_id - AND t.id = mc.movie_id - AND t.id = cc.movie_id - AND mk.movie_id = mi.movie_id - AND mk.movie_id = mi_idx.movie_id - AND mk.movie_id = mc.movie_id - AND mk.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mc.movie_id - AND mi.movie_id = cc.movie_id - AND mc.movie_id = mi_idx.movie_id - AND mc.movie_id = cc.movie_id - AND mi_idx.movie_id = cc.movie_id - AND k.id = mk.keyword_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND ct.id = mc.company_type_id - AND cn.id = mc.company_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/29.sql b/tune/protox/tests/unittest_jobfull_dir/29.sql deleted file mode 100644 index b4d60298..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/29.sql +++ /dev/null @@ -1,67 +0,0 @@ -SELECT MIN(chn.name) AS voiced_char, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS voiced_animation -FROM aka_name AS an, - complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - info_type AS it3, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - name AS n, - person_info AS pi, - role_type AS rt, - title AS t -WHERE cct1.kind ='cast' - AND cct2.kind ='complete+verified' - AND chn.name = 'Queen' - AND ci.note IN ('(voice)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND it3.info = 'trivia' - AND k.keyword = 'computer-animation' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%200%' - OR mi.info LIKE 'USA:%200%') - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.title = 'Shrek 2' - AND t.production_year BETWEEN 2000 AND 2010 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mk.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = ci.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id - AND n.id = pi.person_id - AND ci.person_id = pi.person_id - AND it3.id = pi.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/29b.sql b/tune/protox/tests/unittest_jobfull_dir/29b.sql deleted file mode 100644 index c33d9677..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/29b.sql +++ /dev/null @@ -1,65 +0,0 @@ -SELECT MIN(chn.name) AS voiced_char, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS voiced_animation -FROM aka_name AS an, - complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - info_type AS it3, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - name AS n, - person_info AS pi, - role_type AS rt, - title AS t -WHERE cct1.kind ='cast' - AND cct2.kind ='complete+verified' - AND chn.name = 'Queen' - AND ci.note IN ('(voice)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND it3.info = 'height' - AND k.keyword = 'computer-animation' - AND mi.info LIKE 'USA:%200%' - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.title = 'Shrek 2' - AND t.production_year BETWEEN 2000 AND 2005 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mk.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = ci.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id - AND n.id = pi.person_id - AND ci.person_id = pi.person_id - AND it3.id = pi.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/29c.sql b/tune/protox/tests/unittest_jobfull_dir/29c.sql deleted file mode 100644 index b2ef8785..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/29c.sql +++ /dev/null @@ -1,66 +0,0 @@ -SELECT MIN(chn.name) AS voiced_char, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS voiced_animation -FROM aka_name AS an, - complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - info_type AS it, - info_type AS it3, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_keyword AS mk, - name AS n, - person_info AS pi, - role_type AS rt, - title AS t -WHERE cct1.kind ='cast' - AND cct2.kind ='complete+verified' - AND ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND it.info = 'release dates' - AND it3.info = 'trivia' - AND k.keyword = 'computer-animation' - AND mi.info IS NOT NULL - AND (mi.info LIKE 'Japan:%200%' - OR mi.info LIKE 'USA:%200%') - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2000 AND 2010 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND mc.movie_id = ci.movie_id - AND mc.movie_id = mi.movie_id - AND mc.movie_id = mk.movie_id - AND mc.movie_id = cc.movie_id - AND mi.movie_id = ci.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND cn.id = mc.company_id - AND it.id = mi.info_type_id - AND n.id = ci.person_id - AND rt.id = ci.role_id - AND n.id = an.person_id - AND ci.person_id = an.person_id - AND chn.id = ci.person_role_id - AND n.id = pi.person_id - AND ci.person_id = pi.person_id - AND it3.id = pi.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/2b.sql b/tune/protox/tests/unittest_jobfull_dir/2b.sql deleted file mode 100644 index 1c594b46..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/2b.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - title AS t -WHERE cn.country_code ='[nl]' - AND k.keyword ='character-name-in-title' - AND cn.id = mc.company_id - AND mc.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/2c.sql b/tune/protox/tests/unittest_jobfull_dir/2c.sql deleted file mode 100644 index 905453e8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/2c.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - title AS t -WHERE cn.country_code ='[sm]' - AND k.keyword ='character-name-in-title' - AND cn.id = mc.company_id - AND mc.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/2d.sql b/tune/protox/tests/unittest_jobfull_dir/2d.sql deleted file mode 100644 index b7f7cedf..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/2d.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM company_name AS cn, - keyword AS k, - movie_companies AS mc, - movie_keyword AS mk, - title AS t -WHERE cn.country_code ='[us]' - AND k.keyword ='character-name-in-title' - AND cn.id = mc.company_id - AND mc.movie_id = t.id - AND t.id = mk.movie_id - AND mk.keyword_id = k.id - AND mc.movie_id = mk.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/3.sql b/tune/protox/tests/unittest_jobfull_dir/3.sql deleted file mode 100644 index d10f53b4..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/3.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM keyword AS k, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE k.keyword LIKE '%sequel%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German') - AND t.production_year > 2005 - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi.movie_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/30.sql b/tune/protox/tests/unittest_jobfull_dir/30.sql deleted file mode 100644 index 7b45ac55..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/30.sql +++ /dev/null @@ -1,59 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS complete_violent_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind IN ('cast', - 'crew') - AND cct2.kind ='complete+verified' - AND ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Thriller') - AND n.gender = 'm' - AND t.production_year > 2000 - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/30b.sql b/tune/protox/tests/unittest_jobfull_dir/30b.sql deleted file mode 100644 index 91cfa290..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/30b.sql +++ /dev/null @@ -1,62 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS complete_gore_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind IN ('cast', - 'crew') - AND cct2.kind ='complete+verified' - AND ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Thriller') - AND n.gender = 'm' - AND t.production_year > 2000 - AND (t.title LIKE '%Freddy%' - OR t.title LIKE '%Jason%' - OR t.title LIKE 'Saw%') - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/30c.sql b/tune/protox/tests/unittest_jobfull_dir/30c.sql deleted file mode 100644 index bfa134b9..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/30c.sql +++ /dev/null @@ -1,61 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS complete_violent_movie -FROM complete_cast AS cc, - comp_cast_type AS cct1, - comp_cast_type AS cct2, - cast_info AS ci, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE cct1.kind = 'cast' - AND cct2.kind ='complete+verified' - AND ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Action', - 'Sci-Fi', - 'Thriller', - 'Crime', - 'War') - AND n.gender = 'm' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = cc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = cc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = cc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = cc.movie_id - AND mk.movie_id = cc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cct1.id = cc.subject_id - AND cct2.id = cc.status_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/31.sql b/tune/protox/tests/unittest_jobfull_dir/31.sql deleted file mode 100644 index da3f1380..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/31.sql +++ /dev/null @@ -1,54 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS violent_liongate_movie -FROM cast_info AS ci, - company_name AS cn, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND cn.name LIKE 'Lionsgate%' - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Thriller') - AND n.gender = 'm' - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = mc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/31b.sql b/tune/protox/tests/unittest_jobfull_dir/31b.sql deleted file mode 100644 index a6c506e2..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/31b.sql +++ /dev/null @@ -1,59 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS violent_liongate_movie -FROM cast_info AS ci, - company_name AS cn, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND cn.name LIKE 'Lionsgate%' - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mc.note LIKE '%(Blu-ray)%' - AND mi.info IN ('Horror', - 'Thriller') - AND n.gender = 'm' - AND t.production_year > 2000 - AND (t.title LIKE '%Freddy%' - OR t.title LIKE '%Jason%' - OR t.title LIKE 'Saw%') - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = mc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/31c.sql b/tune/protox/tests/unittest_jobfull_dir/31c.sql deleted file mode 100644 index d96d20ca..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/31c.sql +++ /dev/null @@ -1,57 +0,0 @@ -SELECT MIN(mi.info) AS movie_budget, - MIN(mi_idx.info) AS movie_votes, - MIN(n.name) AS writer, - MIN(t.title) AS violent_liongate_movie -FROM cast_info AS ci, - company_name AS cn, - info_type AS it1, - info_type AS it2, - keyword AS k, - movie_companies AS mc, - movie_info AS mi, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - name AS n, - title AS t -WHERE ci.note IN ('(writer)', - '(head writer)', - '(written by)', - '(story)', - '(story editor)') - AND cn.name LIKE 'Lionsgate%' - AND it1.info = 'genres' - AND it2.info = 'votes' - AND k.keyword IN ('murder', - 'violence', - 'blood', - 'gore', - 'death', - 'female-nudity', - 'hospital') - AND mi.info IN ('Horror', - 'Action', - 'Sci-Fi', - 'Thriller', - 'Crime', - 'War') - AND t.id = mi.movie_id - AND t.id = mi_idx.movie_id - AND t.id = ci.movie_id - AND t.id = mk.movie_id - AND t.id = mc.movie_id - AND ci.movie_id = mi.movie_id - AND ci.movie_id = mi_idx.movie_id - AND ci.movie_id = mk.movie_id - AND ci.movie_id = mc.movie_id - AND mi.movie_id = mi_idx.movie_id - AND mi.movie_id = mk.movie_id - AND mi.movie_id = mc.movie_id - AND mi_idx.movie_id = mk.movie_id - AND mi_idx.movie_id = mc.movie_id - AND mk.movie_id = mc.movie_id - AND n.id = ci.person_id - AND it1.id = mi.info_type_id - AND it2.id = mi_idx.info_type_id - AND k.id = mk.keyword_id - AND cn.id = mc.company_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/32.sql b/tune/protox/tests/unittest_jobfull_dir/32.sql deleted file mode 100644 index f099dac8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/32.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(lt.link) AS link_type, - MIN(t1.title) AS first_movie, - MIN(t2.title) AS second_movie -FROM keyword AS k, - link_type AS lt, - movie_keyword AS mk, - movie_link AS ml, - title AS t1, - title AS t2 -WHERE k.keyword ='10,000-mile-club' - AND mk.keyword_id = k.id - AND t1.id = mk.movie_id - AND ml.movie_id = t1.id - AND ml.linked_movie_id = t2.id - AND lt.id = ml.link_type_id - AND mk.movie_id = t1.id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/32b.sql b/tune/protox/tests/unittest_jobfull_dir/32b.sql deleted file mode 100644 index e5806e74..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/32b.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(lt.link) AS link_type, - MIN(t1.title) AS first_movie, - MIN(t2.title) AS second_movie -FROM keyword AS k, - link_type AS lt, - movie_keyword AS mk, - movie_link AS ml, - title AS t1, - title AS t2 -WHERE k.keyword ='character-name-in-title' - AND mk.keyword_id = k.id - AND t1.id = mk.movie_id - AND ml.movie_id = t1.id - AND ml.linked_movie_id = t2.id - AND lt.id = ml.link_type_id - AND mk.movie_id = t1.id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/33.sql b/tune/protox/tests/unittest_jobfull_dir/33.sql deleted file mode 100644 index 4f63aa69..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/33.sql +++ /dev/null @@ -1,50 +0,0 @@ -SELECT MIN(cn1.name) AS first_company, - MIN(cn2.name) AS second_company, - MIN(mi_idx1.info) AS first_rating, - MIN(mi_idx2.info) AS second_rating, - MIN(t1.title) AS first_movie, - MIN(t2.title) AS second_movie -FROM company_name AS cn1, - company_name AS cn2, - info_type AS it1, - info_type AS it2, - kind_type AS kt1, - kind_type AS kt2, - link_type AS lt, - movie_companies AS mc1, - movie_companies AS mc2, - movie_info_idx AS mi_idx1, - movie_info_idx AS mi_idx2, - movie_link AS ml, - title AS t1, - title AS t2 -WHERE cn1.country_code = '[us]' - AND it1.info = 'rating' - AND it2.info = 'rating' - AND kt1.kind IN ('tv series') - AND kt2.kind IN ('tv series') - AND lt.link IN ('sequel', - 'follows', - 'followed by') - AND mi_idx2.info < '3.0' - AND t2.production_year BETWEEN 2005 AND 2008 - AND lt.id = ml.link_type_id - AND t1.id = ml.movie_id - AND t2.id = ml.linked_movie_id - AND it1.id = mi_idx1.info_type_id - AND t1.id = mi_idx1.movie_id - AND kt1.id = t1.kind_id - AND cn1.id = mc1.company_id - AND t1.id = mc1.movie_id - AND ml.movie_id = mi_idx1.movie_id - AND ml.movie_id = mc1.movie_id - AND mi_idx1.movie_id = mc1.movie_id - AND it2.id = mi_idx2.info_type_id - AND t2.id = mi_idx2.movie_id - AND kt2.id = t2.kind_id - AND cn2.id = mc2.company_id - AND t2.id = mc2.movie_id - AND ml.linked_movie_id = mi_idx2.movie_id - AND ml.linked_movie_id = mc2.movie_id - AND mi_idx2.movie_id = mc2.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/33b.sql b/tune/protox/tests/unittest_jobfull_dir/33b.sql deleted file mode 100644 index ae7a3f18..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/33b.sql +++ /dev/null @@ -1,48 +0,0 @@ -SELECT MIN(cn1.name) AS first_company, - MIN(cn2.name) AS second_company, - MIN(mi_idx1.info) AS first_rating, - MIN(mi_idx2.info) AS second_rating, - MIN(t1.title) AS first_movie, - MIN(t2.title) AS second_movie -FROM company_name AS cn1, - company_name AS cn2, - info_type AS it1, - info_type AS it2, - kind_type AS kt1, - kind_type AS kt2, - link_type AS lt, - movie_companies AS mc1, - movie_companies AS mc2, - movie_info_idx AS mi_idx1, - movie_info_idx AS mi_idx2, - movie_link AS ml, - title AS t1, - title AS t2 -WHERE cn1.country_code = '[nl]' - AND it1.info = 'rating' - AND it2.info = 'rating' - AND kt1.kind IN ('tv series') - AND kt2.kind IN ('tv series') - AND lt.link LIKE '%follow%' - AND mi_idx2.info < '3.0' - AND t2.production_year = 2007 - AND lt.id = ml.link_type_id - AND t1.id = ml.movie_id - AND t2.id = ml.linked_movie_id - AND it1.id = mi_idx1.info_type_id - AND t1.id = mi_idx1.movie_id - AND kt1.id = t1.kind_id - AND cn1.id = mc1.company_id - AND t1.id = mc1.movie_id - AND ml.movie_id = mi_idx1.movie_id - AND ml.movie_id = mc1.movie_id - AND mi_idx1.movie_id = mc1.movie_id - AND it2.id = mi_idx2.info_type_id - AND t2.id = mi_idx2.movie_id - AND kt2.id = t2.kind_id - AND cn2.id = mc2.company_id - AND t2.id = mc2.movie_id - AND ml.linked_movie_id = mi_idx2.movie_id - AND ml.linked_movie_id = mc2.movie_id - AND mi_idx2.movie_id = mc2.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/33c.sql b/tune/protox/tests/unittest_jobfull_dir/33c.sql deleted file mode 100644 index fd4d62cc..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/33c.sql +++ /dev/null @@ -1,52 +0,0 @@ -SELECT MIN(cn1.name) AS first_company, - MIN(cn2.name) AS second_company, - MIN(mi_idx1.info) AS first_rating, - MIN(mi_idx2.info) AS second_rating, - MIN(t1.title) AS first_movie, - MIN(t2.title) AS second_movie -FROM company_name AS cn1, - company_name AS cn2, - info_type AS it1, - info_type AS it2, - kind_type AS kt1, - kind_type AS kt2, - link_type AS lt, - movie_companies AS mc1, - movie_companies AS mc2, - movie_info_idx AS mi_idx1, - movie_info_idx AS mi_idx2, - movie_link AS ml, - title AS t1, - title AS t2 -WHERE cn1.country_code != '[us]' - AND it1.info = 'rating' - AND it2.info = 'rating' - AND kt1.kind IN ('tv series', - 'episode') - AND kt2.kind IN ('tv series', - 'episode') - AND lt.link IN ('sequel', - 'follows', - 'followed by') - AND mi_idx2.info < '3.5' - AND t2.production_year BETWEEN 2000 AND 2010 - AND lt.id = ml.link_type_id - AND t1.id = ml.movie_id - AND t2.id = ml.linked_movie_id - AND it1.id = mi_idx1.info_type_id - AND t1.id = mi_idx1.movie_id - AND kt1.id = t1.kind_id - AND cn1.id = mc1.company_id - AND t1.id = mc1.movie_id - AND ml.movie_id = mi_idx1.movie_id - AND ml.movie_id = mc1.movie_id - AND mi_idx1.movie_id = mc1.movie_id - AND it2.id = mi_idx2.info_type_id - AND t2.id = mi_idx2.movie_id - AND kt2.id = t2.kind_id - AND cn2.id = mc2.company_id - AND t2.id = mc2.movie_id - AND ml.linked_movie_id = mi_idx2.movie_id - AND ml.linked_movie_id = mc2.movie_id - AND mi_idx2.movie_id = mc2.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/3b.sql b/tune/protox/tests/unittest_jobfull_dir/3b.sql deleted file mode 100644 index d50d14a7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/3b.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM keyword AS k, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE k.keyword LIKE '%sequel%' - AND mi.info IN ('Bulgaria') - AND t.production_year > 2010 - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi.movie_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/3c.sql b/tune/protox/tests/unittest_jobfull_dir/3c.sql deleted file mode 100644 index 44efbc83..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/3c.sql +++ /dev/null @@ -1,22 +0,0 @@ -SELECT MIN(t.title) AS movie_title -FROM keyword AS k, - movie_info AS mi, - movie_keyword AS mk, - title AS t -WHERE k.keyword LIKE '%sequel%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND t.production_year > 1990 - AND t.id = mi.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi.movie_id - AND k.id = mk.keyword_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/4.sql b/tune/protox/tests/unittest_jobfull_dir/4.sql deleted file mode 100644 index bac1b786..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/4.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS movie_title -FROM info_type AS it, - keyword AS k, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it.info ='rating' - AND k.keyword LIKE '%sequel%' - AND mi_idx.info > '5.0' - AND t.production_year > 2005 - AND t.id = mi_idx.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/4b.sql b/tune/protox/tests/unittest_jobfull_dir/4b.sql deleted file mode 100644 index d108d8d8..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/4b.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS movie_title -FROM info_type AS it, - keyword AS k, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it.info ='rating' - AND k.keyword LIKE '%sequel%' - AND mi_idx.info > '9.0' - AND t.production_year > 2010 - AND t.id = mi_idx.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/4c.sql b/tune/protox/tests/unittest_jobfull_dir/4c.sql deleted file mode 100644 index 7cf7c97e..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/4c.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(mi_idx.info) AS rating, - MIN(t.title) AS movie_title -FROM info_type AS it, - keyword AS k, - movie_info_idx AS mi_idx, - movie_keyword AS mk, - title AS t -WHERE it.info ='rating' - AND k.keyword LIKE '%sequel%' - AND mi_idx.info > '2.0' - AND t.production_year > 1990 - AND t.id = mi_idx.movie_id - AND t.id = mk.movie_id - AND mk.movie_id = mi_idx.movie_id - AND k.id = mk.keyword_id - AND it.id = mi_idx.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/5.sql b/tune/protox/tests/unittest_jobfull_dir/5.sql deleted file mode 100644 index 80ab76d0..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/5.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT MIN(t.title) AS typical_european_movie -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - title AS t -WHERE ct.kind = 'production companies' - AND mc.note LIKE '%(theatrical)%' - AND mc.note LIKE '%(France)%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German') - AND t.production_year > 2005 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND mc.movie_id = mi.movie_id - AND ct.id = mc.company_type_id - AND it.id = mi.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/5b.sql b/tune/protox/tests/unittest_jobfull_dir/5b.sql deleted file mode 100644 index 63e27e64..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/5b.sql +++ /dev/null @@ -1,19 +0,0 @@ -SELECT MIN(t.title) AS american_vhs_movie -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - title AS t -WHERE ct.kind = 'production companies' - AND mc.note LIKE '%(VHS)%' - AND mc.note LIKE '%(USA)%' - AND mc.note LIKE '%(1994)%' - AND mi.info IN ('USA', - 'America') - AND t.production_year > 2010 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND mc.movie_id = mi.movie_id - AND ct.id = mc.company_type_id - AND it.id = mi.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/5c.sql b/tune/protox/tests/unittest_jobfull_dir/5c.sql deleted file mode 100644 index faabd4f7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/5c.sql +++ /dev/null @@ -1,26 +0,0 @@ -SELECT MIN(t.title) AS american_movie -FROM company_type AS ct, - info_type AS it, - movie_companies AS mc, - movie_info AS mi, - title AS t -WHERE ct.kind = 'production companies' - AND mc.note NOT LIKE '%(TV)%' - AND mc.note LIKE '%(USA)%' - AND mi.info IN ('Sweden', - 'Norway', - 'Germany', - 'Denmark', - 'Swedish', - 'Denish', - 'Norwegian', - 'German', - 'USA', - 'American') - AND t.production_year > 1990 - AND t.id = mi.movie_id - AND t.id = mc.movie_id - AND mc.movie_id = mi.movie_id - AND ct.id = mc.company_type_id - AND it.id = mi.info_type_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6.sql b/tune/protox/tests/unittest_jobfull_dir/6.sql deleted file mode 100644 index dacef7c0..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS marvel_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword = 'marvel-cinematic-universe' - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2010 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6b.sql b/tune/protox/tests/unittest_jobfull_dir/6b.sql deleted file mode 100644 index 011ab47a..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6b.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS hero_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword IN ('superhero', - 'sequel', - 'second-part', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence') - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2014 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6c.sql b/tune/protox/tests/unittest_jobfull_dir/6c.sql deleted file mode 100644 index cc55ef2b..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6c.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS marvel_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword = 'marvel-cinematic-universe' - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2014 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6d.sql b/tune/protox/tests/unittest_jobfull_dir/6d.sql deleted file mode 100644 index 9b317e15..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6d.sql +++ /dev/null @@ -1,24 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS hero_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword IN ('superhero', - 'sequel', - 'second-part', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence') - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2000 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6e.sql b/tune/protox/tests/unittest_jobfull_dir/6e.sql deleted file mode 100644 index 5e0bc22c..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6e.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS marvel_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword = 'marvel-cinematic-universe' - AND n.name LIKE '%Downey%Robert%' - AND t.production_year > 2000 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/6f.sql b/tune/protox/tests/unittest_jobfull_dir/6f.sql deleted file mode 100644 index 32887f9b..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/6f.sql +++ /dev/null @@ -1,23 +0,0 @@ -SELECT MIN(k.keyword) AS movie_keyword, - MIN(n.name) AS actor_name, - MIN(t.title) AS hero_movie -FROM cast_info AS ci, - keyword AS k, - movie_keyword AS mk, - name AS n, - title AS t -WHERE k.keyword IN ('superhero', - 'sequel', - 'second-part', - 'marvel-comics', - 'based-on-comic', - 'tv-special', - 'fight', - 'violence') - AND t.production_year > 2000 - AND k.id = mk.keyword_id - AND t.id = mk.movie_id - AND t.id = ci.movie_id - AND ci.movie_id = mk.movie_id - AND n.id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/7.sql b/tune/protox/tests/unittest_jobfull_dir/7.sql deleted file mode 100644 index 27cdc801..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/7.sql +++ /dev/null @@ -1,31 +0,0 @@ -SELECT MIN(n.name) AS of_person, - MIN(t.title) AS biography_movie -FROM aka_name AS an, - cast_info AS ci, - info_type AS it, - link_type AS lt, - movie_link AS ml, - name AS n, - person_info AS pi, - title AS t -WHERE an.name LIKE '%a%' - AND it.info ='mini biography' - AND lt.link ='features' - AND n.name_pcode_cf BETWEEN 'A' AND 'F' - AND (n.gender='m' - OR (n.gender = 'f' - AND n.name LIKE 'B%')) - AND pi.note ='Volker Boehm' - AND t.production_year BETWEEN 1980 AND 1995 - AND n.id = an.person_id - AND n.id = pi.person_id - AND ci.person_id = n.id - AND t.id = ci.movie_id - AND ml.linked_movie_id = t.id - AND lt.id = ml.link_type_id - AND it.id = pi.info_type_id - AND pi.person_id = an.person_id - AND pi.person_id = ci.person_id - AND an.person_id = ci.person_id - AND ci.movie_id = ml.linked_movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/7b.sql b/tune/protox/tests/unittest_jobfull_dir/7b.sql deleted file mode 100644 index 04dd3be4..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/7b.sql +++ /dev/null @@ -1,29 +0,0 @@ -SELECT MIN(n.name) AS of_person, - MIN(t.title) AS biography_movie -FROM aka_name AS an, - cast_info AS ci, - info_type AS it, - link_type AS lt, - movie_link AS ml, - name AS n, - person_info AS pi, - title AS t -WHERE an.name LIKE '%a%' - AND it.info ='mini biography' - AND lt.link ='features' - AND n.name_pcode_cf LIKE 'D%' - AND n.gender='m' - AND pi.note ='Volker Boehm' - AND t.production_year BETWEEN 1980 AND 1984 - AND n.id = an.person_id - AND n.id = pi.person_id - AND ci.person_id = n.id - AND t.id = ci.movie_id - AND ml.linked_movie_id = t.id - AND lt.id = ml.link_type_id - AND it.id = pi.info_type_id - AND pi.person_id = an.person_id - AND pi.person_id = ci.person_id - AND an.person_id = ci.person_id - AND ci.movie_id = ml.linked_movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/7c.sql b/tune/protox/tests/unittest_jobfull_dir/7c.sql deleted file mode 100644 index c64785d3..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/7c.sql +++ /dev/null @@ -1,36 +0,0 @@ -SELECT MIN(n.name) AS cast_member_name, - MIN(pi.info) AS cast_member_info -FROM aka_name AS an, - cast_info AS ci, - info_type AS it, - link_type AS lt, - movie_link AS ml, - name AS n, - person_info AS pi, - title AS t -WHERE an.name IS NOT NULL - AND (an.name LIKE '%a%' - OR an.name LIKE 'A%') - AND it.info ='mini biography' - AND lt.link IN ('references', - 'referenced in', - 'features', - 'featured in') - AND n.name_pcode_cf BETWEEN 'A' AND 'F' - AND (n.gender='m' - OR (n.gender = 'f' - AND n.name LIKE 'A%')) - AND pi.note IS NOT NULL - AND t.production_year BETWEEN 1980 AND 2010 - AND n.id = an.person_id - AND n.id = pi.person_id - AND ci.person_id = n.id - AND t.id = ci.movie_id - AND ml.linked_movie_id = t.id - AND lt.id = ml.link_type_id - AND it.id = pi.info_type_id - AND pi.person_id = an.person_id - AND pi.person_id = ci.person_id - AND an.person_id = ci.person_id - AND ci.movie_id = ml.linked_movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/8.sql b/tune/protox/tests/unittest_jobfull_dir/8.sql deleted file mode 100644 index 4dd5fc0e..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/8.sql +++ /dev/null @@ -1,25 +0,0 @@ -SELECT MIN(an1.name) AS actress_pseudonym, - MIN(t.title) AS japanese_movie_dubbed -FROM aka_name AS an1, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n1, - role_type AS rt, - title AS t -WHERE ci.note ='(voice: English version)' - AND cn.country_code ='[jp]' - AND mc.note LIKE '%(Japan)%' - AND mc.note NOT LIKE '%(USA)%' - AND n1.name LIKE '%Yo%' - AND n1.name NOT LIKE '%Yu%' - AND rt.role ='actress' - AND an1.person_id = n1.id - AND n1.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND an1.person_id = ci.person_id - AND ci.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/8b.sql b/tune/protox/tests/unittest_jobfull_dir/8b.sql deleted file mode 100644 index 7b51fd1f..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/8b.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(an.name) AS acress_pseudonym, - MIN(t.title) AS japanese_anime_movie -FROM aka_name AS an, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note ='(voice: English version)' - AND cn.country_code ='[jp]' - AND mc.note LIKE '%(Japan)%' - AND mc.note NOT LIKE '%(USA)%' - AND (mc.note LIKE '%(2006)%' - OR mc.note LIKE '%(2007)%') - AND n.name LIKE '%Yo%' - AND n.name NOT LIKE '%Yu%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2006 AND 2007 - AND (t.title LIKE 'One Piece%' - OR t.title LIKE 'Dragon Ball Z%') - AND an.person_id = n.id - AND n.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND an.person_id = ci.person_id - AND ci.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/8c.sql b/tune/protox/tests/unittest_jobfull_dir/8c.sql deleted file mode 100644 index 837cb788..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/8c.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(a1.name) AS writer_pseudo_name, - MIN(t.title) AS movie_title -FROM aka_name AS a1, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n1, - role_type AS rt, - title AS t -WHERE cn.country_code ='[us]' - AND rt.role ='writer' - AND a1.person_id = n1.id - AND n1.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND a1.person_id = ci.person_id - AND ci.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/8d.sql b/tune/protox/tests/unittest_jobfull_dir/8d.sql deleted file mode 100644 index 839ef186..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/8d.sql +++ /dev/null @@ -1,20 +0,0 @@ -SELECT MIN(an1.name) AS costume_designer_pseudo, - MIN(t.title) AS movie_with_costumes -FROM aka_name AS an1, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n1, - role_type AS rt, - title AS t -WHERE cn.country_code ='[us]' - AND rt.role ='costume designer' - AND an1.person_id = n1.id - AND n1.id = ci.person_id - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND an1.person_id = ci.person_id - AND ci.movie_id = mc.movie_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/9.sql b/tune/protox/tests/unittest_jobfull_dir/9.sql deleted file mode 100644 index 6d41df82..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/9.sql +++ /dev/null @@ -1,33 +0,0 @@ -SELECT MIN(an.name) AS alternative_name, - MIN(chn.name) AS character_name, - MIN(t.title) AS movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND mc.note IS NOT NULL - AND (mc.note LIKE '%(USA)%' - OR mc.note LIKE '%(worldwide)%') - AND n.gender ='f' - AND n.name LIKE '%Ang%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2005 AND 2015 - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND ci.movie_id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND n.id = ci.person_id - AND chn.id = ci.person_role_id - AND an.person_id = n.id - AND an.person_id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/9b.sql b/tune/protox/tests/unittest_jobfull_dir/9b.sql deleted file mode 100644 index 792ae70a..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/9b.sql +++ /dev/null @@ -1,31 +0,0 @@ -SELECT MIN(an.name) AS alternative_name, - MIN(chn.name) AS voiced_character, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS american_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note = '(voice)' - AND cn.country_code ='[us]' - AND mc.note LIKE '%(200%)%' - AND (mc.note LIKE '%(USA)%' - OR mc.note LIKE '%(worldwide)%') - AND n.gender ='f' - AND n.name LIKE '%Angel%' - AND rt.role ='actress' - AND t.production_year BETWEEN 2007 AND 2010 - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND ci.movie_id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND n.id = ci.person_id - AND chn.id = ci.person_role_id - AND an.person_id = n.id - AND an.person_id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/9c.sql b/tune/protox/tests/unittest_jobfull_dir/9c.sql deleted file mode 100644 index 2c2f8cca..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/9c.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT MIN(an.name) AS alternative_name, - MIN(chn.name) AS voiced_character_name, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS american_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND n.gender ='f' - AND n.name LIKE '%An%' - AND rt.role ='actress' - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND ci.movie_id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND n.id = ci.person_id - AND chn.id = ci.person_role_id - AND an.person_id = n.id - AND an.person_id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/9d.sql b/tune/protox/tests/unittest_jobfull_dir/9d.sql deleted file mode 100644 index 99bc63d7..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/9d.sql +++ /dev/null @@ -1,29 +0,0 @@ -SELECT MIN(an.name) AS alternative_name, - MIN(chn.name) AS voiced_char_name, - MIN(n.name) AS voicing_actress, - MIN(t.title) AS american_movie -FROM aka_name AS an, - char_name AS chn, - cast_info AS ci, - company_name AS cn, - movie_companies AS mc, - name AS n, - role_type AS rt, - title AS t -WHERE ci.note IN ('(voice)', - '(voice: Japanese version)', - '(voice) (uncredited)', - '(voice: English version)') - AND cn.country_code ='[us]' - AND n.gender ='f' - AND rt.role ='actress' - AND ci.movie_id = t.id - AND t.id = mc.movie_id - AND ci.movie_id = mc.movie_id - AND mc.company_id = cn.id - AND ci.role_id = rt.id - AND n.id = ci.person_id - AND chn.id = ci.person_role_id - AND an.person_id = n.id - AND an.person_id = ci.person_id; - diff --git a/tune/protox/tests/unittest_jobfull_dir/order.txt b/tune/protox/tests/unittest_jobfull_dir/order.txt deleted file mode 100644 index 30016f15..00000000 --- a/tune/protox/tests/unittest_jobfull_dir/order.txt +++ /dev/null @@ -1,113 +0,0 @@ -Q1,1.sql -Q2,1b.sql -Q3,1c.sql -Q4,1d.sql -Q5,2.sql -Q6,2b.sql -Q7,2c.sql -Q8,2d.sql -Q9,3.sql -Q10,3b.sql -Q11,3c.sql -Q12,4.sql -Q13,4b.sql -Q14,4c.sql -Q15,5.sql -Q16,5b.sql -Q17,5c.sql -Q18,6.sql -Q19,6b.sql -Q20,6c.sql -Q21,6d.sql -Q22,6e.sql -Q23,6f.sql -Q24,7.sql -Q25,7b.sql -Q26,7c.sql -Q27,8.sql -Q28,8b.sql -Q29,8c.sql -Q30,8d.sql -Q31,9.sql -Q32,9b.sql -Q33,9c.sql -Q34,9d.sql -Q35,10.sql -Q36,10b.sql -Q37,10c.sql -Q38,11.sql -Q39,11b.sql -Q40,11c.sql -Q41,11d.sql -Q42,12.sql -Q43,12b.sql -Q44,12c.sql -Q45,13.sql -Q46,13b.sql -Q47,13c.sql -Q48,13d.sql -Q49,14.sql -Q50,14b.sql -Q51,14c.sql -Q52,15.sql -Q53,15b.sql -Q54,15c.sql -Q55,15d.sql -Q56,16.sql -Q57,16b.sql -Q58,16c.sql -Q59,16d.sql -Q60,17.sql -Q61,17b.sql -Q62,17c.sql -Q63,17d.sql -Q64,17e.sql -Q65,17f.sql -Q66,18.sql -Q67,18b.sql -Q68,18c.sql -Q69,19.sql -Q70,19b.sql -Q71,19c.sql -Q72,19d.sql -Q73,20.sql -Q74,20b.sql -Q75,20c.sql -Q76,21.sql -Q77,21b.sql -Q78,21c.sql -Q79,22.sql -Q80,22b.sql -Q81,22c.sql -Q82,22d.sql -Q83,23.sql -Q84,23b.sql -Q85,23c.sql -Q86,24.sql -Q87,24b.sql -Q88,25.sql -Q89,25b.sql -Q90,25c.sql -Q91,26.sql -Q92,26b.sql -Q93,26c.sql -Q94,27.sql -Q95,27b.sql -Q96,27c.sql -Q97,28.sql -Q98,28b.sql -Q99,28c.sql -Q100,29.sql -Q101,29b.sql -Q102,29c.sql -Q103,30.sql -Q104,30b.sql -Q105,30c.sql -Q106,31.sql -Q107,31b.sql -Q108,31c.sql -Q109,32.sql -Q110,32b.sql -Q111,33.sql -Q112,33b.sql -Q113,33c.sql diff --git a/tune/protox/tests/unittest_primitive.py b/tune/protox/tests/unittest_primitive.py deleted file mode 100644 index d7590d80..00000000 --- a/tune/protox/tests/unittest_primitive.py +++ /dev/null @@ -1,150 +0,0 @@ -import unittest - -from tune.protox.env.space.primitive.index import IndexAction -from tune.protox.env.space.primitive.knob import Knob -from tune.protox.env.space.primitive.latent_knob import LatentKnob - - -class PrimitivesTests(unittest.TestCase): - - def test_linear_knob(self) -> None: - k = Knob( - table_name=None, - query_name="q", - knob_name="kn", - metadata={ - "type": "float", - "min": 0.0, - "max": 1.0, - "quantize": 10, - "log_scale": False, - "unit": 0, - }, - do_quantize=True, - default_quantize_factor=10, - seed=0, - ) - self.assertEqual(k.name(), "q_kn") - self.assertEqual(k.bucket_size, 0.1) - self.assertEqual(k.project_scraped_setting(0.5), 0.5) - self.assertEqual(k.project_scraped_setting(0.58), 0.5) - self.assertEqual(round(k.project_scraped_setting(0.62), 2), 0.6) - - def test_log_knob(self) -> None: - k = Knob( - table_name=None, - query_name="q", - knob_name="kn", - metadata={ - "type": "integer", - "min": 1.0, - "max": 1024.0, - "quantize": 0, - "log_scale": True, - "unit": 0, - }, - do_quantize=True, - default_quantize_factor=10, - seed=0, - ) - self.assertEqual(k.name(), "q_kn") - self.assertEqual(k.project_scraped_setting(1), 1.0) - self.assertEqual(k.project_scraped_setting(2), 2.0) - self.assertEqual(k.project_scraped_setting(24), 32.0) - self.assertEqual(k.project_scraped_setting(1024), 1024.0) - - def test_latent_knob(self) -> None: - k = LatentKnob( - table_name=None, - query_name="q", - knob_name="kn", - metadata={ - "type": "float", - "min": 0.0, - "max": 1.0, - "quantize": 10, - "log_scale": False, - "unit": 0, - }, - do_quantize=True, - default_quantize_factor=10, - seed=0, - ) - self.assertEqual(k.name(), "q_kn") - self.assertEqual(k.bucket_size, 0.1) - self.assertEqual(k.project_scraped_setting(0.5), 0.5) - self.assertEqual(k.project_scraped_setting(0.58), 0.5) - self.assertEqual(round(k.project_scraped_setting(0.62), 2), 0.6) - - self.assertEqual(k.to_latent(0.5), 0.0) - self.assertEqual(k.from_latent(-1.0), 0.0) - self.assertEqual(k.from_latent(1.0), 1.0) - self.assertEqual(k.from_latent(0.5), 0.7) - - self.assertEqual(k.shift_offset(0.5, 0), None) - self.assertEqual(k.shift_offset(0.5, 1), 0.6) - self.assertEqual(k.shift_offset(0.5, -2), 0.3) - - def test_ia(self) -> None: - ia1 = IndexAction( - idx_type="btree", - tbl="tbl", - columns=["a", "b", "c"], - col_idxs=None, - inc_names=["d", "e"], - raw_repr=None, - bias=0.0, - ) - IndexAction.index_name_counter = 0 - self.assertEqual( - ia1.sql(add=True), - "CREATE INDEX index0 ON tbl USING btree (a,b,c) INCLUDE (d,e)", - ) - - ia2 = IndexAction( - idx_type="btree", - tbl="tbl", - columns=["a", "b", "c"], - col_idxs=None, - inc_names=["d", "e"], - raw_repr=None, - bias=0.0, - ) - self.assertEqual(ia1, ia2) - - ia3 = IndexAction( - idx_type="btree", - tbl="tbl", - columns=["a", "b", "c"], - col_idxs=None, - inc_names=[], - raw_repr=None, - bias=0.0, - ) - self.assertNotEqual(ia1, ia3) - - ia4 = IndexAction( - idx_type="btree", - tbl="tbl", - columns=["a", "b"], - col_idxs=None, - inc_names=["d", "e"], - raw_repr=None, - bias=0.0, - ) - self.assertNotEqual(ia1, ia4) - - ia5 = IndexAction( - idx_type="btree", - tbl="tbla", - columns=["a", "b", "c"], - col_idxs=None, - inc_names=["d", "e"], - raw_repr=None, - bias=0.0, - ) - self.assertNotEqual(ia1, ia5) - - -if __name__ == "__main__": - unittest.main() diff --git a/tune/protox/tests/unittest_ref/ref_dsb_idxspace.pkl b/tune/protox/tests/unittest_ref/ref_dsb_idxspace.pkl deleted file mode 100644 index 572c310858272948389b6dafebed6af449d32ced..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7599 zcmc(kd5{$K6~}iu_Lw~fmP0v|H7ZJ6K|m2uKv0l1D*{GPvFYh)_H{EeJ@mouDj04S z?0C_rcqHC8M@mISymFXS%F>vWmFtg`l}eRKS+SytR}AF)`@QL&?p=(Dl`6N2d7t0w z-}!#;_kIT(2JL=uOojaOC%IlhZ!6lCYjwBjg|%%?QPqUP$Q!P)R#!M}6ta3ZbKs{g z7O5p7W3VeYZDX)L81Fab)o!KW4&}D*%wXB}EI&HN`hQ>{%w5%eOvb|a)I&}=x4*WDYuiA#I=45N0 z6Zk`+)3ctYx`tJVwDq!zBKHri)nJsmk~Q*jOtq+Aq{Voum;=8Kbz23O6h~i!&S#Y_ zU0=-!bYL(tTU}D-EsRR0^QgbJOSN^fJV%!eGej3Qdj za>*tquS6Oy3XR)S)Ws-VBOi{!ud;R3u?lFDZ6WH3V!EDJUnissq2UT_t7CtS}@8A`@RTZ(~d zkkE3fnj00fauy6;Z(za;YA1Sy%5{G*Jk(}O zr5uE*E~jYdGQ3DQby~w(DC@Wip%Xn9h7O%m=5D}; zaA^pa86whb0WI>=n35u8x|~z2DcR0&O<85wjw`3xwaT72Ec&44bf>IG^BKd2V@mgg zO~SPFNp!YL&nOP&M|QR@Ov%O?MJloG8rW#I?s_(&gRsU>NBECO>#>EAf;O7;MkEq8 zs1u54oFY}8jC!QX{^&O&=^EN<-4$^uBNp-ynM1Uwtnt)>E8|H+S*hklh!`#x6j?d+ zs$YyCR?1PPk?8B+&Cq4M9;Ilxn0G_X!EmxnEGP)$6yaR3g5TIx#ER#%s8sc95smYf z2rJS`sUrLx3fW>4Y9X~yKn&IMsv#;bYuvSgAIo5?S;IOzE&P8IF}QX|4}v@4{LCmP>SBr=AGAn_<53Q27=s;HVs(}+BA zr-GNULM&Gvrci`?ht$+T6LU1Al4eMY5?BywEel0I%fbgs z;jrv=*bz2j8vb{D>B4}j8s2o zf-W&+q!UAKfFj{GBcq;+Ug6}ZU~k3wQ?56vqk3wfMrxvFO6&fZD04ihCrY%?Fd9xH zXr%5phX=PrhEX({#?V+gflj1x)T;aGvJ=FcKABFTQ}MNYqKbb#ohG1x@JtnJz;7&V zU*+61QT2z$_n7jh@idvH&{PC(Ih*kh)9759PBUmGok!=>ESgPou*i6};`!#$g>(_k z*KwMKx^z;Bb!a>-q;~W*GF`lI2VF{w=rUSNOK2%wj%+gCi3k(Vwv3kJ)Xi;|f@9+Q z)wF`HA%#|wN*U7NZT{F}Z@`ISokV1iS!z}6rm@eISN*hfM|nDrcc(~e$R>we@^rr? zj5~eHccVOZE8gp36_2OuXdSJm>vexbSoTUE8rL1(_%NO_~pls zU1ws~mFTjPyqlD67~!ctc}|ZV!ed9=aP@@S`xhP(xr{vB#ZPj5PFQ|zpsGc?=~&-!2P^NjL{TzT?F0cQh4!(=y!a_kG@PKnon0b)vo&v07Y2L@+c zTgUxr5UCCYI(^tnfA94&7_+?C<<(*l2R-p-Kp#`xwxGQNvL3P$vH@}cWFzE2$R@}^ zkj;>TA=8jm^a-^;RASXbMq<_&w$m8e;O@=%kath=q z$f=N{=_pNu90PeS??=s+2#^u1X%|SwYHSiq96~JkX*8tCDRDjbNR|01+s=%3y z8Q^)08t{C^EO3@NSTxZIoXtqUIgAGI0!9-!*Q^qG^T2s#wZJ0qLdG?~ix_R-d`1Vj zfYAkB%;*6Znl++^ZeY7vEATqtC5-EU-(p-3>|neec&V8Zc{c(VF>V50#<&@{m~jhm z3FB7aQpRn-%Ne%=zs=YSyn=BDaG6;znz#|Tobe{$m5et7uVTCfcs1j#z!i+UfY&ev zK!tHPa3$kyK$Y=!V21GypvJfdm}R^Zs59OLT*Y`du#@o~AejxKzxM(S#`}OKz-^4L0JnR7m3;bj5SWka?8z5Y$MEv}JnXcI`y6&F1nZi4A%+`ueo;D2+Wo>ychldc`R zfpN=>jGN9p+~;e;ZsseZ8uL|=N0`<8q4@a;e!@YRKSQE4Q1kWp2Qj4b(C#?Aa7oLM zGZtU_fP3+Qb>G`pVg3T&G35ORm+EgG(3T%rf7p81e7(Xm-@=FX3iFpzeA!@zd>w-s zz2&dZTsf}sp|`s)S!eB@{a!ejGe$jK_s1JvI5YTu?r25sAs{YFh9xGnu@gAGe@k$?<`$Wll+*>rO;JCJ@ zxv1LwfzVbLvb?Bw!S%KVb1O3eFWt)a)~$ibZDuBw+w?2f9b-lBnA=+UrGEjIez|XI z##*Zf=4a@I`8j?>^X3siy!}B46#sSI(%nV3{eg1W|GDaq(A18(z2SoSx}`MEATm1X z%WM*Le8a4Yim(5l?05|f4*x3Rd4Ax;wu$)vJu#9{GYOin9(xWAn9{Uh*sH((?IY)W zQrGaoZ^GsJQ~io7KfmGCJ%4$7!I=7^%bqyaa>b@1Vqk|{K^= diff --git a/tune/protox/tests/unittest_ref/ref_dsb_workload.pkl b/tune/protox/tests/unittest_ref/ref_dsb_workload.pkl deleted file mode 100644 index debeea611938d1b9f1a550ef55fa2721375d0740..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 106915 zcmeIb3zVH#Rvzefw=G+5+3lvGp}Dx}R>_v+dPuT0V1wIM+X7qew&b>XIK{21E8Tlt zRkwT}l11p@m2R4F6U21!2uxUnVIaUHlUZa~%$f{j$YOXign?l)3=;^1@XCZNRxfCp z$9&(n&-?%Ht&&Ub2T05Q>z@6ev(G;J?6c24`|NZ6uX@8bpT6aFxA5Qi&BM`p>-a{$ zH|)J~ytRJ$_?2G&rPW?@WpjMT^Zw_DHjjMu=KD5h#&@nPUAnrqxP0-_=DG1cpLKDg zIc#r^ck$0^XK8bM=ev7@&MWUG^g}51x(7DLZ)o?{TKKckzB0c#zD;d+pT`U zhpoYIusJ?BTJH?`dvRs3w7AmgZw@z~1>U9B`f_`zIcRNu{diw*b-1zUX#n7$|GhZu zthIWh;pP{jeI;;lqqW{#9bVmh;`9DUEVyNSu-PAWE;g5ki)+pG=A~92jk|r=Tv}}n zHeUq&2bV{KA;|A9Hdj{qtpNzUCxBL3YrRYT=0>}-4EP-@&0z}!uYuSr&3>!h8-dvI zor88~V{xswg5P(LI?Hl z^sL0G%2=`+NIat#2QLX}NO!=VgLpp}_FJvt;`(T9iA>p_fSQ!E1Teh10RURDI2eIf zq0BBomOBuH@tyqD8?E#A9m0f8gjC|^mv=Tc$GiP|vV+uJS{p9*E?yk8hDOb55A6mY z7gbkll60_fyO*s;)UrE~xYSx-VG|DF_gb^xK{udv&EW{_yc@9A%E%%2&^H3QG3qb3 zF%A|nMmlRE_aMN_{nkonxY!3Z>zCN8tZc&?N1?T3Yi(n-ceRD=Z1m=s2rb*{YU@(V zfm?|%lkz)}iNeh;@xR#}w;LRCiU#!7hwa5d>t!%)W${X@^^zs_tt`Gg!YIQK(o2-N z+G?WJx1n(FivF(kz+I&&`zXLPD!O>psSveU1?rjTsIkrl^OuZFd{n0 zA;c^O=7Y_^(pz2aT>;Z#4m9Utr%x5s13|#N+VlxjJ3tkxYn3vQrqOn!mq-0RMi8?= z8e^K3LK-7LEJOE4h-+b1Ox$XvI)z=b|H@)eGU}HMps|wXFfJTQL4%hUFG2ziTq#G? zz6cRBW7@_OYRZSx?N{7rx&uGlVA@3_63*MM*igFDz2r#RdnFx6Xk1j1Y}|pt+G2pH zbvcC1+l4$v4}UF7zwYL*X20LzsM;g&`mhh}z6psAx>piC)J$O#?S4z5?Kebw2yqwz z%^jE;I_uC|`Z;JV_b4~JhKtR$#SKUYet}eSau0rupp`>vt%U*2YEiw{_pE$RA1ajE zU)9GH3D_6jt+WQqD45L;EPbf6w3y5p*7<-zh%v^Nz<;3G55Q<+BLG@%W>MQai=1Vw z?e4b(rUSB;*=>CaFI&9dKo}|xu*c;WapMD>AnY6c&N2!d>?~fvKxPwh^^aiDUS}!q8k2GHs8(#V}daQ#9@ykI2P+aWw z*G8*S%;+JY=&kn9m-t2N0_qj*Kt~Rnopp-JUg6>NuR76c%#TpMm}NKuT7Etz_jV7! zAxzFu(-V18$baEg}T=xv@v?uz)e$&PFHJk9V}j4?e(BSH=1Pg zu7RWz4Y_j=EEh;Tr;(R1Ig*@1157YxpIk+yU^Q41a##m~99P1dKg_byx!f6WP9vHF zN!lm`!LV31n(Nj>`(x7HqeL-B1E+_WMhR)BQIO6i=-Yu)^Ya}@~{3X%zQT z8U?LQr{=-pu=xtQu|?yFHV;NpYD{S)EiXg3f`O$TJygKNNx;gHalpZ1*Q#XEx6IXp z)fszx7a+!_JMhbO%U=9UdYf;YcIHp&A@tpD{0zNxClfDON3%44J7jxY(t4Owm0ipn zzK?0e#FQhARk<#v`4kfFHB0#Z3|3eKsf+i=Jl4lVlXo&%;jJ)M;jJ)YlJ>bB8Jrd7 z-kTM+oOLx@sJ?FXV_!#8nH{|k#l0PGW@SvXgYV*do3xi{GfNT@U^@U0#hhFaDC3)! zowdLzYuJl@eNT8nlQ_cnqJ_=X5gO?Xa97^sV$ZUf81*9$m5io-UlQvVGVMy%e z8$TxY`Gg-~K5e7oE+tJ@1_b*FOf{QG@&Kb!Bo(rMfaN?)!k0X6~P7h9MxW3$b$G}DCDV{pM@;^1ly;K{J+FSq)fqp{BFbGdu~`K%4L%A#@E zUt9;((DwsOEtA&Vh#-eYl(s!aZZwlW|cGF-zL`DrZdh1wY zqxdB*_4j(brUrl+tyo`gtuC%md8#fCkvbI`4}k?s4~x)v*zj^zsNY=a^fCz4K25fy zsumqHFv^q4Ay-*VD=pZxtF#M2@Tk85Q&aC(Is>r}-8k%ArZtqieaOhWmVr3&1&Koy zAQ^iQISbD22Pk9hJ^YD>u^jc&??w0=d(z!564$j`N~kJ{DTqLT+_&h8Rxshw0mq67UQsmgVskr)B3T1 zh;&+CP~SPG^#v-mK7fVR2PDz@nv=aV^o8GmTYa%{#giDP*G3L?5@S#&F+PhYDoRZG z#`S0c_8nS)U4MG0@8(2eDz6IszDQYthi5e;6OhO2yd0KHd);TqdOZW8OS3d4^emlp zxxvxpS%%Q%9_qP^F883U%MFN$j^gGVss;<4ZuX}+wW%QvT*0bTu6C?gY(#iS&Osl!!h>~*Nv01aa}fP80Qa`3sr$R##wc!|xR88(4(Yyt&r`++U# zRm?pO5bXZYIPP!+2ILB04+_O8nk7<6fH4J#SIL;ISViGO+6UNb2a6x)W+EBt1P@@d zTd%-v73MjbiFY|@mh~P7EbCysM3L6Rjv`FPB#D4%z%Zb?u`c3EW2{UEwuG{>(d0Bq zqI@c9r#F^Rmfyr~Rgqa}L&d_=?_KQB0xI@eB#6ruLG+L)h=gQYBaaT6WxdA%%R1Qpiaa`)i3JjwJi3?G1tyOGc?sm6QcNZC zSVCDI@>mCU1Hr%Y@|f-QNgA)~m_al+%zdZy5fnh4j3QGnt;j6g5 zWGZ&;L8OZ1B{L*_i&V+?eSl?si*(jucLPw^`P4fqp6p216evpmc|N14S7x_|G}dKqkN z|26>ddbBo!@|m;ah|ibN^sZ;0dAebEooRctA;Sfc4UOlYJoD@s5>mR58V@(ZmPX^z zbB`lM*CW95v$J!8gi@S68xKS*VLUe8dFI0NXU?2!%rBf$k@?dn8&5p@%yaK9J1Y$}EbusMvvGS4Xt=X;H1t;dB@d2|^>A!J2hAO4FJ-tM30>N} zd}*e9eWY<;WA6C;*2K)0mH4g(OM!G=6?+(Z5-bn&gno|k`63nEx^C=OdIMma;GlRC@U7!&Y1BEC>8#29?};Nt`=cI9(q?K2fp~{!z&{uY6fNNF~pL@`M19%KH=HeI7AV$#^nk#2#x5 zA*tj$m5aPo4>EZ}k<&TcT>>`~I4yv3*d31&<*Wb-RP#8Za~vA1L##`EaE}SfqobA4 zQg6NS82NgYvhsLyrIjXIS6Ww-^pj2aDX>3~fG#vwmwH1Fa6eB^ zM&RaEoCA(FXta6lq~;(WBjhyxX}9=Jb-e&kY73&Xwwfv!O>{LVFH>4`jr$u5VioW? zyAh~DEQ4OfU;*RFJA1vC2ISt8ppbvhZ}j@Zfzu5nOgnR0RdPy7Ex%a*gHYPD$K5nG zpQX~kfl%PIqj+}KI6hO)UM{Rv|F)TnC1SZ+Q?miTp&SOj8^hr(*b|7)dI7aZyErg) zF5`RXg(FbPje7q2h2r(`7;76_KowF(F(&1WZFY| z9||XlS`?^MP6F0VTG-G@Ll%`Uosg(Dv8Rv)EGC(f$tIbXKM|?R&4WKn%!7}-A(#h< zF`I$<@k`SqM;i{pC1KKc`HOsAIrUwQN|$~IOTDD0lkL$)eHd|)-A%Spd->%-YF<*u zx^OA4ysvW@&5$pC@sDArtx;|F31MPMK1>C^#jFY|@HuA%*4UGH>*vluZ8C0xRXB_3 zz}bZ-!_Rcj7IZF1HQi|9pPx$nd}0buYd|C#R;zDHLI2!qLjU=x^lyv#k$9{vClc?b zUh+S+5+|96{j2D*!_Of)u(i@S&$Z9MQObz(uoOCbj#k7unPu;NG};l-?-D00Cm|p* zaPkIH^7I8#^30QZK0BQfw}x4GZ76x-QPObsDLyZ}FqINV$o$FKBS*AY`e?m2tSBC;@wSc$YTZ)`JE5QI11J0N;6dNq)P}ZdoV;}3 zre6N*p$-2$(S~1mz`Kt7h%wVzLGBUnFisY}=V|x&#f03POdM^@guH{n@fd|qCyS5B z$7){h6efvyZ#iIGSblhi+0Vt}n|~%I6KAnoWwB=F0@}nd1Yl*(n}?BBEZ4eSZt9BcmC1;mQ&IVs`##vR8a<*m# zJ702^fR77jLEM&{JyOHkD+OznPFQQ{lC_EJB?#=5lC29)U~_MX5TXOncdP!x)pi=ZAlju!Ek{IG#BGZ|JDOk`xA-EnrEw z*GRP5ymbffTJ%Mzc};}nD3FzFFEiDGE=$ZpBJ@bGcaym^@G2s$vG&R>t*X}X;X2ka zTl9)Dfr72O`MGmvpE=if^r@%3c7WvjM&u33eKa)Cm((9*>g{9-RJ4Her0kBQ``{=M;M1N!CjZ$6i0=}LOKdlKAhlx z72+sqyy1elty$a*C3kd;>n86Sj_ZX_06`48)0qT8Fq7p7_#U#QV74&LA5?svKY%Ow zqe?BR@QR;o3lSBus-UhTC>_cv)K{L)i&5kiTM?h(1}Aq~!Z~=ii4yPq zhyeM4V^Yfhs!zuWBSNC76JaRjB8VeQvx$W=M__a*dzcF{;QGLWY?Dls95E}mFbXGZ z?s)LD?Hrr4wrNemP@FU+;g#)LyKqj3L$YZ@xd}P^PSm7OBEP){6o=oneaB^ez(~0j z5T>~hvX+N+gg~KeN1)d8G{tN_p{eYl)A|#Il`Zn_2zkc;GLh9EL>Qs>B^aA&wnP!t zZ%Jo_Ue(HL=CMc5UzhFr8fSG@~gFr)BVw8(++#K zzM%R}RxvfpkuZ2BDfJK^I&F89>lzP!RatMhCA*W%WPvsbgZOIzvqzFuTq;Z174c8(hE+#V2yBqKdMjstd=8)J} zUO{9Opw5}@Ms*U#K6oiiE5THh<0~=Ggv-#@bcomU(e{a*;@`R5-Ww1iJXy8c@Pbb& z@H=a!F=TCH4jf#<6^fK;PhQHxtk5=jGZ3?;@MilQ@e@k18RW!o?({=g0 z3%*9%X9eoa3@QCec<%<00>S;_37Uc;D4Huu4U#coEt>ddGqEs4r4$7`6P%SnK&Nh- z+}Eh=l_cz&yiHQoV+HHBv5)d9SqIFO@bp*1y2(2^o^^f@&le7!Ta(%Ap15ePbMPdS zl{a0SHtnnw0N(lHLFo}o4~v&VI0&RKE>L~0AdwzWTzwSFrl*meS0~Z)-jolg^Ck@? z%m*v5XT#898te5ejkUN?XS3)E<15TgwrNg)XiD1<6i~=y*h7tGBhu`0FG*TB_yk`i z7Q8Jgym7CWQ6<}qfwO~NM-$-kczi$*?q2clvSYt6_>m(A2}k`w#@eWe^hAnr=RirA7waq%6J8CRj_%HRS4rcxJz3XD40d9P1K%KPI;aihX^ z#YB}^Zoa$t!vE&52oqx>@3Q2qJ2^%0JT|T^a53)A4O>GNW5bUA{0$x(#+4X^Rg4XH0yQ7cAG@(pp)MaAbJ3`}-jNX3UxB|N z8JVAp-`;ldekL1$S|u5^*uR|Nm;^}kBEoI>+nk?J%if;X%joCDn`oW+cg~+nn>g-M zaQLB|^V?FA@XUbtnfbSO2_CpA-GwR=a9XU ziZ8>#km~kBx~<^P2xA7!4mZ(3e;whvsTpQgi|ZodL9D#ysbu+>xc{RJ)_5)*mc&Du zh?}n>7pU73f%@&zr6X=;77xWJoIVP&6CjDq1Xv;~0hY>00J^@5__H~=&epjpLl79O zg054BG{@hbbo|0?KAhO^v1PXwV1)ZMT)4rkxLi(?aCs+%cb@x~PV)$dzx#`mH_C-u z32cTB)$lq9X_4}-1k)&7>4fP#s*WHR;S?HW%dM{i?&K1-a)p=BTA$G=+w6PLCZAA^y9Xnt{a`DD2~Iv3RYJ&J^iH~up*)WJVac8>T(x;f zCGM^uM%m;BVx|-kVD??@{lm-?t{;vG-98+XTrtFyyjR*rv8#vqCcAl<09Bk{JPdGo z$A;GLK>|ua<~EfFv7e~Vm++5?e{u#bof4@qow-6|s>XWSrmowD#k9q~ z+GJgZD?NIB$;4q^CcCOD6b863b?T7`cX`iAyH*ACMUx?DHt$9^%^PMBiFPxTy zz+YGrSF+uR=^X*jQe}4v9TB`~amrC&MQ2H!K1nAWSYFf2R__^`))UC{+V%u0crAM( z_=X2itFgNVNcypGWiWp{^7;XE-CUA(9r5zq;>+{A)r?w+4h9rTM+&d$n9CqxD3wL8 z3>FgntS_zK*EhfT@fi9W^%jU|aGrIB?J;xSq446-$*vUL;|kI7nRkP;K(qt4KT zIHz_+aekyR+qhuhRLV*zKl2odpL-7HKhA*sM;Z}bDb@jDDSy=wkRv5uBHCQ&1JdbZ zBS)B1nGc45kn&QF(>!aAG=$h#p?vIW{c=Y#@16`d>A{S`01Ojv)S7@UBg)t*QS3k; zU%;LgBf#KIq}op}hc&+ijwUU)R+=_)GY?}_{mxu;yVO3;_b`HwPam6|J2rR1*76FQ z%c1bM%f`E(y>VgY@ab7(#eO)y=6AWdinmum20AMZuqi#hd=FTS<_@>yopKkLVG@dt z8SLxT7dvq>z0L?_-Q`AcyEL#gEFNgYOk(1_tylPEfQIL8!>;STx0b2woXA?5_ASR> zgl8oYDl$6e<{pG8I(zD98Gh=&-DhZ(MHJnba}41$$>QlKSz|8JVoS zV9qvi{g`?jmw}x-k+NR@u*afx=ch<`$a5T~_V)Ol75TXGx_)(#^BQ@!!D~!$?H=yWhnN)G^Scu@^aRw9uExa& zInoI|{Y0gyniRq?rJya*72KwVbv;~ZU51mn)m%SnmJfe&B%Jd`nOR)4y_6ZVqq5Kj zqutQE>~x^pZAt6A4k|zf=HRuBnYyox$mDRO(khvlN0dpkA`8Oq@GV5!B*8MF={_tR z#n26SVw<>rEIxxmn>@Q9U2`Hac-=eETN=(>C3_cIwTI9*h2Q!K7*je9a>HL*IU#kz zv}PR>g2tYoH-&uye}j3FQog;e5#ulRdX7^vjVl5&+mLx$k&Z_iFgW}k-1=oKSToL{ zV+c%SQfVic`pv3bHprQNVS!M^GGAcf!bai^Ec3?$7Oa50wBm&sl$WuXj0t}qglZ3z z(as>E2;fAiJUKBwV*w4hMHMpHa!WWpXl5QU5i-zR_$2dV=L)9)izx9stGNtQ+PTNg>QK?87QTz@LbkRb=#hs%CKNR zI^TPeBoK5pRC^x1EXUEpB*)Q|vfk-5v20?%A^0R7nwsWWIb}v0S~y`H;I_ISvJFCs zrVj&vA3F)Jst%Mmifi8G?u>oO;QLbicIFwwd|g==VG>56OULevfK+fiG)PMXWZ7=tT;q)`XAtOH&91=LH z=zkXUXem90`zba8cDC3pG zDa@h`cu^m%9f-6XUb|B<*Ekfp!{nE<%2I7T@Q9i$Tj^yi;<$Zn=9sJtHB3I!Q;9dKtRK5`?>i^>OSTr6^5&K1+B=V!}48pyCvAJ+dYuT@i!qN%qh-PhT%;~#Fwc| za#MiCfBX0R43gfC&@t|2CLx35>aV`KVddd`J;yLGJWUmVfJ z1>0`FZLYL-?!_DItqkr(9j^-cBg9P>Uk5d(CQ~za3@>*`5HMUPL8m7YbZYJxUha?} zV7N|#AlJ4*TJ5mY*hj+o4S+D2ndPB@~c;7afjG=l4!Va4m=BV{Uq0L zH%f^Gm)^%w|3s_v<4Htr-X2V}^d5f(dkr_{xs4ZvmDFt3`S)0vU_Yo5L};6QPUV-2 z6wJ`6#enj4r)o$dHUF?QU0{4$}vOJXmvajIo&Lgw*=AyV4jU2Chk5DCrgZoM!1%CA%r z?MapYp#AatrqX^s3qnFIc81?eqb+$A!Q0rw80A45L6jv%Q$i$tVTnsR1iaRE{SaCh zqJ2w~)Gi39jtVd<3%_Lx(~u)tL;PHrnvNHWaHj%yznH(q`pkN|?4DvL+o9MN*L}4W z8_&K>;R1s7sXqHkMfgI5eVqZ%@ZAcE!TPYhIB2~b%+ZQkNi)H>Z_{Tm?2W5c2?`R1 zQA0b*X`Qg^nqytKeym~c$ao8h8?-L1Vb3hnk#duGQF@@kuTWSR4z{MuKW2rDj3~2V zB z!zj>BN*qLEz9MN+WYhagBecF0(O}TJh@Salb90Az@7XrSE}C5zd*vKm^X}gHX-+d7 z-*6QLEoDkNbTD0s%rZfxrs4KMbVA>9s*hcW5r8?09d!dF0hsC}!Bs%bZjzAA<6BOO zzPOm+n&BAmah3&8Hg(3v`x_$*KvWm(@frz)Gl6b^Kmb#nK)4E`Km?;{OJVRuKk`VYVHr;*O zz)hh}RR>PMj&M{C{1U=t@jYeMy%c-^x#=YbGFsjyg8F>1pHq|xG{c&}6T}HL$t^wL zR|K=iPs1ciyOufUj?dLVQvu7%r(GAzm(aA9ynKo|mM@``%a=77{UtO_znc0dmrp5+ z^rz5ZLWU6?J0}~+g)wU_ zw^r7#3-1+pyxnqk8VfG8NB!#}w>{bp+IRIk*TI8}{myofp2t__`qzPW&~zU=tyy7I zRssUZZlH9fJMJ7Ig~V;|rvJV+^Dgalp$LF0>%;9)T@DSQzUEaCfFC42%eLVxi^NZ>`_IA4Usyz$?U zf8Qt#ZBE~Lo1rbR*XC6OC%Kr)SfZ&vWl&&)-UJ3EJmHJL6Cew0;ERyJi@*lH2nm+( zY)}cGz>B~Jz6c4t2yEbskZ`UKG!{A7$Ho4po3B?=R&c7!dE+G0md-W1hpA)4Pa)X5 zuqC8^go6w01uko+yMpwo(!vR3SAJtxZaW=Jc`F)`b}(fqbg38?Qheq6RBwC&FG8R4 zMM&U9=u^H33A_k>$`>KQ624DW!YA+|^eJD21YU$b<%^JTLjUpVVDzhPuDlB%=9A@X3vO1Fii?=f|Igf4WlQ;zv%nM(Vx`1OP^fnT)*j!;{+?`k%-?7(O z88BZ)9d>~iLvPRc%Se&tax*To%v6BKiITN8Q;B`UvO_UP_xz0|XYDY(gV z9_IAUSaO7}BYkN|Mo8K(9p1OLGFl#*afcpD(;?WAPMlfrd&Gd z9iostzpt3y6~@xbG^D1y1@#cdwcfDP!zqjUkJb}Un|P+Z2BJ`n+eKnI$kC)1D1mDV zdTTBC_@PLUxwC}K6&Ulu%@P;%j-fTve`(2L&#)55R0b$zJ0QW;oPKNt;|j~D)>_Xy zno&*LrynM3j_Y&HS%!F5g&RtaDG3fOHETR~Kp$Tre#$4DU4H$s5Rx*UVoWiy-TgkV zCC{#Q!zjxe8}nsto-f-R^QBGKpe>s2uS?R92s?c>!DJz0yR$Lur8zmA{Q}0ZMPf|u zKHS)#!i~M*Iw%n=%LZ{f-dW<^6xbzNRLRhA9 zth^x2*r;|LU|*aF_Ns(%jz@tkAZEjVEYU7}qIcknY~wn<8D6dh_nRx7Uh*cW>RQVe zb}qD?+1Oi8w9HuFY?;*0^HwE29(Pz><`$;HCiRU>Gkiu}*&pkzuC^}eikuUTtNq5~ ztxK4-h{L}z((3s+u|_ff)z=u-vH2XH%-NibO(Aijc2jehD7Tz?Lry=-X%cr6x&}`L zMFuUWOyW}pp$PM1GA-h#pA5q)d>I#tYx$$N;d{p}3pa1?Ak$9|Igo$MinODThfI=bY_*5-S2E7nH=+8ZI*@F5H6R zGy9~=?32!7-~}q{Lud5n)*fmErV{4Ec1e%dz6>yd9^uvjZ9WboQY$+KVg5dhL{vE- zKOK3R8o><$M+d?7uosF5$n$`eSr(?lI)!Mrk_hp_bKY@rZu0}1&F&kM z^~x{B<30IEaeJ5UhUDer*u`L^3&&Qb7zxO=5h41RO5*1N3=B=y1KGxau7ibMB^vdHjeD3(%;8NH$$e za>>neErTx-*!2txAr({^Ns>%xofwP)wQJCf>!&22y<3{V-M`8FTJ~n?l5XX2e8spL zEN1`?J%GI2VBpToY7=L$gnLPN4=^)?j;Tiwcl$m5REg9EQOwz8>xd5a71YlB7hb>| z9R|Or!)CN(oEC10dy--PN6Eb?{z72rgwpa6sGN@hOag=_bR(g3BzREQF7N@VJIpPEZGPT-p9) zcYXR-nfu*+FcLpa-p;&$t7@YE$bGlODF+KoD%6?Li5&O@q8ZT`{DGHlHlyN5_Y=y< z%@FT~i+N2hsRgfwAUcbp7ySsK;dzrLx5)d~oKIoPR$=8gz!Iv`QH0;fjsm`Etx7MXd=)C9ULh32KPoYd zuS>1vT?8MILVcrbG$uiid&{tqJFQa$SWfRaxeK&&SM+xD%YL zU(cSm+wOFVxpMMjy=N=kEf8R6=mM8|Bw8z@@reqyJAtF6n|{v|TIJlCXYF(`4vgCc zEACvepN@1_M({Qy1;X)P7D%~bZjkb#m82w5vrsw*CLWC>XHZ9(3b%vLHn+yq4zb56$i|fAw3f8cmK!{KPq4Y$(I|_ z!~}^8Uu0N>3!zQd;peixaQ!#4g=XT4gBguAvH&xRVwn#w=zf6gCPAA<;PhDqSuyx8T_F1kQKPAzPv(Z3IJ}5PK z3Hv5?@;vAUZId`~rY9XN3ReWA!sMV76{nEg8C);5nm5ytb5oDejeS#(N&DT@W1V4h z)jMZDu6j()q&Fu@kiYL?y1RXMA;&|oQLAGi*AI% zZkPoEezr282rZecCAV#Y1?vZPK{Qn~>t{C5bc15MF#*x)Ol%NavL{n31li4_&R1G^ z)4;u&#S7-t#G3|FAWbS*!e7wBsT&vW+P$Yo8}p}+HcsGU*mf*49L+I5@~afk#K=iH z7CVGyhE0hzVqsM#RwlAu|NdvP zhq{$mkQ5E`=}y+EVj*r$7mFJz7jPy7<2R9u@U5Hhwnk^5>=e0(D5l9paImC(&FAa|dHqB>thocAkOQ|*D+ToV8z9Er;kWb(K1gNE5V76y$mNx*4ypiE~_5_}nNctix zk;2mw!C@QrCVd0K8{42f<~Jz)`@nmw!_C#p%W5vnc&^0v;5TB{TK&%Q4DYR^s^-OwHZW&S#}#Zq>^j}j=veE^e3|;GiG}&~d%}s}K83)Wb&HHn)6mzIn!D!@gRU19c|HM@sqHr6^D#`vD#xC^{kbKvj$&?t?WfMqQb13 zM!o*SR#IH?u-CUS#l{FVkV{th-Ph3eWMMd?C?4B2E{iP zvQq(`E27+B+Q7OZn^C2s7aY1NozGEJIMEHCK>2voXCFZd_PhZfw(pt5hqTq9i z`^I4ez`?RS$VSI9sB})^4cu7G3#-v{1;|9Zx=NWiSY%)4OBR!O)b0&{fV6(R0L9et z;z^W#NJfl@CuRl+k3y2HG{{f9%M1=Ib#fCV9Wfp^IW=kE0AaRL`uGhTP?8dCPT{h@ z-OgXeqHT^Nl=*}mN1*M-kE4=z`h4t_d_2jdhP25~!3hCI=XR%<(rC9Kg9`E?1B~@U z2IZ#?nP~3E38re1$DsmzKEa`adW)fg3@IcXDxMFMh6#{;f(#Q-;)%5(lBQg{JsZk9 zJSNUDK>7(gHrU{H4<0xyjXL6KWvW}koEP}Dnww#8An`>m6HY( zwZZtq@_}`*iLxwjO6Rk-+LvLmW_iW7VehAVc+1 zuES23PN+52w5mJ>xLGx!A(;p3X+Q?=YIqdiOml9i!(j6S0(=Lka-XPHoM@B!ijhH# ze(&Ax>HJv3seH zF*g@4K`eR~FAiGtS~RgT!;ocgyQ`yKNNSm4TxRT>f9(V%Lhy^7WQ!~OMC@-_0l#(D zpSO>lp%Y}C`i%beHRa&L_b*M@EQk#TEczjjbrUK-Qn1)K`;^<@_UHrMism2!Xjm61 z76*C+|8?x&+_UhOM0Z{D*F(7VcD8lpjAdIl$xe`(rz^m#hv3Gwsn1zeFXMoo6pyWL zgW4gEsC=BnxSGvC$?bF7rjc_pY*uJ(UVkGP&K$d|@3?RbB_g=Km@sOo30D%XKVLXD zdy;X9mO6E8E=irnrMBVS-0ZOhUOI-hLskP%rl`zvZ5yI0!pfGIYJ+CoCzzcb6Do6G zoR8_?b@!If+zE@io_o;w2-tV&PQWR*F@(U?6DRRuMjQxvlGS zIq?g$ASbl@rpt&g^2shEfg>La83~m+MH16nDiUSAegW|Yztj8aePwa_z^M*SGMaQK z0=%0nP3!o-f2*s>VrJ-L+)c(fg8Q)Z4W$Rycpq^@yqqlNVhu@hr!@eTT~C(R03LZN zcUl8b`3uStq*EDTd>{kzE6S`q){Bk-_~i%D~^k*s}^byVP%P zv^&e-Onu-jR{rsbE*9Q$<2(K2O<}X#=>mVUw?QxVsh(IPD@II75rVz=e_^HoGx|av@oUxX5tyaapmuPv;#JHhF#827P6)@Cras)0B znq~Tt>(x$|ETAhJa{hV% zhh%I!l#-vCklDRU9p089gScE<__ixPAw)|@o*7!ZriY#6PdyN~J4Nq7qR=#|97Gun3+udD#7^20Z|)?2(vgW9SSPgL@cGxEh5lNE8{ay$NN6zu zZr!-YUxUUyx^lU>zT8^z*FfC(Mq0lKFX9c@N}K2*r@_BRL3(Z-Dr}{3o_EFh&qa72 zm>d~TJj&V7j(CQ{osgSkb1e}YU!fD2>#NFrp1|&;d)6J|oKTgEQdkJGd3{y6Rz1bG zg=<0Tt8z8Xp9BkKaC~Mmk9=*MM+T!MIB20?W`e4)110cy&d);a^OQcS@QWc2VeWjQ zQO5ovCmJ&=Zq5swsGK4WFE3p&Hs z=eHMc&I{KyFGMeQru*Z%l+v_?_kB6?o;6s-EMOVwGDVFh4l&#?BZIl5QO1C&#Nr#^dYeJK}s| zYsKZU=Y?M&V?C~(ISzYndmImFS>!e2HgMlKZUfnb2&k0X^e*&zs~WX5kH7i7|L7L| z>wZ?Ej~NkG_P6DC5xs`9%`)}dEj8&VMOylq@Z$o&yqbLo4 z0RxXc7^nK80~PIIk}DAoqFHG%Ue{RUac{%IvZ5&Sk^-$f?@dP3kK%XY!K@JD{ftIY zOgXYVAQ>`tQGwgXfsDp1sqfD3enfFbb8e0#H`;EFB)eB+J0nRV{kBA;zZjE*J}|S4 zyG)?my{U?c$objj>1Not9lphJp0rZMWyc!yXVl=C|m-a<{H@GCsL_slE&q-^7H$K1xF3Ie&e z;+-PW$XO@`bfg9I%_tISbg_wR;xXlWHZPnulltUoTxG{V%KQhYxb6_&+cABguKSvy$XbF5V!&R1O*1em_K@r z)69V4%qXMTOOfJ$zrvemtF~FR$TX-SdXq=;>@ft`&GNimI+7!_9Lc99jbzXwo4>eq zD%?P2-4^rs9VFOG**+8V0b&8G^0S{!{OryVen#HuFXU4fSSHAQlbufCdISt$N|}(3 zal72_huxBtu300LAzNGAHE8|fBXu2DQkj7;U%@TDg(Rg!#QTsyHEt4j*;BBM^2rwk zk_*my7eW0wCjt`)LOe0HDl;<06@%xYjMcj?(rErI#!K#)d3!SG{>$i?i9Vs~l~aDt zV6f%`t-WPQJ&n2O3|orOR4(~bVLZmOdg%m%qr(q37G~{3Yn2=J?!_B?fotGK==C9f zgm}roZBl_#ne|un-McV%3@>*;h&%APO+v7Koj}N`xnp>_147)=c$msl@eQ?1)-D2*YDI@4j@X8gp^q(jFdpD*MrEF5N-L2g_2(-*BVj}x({p3+AYIw zFhaE*zVQ4rjmIB#$B~JWmoV|gU=3WddFD#0gQ0>?#--z{8Txh3EHQi=g~gLMg89Rw_~Tc zt3Lw5ZUGFha6u}Iae`HGE98@iD@xq=gwk3oLCED52K0z?EXQBbl=JWM_$FII6vX37 z3_=p>rEK`AftpK{@CAetNndY@VmS)<8Y9O06$0gg{QU3q!83cMW9G zjYpcCpUp3$!dAF`VAn&eNSB?~(9|$c7T?}{*_GUr@bqZVS$3fq%*=*67gD;8b25C| z6lKNZMa?+fMfYxZb9d5*HA7|I9bG{%hGkUOY>_dndVOPES5~AyYFJ3VhS5wijP8xY zh#`XoGN%_ki(D06cUEOGd*gH4WXzNwUKmSh^5$qu{m|Lhy^lq_ZlLxyDaCVMnLV-G?cPv9f+X;z(n3EQeW~k>am7H`Pb)I|H~f zyXfAb>>z=OZn?jWMpd>0HS3faGtLPYA^8?et(zg_3NiKBZMd#7r-PuW<4Fgj*c}nk zoCa!rMwSbP1B;cf@;N2A^3Jz=h%`)mdA^pqeXsNiaaM@>vpEclhxE2^W(k1cWJK7i za|h~uNNwr2Ej%>mNOcyZ_@q2EK2%Zl#Gqx{|H6(yaatlw+UELn-!7?aMq$dtJ``4g zJb7U&3a~i${{m1m`AxdLQDgJIWNiNTacpMN1?gW*&pYHHBJzFQC;wfr5N{5`p%jH% zV^)2AHwR%Dc7L-6VRQm)cgR)E()*J!{I2Lw)akXLh2!L!%|Z@b&ZTb24l%`0wiqjy zGp;-wAntM2&A`_&<@&lX`T7rSxEVZiLFF@ZwlbW2q|W2^(|4Sv{rpw4+Y(R zF*t$ri4#W~CjonK_GsfF*odcb;Wg=uhS%g3ddxG~?237&`UCQPB3U6%X}9Q{c8Ki5 zTj3N#YK4Rkc(9Y}Z?lSD)sT5`G*K;I6IBb8!1aD@2x{VT3+G;SeHOkA0;6I}%i*T- zuuZ%hV%FYN9y;oLQ+e=gm&H@qF|OA_c_e1d&Q(Rl;u(z(4VP9i3E%={gujdii2vMt z-{#EtHZ43hzj1S>dkZvI_bxs@iVcnKpYd@kleh74kPjXY@9yX0ulV*)`QYWq-8b;@ zdOm1cb*Tiqck^)ok89&Q{9LPwc;LLtWmK0Ri0m@#-ZJ0LU#f^#4k-TB@QF0K%<+FPi1T7~X$p1PmI{dUyM*T3L-bSS#Pow`J!G^ zVEM|rI4@hE_zGg6mG;hto{U-VSbN$MXFre2A|Qq&dXND2#Xi_sb~8kWUq z5kEy@9K(T?*T!$4NgoM}QdB24k{FRCiIK2KP9&sJL?Zhly%LNz$ee7ScQGA<7M^rGXSY1m-T^sL%*&KKORb{NL;L}7Fi8~rQk&HM_)P0cx zwO^ud4SrL-YgMF3+eVEp0wnh0uG&&4HOaIjj%>L$-a~e#T1q2C<3ijLE21u8XI~rN z8J47?LV}~Z)r+cIOlP~1J1mVR<`y#~O=8tvlr!~9rd=B!@STtnstVN{5vtPakV0_@ znOe*t3UUxkx7On|jUq>HgiC0Q|yFOKnIDQQ}Z#=Q1r>NTCpEq-drPw$MDADAgGCcN9u0uxqGm zcXX_{N}{L>tGZ44QdpyuOYl^uNDyjCWYmpu!jW`mjl3Hbm*;`&n;Gj6K@n+PGxHXj zwbK5K@tt88+SQ@KJAF=%@~oj+H*rWgi;Q9>7jq;@b!z(KFo}Z)x7&7oJp;sAwNpd1 zm>|`}C`M{s(Xco{)=7f&RqVNm_D5lQv_E^i3r;UC9UR%}1x-1Ux@b!axg<=~Q4+7A z_-+=*w=>`%a~MRo(8Q5+D{`BK9ubO0VU@lTYRwiJ5i-f!@4&Q`y1-K2ilYr8Rsr!= zgo}>_3pHK6j+yJYcPC#&gGfYa&WW~=k+Rh3uFl#7T$8{~uU8fe;^9%gkYBPY~qu}Ik1{%h@-@jmSR*#yM%6Rwo3A7*2Y zyF-q6K+WaTz*c`oyI;Ig-wBKL-OTu|fF(Hgs+GNkmWlV@Mk$I~QWc6XpPhI1Yi#|t z--4;d-8XX8(XHO5kX^%gzfFhU(l?R=$)Fe{qQxc2x<;_LBtevzsz;4qZ-X`Bw_Q_S z?_sMoYzl=LD=KW*LiRO~qrG-x%-8nY#(PueYRU!)pvaa?i*}JHEhS1M;-W-~T?~=z zh;}t}7u(Ib?i%Xa9nXwZcoU6RsaCN?ScOpYavevcbzUVi{J1_8$F=kAI_f08QF03n zRSav1jaVgd5O3IUvt)|3M52;r$NRY-5zh*xoe(GQw-FVYbgex*-iH!#GdV3F{Uwo& zYC|I~l<%@jh-5dAxLuSKgW4MfEm9yZOGyZqdRtwhrh2S*18cdLk)juYk*ASLVa}0~ z$^&FhvY)dpT#}0GqkL|>pMlEt-5}wl@Ly}s!TjHXqNp}BUSbn9IYo(;ct=&D@nSML z2X(r24;R?q;U%O?lZuq?29lZ{7JaesHmH*1d(!S^`x$B;93H`HkY=UyU2D&e_jB8* zo}YzbA#sp`kr0@a8-i*bQFb&ZinORi3mqZeOLIzfi^Dv1@-l<|$ zCk7TZMg1J@R*jgtiesJ8V3MZvweLgiQwF&BCtVX+R}A{VNsV`>g6GEv*eTWFBqL8f zAT*J)s;RJZchd66^4gZa1u`3M2XjL`8lYiBU&FM1hmZJi*~AIuRTHM08Lz7dN=f)ENrF+pfrp!{5G9~tizE{q}Wvy~0y=-Gpg^1G4l zy<{(N!)LlK!#&zDlg)LL{d^`AGphAdOye=ZNzEvX<^ahNU~yZcPsA08_*B<3r$mE#6zeBHNyjS`=~#cqCPL zjpuXYgCNjmY$Emd_aPN$!DiuQawLLlRd_LQ#-p z)QBuIYTgQKIg4d+SoTopck`hZs+iiPHmEg*;Mn+J88hQhsI>5?4DnoR9~;B@dbtp( z2|3?b7Za1xrw)lksv3^&P>nX!f;ClJ&2e}O!o_7FtC}h+RlH(*y{Q`9-Emp2++;^r ze6rkH3a|98ID#oHbRz7*m?)+gd-b+41(oX7m0hg5 zqxMScL7UAVOF{c|-aBZZevlf)ZS%#(51#y;#y#wX9k@4z;#i=Z`YHSYx0 z$?Dz|GQsp6_WUXGgfdR+hcK=|u!6#Z`9`RR~q zD6m#;5NW*F068)~2-ap-0J#e8V;?K0IQv;lYRbG1;JTu-42@mJV zU4}4}uC))_rj$Fg@7@Ou%Dzc?%dg)id5YKiY;N}YDC8K8cDs7+*6u8a`3L+)PUK!f z!YoLPpJ&WU&jCNi;}rbHprUK6SB{V0C|y&0+s##={VT}D-62oK%_#F(JtDPaJ#ri* z1=nTKF<#aV#vJw$8&{{jf6I6mR|%ba7|Er z)_6}4b}RGPc9jPwXqEb%0VIcVcW6OJDBjdjc0 zVX49q77va`QPWhRteP);Hy%PK)#gvI9v{CsbY*b`T9t&^F}+6Vo$sk6jrGZbMdwI zXJUz-ACGr*WO;VEBi_|_Z^yzI-I&bFNkLnU)b?4HOD-;dn?NB46alaA<%LLC` z$Gcq2uOlN}>mz>Fy_AY+Ov zk(lBVC#JX*i7Bp5V(N8S>eej9)kMtshAeekmbyJl(Xx-_Xd=fH4dR%hB^XnCvJ|bi z_>QJsOwm${DO&O|MYAlXXmQ6BZSt6+6%bQ2&|`|emYCwa8&jNRV~SI1OmTvVDb5oy zMI|3owA^BfIyk1N;9`oto0#IdH>RkPVu~6prl?tBiX%3rD9$lO*^eo%>0)XoOVJjI z?`WgM6vaHIXrqSIXuOYde|53B+G!42gCO}F0l$KrFK+5jv-_)-;Gbsq-&lfwn%!Ts z1phRCdz_;t&h&F;TdzVWTg&CTYdxJA_cLkKf(c7M~5@=tSo zhsHm?OtOjhKdkq^W$*c?+5PvH;GbsqKUji)Kn{m3$;l$rfaPBhi;ovY`bU+X5KW2q z|5CO7p5f-7CY&1na17Noqw$dMUkOh}@J|plQW$C-B|oJSE^-wZen#oZl>D6DPl}NO zf?rUETIzmLuQKX>iJ+0<7wf3|6_v2KxfdZIzl!wu^_?q&O+3+LJeLQXt*nb?y8jPz z-MX@{iN_ot^T?9Kv4P6p6yhS-4DjC~e5800(o@awJ1RJtKfkN@srYq&U!Yo!{GsJe z1?>M5$TzwBM@){i>t9dMA1nW4feae<#RXFBgMp4s^6!)l3De43shVE zJ$e9!Um`fH5=AUD zdi;pN{VGXr07}CBRzpJVOdK;BLajtg%`0~x?*x-0MXJ})@}Sjet`#3hh&oRjyh6+8 z2%gH*G+N%N+<}(InH(w3t&Wx_lzZ|ZIa{f_`-K9oFB;ZVU=|7L{(;RL{8P+lOL;9TRtpWcx@KLn z3KDtgb_A*==cOF?nv(fA=n*#R(llg!RejsfaSWBr_;%S)(4HiU9RJm7{=aRYlu7Rs zs2ogRDQK0XuO@sXUo0x@{zp^{vF4wq3-Lzs{mj(;Pxg*~3ci2PkkHZ<)iJ^sg}*^H zhpzv7N+w~w$nuSXWsLYHLrHVl7~wB2<4E7kQGQFc__qmEZSJ=d-u=(k68=HkX@pnn z_|CkJ@2b}E-4p8g9>FVN$DsRr`DUbZGOV!dqwhC_6(#e7g2%!1Pn1sCl_;0iNM{}D z@c;7(_3`m=)P zW98=w8|hR~9V>oOMM499Ny(I>-Cs6P+JOI4plbd9m*9E*|2JVHotk3(u@S#+coVHN zGk)F5W@3A^DuVG_hKwsaBCAsHJ3?9~a=)uLCDVUj(CXCshk~bkNF=K=h5oVSPFeI} zfhJBBGvn9eQ+Kd*@#(zHR`-w5APfusX^tnG;V9KsskV&YI$1%Mrm{4J_`^aBU}uT7 zb(Ux}hu+?WPm7^%xHQiDW;b8Ne0!BFdADECltTxR-E#T%=Gyu{ zty=%53zQ4sTLr6%N`vr`eD0*RPG;U_=;Bca^wHY|OIuCJ(5htKmL{z!J=i^};uGbf zdtC2B56v0{!9z!-KQCCdK_@J4Hs_ooY@~gLNsT;h#j-|zj$p64k?&OT$&Gwm??WTs zWfWwMd{VG#Bfr4%W{rG`u#ut+*~siL4*2zqiq!S*d#sYmJab;~wE5lV1j;4rdBH+6 zzR0juOyDmOyc|6*5<1dGnTATD2fJUU5>ZtyDIF?WQ8GD!gGOE?totV#?)ua0e#8>| zQ(eAwg`=wYRt;Wh#Jb>hOx)0$oHKpF%8nQkI?~}oHn~oHb+4#orZ}z&R9E%iw#+H( z-e+~t7sooP4g4yV3B3H8DvG}&P+ff=(3`SZA0%|7bFb>jbQ<^um8fm$-_xtfgXA0a ze)43{{U&9|4p~7EJ|t+h-QP_3s7ps=h01SJnOZ8pU9Ton`Cse(6e_<{8A>X@OVBEn z-%a>P=VwV}E>-`np-Ru%p~2rrX!oNg?EEvu5r~N}nRe?X@Sm!dZ1DdxRq+>A1^<9V zdTpyD{_|QAf7noxa2;L))juN8M80l^&>uCdsr3I#f&5`ORP*BmjC6p!O6*Tm5&Wcq zQW5@XflBrDvw~Lj^Uo7L(y?b$*!?Tx8UHkEr`UgO6{d~&KMa)8@GAn<3h1xu)kH>i z|Bc>H5zt>_hLMiukVV;%L2&T@Rrxw?@EdxQ3Ch1SP)ho53sg(`@95QJ(tlU)r;z^p z%rMds@oMk>N0knW?msD+QTv~*ICl;z#UX&W;T|R?Yxtqk%ETm ztJ+&r)qaD;qoKfkP_VMby9CW@d?P`lE_Yg3qwfSPe;DKkw(2h#TZ}8%lH~fWRi=NY z)sl+VX9-l>4{y_}$)fdkBaoXvH;Ta7DuMHp2|Td{fv1c>?njZpf=%7iDqfvGK1c9; z+IU#7w11MdT(TN^%<`oiJ|j>qho8`^DTDJYGmLbLM_mh^Qu%5N&Iz8k;N604-GcX8 zzO)4|2vpmGFV?FmE%;Jq80iRkT?-afKBu%P*hES*yY7;e&rJD_VA1k@$<>iMz19Tt zW2)Ozaj;2ip&~A8CJH`}5n6 zw%^%mKTg=Y+E28fY@cmE)oMS(^n2Rp+ZWo;wV!Xl(Eg(K7q{ALg!bB3+h5UYzmLDa zvi;Sq_WNJlY=0B}|4_62Ed+jB`yaHwz198>{{DyU?{0rj`+M8p*Z#-t?{EJ=`=7S| zhgSQC_~w6X|6Kd$+rQ9i{}NNb-2Syz`;YnS!|gw5|7rUp?LTXOwEH^j*LH8kqt)Go zCti2&Bf%2d z@x*lTPW)+h->t+<_gVbyehD8h^6|I$Smfi&@Mv{gc%sZD{AqSKl$h!E@wYqRW5~w{ zk5>0Go~-&6g1(%OujJ!v`1o2rzK)L%^6~Y2{9Qa+-4Edj{NJoUGu?0Hn}5K^xAXBG ze0(P#-_6H=!^ijV@x6R}A0Pjik00RU2l@E#`1mJ${0JVc?*D`*srsJ@`d|3?Q9gc* zk00mbf8*mP`S>Y5ewvS;;e#KC=>8lZKhMW6@bSO%@r!)?Ydl)rU&oW!|1Ckk$;ZFr z*|Of_ZTe=T0obc-yz*p zb`Kb>Ew%caX`bQLjn*dZXV3}@VsYJN@dp!zo7dWpi}HhrM_ul>RyxDQKI&e-v0(g%@8($KRS@!|Bz)0mm&-|YcqF$ z0T$%fZD!?huKMNPRpsY3%bQ<^iW2&|{}qb(!`Iz!WWwLPK$4Q2`I!MwU=PtLt=w1y_STe-;PLj z27NQ%a&K*;xqg*r_L37P3F0ZZ;LXe@jRP_5CWM&7ri}f_kNQb zAe`5@*urNUFmEHAaPi`5^ODdaLTCi><)vNM!}XMX;mlIj5DkJUWN)Lkdc-tYrtt%k zCYzqq$Ylzo{7muCOOmF%%^(YAw=Qjd$>$GIbbgp3!UJPGiLlyZ1Y+$SG&m|*9<*e` z(zBbzshsk*o7!7x>MN^Y&Iv58(V0M6c?Pv4j}3yXjMi5uE6qV0L1pB~1AH9yY%$&D zhDh4l?%T=ol9alecWUYDJ|`6d8vN1Mnrxg)D$PiVsApgI_egzRhh5X!jRN!KY~}m# z_yFwh$}z3%+D)mpQ&inu*w z=^iIflA~F6w5~WvH%^buaSU+v2&@>X#i=Yx{Txt|6s{^+b@$iV@Lwf^>d$_5e;MzH zmP<_$o1U?Q$|_FOb$^4n?C?`6Og8K*r){zmd(g1yz^mY{Kc_Y2c#Z}Ra0Z#m51w|< zlEHOHx$0{Iy6!=C%-t+kZKFeVxz!(V47HnmTxBR0*|4$2nDp&E;Ybm$zT)z8aoBui zv-=p)e?HMqK6F%R)mCh)9!0C5sPZ~zm1`}J(Scyy_)yC@N($Wzkd*FMuq{{dpuo}D zAo01&osUZIRmm>$(bYi7K``B)lbs;zK1c>tuGQ(DAh_nj1NPgGQ!KYvcXDep^6|7PwNQe%R8>7C%)k#~_SKL8Ww5=jv6MCroZirU*X+FMyk0C7`;82N&B!$!u7V4HU~Ea%sRyt zHh$W|awOL~ypbwSUI&mGq+;3q9w%$OInAGu#%d6*nxIUGGVMcN5$88qC05z3SJ@qG zB{|(s)!+bvj0#m4)>&A$ly~yf-J8S_QXwV108W)>V|9AH-G5QXi@Q*$s_Frp$npx4 zw?}CzA>}Q6H7p_0?a`L9fL#@Cji}x$_u6zs_q*zQz1q)cRJV;@=)T0qIv*`QUdDqB zo-UY`&QDp&qH!Qp7+mSJEm=WYKS1Kfc+_{rJx+Ft8^5|=#bRI2V%JMm_iIs#oTy0t z_GBUDNTtcI5^n`h!Md6|&sfv-6MVhQnlNfAtjpx)x+|$|9xl&6gzITmdh4rK7q9gC zFRk{PE1Mt0W&1DdM*ZZF`1qX3(n6d*y*4whRXk zjMh6t{#slaEG@2d`VS}|0beg2%pM2IO&t>738uMOkBR5I$M1UeG|epxW)}w=U8C!x z#I}PL5}RS-l@L3^bi!l$i=DRcp|pj6xl_7;&06?ZJH>R`W8yU!JL*GVihamyGLf1kV)ja*WBTPZl z@LcPTFwJ{j@p$WwFoluBt5(L3y?ULsaJuP<;_20$p<42&cxrVg?K=^Ak7rYNGH!?| zN)<1d*$K9j88%)}vy)a5n~g#qDBTgJpy7Ej&WOCtJHixnGLLQU1kkmOXBiDJ(YDaVhS?6U@JEAMI7_Qq2vd*-o|oAXrl7BQDcFv9m08F<>aqj6=4OL# v;F**i&?QW<4|#xON7yon;Du;AYGshr`*%uLrsH`T+m7h+k~$hK9smCU=#k%! diff --git a/tune/protox/tests/unittest_ref/ref_jobfull_idxspace.pkl b/tune/protox/tests/unittest_ref/ref_jobfull_idxspace.pkl deleted file mode 100644 index c2c0e2e6bc6b7d691fbca951c425f341b0d73854..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4401 zcmc(iTWlOx8OPU`U9Z>c``WGzaT}Z3apGZNqDJ1ig*F>P;n9PfJBi{pMWP2eL#Rj`JXd8yR&Pn zl)hle^P6+d_nq^7=X~EcXP<2S!bERO{_%aTXR>VBwp?pH%gnW`Q&u(6p_g=5Ut>zp z*3AN258i#AIwe}?g`j`MpStWn;qT(zC3Rgfy^^9?MXzK!el8yL>84ieOY^kNib{#O zYC&~XshLz)RmD_G%zp&)w3nIfSf-*EeBLU18E+R7uI?6v2v3%*HJu3omA7lE<0`s& z$=ae^6XRW#Ic=*b^)RvJGM~4F9ausm)vj6599_Okvr1(}uHrU{UGqC+ifG6`9NAiqfr!D=Ejz8%%@G*q?UCRb9^!@9;qEg3>Uj z+!@B(R@ug5mR*puNz zj0kp%PF7GVHo(KQ=#g6$CB)9M6v&?w`X zFpT8xMa|ktV>BKSkZ~3jn4n3<2ZVk#x$5TMO?zk>=G9^@s=b$H5bg=?LRPLHq<>A&2U;7wB#}O2-)Qi$r>KihJoeouHH0w}=2zp>CEO z-ADJs^FU-f56;lp>KfA4=o%O30eTQ6Zp(%*(?fKT9%j5h61a6MEA%L-@V^?Qb)yAh z$VM5Yb^Sy-8MI18=J9?vUqp4+iXLJTmCs?WgKq}UAcVv3k)nF*L514nLyi3vU4N4& zTs2<|i&PlcD52Fr#(On(#e4O8z#Hp-L%pvxWg_y10v;{QwL%=Nx;#>4w^r4<0=cE= zHP*9GTAX9}0j)1B(V`I+3C0s;tEg)mKA-(xE(D=c$0>0hXM!ohD*=6pzD#YDq!;K# zo)TAs$`RxSf|~(iv34_vQ#_aEodtaO?2>Nkj;m`pwr-&poT}JeTq~M+N3<;pY&`GU z4BzusQXTRcy|hVR-1PD|iK6N_N=Yr3b#p~rHs~wV@vNVVfoZUbUZ!ranZ8PWU<=p} zwt^Wj4i10`a1d;xuhB4=1V_Mj`a10ZQ{X7rLEoUAU>Y0)JHc_Vi>}fH*bPpCJ@if5 z4fcY2z&>yq?5A(hUN8gBfCKaj&4Gj9JUB$(rUT$@;6ZShzC(w>5%3830lG#D;0M9G z!5!dHa1=ZS-cH}8d%-)vvwqNjZu7JLIe%BKg*P4XmzI`J zzk@&d$i-YgznE+e?$;lFgtQE;bVw(458qTwN}KC^gY|GaP9hP8M`QS!RGRZ5tK zoZy{xqp&tmmR4i)AYzPTp67d?di&N}zq#m-Unsrw#!pYq{5i&3P+d=YT{_=-E{pj#pmd8^_Ma9!icWxHx#*^Zet>EI5i+Oo3Pwxaj8;oYLBav+xi0+%w z+b~+UX~qCXr;I@%Blgr75|0r)LJy1`XjHs3c18z;KQ+Hzymk6~rthwEpSB<%UcbP8c~e{d_Y=3QSB#&5O z{X_rwb;XmOA0IgKx1WDJb4*zoEnkg|Slc*a?1xVc51!eQQ+aFM=Pk$Z3^OS>fh4qf z^QHM(Y8YvBZOsjY8-5QJaCgqLqVvV!A2v=OaUFR;YPvjrqn$v7T`j53!JO9xu_A zmeJm378!!V6y73TdG|?ja*~-ah~F!vxviwH(0{YCy1ir{OY;-DZi_I?2m7L{;$_&t$%%DG)S+Wr(SS6t}hH?7w%~`+a*4*WfeP-nEhu-|% S=Z@A{aQgMh!>uC4v;P6|nm5@1 diff --git a/tune/protox/tests/unittest_ref/ref_jobfull_workload.pkl b/tune/protox/tests/unittest_ref/ref_jobfull_workload.pkl deleted file mode 100644 index ab704ea165f96b38edf6740a2f2c2d82b3c39f9f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 141086 zcmeHwXOJYS?kOfbw42C4f?s*iRwYK+gbb|jL}al z&IgTBt+TNB-uFC3)3BxYhEl5&ohX$%#d@hxnhjboz_UB0nOe|Zyb;E5ZfUMmY?N>e z+wrjzb!zxrEubg`U=GKBvKvkeA zY_Z6B=gb8QFaTJP?Q>D1qEfflqQ;zhzNp?j89`P0LIfS|?cfyd*M;__7*%ds>|MYK zrd?GUTZARyL7q1swA#&v9$Kqe3)I)n;-Lq<9i>jE70q;Eod`JW6ybzzjj+eM6?j$+ z?}+M^8ECc=U{CO4#rblx0^L91zBll_-EG+-04Vj!{&u&%7|2C1UprR~6h5Xp*>k>j z4$U9->mmTSO1IpJV1x@nsRdt}51Wmk6O~~aE3jS{1oKfFyVX1alLjMc2PK@329({Q z^YCR`quGJ-qGuK)OwZ&zh+bZ(riW%1zoHsDx6vxXiUSb#&e3@=WD8%W;iO4Dhi4d? zlhfOOI(>MxAHX?-`8EJc;9;{FS|GRE=(HBpw2swhDK`)bIQhcC*hAak};@eaY1xDID!l|Ga z!%$N~_Ft0-?Egjj1*d~X#IONqXVIA`6Cs*t=6o|)uEs<_*Y=4)Qb+o?;8BmH26iaY z!m}^QOH%2JaFT*b(PHoUm_p=+TVNfJ~7EcUj3WEy|=;?F+( z*^fU5@aG`@9KxT&i@h^HvS&B_LLGR9O7IJ{;1{aFFVurys0hDM6Mms8{8HU%syj_} zr>X8V)t#og(^Pkw>P}PLX{tL-b@x);y;OHE)!j>V_fp-xRCh1c-Ai@%Qr*2&cOTW= zM|Jm6-F;MdAJyGQb@x%-eN=ZJ)!j#R_fy^dRChns-A{G*Q{DYkcR$tLPj&ZG-ThSe z0M$J}bq`S81620_)jdFU4^Z6$RQCYYJwSC2Qr&}8_aN0hNOccV-Gfy3Ak{rcbq`YA zgH-nr)jdRY4^iDiRQC|oJw$a6QQbpS_Yl=RM0F2S-NRJ(Fx5Rwbq`bB!&LV$)jdpg z4^!R4PP#gb2b^HhQ@gFpR|DXl@2TkBVXaoO2BlwS7K@fwtJ=_Wc8NTfW zq&nwQU4m$oL&DGBxY+waU#vwk+6FfM5h%ED97VF<1<~-IgGBkWyS8rGa_q=8N3J?v zs7H;7dU*<2`o)DSjunKMfN7UdfzA|3KvaNf7QO3CQ3pi}Q4C)TN3Z|TwFODs@jK;Dpd8InS&M@}gf`EQL4UxLCas z)Tf`lCw_&Y=nT4B7eA& z1?oO13X~g{6qGiGuX9>{ND(VU(K?ikOB4-Y!&cQtq2Uvi;6~>7b5puHB%`qc%xOVu z61y@PH72PsE?I8GxVmEKE_)_U%-GBYQ(|6xK3ua zQCn{_`=xak1RQj-3iBL1Wh?g7e7L=5jZB!wb?&@SqSDEUHchs%iN<^F?*$i4MRA12xuydRj z;L2`ow$zGBeWy*ymwJA@8PLgMniLmvRMG*b?W=m)K0VIUwgPYqF#geFS0H>}Q3tb6 z8GxFN1xcX1=kQ^c@bu)b=$d>@ugPcJHBoj3q!45fYLz-sV;0B+7@YdbbH@6B(i>WZ z1W7T%92N46K@3r%bRq!~#vksIFUcvRk4KjPb8lkW^zOp0{ZqTSR?_qvh22a))iv8vAJ!?7kxUU8ju5G%NEQ|g343A`Ji%i5mpmpBSm z@^WBu$6i(w8gYV#>~X~<%co($XL4hbK7-!Zeu z-fjg*A!%8FsryWxgbi16=_9xpW2NDou3V^-S8HAG)a&XQC?%GgbtT3L`6ZS^iamRjRD2@z_%eRdU%U%;KGNX(J*dQCDVFsAL`QkS5=) zSL%_KS*b8+HCJby$QIJ>!tn5$r~|TU!cXO9qcS-UuAKO)*RxrV%56+Qr_st}voQ(A z@2HF^os*O8`Jfz?YWQ{{nhmMu$q0OPfVnH`Jo3u#BV}YwSNGL8esg^+YG^YZz=o+)^8mDJ0Q8NyJn2ag)nj_0< z0S|p8Aa_2=@=k7`_ zb(H^4G^W@Go1|*;^K+6;$mWzleI+>j0rG$hJ0G!2AbkE~&DtDz7GBv5!aCQ9gOK(N z+jIC3+xbEBW*0Ff48tVoxnkb{l@8x=)1}l5J*Q&AJnc*3^%_l+a9oE_aT4=8!!qWIv!G`;%unK2?Iy6QQmLbx`c-upZNUg{#Eh zWeG%JwsVaW!GHfT^4Pxaqt{$RRAnN86M*-%n|g)zxgL-|kSg8^P6RE~l}W=kZxwK7 zCzV<=5LB=EnX;JF^Q^B7L&4Fw48@<60eFeA!R8n@D#ksiF>cP~C8fq5#|j8iM!PuQ z4!V_QW1$`wvZ^IGJ`0BfSjH+=x-&CDB?+`DCkU_Y)0kavci$ZYz>@l3TQfn^Q9NF-~t|xF~`a3ymFQDx0XJD zWA&}m(snI29@I2yyX424n*Z?h%99%(@2b33X9|agy$1U$&OCP#*F;Jm@T$ZTep(U+ zjQFFapSUSnF9k_Q><5ax5hjvmo06e_>briAV)BkB&)DygAoFbvd}q7lUJ*W`=B4t$ zpjT?Zq4J@Gd_7tXRf7QD`hdIG@gDH>IDjylaHxIjClFZUiz)~n z4@=Rzu_JeGhlX6$hxMx7>#eHd5k&6F&32~?RIC!TquGX8;5ftOwLKMeLO9h=b=qss zZU?A-)S7rHv}+PNcb{HK8Th%XVJTm|a4e>&qU^DN25D#i*sYm0Mac7h<*W zuEn0gguGN13P8#&7*m~=5fe;Zh9i5NS`txWevfLbIU^I+khotdnsZJ}K?Pc20dVpy zDiJ$QVUxUN90rQ$wDt&Qbele^Obi+HVQ~<^c4|###Sy}soDlrpQE>XA1Ph8r<#&O6 zW23}l8YOP?m2{z_FU?4Nlo0%61LDP+GahMTPT>(tu!zkBWj#@ zR(&K}=$+1$0&n`M`E+-El6b$PL1t?oknsdrsQ?N9DUv~ey>>ZB3vAGHOry~x)fEL9 z-NR*v3GcL&HLsBdWS7stz}UgWcQs5r>Kcn2GSUYogn7v5F?zb!H$WI&CW#!xPGGIW zrGHW{{T5dN2Ks{g6l5km(P~1RUy3NZz#K#QK~OF)zHwNJvy=veN}X@wK3I|cvmNHb z&64;;b|chtRhr}+!jT{R_|4a_igl0TiV)2VlWlx7Q5O#Z*bY-MaHWcG<$3(#U2wGZ z6Z1L($Ap?3y>D?es?~Y#z{|Ly)B>xjHscHwfz5ai;)a}ZXD2$MT~8E~JJGDwnx~XV zFRe~`n?DY));kKqx*-h$eViZqIVgc!kRe>cr%7`_@1%lrpdW@nzO?u)UEV_xVIRWP zM$aeMSfs*OOJsA7G3>bAg3p>xQaz{BKF5`%9ezv<@+plGTUQJtax_>f5aDoW&>jj8 zE-wJ#KBSm4qrO6_Ya#6Of@&+r35+AI!W=%WabnjnIHAZAaS0K`uV3^`I+jxehB4D@ z&MDDh{d0)v-JP&-6~waawiXIUn>Qsfi{qytKMMSUyaS+F2l631lYaD0UCQ5lu*k$G zrYNoqZjZErlR;xPz(;c)s?mglZ))Q~EnvZKFwR@v4LGH^N{%=Jah}nLbGxT)^%bu+ zFXCtr&H-I2k1+wp?y8N8)by+d^gen7i)fA_F=#BDN~=^a6~kt$1t)~5o#B{+@vTRZ zKk9%tikC92^@0=~f_!^Xt1$7rr~xm~&XS4*2=$*ZI;947NK7Z{&{>YI1EpXa*c`x! zI@2ayhU;+xg@N}{y@cfbeKd2LtlMD51z;$L(xZ%Ml>syby-B=VOHL$^`*FRrn4b_z zS#`{uez+XhLHqUzIB*KU82biyhAHHJ#4!T(fOIRFL@Ce##7SzUcNl#FbK{X!WQ|w} z(j%VJ7;&B}GpTtXsc5EKnGNuCj~Du>+h}*oOG&^Nr7xhxq02m2M^$mwStH&&WZQKCYZh^-ba*(5? zdQ@9T<{$?L;vm}SBg$4Vo&jvMC|a5D(}G*{6Ri>@D9JO_ z^cHY>)xn548$yg$iy4h84-lnCL4w-i^^KqJgqBEn;RccH$7K)GQo1O!gbc*+paBca ztENeP_S8a*k-exR0y9y>L9jB-Boy#XyKV(&!4@F!eo=$>&U7BYi-(?bp4u=FjB{^G zGjR7G>{T`z&`u_`VVppHSS@1tKwV2dsG~FRdxFx=tHM%!9?m-@N=nx>;l?Q~DdlJ* zu!7NlDYSV>qs=>Vw9$7S4bCCP7;XAtO^!4!GqS8?Fr@jOMw**lQA+9?%fM@L?jlW5 zxgez^Iu~|b0>2D`)b}-TU+QuMVN~m@3N)H62yxqLfD+M~lM1sZ=_guEg>wb?R-_32 z*bq5e^tGO{xF#A*2~%zjqI`^6Twj-L6ZJO0>Rr4~7nNFmnlu;EK+;UD*{oom7x*t# z?5DBO#@cGe1Qlyrco*7m_xE#K)Nv;h6%LgsMKJ_Qo$R?*=;S1us3?jGgQqQcxrs(F zqhG{qszm2gL$fJs8GY42t=6c75=I}D1xOQlStHQ=Xb5zQ%_NLJLf<4`Nb|KmTMq)} z*FO^Rmh~9p3+OEe(k)ivYB*o7VRDhE)m%$7B{>W3(bf7+vdV98%EcI+ED%Q`=0?K6 zS2PCBjD~^4!fZf!Vq}{D5xvkp%gN`6qY*hqW~gqtH|AfqCS>4x5I%#iB}%%C2tMeB zM%S0R49E4QvTq2&_Y? z0yq(B%Mq~PvYqIst8)8pS1NW<>Lha!PaIowCvXvy@m*g z-q2*}cL=+(f;vf_c#oZVSW8zS%M#o zyNl^Vrp7J-zEX4(*d&vE;1|Vmq%(mel*=0--Rm0Z-a9JN)tg`oXcW!4B~6bv9Zz!) zQR->%*iy`O2grAfUx9l)RKw6F37N#qSVc_YX8xI~9I@24@al52>J~T&sB!Bd*Jch& zLM({{L?iCSo!B%8F(xUY&_o7X_grl$?$sBca?m%HK9YI`=UZz*u6l+5{Zk(33%C7p zvsuGXhotZ>Oea1 zBpk6c*w-oC@V+D0A3;yqbalwu6+nbD=YW$gbMH(kOF^;GD!lvhf;to8B)_Y$N1Z$L zqBEJpeX#@aiU!Jq^}>%u3tK{5BA_zeMnmvWXA!A9YXaDk*H7q%jf{%Q(`Z@6TZo=GrVOsuO)-(Ob0~y70PfO?C;o&J@t~$sD?dg07sT@i8|* zLjhK7!|F@Ywh5p+nFHPEK!@y=<~s+)5kc`R0g4y6peO`Ab^k-uAju7dD3IjzrL{y) zS@#iDN_&(i(7G4scu@KTjIsudn|!u%^4Hk{LLP?aV@Iwza@BDg6A$g5+B}=G7oOn8sA@>DKEaUcwI@sCZaFzU@yaOkvFB=% zP$G>4(&VHa<6O$UxO%<-!Z)YyQ*U4(i`|4xHpgAK+u43`KlJ&gNKO_>{Q{BL<4hIGi|9!3hc0kII-SfJm5kB+81m~GbAl}O^gcQPOKtj z4|%lE+nzWAG7bi`QK#->5p|`IB1DnF`i0M3xg9KLWj_WQNIdgPg7rb)_`X zNP^d+kow|vdCMMZ*>GS=_Or1r$Y0F~bde_YrG(yB#hvYCC7xQhfQ0MX(*ym}X6!|R zLVVkm5{0RGNeVI7g5s<=~H5ZAocRxZ)KwSvGS@W=z{=5JCAmEei&hBFfC3XZI1a3-#;kziVwp4HweuC z^U*OMZpAJ_INzGS%DCNH4W!c}^9XFlAH|$D56XoyR&52?k1lsOsmxA7*vuwIzt{#)eb77ezXK zy9+uHe4>fGSKVJjPvvH#GC5yrbugn|YQZf7lhEC$+{OfR{QxClO6TNcdp;;fr5e7S zh-O2oIZ#xVBo=7yC+L41+lQhk%a#LwoKh(sgDL`4S@!=k!-f>#q^ z-g$j<#=4m#;VS6oEUxCb3QTxJTz#{^)la*)D(p$b_t<|izOSv>NwKm3R^F&wM8Yl) z#+m|~jGx9-KA%g1+>y*a2jrZ~z#xgu&O&q!r?xki1P>NAAUrp0w^=3hBwvuKZp1bKRE3=jprRO9`7&qAe$vgytU-82|e# zfblAle~$1rtoT6T|E=Zzd_0&-b?RC;ncz8W+IN1*k4AFDtZaWvb* zNP$HO*etLp0q6YO6~osb7Wn$e3gBzbp^p@H?Oe;dksX9x0S*ngueO_(M1H}nbL3aY zIM)*S-y)E|?D}a*@th@DC8VU=g}XJu%SY8U*tu5uL_?{Mrk0s)9TU+xY~^|II85e9B(Ms?CdW={nC%DAxET8>~795e6 z?9ra6ozSdT!f#!48(Rna^Kf?$iT0A~r&`8GcJ-%$FL4)BqJJ7G2|tq49k^|L8LtWO z-NkacgCuTQ=nlJr=5z-ri=ZVS_&Sa(;myrn&vI%{Lg^4T9^Zw~v$zNg%K-LPhkV3Z7HuWyL{4t3 zuIPN+?Qt|KO`?@$AZf$#igY|5-zpIP!Hfvs2wD|zPtI|EnQ(qeQys#3oL%y9YQhsf zlxk7XDCtw|m1wpRbr!VuvT}YJkzTORG?N8DZADGuTfGDOP6=EQs4P)eCJWpV$uSgj zkQYduGU=V{@JtODzEH{h`rw3f`oaAjO#QT#`C20Du zv*2K-aDPlh0(0X@E)pn;Bxq2a=8TU>@HRmiZgnYxb_XUUK*j7Z$A(xH5Wb$?nSK)D zb>tBOfFzPio6-eA;og4M78(6|B`Dk~hAzoz8UQR2WAJ^wx)qe_H#A#w z?ZOp}sLrYqK}P=o2pI?;h-b(`x^N%TBy9b_H-pL~JK`97%{rusGKvZYQ7Ihl7M4AT zir!%ID3D9}B(;ju)SPPK0Umjc(@^o6`cuuf3#$1!*Holfwy+Hk6?0a|jP+72C~~yG z1W_?+)`Es|EtD7U(P=-zD-h<&(IthrJ=jZu^X^HsQnyiuHk3I5e0R`IYe^45Ap*vo z1)Z1?YZwgCBW?9=3vLNTrE;d$Y?66tw%MYvhn1kz2{GqHfNL_@=t8toZC+CAji>fi zL`4D}DVr^KfW#E)An5^mrnp8RKR51g=aukjk7y5DkJp!9j7v{`jd+Q)NEF%J_0lAt z&uqo}FX6BHlLDE&!vHxlFL9;dCPwC@!?Q+nl2XFQO7;1u6&TAg@7@Y@l6q1CUJa*N zd06ay#?Osou~%Y)Q-SI`1oD4B=j25p2`m}0X6%vQM3NbTt3%PS*`AL&5S7N$Q_$6a zY@;UULctI&X=y@1^``_m`0G9fUvn#|u;lhT9VEp1`C4fqf&i{`uW($jN1d5`Le?q} z$5Pq>Mv^dLwB|a?s~kAKBg2Q@iJ4cm4NHf{st*vfgB%+1m;#9Oe8P*3sGh>$guz<# zU>*Ya@FIhy&K0eA>>W*x666_aS52_c3W+1WHRRUybqW-uT=~V{D0g!c>wC861PN)) z19d6G-Sm_aC8>`FZ{dwhyE_FfyKxz5S)zwqPOUZ;wc==eEC6YbJD`Vbza{wdD9)j& z-F?Ff@jbTvH^2qz3!O;ftV|Y&Dw30M+NBzPdtR>BteczEx@<2_vE1Cm`r?bpmKEe; zgzC_kia$^HFhClPJeH}_lcXG~FnqYN!6?p83ySm0ImN-<^jtooFrjNMA7yaZ%c;`( zrb@d6RodolaZA0d)vR>OsF|_n%L40W1IA=CYMYMzz|n?mWVWIF{1ammgJL*{|G%){ zfVaO1BZr$dHA9Vfhyz505Urs-h&O`zhj%kN9Qn%NeiZJVlv}|mWl*8$vpOK5p19Q)(J7KF?DF}s{5oMHh7LAG}X(!H=ud=!VXe?JuTU_{bx8y`!QV=HuVSL^b*K)!L^s)}2 zcRs7-4eoKWp@fu~BsU7K_|j}LCyL!2WK#>)%fQ_aC~a!Samp$O!j zKvg6aLGtR+NkK&@SIE2|{Q0>Ky^=I0p{}<9DceG^(sfdBdnM=zb`oB@s`Q z9K?H8m$~jF6!bQ$wg|a=XQzH`kOA!niW=uq13Aq?`V*XTu;c-TeCu% zLb**RC+yhjEI}AV4WG=q)Fme3iO(k66}|f2N8Q}mx5ZdfT6atMtvk7~)aP5O*SXkQG%do`Y_5RQtuRO;4gAE;5*poY=D5vbEvT%`1~{(H71FVIZpY; zr_bG+7SSC*2ws^mzm^RQV6zbN3_9c-nA)FtdtA^aL4GAcl*t~gky(OBiR2iy@+h^Z zs(ZO{KN;CEpqAc8-Q3u>#aL4)ty{ux-N}u$E=s4g?v#kt_vUuP((y|K9e*UJE`y~j?7AH+9G5HvxvB^8y)9)Xfsg0hL^7>V=9drtg;!I9MB zyKrq-;y)>f|7Xq~C^P`OOo+WhFYXEFO9xUN@PlZ`S z&!!VQP`lUX3evbWfKA6UpNq4z%GD1$fMcGAgSh1O*MaD}n9u@ujwK8|{W*hg=K(iE z9WdY{-5_Wv&f%Q%p#{4bMe+%LEKGD8U{Hc1u|Y+oUsh0-=mLa(Y(qSoi?#QMXT}Fk z^$Ed;c#jYg6GA^moxLWz(~3?;CGMeBBox~dK?mT=TnG#iG?TG20y@JB!GfXQz_Y~7 zRUud)kJS%(%=ZL;$Mr-RAOs69A@uxM*b3$f)7(tlVNGBDX97%`%fz&OCWS|&Q2dnA zg7Ej_528_^fL>ct*hc}|;~q|3pM|+~V|+=pmGE6BH`cO~>$fb&0DbnR!OivS%F=Ou z02^1dMGGU-1V-_a@edrMx;}3lk5$jDCo&B!mQSdPciBUZp`ex=PM*6djbjEzULE zT%T*WJVo*lWV~*!UssmSIS$U(s&br51;_c7oa1Z)j?-5Ox~xEv8-sRWwVhl;S~h;= zhg!)sA@tyMwHfh-)X>e9+#;Uq^A?x$_;uY}zpgBu^QBeiOS=VMI_hrN07RfZK+aWc zq8c?YU0O^!z2Q(yx%W2v5hyTcnHwa>F}9s?vKs2>lW&G|Sj{*_$TNHJz~csumIp-# zi>q@yY(=#igyM@^*i0ZjpCO3L7naaW(lOp4S9wjmF&|!Svzj=@^j;x(_)k;AUT72#Vjl#?V?`X6;&6a#D;C4r|rMEN)P*udB*`U!1+HAv@k%Ue*J384K zf!J%wL>?Vyx;mkD?euZ4qp;;uS>QbqFY5!Pb`vAd!|sWjbF6eqZ=7iA>WQr;*`+9g z9!pXvlBL`!l6^uXYv*LzpB*`g>y=DT3o?D>nvv;T0>+)#;dY-lx7d3HUHU?&y~@$# z(FmW6C=Ab6wIaCI2p%A|K|j%$w&SEkC#a$Cx+1b9VHC-dB8X(4aLC#@S@mZ}PU3nc zt9u1m{k$710Dx_U4GWr@*1(x#7VeR71Ylow)m!Y^k){myq=LEb*P+EQaq-hxsl{d--JHx(I0 zMXH*(veun!l@=~mv{*4y4w7PnjT=dDG^6PHRNj4RkyBdhM;WK3u9WG|-Plmt{esfo zl?#&W+P!Y>SXx(CcL`ixC zP*G^9z@DHkTU~X!aKrnKTz{lsH!;A{w`cbyyLa!NtVeJ=TDjJpQ70};8PFrZ6`XeD z2;Nmp0m-sR;cxZq44*Z-8TZ9K()&Y)FcV~2gS1d=0;r>b4$r2 znjI$1d3rv_HpZ)SNUYA3tB#c$d+3mozzk*aVcaM;rft5$4W|H;Yk)2748!Op;|*~U zfG=G{wIj;_D2dIH$O2`&029#8U~zzQj&V)v@xfRG1_>Upp8dz z+PIl%V|pyhEW{DyL^lLkny48oxI9v)jy2X->g3RoCWt^zStZ{{pqeqsIHAlYmpZ$> zH+E)f#ZPrf%_?@=rP*Nd7ZxXa=WFEvY0ctqFHVF{hPPJnW}fO>;3s@0d?CD}`V;tg zOL%AaRQTQM5976l1j&B&zEc{0G2l3w%r+eFteodWv1v-V-CRX2$Kj-5Q zcCU)UbofH`4fu>37<6rP^#Xjl2!GClAKU-?tLQ5p-dTMc=Hd17)eG?luSbIsy&Aq) z#n?JkWJBTi!rQ|K#F%I}Z^UdIQ}}%vRTa;hR?)SwTEJ8_NOZ~(BgM;WX}DF~&W16+ zEXMrx@M~2xtH9tfTtawX70(Kt?p?&11+Ne2#xxHP!Ge6dx)saa5kAP{I1As*!aPEY z!b5l|e6C78w*A?Lg+E_C6aLvgRJY?Rc7nU`1kG#p9892Tr1gImrsVbNxsV78yaWHe z#&baHN+Ww327#~>K5UmHyrp_JCf!MMO<+TlOd#_(Paw`61H~Cw^os<91b;7tkA;r{ zh<$+|i{Ogq|7M1tFNI$wm^t0MP-$WywxT9I2-tWKL%?zi!3YC~f{Pa|K)j>?941ym z0RaF`G5~-Da9Vb=;soG&vDF%%MRpcmS%PX<-fs@Ez_*9r0tn?r28di@nZvKg7n<-1 zv4~gO#T>@_`@^qR-;5;)dF~CLt^NRxGzA!ND#E)M*J4^pdl-*>C?UkP zpyJ^s(i#Ms@ByYYk5}J>9U|m2uoK>0MdNWb4TNs1qRC%DQuq}ImJEi508oICNQO>Z z;)qsSTBRo$ydMGpxAX^SqyYEFXeMD133CWlALNOAln@~SX&Ez+q&{MZs-#3}&`6A+ z`w9b*(h?e4Vr!@q&kT}XB1;Sygr*ickrD`^^ko?N(}{EesS-^pQwXNe;TKQ$&QV(x zJTu`v;VX7+nNSl7Kf{z6sY&=vCeS!5G$%|N1bo@qAruUsB6hNqUXg49`vse+iHXi52+TxkB&&@JgDp@Z}JZng}Y9Q3O9G ztu%22k~n9C@;Gf5FrmsP4+z81lefcCd8Y6zs}LD8Vu8fReiFe~KnH(9??xWs!c^>X%=}?BpJqynJd*h+$%81!vqVXB|2`6h zPWKcVV$;jm>UkotA~lP>B!J?<(9mH-CfFhsdN_Q@ow)G9@KFM0P*!Np5Ozs?h|L+b zi|PjM$eOiaHpYuVY#iDnG{I$1jV57*5=*>wLD*MvHGIgJuc_~fV_I3_1!jx%H55Nt z)-n<(%b$#MlGwPcibZ2dQ*bTe;|Op@E>0SFXl531JG3IyFdSIK+2U%3GU;EE`}ZvcVG z+J;wqrYs=K6lgU9fr3nV%kyyfgYc4OpF%n$Gz8=3QGM@ZZQI*v^Fk|g~cemw0Du`PQ>UamI}<#Bt|A$ zY|)@+%z1=wMqmQ#9ZBRYva;}g5n>S;XH$%HuZPboEA4KKSNw&jgojfU|bn zN5k9LRGMaIQ#Q7+ARs!ouBrVGFcZPVz}`%^(P{Cms+z3#GeG*PBZCI(LRDv*K$|!~ zGaA6+hzvn)>X3qiOvtx0rkNPQraG*3G5jeB7`btgP4`O~7h%?nRaUDk!`fwOw`Ne2 ztfvQtoN#|?*Q<6_zmf2OxX*{7{+^kj~N10vzi#H8VT?zRj#cN4L>du0=t_`?a;29Y#m zyT{0i6A6E4+}92c18rxpA@nxhQq=w$W+P)Mk}P(&2i&{>Ma!|Sux<5xq?Xp|`7H0S z->!DooxOFB1+Z0<_T0#*2{V_d$a$V318xb$H-tz(w!R^-?QPv$mUrVvIe_ryK={@e z9<0t*h_;!p!L|>KdAr^L(Dt6y8T6X$|Adza33p?UllSvjAiYOz*07rd?otvVBMT62 z+_-|2S{c28^MJqSNE;2f{T1E?(jn=__ni)ZEDlI3w7mwOl7o97yf3`}bokS~ZL~F} zL~xcY-7Poe*gz_M@+KR!B6c2<+8B1*E_`S2eC(G#ZR6^cZvRGT`yy;xoCQ_Qn|9r; zMQ{na4A$*5puyP?o3j$Ml&Qq-<*OYyYbLaig4@>#Hg*Fi&h{wnK^gZB+HrTy;Ft6+ zzR&N>xWYUpqFE0Q@w3CU23(`TVx`V*k^(YHmRKk z>z#7QBfRCI4%k!pz(2417s%4wMB3l;lv7IEcvlf$2|6S{(5 zDL5pKjdVOE___jt7L0#v|c$_%;h0tWK?lF9Up4Y|gLZ zcYm_Cz1=Bwx@}{yS=Enw`?Rb2?H%pzOf@Kjhn&(H#McQ>h=&oh5H%;uAi)jobXuPWyP5eZ1Q~-eVuXX&>*kkKeM7_u0q$ z?c)RX@j?6eZTt9;eSFwHK4KpqwU3Y4$H(pC6ZY{t_VK&+@k#smlzn{KK0ad~pS6$A z*~jPY;|uojMf>=Yef*w%{JwpB**?BvA78bPui3}f%_GrE`-YUzzM=c3eM16i-;i+H zH{9^CZ>Uz;H+06fZ)kS5Z|KEr-_WzuzM+<8-;ku+H#9}rw+o#&l#^^8N}l!&ca!WJ z5^(#5xM|-Ine7`|^z9o;5B3e+c>H9e`-U!g_6-T4 zeM8!4-;e^@HzbPo4QZl%L+WbZkV@LOz0Mo@j@UdD7wsD=hxQG<=It9Q$@UF}Mf-+2 zqSwoZ*>dAzZ)>@;SgQV>dWrDm_ti_i3@?AcFK6?S zLP+>`oNxw|?1neTPt55y<$*O!!=~d$xG)F56FaN~8RnZ>3Au5x`ip07*#h6aWsSaj z>l%Ic3%Fu`q}HX}fMxh&wG4CpX0`esRFygwQ>vVu#V)Lm}(W)l0nwEwD95 zuYdI~RNg#7Q4HSh$1&QIQV4FtbVDA0!n)WSO(JNEhDb!bS3Tz}5*AAC-UH*^y$566 zJG0c-ld_72{JOGHI;qtQ+1rY3R{v5hWDDmwwvgYBbtQcvNA8ItDdL0}E?shm9!luY z3QdhXvX#f62ueT0Av)(%V?C>3S1m=m}nGpOF7!?F5TMYWpp+wcz zk9!0d(8rYc+&b#(^XsUuFJ$g3T96b7RR2av(sTX!(xC8gRz1RqSP4S;i_6(<8QcKr z@QU~TrFGr=@2%_Je;<3_kBlX-X&d{(fXAR9Y)M)g`;Kwym&d+`uZ(>UUxgl4|GT0_ zC8S11C>uyta{z?dT#_X+U06$mkAXqpHJV@>^#s9g{f*-FjW7ywYS<71L4OP!$bd%4 z6oiSB$8;n}gLiFzOEIKVoY3{rQ}kn5A3a4MoAuFC^#8%0CK>G45Gc_RX$-rC9#Lc1 zEp*Kq!)~FE*cf&Toz=#$Tj({H)-6Xu@+<*~!cSj)e<_ZD9(&`_6Z94wkDj2XBK9Pa zpPR6NAUY1ek>D)ul1^cX%k8b`Af{+KP`9y0gdU?CYeeYvy0J!tp1Uxj>fb4Kv5XFt ze@`!PCZt?O6DR&d8|T1VIB_vqM9?%=gi!@#bT7t5@SuZyH#s!mj^_Z-1imY#U-A=uE#M27>1Rh98K-#61aZF!GeH z*(ZWsUkNV_CS%}-#~e1zVDS{krWq_AQrR?v#WOfK*uhz};?OH(s!kF=hQ#lIQ5a7g zZPu~l`Krx2c05gnV_zkWG|t4937|*OUGePPW*s}86vVM7S!{=sLY7ai@YXOUX5e|o z_10%Rb-CX9jAui!&x7;9(N$Fk*4QT{Y1cN16^odvYRIE4Q55av)!A04)8?#W}gE*$G@s`;4p#pWA?xZ2l+hd%LUPr zop>6a?wiZ!Y3X^ws2Dg;3A)&7h+--H%R^4>JG>XRR^mKtWwg2E1Cj&nPhxD5d`>ZD zgeUoCm{W|-CnMI3D$;F5In(Uk3p-d6NZ*^0g?UAix3b-#!K&hU#q z9)j9chR_Q5OI|bP#=6BDp|pdKjxi4M=IBJ>9DCcCpx;n0{d_Fcs+aL=^)h}fj-apz z92gA>sV6Mh-;m+XEe3Df93#cRbem$N-QM{ebg$bk&8nDt(*U30{_sE$zU#)t@I9sQ z1DJ4acwP9R@WbKtL3j+`KN@~42!9Tr{z+I2!cXGUOjr&pK^WolTv!XjIzG*Zp9)(+ z*v98>crpl2;nPC+PlNF1@#&w1zZitSgioIie@c1m~RD~|pDE9}-Gs9ya?@Pm{2x0v#C<_l_L?$@EOcxwc(OSmE$ zBO=Fdq|^Wh?RI-_Yz3uCvr$_po@%z{YRyt*@i!op^rz@XY#l+fclo7ZvmRVJAD)`N zv@$chP`|X(ZNPi0*=gQ%X(woR+Lv}4Q3s!j)#l8JZmm|VM6FBrOhHqN;my5wkKDox zH*rgbCd%Bztr?oAa1&n`eJZB8g)ffYiD@+zU&_#gnu;%HXhKcJZKF@cUY?4tjNXa8 zYAU{(p$Rn=UmJZK`*<8*AH5U%)HrU>(1aSt9ixw9Kab-ZqjzGz8poX(no#4oYxHp( z;BnkNdM6I3aom%k2{n#yW@tj8*u5E=2zV~Om7$3f+{AsOFUUb|;r`J(aZoMD0~wl7 z3-aLT<2c0Q`1a_XIHbn$P=+SdI3CW>gaWKbMxTnqJQa_Q-igC%Djv(wgqn)SGc=*5 z;)&6l*t47G;ybM)}GJOhMJBSG7LotTQ6qlhL*No%F>1!%J;Igp;q?$8Ky(YY%gc%h8Egh z$%W$1>M+g{Jo1`nmW1uR}!n&HK*VtkDZlfp6}#lFbU86^i+F)&4z zX4KSR)QBw2sHwq_4jHC~r7^l;Gec*TL`L^_W@$!E4MsS~(u|rKj7X5B85WnT7(pOQ zGb}RG^@$l4ip9+;p4QLM86|R7@u+>4X4FFAIr$7@V!4sdrf2Akk{#&|*(}YdG2tof zEX^q3!=ulmPt7!okySj6JbGuQl_Xikqrh33QB#BGa5IdF1Bc(k16vno(oI(|}o;Q6P-x>@rLZ3z=0sv6i7TO3tKPBeOK4 zrUuVZWf&7nigdYahR!HqkuKiN&>5EXs(9QcLuZt@SH%-BS(;H3jE7P(j7f=Vc)TM+ zXSAq>Cnd5pqsD}X4l;~M$yn&NpP@5a#zN2cEX}Afq4Ri#F)48bU8^&6MvEhO>s*#* z)R@pQH^Z2eID)RI89JlI5%j*y(u^7tIvkEZroBpzKtI0GJF{2I5$GS6r5QCQ^gGKi zCM8FpV^@aGXgLC1r?NDo#)QsE8OEgK2)yJlLua%cfzCr2I-@u@I_6~PjON_v^^&C- Lp5ShGX6pX~_F01) diff --git a/tune/protox/tests/unittest_ref/ref_tpcc_idxspace.pkl b/tune/protox/tests/unittest_ref/ref_tpcc_idxspace.pkl deleted file mode 100644 index 2d9083cfb8556f5db25b44e43bc27ab211b848c1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2753 zcmb_dYiv|S6y9yO+wJz<-O^AVngVf~l;s&QKtg1ZM^`JB{*V}$+{d`*!hJ0F(e4_E zJW?z;29zOa3@RZp5~J{|iLoIjiis#1;fINs82;%Gqme|TKh!h#?(J@&8cf)x_nUjp zoH=K{`Oe&fvD1^ui2q|NT+gK0qHVcWDND`KtW%T}VbBcC)kY}^-ZfLDrC`iia+`SD zAtv%uy#EkCz%#7fkV?e#45C=NXP6Gp#{!#XDpgxIYq6I)0kvausnzI zU7W?nh^BIh#Wa^1zCjBN4UTImd(o0%afkS`#K8Bmq(k<3lId!0nNyZfHOI9z#SLbw zmDw>>tb}gG!?g@*2W}-?OdJtvTQXBwZc#<47Gw# zi}-6$)@0H#QjKUd!w@m5)RXl6MAuBgK!!D1dSzWP3ThIgU~Mt-H%(dRG;>7KbtK-w zcxaYbiy*Jo)+Vu5pT8Po20Eb&Qj~Rsbj@(ifgW^)RLyYp!aSG{3n*)^5L4r2Z2>_w zrVRAKB3KMdu;OVtX81GEPgz%$h8fNQSPEHKMm?5tW!$@|=$?wg^kj#+4oZlnYvf5J zuSDE(k@5-FbEmx$i>|mU$vPE$Wmt^>Yot>nld1rwkDt{ZTJ0&bicTv6rYv5xbWJI9 zw(|e9S{T&q_uBSgiLlvVGi-rINWezOv7~6a%5L&aLC@`__Y59Od+OVJG3FDJ-jr;&E??k<`&9^ zDLI;F1lOLE#ZFLcB#N`drf?Xt&Y~t~-N;(8oK-BFW(`kwHOaOmJlwdWNV;U>eaIU8 zDTt~sL$uUskMLs?FYsZW$=9>!dOkEXH263E{6{|Gaygf%3vT2HKZ^5&iX_|R@H~v? z5*1Iy6(usQ>eh{C_@WWxwX2_QUH)T) z)nn7~;~j;*FF*P7>$O|n`}zAHF0Z>~c`S*VBA#Ztt5%|JSV9bG0k774#FIT4%E6Zf z_6H<1)$#)`&cY0w=!cUq4ll!Wb#SIqKD-_l3}-8)zB)CyBi*%h___1$woCipcs~Mf zV*M8Z{F=sY zuA1WvpB|I1?`}!$JMz&THC+PyW9hrJEL$bc>NnyuR4}r0nAJVIx++FEdbbquq%9A> z@G6n1ZjEs`z*>k+70bkrs)rxdQydPmMpF%>e+tiQoadr44+I{~caL+CM_y6Q!=n3V z5&f}TcXYNN$A+j@A<5P#CT0!4ax6CzHrM?`e*-bl z3h*y=2Z;}Eh(zx1zn?gP8^!}G+PL@S+2x!yiAqAt3bfp7i+r$o*~*;@ca5fRKM1Lw zKduWoc6_{M{jb+{r#F(q!s7WzuQj7X@X9(d1Sf~?F+<12s{^Tl2fVrOe)HW^8!im@ li3As%(x$0Zhn_y%`}^9P-@f#yNDzf{gOd-fi;0^d`wtm=0^|Sy diff --git a/tune/protox/tests/unittest_ref/ref_tpcc_workload.pkl b/tune/protox/tests/unittest_ref/ref_tpcc_workload.pkl deleted file mode 100644 index 5cc45485e8565242fc1707b33e639762cce2fd15..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11205 zcmd^F>yI485jTf@=O+S1k&qI^BvJ4=9KO5VGq!;c8GMk7!S+1}1h6KvyL0YYc6ZLZ zvz!lNLPRhSMsN~?eSW`v=l9!@P>3QCMaq{*k)r$)`I0aBo?lhE`BivKZ#cT zD%urwhf7L@?Ne2|IKDmF9Il9`+7nKoqOgmaidTrjC0qT5w{t6DKLOQUy-~PO`BfLq zgqqAn;UeaPF0CGgow7dgf!hcgQFvdo<^=@X!9=lWm%MruL?d8aaBD?Xa2js(ez?M~ z1QT{F1Q7SeupM|+*KY>VrUl{s?MQpVtvQuoDth)u|I|jf*Qp2IxKj-5s#9~eyLD*S z890TC+laQ2+LKP*Relr8hKoyHBdB}D0AR7%2%uISx+`G)`V@5W{d$Q|OS!cx9%;Cw z=1$srU=d~(UxV^q;8r6yT}k|26lhC11n=n(SxF47fo>jOa_-Xya_BSi!*9Zjbe@S=+vBJi??C~g;Pu-Wp4U3aT~;7S*yvcwVqFrnQC zB@!4qByc_j52WBVDfrP8yfy`|OTp_6+}{U<%zm;?C9TuKl32Z@tzNR(>dQ)Y&D|N; zzSs;s*@IGg50Vb;K`Gt?!pa_$^d1=h;iZeQIpOfkeqoWDrE~xqMZZb*TU@k@b+_aJ z>@3<9rvad=Xpeh!f|kdlaJwoLE!qXA;?!s}R}@JT9M!3+ON`LhujI>w0}{!W0ID^s z1-@4R%h|$hXEq4;0L_Y3zXx!mIWbY0(rlLjVNT+5BBjIS)yDnEZH9~FCL&QfhH>CJ zhOw(1!#H9c!vpk#L)9^i)6_AHW7IK>^V2bmgLC&Vjtu>R2EQP}FR1VfGW>!LzaYdf zDDewY{4#BhX>&}QW7-_k=9o6ev^l2DF>Q`%b4;6O+C0vq_$NeoM8wW?Mr+$`j*f9FE_1w_&Lt|Edv^#Rt-ffZk-KM*FwV!On zYGZ@9T!r=?Tyy<>$wIeUn0e2D_wU0Yt%EqTFqZ(NT&Yf}J&)m1i+$l_Qc(izhw0mQTmASrL{*k`5kM!kN#=6J}Nn43L zJKhf39`xhwVTBH`mBzJ|8qI1qX2`KwWo;Ox`GFre6>T{qlGg9eS3Jl{zO9vvGXz#K zxs9QLfgiT|d(cf^zC7N7z$9%1i4vG0ZX(xZs>@z9(8t;zV(o3MJ*Mg~LhDiJpjM)Z z=$}X{+TY1ihk2d2CDSaChrN-xD8j21vG5)=xJ)_rl<))t$#t?4ix+ zU&RcMF~co-XA_~wc~g=@u5{0A+%3dr*2kIkTWMMK;p?U&)`Rm$YEnxwQcD4oS{;)J zgfk<@A}9%4DQ)^k(J%o#9mzySk7#5hHK)PQRZxXys<6R^KeWaX4w3oF@b z;y;qba<|QcPVuh1qc>bw&~6)*h>F%iW(81MKF_I~mj@&P!0uRj*d)O`p6VKpF@myP zPqR3mwK$2nu;^mbz9M%b{6H>41|*?ikT#2fRLa2pIAh?BT#^hXxr6a03R7}6QorOy zaIDuS-^aB%RaK6)o9w`qGXQbHZsD2T0O#!67>)03UnQa8Aw>_}jri^qoMxbY#$<41`X7 zF;F-4!9ZQk??w5q5IN4W+W)$p+T^c^qHwVU{=ZV1A})M$eDx@Y@=CR zT)1VK3oPf;UmpMD2IQ}0y5ephs#)rbEVZYlPQID3y$Ou%mCt9vvDEzWOnoDD5{gCW z+XdQ7yy(~Nj9q563smmlru~?KF zGjQig=v9`e`CpPq`|zD)!Pi)T57Qx9KMY#k@zX|+MUbR%^6o(xN$=!+gZu34@#}`h zI3@tYg!oU1oIvT%B?-f1LmhLk)AqmAdDnskU&~w8KQ=jowD}>gJ!L==Nsq$X<#ggk$3oJa`I!S} zHBaEVAeT&G)RUXJI74yxG>8QTLZ+UbY5u}J@&$PzTi8Z(f!-ZsTjhLBM55f?9sx@j>#mE#hMp|L= z>L*Lgv0;?pQ(Rl{spU2P0e((3LXauRkTB!4dLWe7V_D%6h7PIAgvJwWid75Ew{)#R zXb!{b>f50_gbRwSeuR*C)lMg^==ReO?xkJ@X%83XP!;C=%hFyKK09x~uz10FHp zQ3D<`;Bf<BZ61I#{5 zHP8CxUki3W=B<)>)L`BL%xisZFMl8-2Ql|ynM)1k9>Uyo38ws!;16TwBQld3%sc{2 zuVAPF=IA`pKb3)F#5B38wngO`!5;@crN9Bfo&e*1fS5-O7@q_tFBocoIR(n3>Q+83 z$kRZ!>3K>2Z zB%cZXVmtnG!CxYLoKB+tTKzOa52E-^6v&ZvMZ~P->(8PW@&xTrMYpqo^E&hmbCGJE|W|-cjpNg;9^G$JGTdsPCdhe>;u6m!K->45&s6Ikz=NlB= zP7Y0jM@Rgb16M@@SbiHV*jhctDD6Y}=X&Lrc;$4>HYv2Gs=jR{UaF{4&<$Iv#HJ#w zYsi=;wc=DUw!H%UWwl$X;!1`0w&h?>2IiHR7fDeV>L-~>QUTWKNs^9`>HUx4=46C& zg{;+9m_>Renx+b|ItfcsFXHkJIF#2DgQNmmegfV1F!sKf7zuD(I!#hB`g0Ofgn>b6 zgfk6>I4uiX>#Fn0u0+D7fE>u8>#kGsYgCkt|B&Vb{5``C&OYOy7(UiZe;LzDe;3os ze-+b9CCpy=iyzZ!fZwKpSqPRM66lIMkdUqW^oe2tJF$VmRE zkRQxQzE;Q&Wh7rG9yyb$V#tW?_^eb z?R%%P(rf2Cot0jD-)vTT?S6Av>9zlz$w;5m{&zMjz4pIzS?RU^ozF_I{qI6ndhLG~ zv(jt-yOfn)``_iP^xFTfWTn^scQq@$_P=Wx>GRtEu4kp!{&yoQz4pJGS?R_9n$5!M Fe*-i^`r-fp diff --git a/tune/protox/tests/unittest_ref/ref_tpch_idxspace.pkl b/tune/protox/tests/unittest_ref/ref_tpch_idxspace.pkl deleted file mode 100644 index 91d86cffb1f426853e561eeea2c4ae8823e24719..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3133 zcmb`JYiJx*6vsF3O?K04H0eW|Y9G?1;+mvS)7Do6t&v#Ms;v)(+1cx!vGdBzotW5G z>a%IRijeVb6{3ii^rhmXL_dg%h!4bX3W|ba3q{cnf=Km$XLj6O+d>s5WPWq+x%ZxP z&pqed-BJ5!Z#oe^qKEqy%Q~*j?a?f=MzWrxYO+H&b*_&vC2H$do{dJ|foNJMTN~uS zU?S+cJJ=p{iWXBHRV?3BG~4h^%L@ilQJ-#UrM`?vyUb8b#?`#a)o^CL%L=+}1-D|A zl&eIoEo>|h^{!&6CJUITw^aW37z@3!6^)TCj1eiTY$_AAj_UGo%|=I&-_dBNIJ&`6 za;|FSp~O-6n1coI;punEl^hssaaFgN3*AZ2cN{~3$r6tf%M^xPy=Gbs^QDls1wihm=`5w2;ST{V0cBES?*WI8g4%rrr zZ`2f3(|Arb0+H4fa|`!ap=eaS?dF;5MJCxw*a{bGv`Y&&7X=ZC4OLg&M* z=x8d$S-OzAsRy%TpI3;N(Q;ZrD`^#7M18cH*3eqInEB#No--6=o#E$^zJAVQ+(V*? zwz4EvRDU!I`*AVrpZ&$T!MLdBYR+I$z(fgE7T9H>AmYd>OBRug_l(pkrY|x&%5`M4 zV;j1Lfav`<6)2vR3QN%XXoSZFMl|Oc zXRfJRy2o`5TW*o_B9F+jfI3z!Qsr$~1nP3!Wk|MKQauV7T{lic<9<#wYliB1im5sX z%z~_LbUn2_5DX;1cCZSZ308x%z#4EiSPRYp>*xlW57vVVzy@$3m;@JrjdUX|1Dn9* zU+4P z(_C;4*a4me&I8Y;n`kjO7wiN(z%FnexCA^0JRdxlZl)G+K9~U)fUV#{x`k5UA}|e} z=ZpIA#_{0gIfXIPQa#-_qg%<#%F2O)0naC|h(F`g7gbgu+Hrc$SXQ%LmNk8Y>#FOj z@+R>#)lgkrSE4Q)0_MZAkDh9MRj_M(XD}3W4%CRMHNoKE;MITP310&V9_0Fy)zO8S z2p+&VQHSb?>xG`7$^K+fQWVvr45_$Jx?&A9DVh|^QRH=Fo5;h+#;BQ999`RDU`H86 zDKOkPDz{xl!z(m)egBm!mIZV`v?)qX=O+%4<{-{QbA=QZMl(Z8;V>DBhj+K$`|I?# zU)&rl+-N@e(Fa#`A4!NBWP7kXQ&{lnd%u6)cjdmHzWMQ;)!*8_XhLi&zHafQy@+3t zl%K4Cn`j{6=lmQ!7G3=GWaNa{3!#7LrIUDurn+b!P11fk6kA8H70stN|L!_mH1&z; zt2eZralz1?&+~N$w>`T*LDR7R!0Q{?X9u;-$G1=0FW&xf!l$>9T<@IRrs9vB=wSX2 zn~tnoH}!aCV)pil4?Ev=_dOjsj>8$QdQMc|wyYGH^!$l&iDSo2nrC?>#|-<9fSwX*c~9E$wKx&b9?@Xsqdb_v z;oTqfSLK2z+N%aK_Xko>uBW6&RBby+2JGt3RQ=-#kQgNc)SNxS0{Ve|lsf2WxrU!l zF`K5O0iq@}{iHL$+tq(*oL~Qyp`Ep)7w8n`MM`#Dl1Tjb|4>O18-e>B7klc?cH`HW z)Ef8npyxEBKeN7n)flhTVI1zPx-?2HNK{FMt2*yY{X;_R3II%j>DF6@i9g zG&M;322WE#>(RY0^wquE`|K|r+QWYYM;-}7`DJhPis_|yZ@TBK-}=7#@}bL$8g|zW MZ@uQ}FzvH{0l^=+CIA2c diff --git a/tune/protox/tests/unittest_ref/ref_tpch_workload.pkl b/tune/protox/tests/unittest_ref/ref_tpch_workload.pkl deleted file mode 100644 index 911c84a09b2140b5f7eee28648a29b927aabf30c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18675 zcmd5^|8HDZR&UcbshgzTf_7C{y1W9Boz$^s#z~r}O~J9JiHT!7w$pDYntdL>_>McC zna;fNx5UkNCs_?@md>vD4Iza12?-&D5E4QN@fRSz2&t+T7QwQB;0FZkPkhcf_uG5p zZ<+27_=h+5-Fwct=bm%!Ip^N{u5Ug2oqxXY6#uneiJO&ZpjNNO)zyKhvOKU-t$$Rm zhV$*#^Y5ABXYHf6+Z*k^R?qz0!rD@ycx|CQ(K>8E3$-v-?bZSMDVOHjt)8iBqqI6j zwVwg&>A`mESyf$%@KaMOxpwP0rh`}Nz1`X`{i8mP8gZlDdbwFC#Z)fDwW2D_m+I}f zJq^NhQKhKn!ba47)H+-($F+iC0u^5|cbLFVfzKQ`nTLS_4%j{ zrk|&#atYY2=Znooi~*y#zY@l!Y6XRQv_OTZ^+FiO_0n7u17xY17V7F?y&!NOMQemp zsDvc9o_e8JU0RANG4KsZhOYpEIuLkzBlv?ApG;51!Fjo&NFa%V^YcshL>`!UMdo^)M}V@QsFKe1(K3< z*)W4^6vFxWdendr4^pKa&w)mvrmAEjJ%A||9O{+fUxmcig;kjsdE_ysurQ^A1c?OK(`B1U2^ikZ17FdU^ z9?-NRG8V)Gs213sxVN6F>-n&5XvAUMgrE*p3vo3Ljm!_~=J_y&v|iG0P)N02q5&TO zsd7~RsNH(8T4<U8^L(jMtTtiFt>?;xI9$bG%49*!dgWRCo2+Uo<_~qb1Z3|;rlLU+pT? zQN1L-K*d+vt*@qiGR;1yyyr3i*Jf!deg@ZeA0F}jr}jPd)J%RnKQbHmGVI5j6JdYQ zXfE|Ry&MgOXQ--xxnSB7YO(<2Y74BnE#ZP=L0>jFj?Rouk6Ki!@lnx#qK_?cncnN5 z2eWXbiYlxxMEyayypSFRs$>+>mgs?5Apku90kNEt5U7znj$4;w1km%4K zSiet2^7aIxT`(2d=TbrY1rY-IRaELA$0n2o0UZrs?^Ow|9c|{DSFhX8~9zp~a<@;`q z-l-@wfLQ+hv6tM<#)Li7vM}PfM*oCJ*$^^w@~@y}q^hwsi`IwRhX< zgf_M%)@f~PEeh*?SQgDWb^Ei6b-l*A-sg0^wT`{?h$Er+v~8@_rWU&o*z+g;Tzb;+ zlI|Rf!MXhGd-?oCkQvCFK>~xHEYCK6f8D*id=gV9cajCN&I0+qmxaKR^y&ys1F5K&mbbAkx&ay6@|y z9(yUlGD7Aq4JW&YBw|o_75<17TJgJBi8I%mCYK1JF0kQZ=Rxtu7JDhn>zPS{Vv zWW*5MeBTX{P5wHY{9ibeuX=_J=JFILCrsAo2`iYTdTmbmaR6q5{KV+dV2~3eZ%$z6 z02Uk8qvWjE!NNVq$x<*pJ&B1UrudUfTQRSPl1qVZtv?m4Tq;SXu6J?RY_^{t-jtxM z&16fA<>tg$1THgIWahZQg(X7g{NO6P4~MC@!$QFeSE+d?((|X4rC&orD=EpH5;j_YWS$oWen+IYO&f4XjZTbe=^z+W9MP8JII;R~j zQAQ|}40{RK!@XA038MAe=yfR~4h(@VY<5peh2ueTF^2Voi%%j0W>;p=_V#O*v(Are zyO)G&drQS^W2X0gF$@gtY+*lC5>&Z_QiZD;pM-TKmQuM%awAA#OvpHh_Q0<5e@d^eO( zn&lD`**OIg)z0pN$+d<&DSMO}3qPC}Yt5G{t zrw~uLR+45(l8~|y?v{&5m=i7Uv3W_GO`otK{EagNXX0W3iH#A=MZBBMcb<&c18eGN zW@a3D3En`WsY6K>RirnR%npe^4_%p5y7S!7StLi{4~H+E8@qfZ-|JkW8%A(p_}#II z^BjqH9L(Ex`ANC|j-baVcg|$6MM8hh6VZTW^w)leIkPqJSi4w3RMecG-l;W%_)6Vz!rZ!Y$f!$1jYXztAgLP&V)r zAPiQO78LJb%f7>I{k`?J-yzvSx<`W8xyy2O#p_YjllCmhJrCpT6KRJNQZP0V^z}{; zkBwu^f$b*z!uZ+kxFd}8wu1&R3lwhlpatDK^4NoKN^EO@pxdzC-aGBGGWG^(-G!@T zA90JV=T+w;$bBPWfM=o~v(=6z3^w3Xd4uo97+7l)0@u5&#`y>1`FCc6U!H`hMOVS( zgqQ859X{N@4j~hhvnE6~E6qkTEFY;yIElge^AXP@SWeh&6(z%L1bw!i94!yX+b_~J zF0~?Vd6cq=YTE5gyUns1ce3h(lYp;)=OnM<=2>FfdkRMrlVFzD5qrq&y(wlE8M|Zg zxRE$4RdGhad$BSz%r2Xp0O)z)(spd^CsKGZj;(^}8=Xr!4>)D5DPR*zQ`V~Y2-ytp z;2cwZdhuyKE&C}*cw{<{RV+EB>HO6A@JK#*cP#&&*=WubYNh$0&v@ZNrP?3xIjb2x z%fp_y5gd$C+|yo!xyx}PmPIYP>Ad$MuGx0awOw?aN&`B8`!_qH(17W<_D0ZO7CUE-df-+l(9aYh6 znwvZ)F=5;!={O719S0P+&e4Y$KR=X{trzVCoClk^G(0{YoVzkTF*b7{KMI#XVM3pn zQ4kvH4~9g1BnVPRJ0WwXASVeYH5Mhgw3?7ad3bD^dczMQv+It4%;!8b zu}1UbWAEaV2(XSb|Cz}NFh;x1Pa+A*7uZ9lB!N9Np0$*&Q&B`g#VH6t*Roh-bj~_S z7}?WXNdvXJh$9Q{#H)h;ck&oNZ!H5wEe@27Wl(V&%g7CFB@EQ=CX64qaj=Jcz_ajs z&O>(OG|BL0*WKxDKbkPwfqU`NZi-X%G9ko$+TtVGtk~ihc+Rr)nxr1h^PU|UsoOe% z2Q-~$22Se5fwRGM^bp#9+OdIOft3?svP~Z847Gj4(OSs=MdNA&QLkW zHQAhw{&qy3E20$E)~B~;h;fs_S|A|v?gqzTcTB(S=%0ws-K zLye8F967VG8gQ0ltH|p#TQA3R*~5%RX5d!dZ^YO>k9`2+ywXo020`Bb@#D%%fK|>0l+6{TnB}R3}Tj=Z3998 zz%>^K{A$5Nzi5^dPmKkJ(3`O>uDq|7bw9V%Beekx5hgd@z<_92Lhu#_0}vv6$X}r| z=$*$+Ts(oOu9eF8AhuIt>Q(dIHbo%FmN=$yE<+USkk?H}NqybF!5}bcaRCOm7x>bG z0oa5jq?#`^nCiRg`eXGod=WtVCf%)&B>CxPk}kZZZGNEcsvFdpx>TSkKUP1RywzZO zB*j|25ftt?batKAu6m6wcStPqx$vox*agm>VOwrVt?K9O%@V_mu1;J(Nwl#>Oz6HCHGp`*MaZ2=C9X9HF|n~#A{4Ze*D3^HnHuPV1tzeQP>P3F zTdA7^hWJgA-K%sX!G#;m;|k1<8@<~Pf=0sE6OcMT=$+JQ+*P!1sC6Rk-+&Nw*fLj} zRg}iL52`fp(fk_bF6W23i7aQ`GLy7O&tYgo=Q}}`Q$0NQ#kr;lf!b-u*ChmFQ{5yo z_BxQ%-UVOxw9%5F=t_ldA?U;fAwvi7ueM&4WYMhXG}0!`pTrd%L3%*elDhgLEl@r zC$4zY6>quXZCAYGig#V{o-5vW#Z6ay;EE4j@sTYyTZc&oYlW~}!le$rd8Xoxw)z~I z&F5i5HAyUA;N^=@eUC2x>}$}SmsDGgMXK*@;Yb-Uc`A3GR5JZI_{)yk~^j^n% zQ!Q_R+z&wR2ch~QetsBwm*%dv7uT^C3!7>MU^a}PdV{KykT(T#l^|U_0_+`zH8BVk z_d1a`rHcG2U(Ey5LqQW4irYVn-;xR){ZP@Oil$oI@GttIo<3)Xq}G`epX9GG1sCQ4 z+ZLiaK%(Ni0%HRzs(xKyEGLk;m1t94r*6z`y~W=UbiY^HrB2D}w}hhW^|uAa@AY>C zX3Jh}4gl_cm*5cn5^VgQ(DT{&eW@~Af+W#DkSd#ppz05~%2U=KNrh!&@sFvZsWv+F z{3mJDKjn&qxc@9o?azhUCrr&_Bx%Q9I&~G&b|!CgC9d|ga+3&p!}!ZiUj9nDzQtYp zlK)#_(rM@K1oAdR`U0fS$3v(5F9hyR2i)H?oPWo1@%vPz{_QFar;fX3`!FdnAMY}* zCy2#=N%rxtouvMqs+#Ivx?4~#(@ZbZ^)+ju|0U4(8QLHA|1f1wX7mM7(r|r>O|_Zs zaPes@ygYe4L$6KsfZ=>w+|O_(&xP61H*Dz2ANLDMT89L}m;Q@X)l?5V$iLhH>oE>_ z3_%{=IA39KkDH$s!jIA-0+k{Ov`7N=2)xGd&DJ4t-Mpd@&wb=7+-tAui_|eH^{eA* zAW|7B=hToosZOak)agjQNi}EHFR778jZ*m?bzWUiW0AT@ZvH-K;idHUi{ zK)Z`y#bdMmSRF%+&0Q7`QPWF!JU~;1fY0G2na20IR3n*2_+U1xk3rt1CR;+->&pac zZ#R2)pVfb>AI0G3P4I(D(e}m@Uo^HClTmN-oYsE5)3mssxVM*-7kltZlt!6SBAwac zW^!uTKm;xdD|W#QR!}x~q`k|-exBRfFGm}=MD6FFE=h{pi@o4*@fYwQ-CUtw7Qaqo zNaWdz-=M1WJz?CX^==CLNq!=oe*h_a3xFi$1Z3(8Gu_qomIaF+GM78ycieI&tNJF3 z&d;%1otKc2-P3&cE*RU?J9QUX{inI4Oy;u-Sv1=fQepv17mvwJ(Q;%DbO22bi=c^& zk1GuZ(UYO7|q_&LE}?iPPY@K?IUKOy+5-Qu4V{IzcJPYM2dxA<=e z{`GG0PYeEyZt>3u{zjMh*^J=s|!--9mkbK3qMc8jm=?@_n-Vt>u%+`#_= 2101 -AND S_W_ID = 1 -AND S_I_ID = OL_I_ID -AND S_QUANTITY < 20; diff --git a/tune/protox/tests/unittest_tpcc_dir/4.sql b/tune/protox/tests/unittest_tpcc_dir/4.sql deleted file mode 100644 index 7cb476a5..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/4.sql +++ /dev/null @@ -1,5 +0,0 @@ -UPDATE OORDER -SET O_CARRIER_ID = 1 -WHERE O_ID = 2101 -AND O_D_ID = 1 -AND O_W_ID = 1; diff --git a/tune/protox/tests/unittest_tpcc_dir/5.sql b/tune/protox/tests/unittest_tpcc_dir/5.sql deleted file mode 100644 index 4934c89e..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/5.sql +++ /dev/null @@ -1,5 +0,0 @@ -UPDATE ORDER_LINE -SET OL_DELIVERY_D = '2023-07-03' -WHERE OL_O_ID = 2101 -AND OL_D_ID = 1 -AND OL_W_ID = 1; diff --git a/tune/protox/tests/unittest_tpcc_dir/6.sql b/tune/protox/tests/unittest_tpcc_dir/6.sql deleted file mode 100644 index 3f3b2b08..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/6.sql +++ /dev/null @@ -1,5 +0,0 @@ -SELECT SUM(OL_AMOUNT) AS OL_TOTAL -FROM ORDER_LINE -WHERE OL_O_ID = 2101 -AND OL_D_ID = 1 -AND OL_W_ID = 1; diff --git a/tune/protox/tests/unittest_tpcc_dir/7.sql b/tune/protox/tests/unittest_tpcc_dir/7.sql deleted file mode 100644 index 993b100d..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/7.sql +++ /dev/null @@ -1,6 +0,0 @@ -UPDATE CUSTOMER -SET C_BALANCE = C_BALANCE + 1, -C_DELIVERY_CNT = C_DELIVERY_CNT + 1 -WHERE C_W_ID = 1 -AND C_D_ID = 1 -AND C_ID = 40; diff --git a/tune/protox/tests/unittest_tpcc_dir/8.sql b/tune/protox/tests/unittest_tpcc_dir/8.sql deleted file mode 100644 index 6d9b7950..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/8.sql +++ /dev/null @@ -1,5 +0,0 @@ -SELECT C_DISCOUNT, C_LAST, C_CREDIT -FROM CUSTOMER -WHERE C_W_ID = 1 -AND C_D_ID = 1 -AND C_ID = 40; diff --git a/tune/protox/tests/unittest_tpcc_dir/9.sql b/tune/protox/tests/unittest_tpcc_dir/9.sql deleted file mode 100644 index b73df2b4..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/9.sql +++ /dev/null @@ -1,3 +0,0 @@ -SELECT W_TAX -FROM WAREHOUSE -WHERE W_ID = 1 diff --git a/tune/protox/tests/unittest_tpcc_dir/txn.txt b/tune/protox/tests/unittest_tpcc_dir/txn.txt deleted file mode 100644 index a618589d..00000000 --- a/tune/protox/tests/unittest_tpcc_dir/txn.txt +++ /dev/null @@ -1,33 +0,0 @@ -Q1,1.sql,0.04 -Q2,2.sql,0.04 -Q3,3.sql,0.04 -Q4,4.sql,0.04 -Q5,5.sql,0.04 -Q6,6.sql,0.04 -Q7,7.sql,0.04 -Q8,8.sql,0.45 -Q9,9.sql,0.45 -Q10,10.sql,0.45 -Q11,11.sql,0.45 -Q12,12.sql,0.45 -Q13,13.sql,0.45 -Q14,14.sql,0.45 -Q15,15.sql,0.45 -Q16,16.sql,0.45 -Q17,17.sql,0.45 -Q18,18.sql,0.04 -Q19,19.sql,0.04 -Q20,20.sql,0.016 -Q21,21.sql,0.024 -Q22,22.sql,0.43 -Q23,23.sql,0.43 -Q24,24.sql,0.43 -Q25,25.sql,0.43 -Q26,26.sql,0.43 -Q27,27.sql,0.43 -Q28,28.sql,0.43 -Q29,29.sql,0.43 -Q30,30.sql,0.43 -Q31,31.sql,0.43 -Q32,32.sql,0.04 -Q33,33.sql,0.04 diff --git a/tune/protox/tests/unittest_tpch_dir/01.sql b/tune/protox/tests/unittest_tpch_dir/01.sql deleted file mode 100644 index 8b8f1af0..00000000 --- a/tune/protox/tests/unittest_tpch_dir/01.sql +++ /dev/null @@ -1,21 +0,0 @@ -select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -from - lineitem -where - l_shipdate <= date '1998-12-01' - interval '80' day -group by - l_returnflag, - l_linestatus -order by - l_returnflag, - l_linestatus; diff --git a/tune/protox/tests/unittest_tpch_dir/02.sql b/tune/protox/tests/unittest_tpch_dir/02.sql deleted file mode 100644 index fdc12a88..00000000 --- a/tune/protox/tests/unittest_tpch_dir/02.sql +++ /dev/null @@ -1,45 +0,0 @@ - -select - s_acctbal, - s_name, - n_name, - p_partkey, - p_mfgr, - s_address, - s_phone, - s_comment -from - part, - supplier s1, - partsupp ps1, - nation n1, - region r1 -where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and p_size = 4 - and p_type like '%COPPER' - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - and ps_supplycost = ( - select - min(ps_supplycost) - from - partsupp ps2, - supplier s2, - nation n2, - region r2 - where - p_partkey = ps_partkey - and s_suppkey = ps_suppkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'ASIA' - ) -order by - s_acctbal desc, - n_name, - s_name, - p_partkey -limit 100; diff --git a/tune/protox/tests/unittest_tpch_dir/03.sql b/tune/protox/tests/unittest_tpch_dir/03.sql deleted file mode 100644 index 8e92af43..00000000 --- a/tune/protox/tests/unittest_tpch_dir/03.sql +++ /dev/null @@ -1,24 +0,0 @@ - -select - l_orderkey, - sum(l_extendedprice * (1 - l_discount)) as revenue, - o_orderdate, - o_shippriority -from - customer, - orders, - lineitem -where - c_mktsegment = 'MACHINERY' - and c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate < date '1995-03-16' - and l_shipdate > date '1995-03-16' -group by - l_orderkey, - o_orderdate, - o_shippriority -order by - revenue desc, - o_orderdate -limit 10; diff --git a/tune/protox/tests/unittest_tpch_dir/04.sql b/tune/protox/tests/unittest_tpch_dir/04.sql deleted file mode 100644 index 4cd86b59..00000000 --- a/tune/protox/tests/unittest_tpch_dir/04.sql +++ /dev/null @@ -1,22 +0,0 @@ - -select - o_orderpriority, - count(*) as order_count -from - orders -where - o_orderdate >= date '1995-05-01' - and o_orderdate < date '1995-05-01' + interval '3' month - and exists ( - select - * - from - lineitem - where - l_orderkey = o_orderkey - and l_commitdate < l_receiptdate - ) -group by - o_orderpriority -order by - o_orderpriority; diff --git a/tune/protox/tests/unittest_tpch_dir/05.sql b/tune/protox/tests/unittest_tpch_dir/05.sql deleted file mode 100644 index f4bd5c08..00000000 --- a/tune/protox/tests/unittest_tpch_dir/05.sql +++ /dev/null @@ -1,25 +0,0 @@ - -select - n_name, - sum(l_extendedprice * (1 - l_discount)) as revenue -from - customer, - orders, - lineitem, - supplier, - nation, - region -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and l_suppkey = s_suppkey - and c_nationkey = s_nationkey - and s_nationkey = n_nationkey - and n_regionkey = r_regionkey - and r_name = 'AFRICA' - and o_orderdate >= date '1996-01-01' - and o_orderdate < date '1996-01-01' + interval '1' year -group by - n_name -order by - revenue desc; diff --git a/tune/protox/tests/unittest_tpch_dir/06.sql b/tune/protox/tests/unittest_tpch_dir/06.sql deleted file mode 100644 index 2ab2aa61..00000000 --- a/tune/protox/tests/unittest_tpch_dir/06.sql +++ /dev/null @@ -1,10 +0,0 @@ - -select - sum(l_extendedprice * l_discount) as revenue -from - lineitem -where - l_shipdate >= date '1996-01-01' - and l_shipdate < date '1996-01-01' + interval '1' year - and l_discount between 0.09 - 0.01 and 0.09 + 0.01 - and l_quantity < 25; diff --git a/tune/protox/tests/unittest_tpch_dir/07.sql b/tune/protox/tests/unittest_tpch_dir/07.sql deleted file mode 100644 index 8e5e9dc5..00000000 --- a/tune/protox/tests/unittest_tpch_dir/07.sql +++ /dev/null @@ -1,40 +0,0 @@ - -select - supp_nation, - cust_nation, - l_year, - sum(volume) as revenue -from - ( - select - n1.n_name as supp_nation, - n2.n_name as cust_nation, - extract(year from l_shipdate) as l_year, - l_extendedprice * (1 - l_discount) as volume - from - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2 - where - s_suppkey = l_suppkey - and o_orderkey = l_orderkey - and c_custkey = o_custkey - and s_nationkey = n1.n_nationkey - and c_nationkey = n2.n_nationkey - and ( - (n1.n_name = 'UNITED STATES' and n2.n_name = 'MOROCCO') - or (n1.n_name = 'MOROCCO' and n2.n_name = 'UNITED STATES') - ) - and l_shipdate between date '1995-01-01' and date '1996-12-31' - ) as shipping -group by - supp_nation, - cust_nation, - l_year -order by - supp_nation, - cust_nation, - l_year; diff --git a/tune/protox/tests/unittest_tpch_dir/08.sql b/tune/protox/tests/unittest_tpch_dir/08.sql deleted file mode 100644 index 6cb8f188..00000000 --- a/tune/protox/tests/unittest_tpch_dir/08.sql +++ /dev/null @@ -1,38 +0,0 @@ - -select - o_year, - sum(case - when nation = 'MOROCCO' then volume - else 0 - end) / sum(volume) as mkt_share -from - ( - select - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) as volume, - n2.n_name as nation - from - part, - supplier, - lineitem, - orders, - customer, - nation n1, - nation n2, - region - where - p_partkey = l_partkey - and s_suppkey = l_suppkey - and l_orderkey = o_orderkey - and o_custkey = c_custkey - and c_nationkey = n1.n_nationkey - and n1.n_regionkey = r_regionkey - and r_name = 'AFRICA' - and s_nationkey = n2.n_nationkey - and o_orderdate between date '1995-01-01' and date '1996-12-31' - and p_type = 'MEDIUM ANODIZED NICKEL' - ) as all_nations -group by - o_year -order by - o_year; diff --git a/tune/protox/tests/unittest_tpch_dir/09.sql b/tune/protox/tests/unittest_tpch_dir/09.sql deleted file mode 100644 index b4efdc33..00000000 --- a/tune/protox/tests/unittest_tpch_dir/09.sql +++ /dev/null @@ -1,33 +0,0 @@ - -select - nation, - o_year, - sum(amount) as sum_profit -from - ( - select - n_name as nation, - extract(year from o_orderdate) as o_year, - l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount - from - part, - supplier, - lineitem, - partsupp, - orders, - nation - where - s_suppkey = l_suppkey - and ps_suppkey = l_suppkey - and ps_partkey = l_partkey - and p_partkey = l_partkey - and o_orderkey = l_orderkey - and s_nationkey = n_nationkey - and p_name like '%ghost%' - ) as profit -group by - nation, - o_year -order by - nation, - o_year desc; diff --git a/tune/protox/tests/unittest_tpch_dir/10.sql b/tune/protox/tests/unittest_tpch_dir/10.sql deleted file mode 100644 index 408d699c..00000000 --- a/tune/protox/tests/unittest_tpch_dir/10.sql +++ /dev/null @@ -1,33 +0,0 @@ - -select - c_custkey, - c_name, - sum(l_extendedprice * (1 - l_discount)) as revenue, - c_acctbal, - n_name, - c_address, - c_phone, - c_comment -from - customer, - orders, - lineitem, - nation -where - c_custkey = o_custkey - and l_orderkey = o_orderkey - and o_orderdate >= date '1994-01-01' - and o_orderdate < date '1994-01-01' + interval '3' month - and l_returnflag = 'R' - and c_nationkey = n_nationkey -group by - c_custkey, - c_name, - c_acctbal, - c_phone, - n_name, - c_address, - c_comment -order by - revenue desc -limit 20; diff --git a/tune/protox/tests/unittest_tpch_dir/11.sql b/tune/protox/tests/unittest_tpch_dir/11.sql deleted file mode 100644 index 4d5d12c1..00000000 --- a/tune/protox/tests/unittest_tpch_dir/11.sql +++ /dev/null @@ -1,28 +0,0 @@ - -select - ps_partkey, - sum(ps_supplycost * ps_availqty) as value -from - partsupp ps1, - supplier s1, - nation n1 -where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'MOZAMBIQUE' -group by - ps_partkey having - sum(ps_supplycost * ps_availqty) > ( - select - sum(ps_supplycost * ps_availqty) * 0.0001000000 - from - partsupp ps2, - supplier s2, - nation n2 - where - ps_suppkey = s_suppkey - and s_nationkey = n_nationkey - and n_name = 'MOZAMBIQUE' - ) -order by - value desc; diff --git a/tune/protox/tests/unittest_tpch_dir/12.sql b/tune/protox/tests/unittest_tpch_dir/12.sql deleted file mode 100644 index 5cffcd5b..00000000 --- a/tune/protox/tests/unittest_tpch_dir/12.sql +++ /dev/null @@ -1,29 +0,0 @@ - -select - l_shipmode, - sum(case - when o_orderpriority = '1-URGENT' - or o_orderpriority = '2-HIGH' - then 1 - else 0 - end) as high_line_count, - sum(case - when o_orderpriority <> '1-URGENT' - and o_orderpriority <> '2-HIGH' - then 1 - else 0 - end) as low_line_count -from - orders, - lineitem -where - o_orderkey = l_orderkey - and l_shipmode in ('RAIL', 'MAIL') - and l_commitdate < l_receiptdate - and l_shipdate < l_commitdate - and l_receiptdate >= date '1994-01-01' - and l_receiptdate < date '1994-01-01' + interval '1' year -group by - l_shipmode -order by - l_shipmode; diff --git a/tune/protox/tests/unittest_tpch_dir/13.sql b/tune/protox/tests/unittest_tpch_dir/13.sql deleted file mode 100644 index c9c5f237..00000000 --- a/tune/protox/tests/unittest_tpch_dir/13.sql +++ /dev/null @@ -1,21 +0,0 @@ - -select - c_count, - count(*) as custdist -from - ( - select - c_custkey, - count(o_orderkey) - from - customer left outer join orders on - c_custkey = o_custkey - and o_comment not like '%unusual%requests%' - group by - c_custkey - ) as c_orders (c_custkey, c_count) -group by - c_count -order by - custdist desc, - c_count desc; diff --git a/tune/protox/tests/unittest_tpch_dir/14.sql b/tune/protox/tests/unittest_tpch_dir/14.sql deleted file mode 100644 index 9c5d642d..00000000 --- a/tune/protox/tests/unittest_tpch_dir/14.sql +++ /dev/null @@ -1,14 +0,0 @@ - -select - 100.00 * sum(case - when p_type like 'PROMO%' - then l_extendedprice * (1 - l_discount) - else 0 - end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue -from - lineitem, - part -where - l_partkey = p_partkey - and l_shipdate >= date '1994-07-01' - and l_shipdate < date '1994-07-01' + interval '1' month; diff --git a/tune/protox/tests/unittest_tpch_dir/15.sql b/tune/protox/tests/unittest_tpch_dir/15.sql deleted file mode 100644 index 5f4a0a26..00000000 --- a/tune/protox/tests/unittest_tpch_dir/15.sql +++ /dev/null @@ -1,35 +0,0 @@ - -create or replace view revenue0_PID (supplier_no, total_revenue) as - select - l_suppkey, - sum(l_extendedprice * (1 - l_discount)) - from - lineitem - where - l_shipdate >= date '1994-09-01' - and l_shipdate < date '1994-09-01' + interval '3' month - group by - l_suppkey; - - -select - s_suppkey, - s_name, - s_address, - s_phone, - total_revenue -from - supplier, - revenue0_PID r0 -where - s_suppkey = supplier_no - and total_revenue = ( - select - max(total_revenue) - from - revenue0_PID r1 - ) -order by - s_suppkey; - -drop view revenue0_PID; diff --git a/tune/protox/tests/unittest_tpch_dir/16.sql b/tune/protox/tests/unittest_tpch_dir/16.sql deleted file mode 100644 index d3b550e1..00000000 --- a/tune/protox/tests/unittest_tpch_dir/16.sql +++ /dev/null @@ -1,30 +0,0 @@ -select - p_brand, - p_type, - p_size, - count(distinct ps_suppkey) as supplier_cnt -from - partsupp, - part -where - p_partkey = ps_partkey - and p_brand <> 'Brand#32' - and p_type not like 'SMALL BURNISHED%' - and p_size in (3, 38, 9, 4, 12, 10, 42, 40) - and ps_suppkey not in ( - select - s_suppkey - from - supplier - where - s_comment like '%Customer%Complaints%' - ) -group by - p_brand, - p_type, - p_size -order by - supplier_cnt desc, - p_brand, - p_type, - p_size; diff --git a/tune/protox/tests/unittest_tpch_dir/17.sql b/tune/protox/tests/unittest_tpch_dir/17.sql deleted file mode 100644 index 4a141fbc..00000000 --- a/tune/protox/tests/unittest_tpch_dir/17.sql +++ /dev/null @@ -1,18 +0,0 @@ - -select - sum(l_extendedprice) / 7.0 as avg_yearly -from - lineitem l1, - part -where - p_partkey = l_partkey - and p_brand = 'Brand#22' - and p_container = 'SM BAG' - and l_quantity < ( - select - 0.2 * avg(l_quantity) - from - lineitem l2 - where - l_partkey = p_partkey - ); diff --git a/tune/protox/tests/unittest_tpch_dir/18.sql b/tune/protox/tests/unittest_tpch_dir/18.sql deleted file mode 100644 index fbd3ad95..00000000 --- a/tune/protox/tests/unittest_tpch_dir/18.sql +++ /dev/null @@ -1,34 +0,0 @@ - -select - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice, - sum(l_quantity) -from - customer, - orders, - lineitem l1 -where - o_orderkey in ( - select - l_orderkey - from - lineitem l2 - group by - l_orderkey having - sum(l_quantity) > 312 - ) - and c_custkey = o_custkey - and o_orderkey = l_orderkey -group by - c_name, - c_custkey, - o_orderkey, - o_orderdate, - o_totalprice -order by - o_totalprice desc, - o_orderdate -limit 100; diff --git a/tune/protox/tests/unittest_tpch_dir/19.sql b/tune/protox/tests/unittest_tpch_dir/19.sql deleted file mode 100644 index 22be2cb2..00000000 --- a/tune/protox/tests/unittest_tpch_dir/19.sql +++ /dev/null @@ -1,36 +0,0 @@ - -select - sum(l_extendedprice* (1 - l_discount)) as revenue -from - lineitem, - part -where - ( - p_partkey = l_partkey - and p_brand = 'Brand#14' - and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - and l_quantity >= 9 and l_quantity <= 9 + 10 - and p_size between 1 and 5 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#11' - and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - and l_quantity >= 18 and l_quantity <= 18 + 10 - and p_size between 1 and 10 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ) - or - ( - p_partkey = l_partkey - and p_brand = 'Brand#11' - and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - and l_quantity >= 23 and l_quantity <= 23 + 10 - and p_size between 1 and 15 - and l_shipmode in ('AIR', 'AIR REG') - and l_shipinstruct = 'DELIVER IN PERSON' - ); diff --git a/tune/protox/tests/unittest_tpch_dir/20.sql b/tune/protox/tests/unittest_tpch_dir/20.sql deleted file mode 100644 index c23fa5c6..00000000 --- a/tune/protox/tests/unittest_tpch_dir/20.sql +++ /dev/null @@ -1,38 +0,0 @@ - -select - s_name, - s_address -from - supplier, - nation -where - s_suppkey in ( - select - ps_suppkey - from - partsupp - where - ps_partkey in ( - select - p_partkey - from - part - where - p_name like 'red%' - ) - and ps_availqty > ( - select - 0.5 * sum(l_quantity) - from - lineitem - where - l_partkey = ps_partkey - and l_suppkey = ps_suppkey - and l_shipdate >= date '1993-01-01' - and l_shipdate < date '1993-01-01' + interval '1' year - ) - ) - and s_nationkey = n_nationkey - and n_name = 'ALGERIA' -order by - s_name; diff --git a/tune/protox/tests/unittest_tpch_dir/21.sql b/tune/protox/tests/unittest_tpch_dir/21.sql deleted file mode 100644 index ed0cd501..00000000 --- a/tune/protox/tests/unittest_tpch_dir/21.sql +++ /dev/null @@ -1,41 +0,0 @@ - -select - s_name, - count(*) as numwait -from - supplier, - lineitem l1, - orders, - nation -where - s_suppkey = l1.l_suppkey - and o_orderkey = l1.l_orderkey - and o_orderstatus = 'F' - and l1.l_receiptdate > l1.l_commitdate - and exists ( - select - * - from - lineitem l2 - where - l2.l_orderkey = l1.l_orderkey - and l2.l_suppkey <> l1.l_suppkey - ) - and not exists ( - select - * - from - lineitem l3 - where - l3.l_orderkey = l1.l_orderkey - and l3.l_suppkey <> l1.l_suppkey - and l3.l_receiptdate > l3.l_commitdate - ) - and s_nationkey = n_nationkey - and n_name = 'CANADA' -group by - s_name -order by - numwait desc, - s_name -limit 100; diff --git a/tune/protox/tests/unittest_tpch_dir/22.sql b/tune/protox/tests/unittest_tpch_dir/22.sql deleted file mode 100644 index 1fac1983..00000000 --- a/tune/protox/tests/unittest_tpch_dir/22.sql +++ /dev/null @@ -1,38 +0,0 @@ - -select - cntrycode, - count(*) as numcust, - sum(c_acctbal) as totacctbal -from - ( - select - substring(c_phone from 1 for 2) as cntrycode, - c_acctbal - from - customer c1 - where - substring(c_phone from 1 for 2) in - ('10', '14', '11', '30', '29', '21', '12') - and c_acctbal > ( - select - avg(c_acctbal) - from - customer c2 - where - c_acctbal > 0.00 - and substring(c_phone from 1 for 2) in - ('10', '14', '11', '30', '29', '21', '12') - ) - and not exists ( - select - * - from - orders - where - o_custkey = c_custkey - ) - ) as custsale -group by - cntrycode -order by - cntrycode; diff --git a/tune/protox/tests/unittest_tpch_dir/order.txt b/tune/protox/tests/unittest_tpch_dir/order.txt deleted file mode 100644 index c47df048..00000000 --- a/tune/protox/tests/unittest_tpch_dir/order.txt +++ /dev/null @@ -1,22 +0,0 @@ -Q1,01.sql -Q2,02.sql -Q3,03.sql -Q4,04.sql -Q5,05.sql -Q6,06.sql -Q7,07.sql -Q8,08.sql -Q9,09.sql -Q10,10.sql -Q11,11.sql -Q12,12.sql -Q13,13.sql -Q14,14.sql -Q15,15.sql -Q16,16.sql -Q17,17.sql -Q18,18.sql -Q19,19.sql -Q20,20.sql -Q21,21.sql -Q22,22.sql diff --git a/tune/protox/tests/unittest_workload.py b/tune/protox/tests/unittest_workload.py deleted file mode 100644 index 59d28694..00000000 --- a/tune/protox/tests/unittest_workload.py +++ /dev/null @@ -1,93 +0,0 @@ -import json -import pickle -import unittest -from pathlib import Path -from typing import Any, Tuple - -import yaml - -from tune.protox.env.space.primitive_space import IndexSpace -from tune.protox.env.types import TableAttrAccessSetsMap, TableColTuple -from tune.protox.env.workload import Workload - - -class WorkloadTests(unittest.TestCase): - @staticmethod - def build(config_fpath: Path, workload_path: Path) -> tuple[Workload, IndexSpace]: - # don't call open_and_save() because this is a unittest - with open(config_fpath, "r") as f: - benchmark_config = yaml.safe_load(f) - benchmark_key = [k for k in benchmark_config.keys()][0] - benchmark_config = benchmark_config[benchmark_key] - benchmark_config["benchmark"] = benchmark_key - - w = Workload( - None, - tables=benchmark_config["tables"], - attributes=benchmark_config["attributes"], - query_spec=benchmark_config["query_spec"], - workload_path=workload_path, - pid=None, - workload_timeout=0, - workload_timeout_penalty=1.0, - artifact_manager=None, - ) - - i = IndexSpace( - tables=benchmark_config["tables"], - max_num_columns=benchmark_config["max_num_columns"], - max_indexable_attributes=w.max_indexable(), - seed=0, - rel_metadata=w.column_usages(), - attributes_overwrite=w.column_usages(), - tbl_include_subsets=TableAttrAccessSetsMap({}), - index_space_aux_type=True, - index_space_aux_include=True, - deterministic_policy=True, - ) - return w, i - - def _test_workload(self, workload_name: str) -> None: - # Build objects. - tests_dpath = Path("tune/protox/tests") - w, i = WorkloadTests.build( - tests_dpath / f"unittest_benchmark_configs/unittest_{workload_name}.yaml", - (tests_dpath / f"unittest_{workload_name}_dir").resolve(), - ) - - # Load reference objects. - ref_dpath = tests_dpath / "unittest_ref" - ref_workload_fpath = ref_dpath / f"ref_{workload_name}_workload.pkl" - ref_idxspace_fpath = ref_dpath / f"ref_{workload_name}_idxspace.pkl" - with open(ref_workload_fpath, "rb") as f: - ref_w: Workload = pickle.load(f) - with open(ref_idxspace_fpath, "rb") as f: - ref_i: IndexSpace = pickle.load(f) - - # Check various workload fields. - self.assertEqual(w.column_usages(), ref_w.column_usages()) - - # Check various idxspace mapping. - self.assertEqual(i.class_mapping, ref_i.class_mapping) - - # # Uncomment this to "update" the reference objects. - # with open(ref_workload_fpath, "wb") as f: - # pickle.dump(w, f) - # with open(ref_idxspace_fpath, "wb") as f: - # pickle.dump(i, f) - - def test_tpch(self) -> None: - self._test_workload("tpch") - - def test_jobfull(self) -> None: - self._test_workload("jobfull") - - def test_dsb(self) -> None: - self._test_workload("dsb") - - def test_tpcc(self) -> None: - self._test_workload("tpcc") - - -if __name__ == "__main__": - unittest.main() diff --git a/tune/protox/tests/unittest_workload_utils.py b/tune/protox/tests/unittest_workload_utils.py deleted file mode 100644 index be2fd9a8..00000000 --- a/tune/protox/tests/unittest_workload_utils.py +++ /dev/null @@ -1,231 +0,0 @@ -import unittest - -import pglast - -from tune.protox.env.types import AttrTableListMap, QueryType -from tune.protox.env.util.workload_analysis import ( - extract_aliases, - extract_columns, - extract_sqltypes, -) - - -class WorkloadUtilsTests(unittest.TestCase): - TPCH_TABLES = [ - "part", - "partsupp", - "lineitem", - "orders", - "supplier", - "customer", - "nation", - "region", - ] - TPCH_ALL_ATTRIBUTES = AttrTableListMap( - { - "r_regionkey": ["region"], - "r_name": ["region"], - "r_comment": ["region"], - "n_nationkey": ["nation"], - "n_name": ["nation"], - "n_regionkey": ["nation"], - "n_comment": ["nation"], - "p_partkey": ["part"], - "p_name": ["part"], - "p_mfgr": ["part"], - "p_brand": ["part"], - "p_type": ["part"], - "p_size": ["part"], - "p_container": ["part"], - "p_retailprice": ["part"], - "p_comment": ["part"], - "s_suppkey": ["supplier"], - "s_name": ["supplier"], - "s_address": ["supplier"], - "s_nationkey": ["supplier"], - "s_phone": ["supplier"], - "s_acctbal": ["supplier"], - "s_comment": ["supplier"], - "ps_partkey": ["partsupp"], - "ps_suppkey": ["partsupp"], - "ps_availqty": ["partsupp"], - "ps_supplycost": ["partsupp"], - "ps_comment": ["partsupp"], - "c_custkey": ["customer"], - "c_name": ["customer"], - "c_address": ["customer"], - "c_nationkey": ["customer"], - "c_phone": ["customer"], - "c_acctbal": ["customer"], - "c_mktsegment": ["customer"], - "c_comment": ["customer"], - "o_orderkey": ["orders"], - "o_custkey": ["orders"], - "o_orderstatus": ["orders"], - "o_totalprice": ["orders"], - "o_orderdate": ["orders"], - "o_orderpriority": ["orders"], - "o_clerk": ["orders"], - "o_shippriority": ["orders"], - "o_comment": ["orders"], - "l_orderkey": ["lineitem"], - "l_partkey": ["lineitem"], - "l_suppkey": ["lineitem"], - "l_linenumber": ["lineitem"], - "l_quantity": ["lineitem"], - "l_extendedprice": ["lineitem"], - "l_discount": ["lineitem"], - "l_tax": ["lineitem"], - "l_returnflag": ["lineitem"], - "l_linestatus": ["lineitem"], - "l_shipdate": ["lineitem"], - "l_commitdate": ["lineitem"], - "l_receiptdate": ["lineitem"], - "l_shipinstruct": ["lineitem"], - "l_shipmode": ["lineitem"], - "l_comment": ["lineitem"], - } - ) - TPCH_Q1 = """ -select - l_returnflag, - l_linestatus, - sum(l_quantity) as sum_qty, - sum(l_extendedprice) as sum_base_price, - sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, - avg(l_quantity) as avg_qty, - avg(l_extendedprice) as avg_price, - avg(l_discount) as avg_disc, - count(*) as count_order -from - lineitem -where - l_shipdate <= date '1998-12-01' - interval '80' day -group by - l_returnflag, - l_linestatus -order by - l_returnflag, - l_linestatus; -""" - - @staticmethod - def pglast_parse(sql: str) -> pglast.ast.Node: - return pglast.parse_sql(sql) - - def test_extract_aliases(self) -> None: - sql = "select * from t1 as t1_alias; select * from t1;" - stmts = WorkloadUtilsTests.pglast_parse(sql) - aliases = extract_aliases(stmts) - # if a table has more than one alias we have to do this more verbose assertion code - # to make it order invariant - self.assertTrue("t1" in aliases and len(aliases) == 1) - self.assertEqual(set(aliases["t1"]), set(["t1", "t1_alias"])) - - def test_extract_aliases_ignores_views_in_create_view(self) -> None: - sql = "create view view1 (view1_c1) as select c1 from t1;" - stmts = WorkloadUtilsTests.pglast_parse(sql) - aliases = extract_aliases(stmts) - # all tables have only one alias so we can do this simpler assertion code - self.assertEqual(aliases, {"t1": ["t1"]}) - - def test_extract_aliases_doesnt_ignore_views_that_are_used(self) -> None: - sql = "create view view1 (view1_c1) as select c1 from t1; select * from view1;" - stmts = WorkloadUtilsTests.pglast_parse(sql) - aliases = extract_aliases(stmts) - # all tables have only one alias so we can do this simpler assertion code - self.assertEqual(aliases, {"t1": ["t1"], "view1": ["view1"]}) - - def test_extract_sqltypes(self) -> None: - sql = """ -select * from t1; -update t1 set t1.c1 = 0 where t1.c1 = 1; -create or replace view view1 (view1_c1) as - select c1 - from t1; -""" - stmts = WorkloadUtilsTests.pglast_parse(sql) - pid = 0 - sqltypes = extract_sqltypes(stmts, pid) - - expected_num_stmts = 3 - self.assertEqual(len(sqltypes), expected_num_stmts) - for i in range(expected_num_stmts): - self.assertTrue(type(sqltypes[i]) is tuple and len(sqltypes[i]) == 2) - self.assertEqual(sqltypes[0][0], QueryType.SELECT) - self.assertEqual(sqltypes[1][0], QueryType.INS_UPD_DEL) - self.assertEqual(sqltypes[2][0], QueryType.CREATE_VIEW) - - def test_extract_columns(self) -> None: - sql = WorkloadUtilsTests.TPCH_Q1 - tables = WorkloadUtilsTests.TPCH_TABLES - all_attributes = WorkloadUtilsTests.TPCH_ALL_ATTRIBUTES - stmts = WorkloadUtilsTests.pglast_parse(sql) - aliases = extract_aliases(stmts) - self.assertEqual(len(stmts), 1) - stmt = stmts[0] - tbl_col_usages, all_refs = extract_columns( - stmt, tables, all_attributes, aliases - ) - - for table in tables: - self.assertTrue(table in tbl_col_usages) - if table == "lineitem": - self.assertEqual(tbl_col_usages[table], {"l_shipdate"}) - else: - self.assertEqual(tbl_col_usages[table], set()) - - self.assertEqual( - set(all_refs), - set( - [ - ("lineitem", "l_returnflag"), - ("lineitem", "l_linestatus"), - ("lineitem", "l_returnflag"), - ("lineitem", "l_linestatus"), - ("lineitem", "l_returnflag"), - ("lineitem", "l_linestatus"), - ("lineitem", "l_quantity"), - ("lineitem", "l_extendedprice"), - ("lineitem", "l_extendedprice"), - ("lineitem", "l_discount"), - ("lineitem", "l_extendedprice"), - ("lineitem", "l_discount"), - ("lineitem", "l_tax"), - ("lineitem", "l_quantity"), - ("lineitem", "l_extendedprice"), - ("lineitem", "l_discount"), - ("lineitem", "l_shipdate"), - ] - ), - ) - - def test_extract_columns_with_cte(self) -> None: - sql = """ -with cte1 as ( - select t1.c1 - from t1 - where t1.c2 = 3 -) -select * -from cte1; -""" - tables = ["t1"] - all_attributes = AttrTableListMap({"c1": ["t1"], "c2": ["t1"]}) - stmts = WorkloadUtilsTests.pglast_parse(sql) - aliases = extract_aliases(stmts) - self.assertEqual(len(stmts), 1) - stmt = stmts[0] - tbl_col_usages, all_refs = extract_columns( - stmt, tables, all_attributes, aliases - ) - - self.assertEqual(tbl_col_usages, {"t1": {"c2"}}) - self.assertEqual( - set(all_refs), set([("t1", "c1"), ("t1", "c2"), ("t1", "c1"), ("t1", "c2")]) - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/util/log.py b/util/log.py index b4073b1d..271fd92f 100644 --- a/util/log.py +++ b/util/log.py @@ -36,19 +36,6 @@ def set_up_loggers(log_dpath: Path) -> None: console_level=logging.DEBUG, ) - # Set up some of the third-party loggers. - # The reason I only set up a few select keys is to avoid cluttering the artifacts/ directory with too many *.log files. - for logger_name in ["tensorflow", "ray"]: - logger = logging.root.manager.loggerDict[logger_name] - assert isinstance(logger, Logger) - # Make sure to clear the handlers to remove the console handler that the loggers create by default. - logger.handlers.clear() - _set_up_logger( - logger, - log_format, - log_dpath / f"{logger_name}.log", - ) - def _set_up_logger( logger: Logger, diff --git a/util/workspace.py b/util/workspace.py index 1c379905..3c580a7f 100644 --- a/util/workspace.py +++ b/util/workspace.py @@ -1,12 +1,12 @@ """ This file contains everything needed to manage the workspace (the dbgym_workspace/ folder). -TODO: it has some things specific to Proto-X which should be moved somewhere else in the future. """ import logging import os import shutil import subprocess +import time from datetime import datetime from enum import Enum from pathlib import Path @@ -29,10 +29,6 @@ DBMS_PATH = Path("dbms") POSTGRES_PATH = DBMS_PATH / "postgres" TUNE_PATH = Path("tune") -PROTOX_PATH = TUNE_PATH / "protox" -PROTOX_EMBEDDING_PATH = PROTOX_PATH / "embedding" -PROTOX_AGENT_PATH = PROTOX_PATH / "agent" -PROTOX_WOLP_PATH = PROTOX_AGENT_PATH / "wolp" # Paths of different parts of the workspace # I made these Path objects even though they're not real paths just so they can work correctly with my other helper functions @@ -77,19 +73,9 @@ def get_dbdata_tgz_name(benchmark_name: str, scale_factor: float | str) -> str: # Paths of config files in the codebase. These are always relative paths. # The reason these can be relative paths instead of functions taking in codebase_path as input is because relative paths are relative to the codebase root -DEFAULT_HPO_SPACE_PATH = PROTOX_EMBEDDING_PATH / "default_hpo_space.json" -DEFAULT_SYSKNOBS_PATH = PROTOX_AGENT_PATH / "default_sysknobs.yaml" DEFAULT_BOOT_CONFIG_FPATH = POSTGRES_PATH / "default_boot_config.yaml" -def get_default_benchmark_config_path(benchmark_name: str) -> Path: - return PROTOX_PATH / f"default_{benchmark_name}_benchmark_config.yaml" - - -def get_default_benchbase_config_path(benchmark_name: str) -> Path: - return PROTOX_PATH / f"default_{benchmark_name}_benchbase_config.xml" - - # Generally useful functions def get_workload_name(scale_factor: float | str, suffix: str) -> str: return f"workload_sf{get_scale_factor_string(scale_factor)}_{suffix}" @@ -110,32 +96,6 @@ def get_default_tables_dname(scale_factor: float | str) -> str: return f"tables_sf{get_scale_factor_string(scale_factor)}" -def get_default_traindata_fname(benchmark_name: str, workload_name: str) -> str: - return f"{benchmark_name}_{workload_name}_embedding_traindata.parquet" - - -def get_default_embedder_dname(benchmark_name: str, workload_name: str) -> str: - return f"{benchmark_name}_{workload_name}_embedder" - - -def get_default_hpoed_agent_params_fname( - benchmark_name: str, workload_name: str -) -> str: - return f"{benchmark_name}_{workload_name}_hpoed_agent_params.json" - - -def get_default_tuning_steps_dname( - benchmark_name: str, workload_name: str, boot_enabled_during_tune: bool -) -> str: - return f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_tuning_steps" - - -def get_default_replay_data_fname( - benchmark_name: str, workload_name: str, boot_enabled_during_tune: bool -) -> str: - return f"{benchmark_name}_{workload_name}{'_boot' if boot_enabled_during_tune else ''}_replay_data.csv" - - # Paths of dependencies in the workspace. These are named "*_path" because they will be an absolute path # The reason these _cannot_ be relative paths is because relative paths are relative to the codebase root, not the workspace root # Note that it's okay to hardcode the codebase paths (like dbgym_dbms_postgres) here. In the worst case, we'll just break an @@ -146,45 +106,9 @@ def get_default_replay_data_fname( # - If a name already has the workload_name, I omit scale factor. This is because the workload_name includes the scale factor # - By convention, symlinks should end with ".link". The bug that motivated this decision involved replaying a tuning run. When # replaying a tuning run, you read the tuning_steps/ folder of the tuning run. Earlier, I created a symlink to that tuning_steps/ -# folder called run_*/dbgym_agent_protox_tune/tuning_steps. However, replay itself generates an replay_info.log file, which goes in -# run_*/dbgym_agent_protox_tune/tuning_steps/. The bug was that my replay function was overwriting the replay_info.log file of the +# folder called run_*/*/tuning_steps. However, replay itself generates an replay_info.log file, which goes in +# run_*/*/tuning_steps/. The bug was that my replay function was overwriting the replay_info.log file of the # tuning run. By naming all symlinks "*.link", we avoid the possibility of subtle bugs like this happening. -def get_default_traindata_path( - workspace_path: Path, benchmark_name: str, workload_name: str -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / "dbgym_tune_protox_embedding" - / "data" - / (get_default_traindata_fname(benchmark_name, workload_name) + ".link") - ) - - -def get_default_embedder_path( - workspace_path: Path, benchmark_name: str, workload_name: str -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / "dbgym_tune_protox_embedding" - / "data" - / (get_default_embedder_dname(benchmark_name, workload_name) + ".link") - ) - - -def get_default_hpoed_agent_params_path( - workspace_path: Path, benchmark_name: str, workload_name: str -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / "dbgym_tune_protox_agent" - / "data" - / ( - get_default_hpoed_agent_params_fname(benchmark_name, workload_name) - + ".link" - ) - ) - - def get_default_tables_path( workspace_path: Path, benchmark_name: str, scale_factor: float | str ) -> Path: @@ -235,44 +159,6 @@ def get_default_pgbin_path(workspace_path: Path) -> Path: return get_default_repo_path(workspace_path) / "boot" / "build" / "postgres" / "bin" -def get_default_tuning_steps_dpath( - workspace_path: Path, - benchmark_name: str, - workload_name: str, - boot_enabled_during_tune: bool, -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / "dbgym_tune_protox_agent" - / "artifacts" - / ( - get_default_tuning_steps_dname( - benchmark_name, workload_name, boot_enabled_during_tune - ) - + ".link" - ) - ) - - -def get_default_replay_data_fpath( - workspace_path: Path, - benchmark_name: str, - workload_name: str, - boot_enabled_during_tune: bool, -) -> Path: - return ( - get_symlinks_path_from_workspace_path(workspace_path) - / "dbgym_tune_protox_agent" - / "data" - / ( - get_default_replay_data_fname( - benchmark_name, workload_name, boot_enabled_during_tune - ) - + ".link" - ) - ) - - class DBGymConfig: """ Global configurations that apply to all parts of DB-Gym @@ -333,11 +219,21 @@ def __init__(self, dbgym_config_path: Path): self.dbgym_tmp_path.mkdir(parents=True, exist_ok=True) # Set the path for this task run's results. - self.dbgym_this_run_path = ( - self.dbgym_runs_path / f"run_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}" - ) - # `exist_ok` is False because we don't want to override a previous task run's data. - self.dbgym_this_run_path.mkdir(parents=True, exist_ok=False) + for _ in range(2): + try: + self.dbgym_this_run_path = ( + self.dbgym_runs_path + / f"run_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}" + ) + # `exist_ok` is False because we don't want to override a previous task run's data. + self.dbgym_this_run_path.mkdir(parents=True, exist_ok=False) + except FileExistsError: + # In case we call task.py twice in one second, sleeping here will fix it. + # Waiting one second is enough since we assume there's only one task.py running at a time. + time.sleep(1) + except Exception as e: + raise e + self.dbgym_latest_run_path = get_latest_run_path_from_workspace_path( self.dbgym_workspace_path )