@@ -73,6 +73,7 @@ tpch: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), s
73
73
tpch_mem: TPCH inspired benchmark on Scale Factor (SF) 1 (~1GB), query from memory
74
74
tpch10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), single parquet file per table, hash join
75
75
tpch_mem10: TPCH inspired benchmark on Scale Factor (SF) 10 (~10GB), query from memory
76
+ cancellation: How long cancelling a query takes
76
77
parquet: Benchmark of parquet reader's filtering speed
77
78
sort: Benchmark of sorting speed
78
79
sort_tpch: Benchmark of sorting speed for end-to-end sort queries on TPCH dataset
@@ -232,6 +233,7 @@ main() {
232
233
run_tpch_mem " 1"
233
234
run_tpch " 10"
234
235
run_tpch_mem " 10"
236
+ run_cancellation
235
237
run_parquet
236
238
run_sort
237
239
run_clickbench_1
@@ -255,6 +257,9 @@ main() {
255
257
tpch_mem10)
256
258
run_tpch_mem " 10"
257
259
;;
260
+ cancellation)
261
+ run_cancellation
262
+ ;;
258
263
parquet)
259
264
run_parquet
260
265
;;
@@ -397,6 +402,14 @@ run_tpch_mem() {
397
402
$CARGO_COMMAND --bin tpch -- benchmark datafusion --iterations 5 --path " ${TPCH_DIR} " --prefer_hash_join " ${PREFER_HASH_JOIN} " -m --format parquet -o " ${RESULTS_FILE} "
398
403
}
399
404
405
+ # Runs the cancellation benchmark
406
+ run_cancellation () {
407
+ RESULTS_FILE=" ${RESULTS_DIR} /cancellation.json"
408
+ echo " RESULTS_FILE: ${RESULTS_FILE} "
409
+ echo " Running cancellation benchmark..."
410
+ $CARGO_COMMAND --bin dfbench -- cancellation --iterations 5 --path " ${DATA_DIR} /cancellation" -o " ${RESULTS_FILE} "
411
+ }
412
+
400
413
# Runs the parquet filter benchmark
401
414
run_parquet () {
402
415
RESULTS_FILE=" ${RESULTS_DIR} /parquet.json"
@@ -490,9 +503,9 @@ data_imdb() {
490
503
local imdb_temp_gz=" ${imdb_dir} /imdb.tgz"
491
504
local imdb_url=" https://event.cwi.nl/da/job/imdb.tgz"
492
505
493
- # imdb has 21 files, we just separate them into 3 groups for better readability
506
+ # imdb has 21 files, we just separate them into 3 groups for better readability
494
507
local first_required_files=(
495
- " aka_name.parquet"
508
+ " aka_name.parquet"
496
509
" aka_title.parquet"
497
510
" cast_info.parquet"
498
511
" char_name.parquet"
@@ -539,13 +552,13 @@ data_imdb() {
539
552
if [ " $convert_needed " = true ]; then
540
553
# Expected size of the dataset
541
554
expected_size=" 1263193115" # 1.18 GB
542
-
555
+
543
556
echo -n " Looking for imdb.tgz... "
544
557
if [ -f " ${imdb_temp_gz} " ]; then
545
558
echo " found"
546
559
echo -n " Checking size... "
547
560
OUTPUT_SIZE=$( wc -c " ${imdb_temp_gz} " 2> /dev/null | awk ' {print $1}' || true)
548
-
561
+
549
562
# Checking the size of the existing file
550
563
if [ " ${OUTPUT_SIZE} " = " ${expected_size} " ]; then
551
564
# Existing file is of the expected size, no need for download
@@ -559,7 +572,7 @@ data_imdb() {
559
572
560
573
# Download the dataset
561
574
curl -o " ${imdb_temp_gz} " " ${imdb_url} "
562
-
575
+
563
576
# Size check of the installed file
564
577
DOWNLOADED_SIZE=$( wc -c " ${imdb_temp_gz} " | awk ' {print $1}' )
565
578
if [ " ${DOWNLOADED_SIZE} " != " ${expected_size} " ]; then
@@ -591,7 +604,7 @@ data_imdb() {
591
604
# Runs the imdb benchmark
592
605
run_imdb () {
593
606
IMDB_DIR=" ${DATA_DIR} /imdb"
594
-
607
+
595
608
RESULTS_FILE=" ${RESULTS_DIR} /imdb.json"
596
609
echo " RESULTS_FILE: ${RESULTS_FILE} "
597
610
echo " Running imdb benchmark..."
@@ -726,9 +739,9 @@ run_external_aggr() {
726
739
echo " Running external aggregation benchmark..."
727
740
728
741
# Only parquet is supported.
729
- # Since per-operator memory limit is calculated as (total-memory-limit /
742
+ # Since per-operator memory limit is calculated as (total-memory-limit /
730
743
# number-of-partitions), and by default `--partitions` is set to number of
731
- # CPU cores, we set a constant number of partitions to prevent this
744
+ # CPU cores, we set a constant number of partitions to prevent this
732
745
# benchmark to fail on some machines.
733
746
$CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path " ${TPCH_DIR} " -o " ${RESULTS_FILE} "
734
747
}
0 commit comments