Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions .github/scripts/run-sql-bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors
#
# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench) for the given targets.
# This script is used by the sql-benchmarks.yml workflow.
# Runs SQL benchmarks (datafusion-bench, duckdb-bench, lance-bench, clickhouse-bench)
# for the given targets. This script is used by the sql-benchmarks.yml workflow.
#
# Usage:
# run-sql-bench.sh <subcommand> <targets> [options]
#
# Arguments:
# subcommand The benchmark subcommand (e.g., tpch, clickbench, tpcds)
# targets Comma-separated list of engine:format pairs
# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet")
# (e.g., "datafusion:parquet,datafusion:vortex,duckdb:parquet,clickhouse:parquet")
#
# Options:
# --scale-factor <sf> Scale factor for the benchmark (e.g., 1.0, 10.0)
# --iterations <n> Number of iterations to pass to each benchmark binary
# --remote-storage <url> Remote storage URL (e.g., s3://bucket/path/)
# If provided, runs in remote mode (no lance support).
# If provided, runs in remote mode (no lance/clickhouse support).
# --benchmark-id <id> Benchmark ID for error messages (e.g., tpch-s3)

set -Eeu -o pipefail
Expand Down Expand Up @@ -71,6 +71,13 @@ if $is_remote && echo "$targets" | grep -q 'lance'; then
exit 1
fi

# ClickHouse on remote storage is not supported. clickhouse-local reads local files only.
if $is_remote && echo "$targets" | grep -q 'clickhouse:'; then
echo "ERROR: ClickHouse benchmarks are not supported for remote storage."
echo "Remove 'clickhouse:' targets for benchmark '${benchmark_id:-unknown}'."
exit 1
fi

# Extract formats for each engine from the targets string.
# Example input: "datafusion:parquet,datafusion:vortex,datafusion:lance,duckdb:parquet"
#
Expand All @@ -84,6 +91,7 @@ fi
df_formats=$(echo "$targets" | tr ',' '\n' | (grep '^datafusion:' | grep -v ':lance$' || true) | sed 's/datafusion://' | tr '\n' ',' | sed 's/,$//')
ddb_formats=$(echo "$targets" | tr ',' '\n' | (grep '^duckdb:' || true) | sed 's/duckdb://' | tr '\n' ',' | sed 's/,$//')
has_lance=$(echo "$targets" | grep -q 'datafusion:lance' && echo "true" || echo "false")
has_clickhouse=$(echo "$targets" | grep -q 'clickhouse:' && echo "true" || echo "false")

# Build options string.
opts=""
Expand Down Expand Up @@ -136,3 +144,14 @@ if ! $is_remote && [[ "$has_lance" == "true" ]] && [[ -f "target/release_debug/l

cat lance-results.json >> results.json
fi

# ClickHouse-bench only runs for local benchmarks (clickhouse-local reads local files).
if ! $is_remote && [[ "$has_clickhouse" == "true" ]] && [[ -f "target/release_debug/clickhouse-bench" ]]; then
Comment on lines +148 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you look above at the lance setup code, we have a better guard against benching something on remote that is not supposed to be, can you mimic that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch — the ^clickhouse: anchor only matched when clickhouse was the first target in the comma-separated string. Dropped the ^ from both the remote guard and the has_clickhouse detection to match the lance pattern.

# shellcheck disable=SC2086
target/release_debug/clickhouse-bench "$subcommand" \
-d gh-json \
$opts \
-o ch-results.json

cat ch-results.json >> results.json
fi
2 changes: 1 addition & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ jobs:
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet",
"build_lance": true
},
{
Expand Down
15 changes: 14 additions & 1 deletion .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ on:
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb,clickhouse:parquet"
},
{
"id": "tpch-nvme",
Expand Down Expand Up @@ -135,6 +135,16 @@ jobs:

- uses: ./.github/actions/system-info

- name: Install ClickHouse
if: contains(matrix.targets, 'clickhouse:')
env:
CLICKHOUSE_VERSION: "25.8.18.1"
run: |
wget -qO- "https://github.com/ClickHouse/ClickHouse/releases/download/v${CLICKHOUSE_VERSION}-lts/clickhouse-common-static-${CLICKHOUSE_VERSION}-amd64.tgz" | tar xz
cp clickhouse-common-static-${CLICKHOUSE_VERSION}/usr/bin/clickhouse .
chmod +x clickhouse
echo "CLICKHOUSE_BINARY=$PWD/clickhouse" >> $GITHUB_ENV

- name: Build binaries
shell: bash
env:
Expand All @@ -144,6 +154,9 @@ jobs:
if [ "${{ matrix.build_lance }}" = "true" ]; then
packages="$packages --bin lance-bench"
fi
if echo "${{ matrix.targets }}" | grep -q 'clickhouse:'; then
packages="$packages --bin clickhouse-bench"
fi
cargo build $packages --profile release_debug

- name: Generate data
Expand Down
31 changes: 22 additions & 9 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ members = [
"encodings/zstd",
"encodings/bytebool",
# Benchmarks
"benchmarks/clickhouse-bench",
"benchmarks/lance-bench",
"benchmarks/compress-bench",
"benchmarks/datafusion-bench",
Expand Down
23 changes: 23 additions & 0 deletions benchmarks/clickhouse-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "clickhouse-bench"
description = "ClickHouse (clickhouse-local) benchmark runner for Vortex"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
license.workspace = true
readme.workspace = true
repository.workspace = true
rust-version.workspace = true
version.workspace = true
publish = false

[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
parking_lot = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
vortex-bench = { workspace = true }

[lints]
workspace = true
18 changes: 18 additions & 0 deletions benchmarks/clickhouse-bench/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Build script that exports the ClickHouse binary path.
//!
//! Resolution order:
//! 1. `CLICKHOUSE_BINARY` env var — use as-is.
//! 2. Falls back to `"clickhouse"` (i.e., resolve from `$PATH` at runtime).
//!
//! Users must install ClickHouse themselves for local runs.
//! In CI, it is installed via the workflow before the benchmark step.

fn main() {
println!("cargo:rerun-if-env-changed=CLICKHOUSE_BINARY");

let binary = std::env::var("CLICKHOUSE_BINARY").unwrap_or_else(|_| "clickhouse".to_string());
println!("cargo:rustc-env=CLICKHOUSE_BINARY={binary}");
}
Loading