Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(benchmarks): remove pandas benchmarking and replace with more-representative duckdb version #8322

Merged
merged 1 commit into from
Feb 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 2 additions & 65 deletions ibis/tests/benchmarks/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.backends.base import _get_backend_names
from ibis.backends.pandas.udf import udf

pytestmark = pytest.mark.benchmark

Expand Down Expand Up @@ -214,7 +213,8 @@ def pt():
}
)

return ibis.pandas.connect(dict(df=data)).table("df")
con = ibis.duckdb.connect()
return con.create_table("df", data)


def high_card_group_by(t):
Expand Down Expand Up @@ -277,24 +277,6 @@ def high_card_grouped_rolling(t):
return t.value.mean().over(high_card_rolling_window(t))


@udf.reduction(["double"], "double")
def my_mean(series):
return series.mean()


def low_card_grouped_rolling_udf_mean(t):
return my_mean(t.value).over(low_card_rolling_window(t))


def high_card_grouped_rolling_udf_mean(t):
return my_mean(t.value).over(high_card_rolling_window(t))


@udf.analytic(["double"], "double")
def my_zscore(series):
return (series - series.mean()) / series.std()


def low_card_window(t):
return ibis.window(group_by=t.low_card_key)

Expand All @@ -303,27 +285,6 @@ def high_card_window(t):
return ibis.window(group_by=t.key)


def low_card_window_analytics_udf(t):
return my_zscore(t.value).over(low_card_window(t))


def high_card_window_analytics_udf(t):
return my_zscore(t.value).over(high_card_window(t))


@udf.reduction(["double", "double"], "double")
def my_wm(v, w):
return np.average(v, weights=w)


def low_card_grouped_rolling_udf_wm(t):
return my_wm(t.value, t.value).over(low_card_rolling_window(t))


def high_card_grouped_rolling_udf_wm(t):
return my_wm(t.value, t.value).over(low_card_rolling_window(t))


broken_pandas_grouped_rolling = pytest.mark.xfail(
condition=vparse("1.4") <= vparse(pd.__version__) < vparse("1.4.2"),
raises=ValueError,
Expand Down Expand Up @@ -353,30 +314,6 @@ def high_card_grouped_rolling_udf_wm(t):
id="high_card_grouped_rolling",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
low_card_grouped_rolling_udf_mean,
id="low_card_grouped_rolling_udf_mean",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
high_card_grouped_rolling_udf_mean,
id="high_card_grouped_rolling_udf_mean",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(low_card_window_analytics_udf, id="low_card_window_analytics_udf"),
pytest.param(
high_card_window_analytics_udf, id="high_card_window_analytics_udf"
),
pytest.param(
low_card_grouped_rolling_udf_wm,
id="low_card_grouped_rolling_udf_wm",
marks=[broken_pandas_grouped_rolling],
),
pytest.param(
high_card_grouped_rolling_udf_wm,
id="high_card_grouped_rolling_udf_wm",
marks=[broken_pandas_grouped_rolling],
),
],
)
def test_execute(benchmark, expression_fn, pt):
Expand Down
Loading