From 7c7e45993a6d2c24fbf9e51189eacfbbb620e5b6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 12 Nov 2024 06:17:01 -0500 Subject: [PATCH] feat(api): add `modes` array aggregation --- ibis/backends/sql/compilers/duckdb.py | 1 + ibis/backends/tests/test_array.py | 3 ++- ibis/expr/operations/arrays.py | 5 +++++ ibis/expr/types/arrays.py | 27 +++++++++++++++++++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/ibis/backends/sql/compilers/duckdb.py b/ibis/backends/sql/compilers/duckdb.py index 2a6b36e7e45b..de83a0efb527 100644 --- a/ibis/backends/sql/compilers/duckdb.py +++ b/ibis/backends/sql/compilers/duckdb.py @@ -62,6 +62,7 @@ class DuckDBCompiler(SQLGlotCompiler): ops.ArrayAll: "list_bool_and", ops.ArraySum: "list_sum", ops.ArrayMean: "list_avg", + ops.ArrayMode: "list_mode", ops.BitAnd: "bit_and", ops.BitOr: "bit_or", ops.BitXor: "bit_xor", diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 02f811bc3f0a..5533ddd3b490 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -1816,8 +1816,9 @@ def _agg_with_nulls(agg, x): (ir.ArrayValue.mins, lambda x: _agg_with_nulls(min, x)), (ir.ArrayValue.maxs, lambda x: _agg_with_nulls(max, x)), (ir.ArrayValue.means, lambda x: _agg_with_nulls(statistics.mean, x)), + (ir.ArrayValue.modes, lambda x: _agg_with_nulls(statistics.mode, x)), ], - ids=["sums", "mins", "maxs", "means"], + ids=["sums", "mins", "maxs", "means", "modes"], ) @notimpl_aggs @pytest.mark.parametrize( diff --git a/ibis/expr/operations/arrays.py b/ibis/expr/operations/arrays.py index 11687c752030..8202b21259da 100644 --- a/ibis/expr/operations/arrays.py +++ b/ibis/expr/operations/arrays.py @@ -292,6 +292,11 @@ class ArrayMax(ArrayAgg): """Compute the maximum value of an array.""" +@public +class ArrayMode(ArrayAgg): + """Compute the mode of an array.""" + + # in duckdb summing an array of ints leads to an int, but for other backends # it might lead to a float?? @public diff --git a/ibis/expr/types/arrays.py b/ibis/expr/types/arrays.py index c33b0b84a008..b494d8be3f24 100644 --- a/ibis/expr/types/arrays.py +++ b/ibis/expr/types/arrays.py @@ -1208,6 +1208,33 @@ def alls(self) -> ir.BooleanValue: """ return ops.ArrayAll(self).to_expr() + def modes(self) -> ir.Value: + """Return the mode of the values in the array. + + See Also + -------- + [`Column.mode`](./expression-generic.qmd#ibis.expr.types.generic.Column.mode) + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"arr": [[1, 2, 3, 3], [None, 6], [None], [], None]}) + >>> t.mutate(mode=t.arr.modes()) + ┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓ + ┃ arr ┃ mode ┃ + ┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩ + │ array │ int64 │ + ├──────────────────────┼───────┤ + │ [1, 2, ... +2] │ 3 │ + │ [None, 6] │ 6 │ + │ [None] │ NULL │ + │ [] │ NULL │ + │ NULL │ NULL │ + └──────────────────────┴───────┘ + """ + return ops.ArrayMode(self).to_expr() + def mins(self) -> ir.NumericValue: """Return the minimum value in the array.