Skip to content

Commit

Permalink
feat(arrays): add modes array aggregation (#10737)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Jan 29, 2025
1 parent 847ed85 commit 6603c6c
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 1 deletion.
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class DuckDBCompiler(SQLGlotCompiler):
ops.ArrayAll: "list_bool_and",
ops.ArraySum: "list_sum",
ops.ArrayMean: "list_avg",
ops.ArrayMode: "list_mode",
ops.BitAnd: "bit_and",
ops.BitOr: "bit_or",
ops.BitXor: "bit_xor",
Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/sql/compilers/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,14 @@ def visit_ArrayAny(self, op, *, arg):
def visit_ArrayAll(self, op, *, arg):
return self._array_reduction(arg=arg, reduction="bool_and")

def visit_ArrayMode(self, op, *, arg):
name = sg.to_identifier(gen_name("pg_arr_mode"))
expr = sge.WithinGroup(
this=self.f.mode(),
expression=sge.Order(expressions=[sge.Ordered(this=name)]),
)
return sg.select(expr).from_(self._unnest(arg, as_=name)).subquery()

def visit_StringToTime(self, op, *, arg, format_str):
return self.cast(self.f.str_to_time(arg, format_str), to=dt.time)

Expand Down
26 changes: 25 additions & 1 deletion ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1816,8 +1816,32 @@ def _agg_with_nulls(agg, x):
(ir.ArrayValue.mins, lambda x: _agg_with_nulls(min, x)),
(ir.ArrayValue.maxs, lambda x: _agg_with_nulls(max, x)),
(ir.ArrayValue.means, lambda x: _agg_with_nulls(statistics.mean, x)),
param(
ir.ArrayValue.modes,
lambda x: _agg_with_nulls(statistics.mode, x),
marks=[
pytest.mark.notyet(
[
"athena",
"bigquery",
"clickhouse",
"databricks",
"polars",
"pyspark",
"trino",
],
raises=com.OperationNotDefinedError,
reason="no mode aggregate in the engine",
),
pytest.mark.notimpl(
["snowflake"],
raises=com.OperationNotDefinedError,
reason="not yet implemented in Ibis",
),
],
),
],
ids=["sums", "mins", "maxs", "means"],
ids=["sums", "mins", "maxs", "means", "modes"],
)
@notimpl_aggs
@pytest.mark.parametrize(
Expand Down
5 changes: 5 additions & 0 deletions ibis/expr/operations/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,11 @@ class ArrayMax(ArrayAgg):
"""Compute the maximum value of an array."""


@public
class ArrayMode(ArrayAgg):
"""Compute the mode of an array."""


# in duckdb summing an array of ints leads to an int, but for other backends
# it might lead to a float??
@public
Expand Down
27 changes: 27 additions & 0 deletions ibis/expr/types/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,33 @@ def alls(self) -> ir.BooleanValue:
"""
return ops.ArrayAll(self).to_expr()

def modes(self) -> ir.Value:
"""Return the mode of the values in the array.
See Also
--------
[`Column.mode`](./expression-generic.qmd#ibis.expr.types.generic.Column.mode)
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"arr": [[1, 2, 3, 3], [None, 6], [None], [], None]})
>>> t.mutate(mode=t.arr.modes())
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
┃ arr ┃ mode ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
│ array<int64> │ int64 │
├──────────────────────┼───────┤
│ [1, 2, ... +2] │ 3 │
│ [None, 6] │ 6 │
│ [None] │ NULL │
│ [] │ NULL │
│ NULL │ NULL │
└──────────────────────┴───────┘
"""
return ops.ArrayMode(self).to_expr()

def mins(self) -> ir.NumericValue:
"""Return the minimum value in the array.
Expand Down

0 comments on commit 6603c6c

Please sign in to comment.