Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 2 additions & 28 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import datetime
import math
import operator
from collections.abc import Mapping
from functools import partial, reduce, singledispatch
from math import isnan

Expand Down Expand Up @@ -375,36 +374,11 @@ def fill_null(op, **kw):
table = translate(op.parent, **kw)

columns = []

repls = op.replacements

if isinstance(repls, Mapping):

def get_replacement(name):
repl = repls.get(name)
if repl is not None:
return repl.value
else:
return None

else:
value = repls.value

def get_replacement(_):
return value

for name, dtype in op.parent.schema.items():
column = pl.col(name)
if isinstance(op.replacements, Mapping):
value = op.replacements.get(name)
else:
value = _literal_value(op.replacements)

if value is not None:
if dtype.is_floating():
column = column.fill_nan(value)
if (repl := op.replacements.get(name)) is not None:
value = translate(repl, **kw)
column = column.fill_null(value)

# requires special treatment if the fill value has different datatype
if dtype.is_timestamp():
column = column.cast(pl.Datetime)
Expand Down
16 changes: 3 additions & 13 deletions ibis/backends/sql/rewrites.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import operator
import sys
from collections.abc import Mapping
from functools import reduce
from typing import TYPE_CHECKING, Any

Expand All @@ -24,7 +23,7 @@
from ibis.expr.schema import Schema

if TYPE_CHECKING:
from collections.abc import Sequence
from collections.abc import Mapping, Sequence

x = var("x")
y = var("y")
Expand Down Expand Up @@ -150,22 +149,13 @@ def drop_columns_to_select(_, **kwargs):
@replace(p.FillNull)
def fill_null_to_select(_, **kwargs):
"""Rewrite FillNull to a Select node."""
if isinstance(_.replacements, Mapping):
mapping = _.replacements
else:
mapping = {
name: _.replacements
for name, type in _.parent.schema.items()
if type.nullable
}

if not mapping:
if not _.replacements:
return _.parent

selections = {}
for name in _.parent.schema.names:
col = ops.Field(_.parent, name)
if (value := mapping.get(name)) is not None:
if (value := _.replacements.get(name)) is not None:
col = ops.Coalesce((col, value))
selections[name] = col

Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,11 @@ def decorator(func):
pytest.mark.notimpl(["datafusion", "exasol", "mssql", "druid", "oracle"]),
]
NO_JSON_SUPPORT = combine_marks(NO_JSON_SUPPORT_MARKS)


NAN_TREATED_AS_NULL_MARKS = [
pytest.mark.never(
["sqlite", "mssql", "mysql"], reason="Treats NaN as NULL", raises=Exception
),
]
NAN_TREATED_AS_NULL = combine_marks(NAN_TREATED_AS_NULL_MARKS)
103 changes: 89 additions & 14 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import ibis.expr.datatypes as dt
import ibis.selectors as s
from ibis import _
from ibis.backends.tests.conftest import NAN_TREATED_AS_NULL
from ibis.backends.tests.errors import (
ClickHouseDatabaseError,
ExaQueryError,
Expand Down Expand Up @@ -450,37 +451,70 @@ def test_table_fill_null_invalid(alltypes):
com.IbisTypeError, match=r"Column 'invalid_col' is not found in table"
):
alltypes.fill_null({"invalid_col": 0.0})

with pytest.raises(
com.IbisTypeError, match=r"Cannot fill_null on column 'string_col' of type.*"
):
alltypes[["int_col", "string_col"]].fill_null(0)

alltypes.select("int_col", "string_col").fill_null(0)
with pytest.raises(AttributeError, match=r"'Table' object has no attribute 'oops'"):
alltypes.fill_null(_.oops)
with pytest.raises(AttributeError, match=r"'Table' object has no attribute 'oops'"):
alltypes.fill_null({"int_col": _.oops})
with pytest.raises(com.IbisTypeError, match=r"Column 'oops' is not found in table"):
alltypes.fill_null("oops")
with pytest.raises(com.IbisTypeError, match=r"Column 'oops' is not found in table"):
alltypes.fill_null({"int_col": "oops"})
with pytest.raises(
com.IbisTypeError, match=r"Cannot fill_null on column 'int_col' of type.*"
):
alltypes.fill_null({"int_col": "oops"})
alltypes.fill_null({"int_col": ibis.literal("oops")})


@pytest.mark.parametrize(
"replacements",
("ibis_replacements", "pd_replacements"),
[
param({"int_col": 20}, id="int"),
param({"double_col": -1, "string_col": "missing"}, id="double-int-str"),
param({"double_col": -1.5, "string_col": "missing"}, id="double-str"),
param({}, id="empty"),
param(
lambda _t: {"int_col": 20},
lambda _t: {"int_col": 20},
id="int",
),
param(
lambda _t: {"double_col": -1, "string_col": ibis.literal("missing")},
lambda _t: {"double_col": -1, "string_col": "missing"},
id="double-int-str",
),
param(
lambda _t: {"double_col": -1.5, "string_col": ibis.literal("missing")},
lambda _t: {"double_col": -1.5, "string_col": "missing"},
id="double-str",
),
param(
lambda _t: {"double_col": "int_col"},
lambda t: {"double_col": t["int_col"]},
id="column-name",
),
param(
lambda _t: {"double_col": ibis._.int_col},
lambda t: {"double_col": t["int_col"]},
id="column",
),
param(
lambda t: {"double_col": t.int_col},
lambda t: {"double_col": t["int_col"]},
id="deferred",
),
param(lambda _t: {}, lambda _t: {}, id="empty"),
],
)
def test_table_fill_null_mapping(backend, alltypes, replacements):
def test_table_fill_null_mapping(backend, alltypes, ibis_replacements, pd_replacements):
table = alltypes.mutate(
int_col=alltypes.int_col.nullif(1),
double_col=alltypes.double_col.nullif(3.0),
string_col=alltypes.string_col.nullif("2"),
).select("id", "int_col", "double_col", "string_col")
pd_table = table.execute()

result = table.fill_null(replacements).execute().reset_index(drop=True)
expected = pd_table.fillna(replacements).reset_index(drop=True)
result = table.fill_null(ibis_replacements(table)).execute().reset_index(drop=True)
expected = pd_table.fillna(pd_replacements(pd_table)).reset_index(drop=True)

backend.assert_frame_equal(result, expected, check_dtype=False)

Expand All @@ -493,14 +527,55 @@ def test_table_fill_null_scalar(backend, alltypes):
).select("id", "int_col", "double_col", "string_col")
pd_table = table.execute()

res = table[["int_col", "double_col"]].fill_null(0).execute().reset_index(drop=True)
res = (
table.select("int_col", "double_col")
.fill_null(0)
.execute()
.reset_index(drop=True)
)
sol = pd_table[["int_col", "double_col"]].fillna(0).reset_index(drop=True)
backend.assert_frame_equal(res, sol, check_dtype=False)

res = table[["string_col"]].fill_null("missing").execute().reset_index(drop=True)
res = (
table.select("string_col")
.fill_null(ibis.literal("missing"))
.execute()
.reset_index(drop=True)
)
sol = pd_table[["string_col"]].fillna("missing").reset_index(drop=True)
backend.assert_frame_equal(res, sol, check_dtype=False)

t = table.select("int_col", "double_col")
sol = (
pd_table[["int_col", "double_col"]]
.fillna(pd_table.int_col)
.reset_index(drop=True)
)
res = t.fill_null(t.int_col).execute().reset_index(drop=True)
backend.assert_frame_equal(res, sol, check_dtype=False)
res = t.fill_null(ibis._.int_col).execute().reset_index(drop=True)
backend.assert_frame_equal(res, sol, check_dtype=False)
res = t.fill_null("int_col").execute().reset_index(drop=True)
backend.assert_frame_equal(res, sol, check_dtype=False)


@NAN_TREATED_AS_NULL
def test_table_fill_null_nans_are_untouched(con):
# Test that NaNs are not filled when using fill_null

def make_comparable(vals):
return {"nan" if (isinstance(x, float) and np.isnan(x)) else x for x in vals}

pa_table = pa.table({"f": pa.array([1.0, float("nan"), None])})

before = ibis.memtable(pa_table)
actual_before = make_comparable(con.to_pyarrow(before.f).to_pylist())
assert actual_before == {1.0, "nan", None}

after = before.fill_null(0.0)
actual_after = make_comparable(con.to_pyarrow(after.f).to_pylist())
assert actual_after == {1.0, "nan", 0.0}


def test_mutate_rename(alltypes):
table = alltypes.select(["bool_col", "string_col"])
Expand Down
2 changes: 1 addition & 1 deletion ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def schema(self):
class FillNull(Simple):
"""Fill null values in the table."""

replacements: typing.Union[Value[dt.Numeric | dt.String], FrozenDict[str, Any]]
replacements: FrozenDict[str, Value]


@public
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
r0 := UnboundTable: t
i int64
f float64

FillNull[r0]
replacements:
i: r0.i
f: r0.i
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
r0 := UnboundTable: t
a int64
b string
i int64
f float64

FillNull[r0]
replacements:
a: 3
i: 3
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
r0 := UnboundTable: t
a int64
b string
i int64
f float64

r1 := Project[r0]
a: r0.a

FillNull[r1]
FillNull[r0]
replacements:
3
i: 3
f: 3

This file was deleted.

10 changes: 5 additions & 5 deletions ibis/expr/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,16 +294,16 @@ def test_window_group_by(snapshot):


def test_fill_null(snapshot):
t = ibis.table(dict(a="int64", b="string"), name="t")
t = ibis.table(dict(i="int64", f="float64"), name="t")

expr = t.fill_null({"a": 3})
expr = t.fill_null({"i": 3})
snapshot.assert_match(repr(expr), "fill_null_dict_repr.txt")

expr = t[["a"]].fill_null(3)
expr = t.fill_null(3)
snapshot.assert_match(repr(expr), "fill_null_int_repr.txt")

expr = t[["b"]].fill_null("foo")
snapshot.assert_match(repr(expr), "fill_null_str_repr.txt")
expr = t.fill_null(t.i)
snapshot.assert_match(repr(expr), "fill_null_col_repr.txt")


def test_asof_join(snapshot):
Expand Down
1 change: 1 addition & 0 deletions ibis/expr/types/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def fill_null(self, fill_value: Scalar, /) -> Self:
[`Value.isnull()`](./expression-generic.qmd#ibis.expr.types.generic.Value.isnull)
[`FloatingValue.isnan()`](./expression-numeric.qmd#ibis.expr.types.numeric.FloatingValue.isnan)
[`FloatingValue.isinf()`](./expression-numeric.qmd#ibis.expr.types.numeric.FloatingValue.isinf)
[`Table.fill_null()`](./expression-tables.qmd#ibis.expr.types.relations.Table.fill_null)

Examples
--------
Expand Down
Loading
Loading