narwhals-dev · lucas-nelson-uiuc · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 19, 2025
diff --git a/narwhals/_compliant/expr.py b/narwhals/_compliant/expr.py
@@ -886,7 +886,6 @@ class LazyExpr(  # type: ignore[misc]
     map_batches: not_implemented = not_implemented()
     ewm_mean: not_implemented = not_implemented()
     gather_every: not_implemented = not_implemented()
-    replace_strict: not_implemented = not_implemented()
     cat: not_implemented = not_implemented()  # pyright: ignore[reportAssignmentType]
 
     @classmethod

diff --git a/narwhals/_dask/expr.py b/narwhals/_dask/expr.py
@@ -688,3 +688,4 @@ def dt(self) -> DaskExprDateTimeNamespace:
     list = not_implemented()  # pyright: ignore[reportAssignmentType]
     struct = not_implemented()  # pyright: ignore[reportAssignmentType]
     rank = not_implemented()  # pyright: ignore[reportAssignmentType]
+    replace_strict = not_implemented()
diff --git a/narwhals/_duckdb/expr.py b/narwhals/_duckdb/expr.py
@@ -757,3 +757,4 @@ def struct(self) -> DuckDBExprStructNamespace:
 
     drop_nulls = not_implemented()
     unique = not_implemented()
+    replace_strict = not_implemented()
diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py
@@ -1,10 +1,12 @@
 from __future__ import annotations
 
 import operator
+from itertools import chain
 from typing import TYPE_CHECKING
 from typing import Any
 from typing import Callable
 from typing import Literal
+from typing import Mapping
 from typing import Sequence
 from typing import cast
 
@@ -560,6 +562,65 @@ def _len(_input: Column) -> Column:
 
         return self._with_callable(_len)
 
+    def replace_strict(
+        self,
+        old: Sequence[Any] | Mapping[Any, Any],
+        new: Sequence[Any],
+        *,
+        return_dtype: DType | type[DType] | None,
+    ) -> Self:
+        mapping = old if isinstance(old, Mapping) else dict(zip(old, new))
+
+        mapping_keys = list(mapping.keys())
+
+        # Create an array of all valid keys for our IN check
+        # Note: None/null handling is special in Spark - we'll handle it separately
+        non_null_keys = [k for k in mapping_keys if k is not None]
+        has_null_key = None in mapping_keys
+
+        mapping_expr = self._F.create_map(
+            [self._F.lit(x) for x in chain(*mapping.items())]
+        )
+
+        def _replace_strict(_input: Column) -> Column:
+            validation_expr = (
+                self._F.when(
+                    _input.isNull() & self._F.lit(has_null_key),
+                    self._F.lit(True),  # noqa: FBT003
+                )
+                .when(_input.isNull() & ~self._F.lit(has_null_key), self._F.lit(False))  # noqa: FBT003
+                .otherwise(
+                    self._F.array_contains(
+                        self._F.array([self._F.lit(k) for k in non_null_keys]), _input
+                    )
+                )
+            )
+
+            mapped_col = (
+                mapping_expr[_input]
+                if self._implementation.is_pyspark()
+                else mapping_expr.getItem(_input)
+            )
+
+            try:
+                result = self._F.when(validation_expr, mapped_col).otherwise(
+                    self._F.assert_true(self._F.lit(False))  # noqa: FBT003
+                )
+            except Exception as exc:
+                msg = "replace_strict did not replace all non-null values."
+                raise ValueError(msg) from exc
+
+            if return_dtype is not None:
+                result = result.cast(
+                    narwhals_to_native_dtype(
+                        return_dtype, self._version, self._native_dtypes
+                    )
+                )
+
+            return result
+
+        return self._with_callable(_replace_strict)
+
     def round(self, decimals: int) -> Self:
         def _round(_input: Column) -> Column:
             return self._F.round(_input, decimals)

diff --git a/tests/expr_and_series/replace_strict_test.py b/tests/expr_and_series/replace_strict_test.py
@@ -24,7 +24,7 @@ def test_replace_strict(
 ) -> None:
     if "dask" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
+    if ("sqlframe" in str(constructor)) or "duckdb" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     result = df.select(
@@ -59,7 +59,7 @@ def test_replace_non_full(
 ) -> None:
     if "dask" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
+    if "duckdb" in str(constructor):
         request.applymarker(pytest.mark.xfail)
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
     if isinstance(df, nw.LazyFrame):
@@ -80,7 +80,7 @@ def test_replace_strict_mapping(
 ) -> None:
     if "dask" in str(constructor):
         request.applymarker(pytest.mark.xfail)
-    if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
+    if "duckdb" in str(constructor):
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor({"a": [1, 2, 3]}))
Original file line number	Diff line number	Diff line change
Expand Up		@@ -757,3 +757,4 @@ def struct(self) -> DuckDBExprStructNamespace:

		drop_nulls = not_implemented()
		unique = not_implemented()
		replace_strict = not_implemented()