Skip to content

Commit 3dc9b67

Browse files
authored
Add standard entrypoints tests for Modin (#76)
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 4296995 commit 3dc9b67

12 files changed

+79
-34
lines changed

dataframe_api_compat/modin_standard/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,7 @@ def map_standard_dtype_to_pandas_dtype(dtype: DType) -> Any:
168168
def convert_to_standard_compliant_column(
169169
ser: pd.Series[Any],
170170
api_version: str | None = None,
171-
) -> Column: # pragma: no cover
172-
# TODO: remove pragma after after modin implements `__column_consortium_standard__`
171+
) -> Column:
173172
if ser.name is not None and not isinstance(ser.name, str):
174173
msg = f"Expected column with string name, got: {ser.name}"
175174
raise ValueError(msg)

dataframe_api_compat/modin_standard/dataframe_object.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,7 @@ def rename(self, mapping: Mapping[str, str]) -> DataFrame:
211211
self.dataframe.rename(columns=mapping),
212212
)
213213

214-
def get_column_names(self) -> list[str]: # pragma: no cover
215-
# TODO: add a test after modin implements `__dataframe_consortium_standard__`
214+
def get_column_names(self) -> list[str]:
216215
# DO NOT REMOVE
217216
# This one is used in upstream tests - even if deprecated,
218217
# just leave it in for backwards compatibility

requirements-dev-modin.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
covdefaults
2-
modin[ray]
2+
modin[ray]@git+https://github.com/modin-project/modin@main
33
pre-commit
44
pytest
55
pytest-cov

tests/column/comparisons_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def test_column_comparisons(
4141
other = df.col("b")
4242
result = df.assign(getattr(ser, comparison)(other).rename("result"))
4343
expected_ns_dtype = getattr(ns, expected_dtype)
44-
if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
44+
if comparison == "__pow__" and library.name == "polars-lazy":
4545
# TODO
4646
result = result.cast({"result": ns.Int64()})
4747
expected_ns_dtype = ns.Int64
@@ -79,7 +79,7 @@ def test_column_comparisons_scalar(
7979
other = 3
8080
result = df.assign(getattr(ser, comparison)(other).rename("result"))
8181
expected_ns_dtype = getattr(ns, expected_dtype)
82-
if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
82+
if comparison == "__pow__" and library.name == "polars-lazy":
8383
result = result.cast({"result": ns.Int64()})
8484
expected_ns_dtype = ns.Int64
8585
compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype)

tests/column/name_test.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from __future__ import annotations
22

3-
import pandas as pd
43
import pytest
54
from packaging.version import Version
6-
from packaging.version import parse
75

86
from tests.utils import BaseHandler
97
from tests.utils import convert_to_standard_compliant_dataframe
108
from tests.utils import integer_dataframe_1
9+
from tests.utils import pandas_version
1110

1211

1312
def test_name(library: BaseHandler) -> None:
@@ -17,15 +16,26 @@ def test_name(library: BaseHandler) -> None:
1716

1817

1918
def test_pandas_name_if_0_named_column() -> None:
19+
import pandas as pd
20+
2021
df = convert_to_standard_compliant_dataframe(pd.DataFrame({0: [1, 2, 3]}))
2122
assert df.column_names == [0] # type: ignore[comparison-overlap]
2223
assert [col.name for col in df.iter_columns()] == [0] # type: ignore[comparison-overlap]
2324

2425

25-
@pytest.mark.skipif(
26-
parse(pd.__version__) < Version("2.1.0"),
27-
reason="before consoritum standard",
28-
)
29-
def test_invalid_name_pandas() -> None:
30-
with pytest.raises(ValueError):
31-
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
26+
def test_invalid_column_name(library: BaseHandler) -> None:
27+
if library.name in ("pandas-numpy", "pandas-nullable"):
28+
import pandas as pd
29+
30+
if pandas_version() < Version("2.1.0"): # pragma: no cover
31+
pytest.skip(reason="before consoritum standard")
32+
with pytest.raises(ValueError):
33+
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
34+
elif library.name == "modin":
35+
import modin.pandas as pd
36+
37+
with pytest.raises(ValueError):
38+
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
39+
else: # pragma: no cover
40+
msg = f"Not supported library: {library}"
41+
raise AssertionError(msg)

tests/column/pow_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def test_int_powers_column(library: BaseHandler) -> None:
3333
ser = df.col("a")
3434
other = df.col("b") * 1
3535
result = df.assign(ser.__pow__(other).rename("result"))
36-
if library.name in ("polars", "polars-lazy"):
36+
if library.name == "polars-lazy":
3737
result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
3838
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]}
3939
expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}
@@ -46,7 +46,7 @@ def test_int_powers_scalar_column(library: BaseHandler) -> None:
4646
ser = df.col("a")
4747
other = 1
4848
result = df.assign(ser.__pow__(other).rename("result"))
49-
if library.name in ("polars", "polars-lazy"):
49+
if library.name == "polars-lazy":
5050
result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
5151
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]}
5252
expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}

tests/conftest.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,8 @@ def pytest_generate_tests(metafunc: Any) -> None:
6363
ci_skip_ids = [
6464
# polars does not allow to create a dataframe with non-unique columns
6565
"non_unique_column_names_test.py::test_repeated_columns[polars-lazy]",
66-
# TODO: enable after modin adds implementation for standard
67-
"scale_column_test.py::test_scale_column[modin]",
68-
"scale_column_test.py::test_scale_column_polars_from_persisted_df[modin]",
69-
"convert_to_standard_column_test.py::test_convert_to_std_column[modin]",
66+
# it is impossible to create a series with a name different from the string type
67+
"name_test.py::test_invalid_column_name[polars-lazy]",
7068
]
7169

7270

tests/integration/scale_column_test.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ def test_scale_column(library: BaseHandler) -> None:
2424

2525
s = pl.Series("a", [1, 2, 3])
2626
ser = s.__column_consortium_standard__()
27+
elif library.name == "modin":
28+
import modin.pandas as pd
29+
30+
s = pd.Series([1, 2, 3], name="a")
31+
ser = s.__column_consortium_standard__()
2732
else: # pragma: no cover
2833
msg = f"Not supported library: {library}"
2934
raise AssertionError(msg)
@@ -33,7 +38,7 @@ def test_scale_column(library: BaseHandler) -> None:
3338
compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64)
3439

3540

36-
def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
41+
def test_scale_column_from_persisted_df(library: BaseHandler) -> None:
3742
if library.name in ("pandas-numpy", "pandas-nullable"):
3843
if pandas_version() < Version("2.1.0"): # pragma: no cover
3944
pytest.skip(reason="pandas doesn't support 3.8")
@@ -48,6 +53,11 @@ def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
4853

4954
df = pl.DataFrame({"a": [1, 2, 3]})
5055
ser = df.__dataframe_consortium_standard__().col("a")
56+
elif library.name == "modin":
57+
import modin.pandas as pd
58+
59+
df = pd.DataFrame({"a": [1, 2, 3]})
60+
ser = df.__dataframe_consortium_standard__().col("a")
5161
else: # pragma: no cover
5262
msg = f"Not supported library: {library}"
5363
raise AssertionError(msg)

tests/integration/upstream_test.py

+20
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,23 @@ def test_pandas(self) -> None:
6363

6464
ser = pd.Series([1, 2, 3], name="a")
6565
assert ser.name == "a"
66+
67+
68+
class TestModin:
69+
def test_pandas(self) -> None:
70+
"""
71+
Test some basic methods of the dataframe consortium standard.
72+
73+
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
74+
this is just to check that the entry point works as expected.
75+
"""
76+
pd = pytest.importorskip("modin.pandas")
77+
78+
df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
79+
df = df_pd.__dataframe_consortium_standard__()
80+
result_1 = df.get_column_names()
81+
expected_1 = ["a", "b"]
82+
assert result_1 == expected_1
83+
84+
ser = pd.Series([1, 2, 3], name="a")
85+
assert ser.name == "a"

tests/namespace/convert_to_standard_column_test.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,29 @@ def test_convert_to_std_column(library: BaseHandler) -> None:
1414
pytest.skip(reason="before consortium standard in pandas")
1515
import pandas as pd
1616

17-
s = pd.Series([1, 2, 3]).__column_consortium_standard__()
18-
assert float(s.mean()) == 2
19-
s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__()
20-
assert float(s.mean()) == 2
17+
ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
18+
ser_with_name = pd.Series(
19+
[1, 2, 3],
20+
name="alice",
21+
).__column_consortium_standard__()
2122
elif library.name == "polars-lazy":
2223
if polars_version() < Version("0.19.0"): # pragma: no cover
2324
pytest.skip(reason="before consortium standard in polars")
2425
import polars as pl
2526

26-
s = pl.Series([1, 2, 3]).__column_consortium_standard__()
27-
assert float(s.mean()) == 2
28-
s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
29-
assert float(s.mean()) == 2
27+
ser = pl.Series([1, 2, 3]).__column_consortium_standard__()
28+
ser_with_name = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
29+
elif library.name == "modin":
30+
import modin.pandas as pd
31+
32+
ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
33+
ser_with_name = pd.Series(
34+
[1, 2, 3],
35+
name="alice",
36+
).__column_consortium_standard__()
3037
else: # pragma: no cover
3138
msg = f"Not supported library: {library}"
3239
raise AssertionError(msg)
40+
41+
assert float(ser.mean()) == 2
42+
assert float(ser_with_name.mean()) == 2

tests/namespace/sorted_indices_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_column_sorted_indices_ascending(library: BaseHandler) -> None:
2020
"b": [4, 4, 3, 1, 2],
2121
"result": [3, 4, 2, 1, 0],
2222
}
23-
if library.name in ("polars", "polars-lazy"):
23+
if library.name == "polars-lazy":
2424
result = result.cast({"result": ns.Int64()})
2525
try:
2626
compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
@@ -44,7 +44,7 @@ def test_column_sorted_indices_descending(library: BaseHandler) -> None:
4444
"b": [4, 4, 3, 1, 2],
4545
"result": [0, 1, 2, 4, 3],
4646
}
47-
if library.name in ("polars", "polars-lazy"):
47+
if library.name == "polars-lazy":
4848
result = result.cast({"result": ns.Int64()})
4949
try:
5050
compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)

tests/utils.py

-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ def convert_to_standard_compliant_dataframe(
150150
df: pd.DataFrame | pl.DataFrame,
151151
api_version: str | None = None,
152152
) -> DataFrame:
153-
# TODO: type return
154153
import pandas as pd
155154

156155
try:

0 commit comments

Comments
 (0)