fix more tests

TrevorBergeron · TrevorBergeron · commit 3d4abc7038ae · 2026-02-19T22:58:52.000Z
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
@@ -214,7 +214,7 @@ def confusion_matrix(
         y_true = row["y_true"]
         y_pred = row["y_pred"]
         count = row["dummy"]
-        confusion_matrix[y_pred][y_true] = count
+        confusion_matrix.at[y_true, y_pred] = count
 
     return confusion_matrix
 
@@ -251,7 +251,7 @@ def recall_score(
         / is_accurate.groupby(y_true_series).count()
     ).to_pandas()
 
-    recall_score = pd.Series(0, index=index)
+    recall_score = pd.Series(0.0, index=index)
     for i in recall_score.index:
         recall_score.loc[i] = recall.loc[i]
 
@@ -321,7 +321,7 @@ def _precision_score_per_label(y_true: bpd.Series, y_pred: bpd.Series) -> pd.Ser
         is_accurate.groupby(y_pred).sum() / is_accurate.groupby(y_pred).count()
     ).to_pandas()
 
-    precision_score = pd.Series(0, index=index)
+    precision_score = pd.Series(0.0, index=index)
     for i in precision.index:
         precision_score.loc[i] = precision.loc[i]
 
@@ -366,7 +366,7 @@ def f1_score(
     recall = recall_score(y_true_series, y_pred_series, average=None)
     precision = precision_score(y_true_series, y_pred_series, average=None)
 
-    f1_score = pd.Series(0, index=recall.index)
+    f1_score = pd.Series(0.0, index=recall.index)
     for index in recall.index:
         if precision[index] + recall[index] != 0:
             f1_score[index] = (
diff --git a/bigframes/testing/utils.py b/bigframes/testing/utils.py
@@ -15,7 +15,7 @@
 import base64
 import decimal
 import re
-from typing import Iterable, Optional, Sequence, Set, Union
+from typing import Iterable, Optional, Sequence, Set, TypeVar, Union
 
 import geopandas as gpd  # type: ignore
 import google.api_core.operation
@@ -68,6 +68,8 @@
     "content",
 ]
 
+SeriesOrIndexT = TypeVar("SeriesOrIndexT", pd.Series, pd.Index)
+
 
 def pandas_major_version() -> int:
     match = re.search(r"^v?(\d+)", pd.__version__.strip())
@@ -90,15 +92,27 @@ def assert_series_equivalent(pd_series: pd.Series, bf_series: bpd.Series, **kwar
 
 def _normalize_all_nulls(col: pd.Series) -> pd.Series:
     # This over-normalizes probably, make more conservative later
-    if col.hasnans and (
-        pd_types.is_float_dtype(col.dtype) or pd_types.is_integer_dtype(col.dtype)
-    ):
-        col = col.astype("float64")
+    if col.hasnans and (pd_types.is_float_dtype(col.dtype)):
+        col = col.astype("float64").astype("Float64")
     if pd_types.is_object_dtype(col):
-        col = col.fillna(float("nan"))
+        col = col.fillna(pd.NA)
     return col
 
 
+def _normalize_index_nulls(idx: pd.Index) -> pd.Index:
+    if isinstance(idx, pd.MultiIndex):
+        new_levels = [
+            _normalize_index_nulls(idx.get_level_values(i)) for i in range(idx.nlevels)
+        ]
+        return pd.MultiIndex.from_arrays(new_levels, names=idx.names)
+    if idx.hasnans:
+        if pd_types.is_float_dtype(idx.dtype) or pd_types.is_integer_dtype(idx.dtype):
+            idx = idx.astype("float64").astype("Float64")
+        if pd_types.is_object_dtype(idx.dtype):
+            idx = idx.fillna(pd.NA)
+    return idx
+
+
 def assert_frame_equal(
     left: pd.DataFrame,
     right: pd.DataFrame,
@@ -123,6 +137,8 @@ def assert_frame_equal(
     if nulls_are_nan:
         left = left.apply(_normalize_all_nulls)
         right = right.apply(_normalize_all_nulls)
+        left.index = _normalize_index_nulls(left.index)
+        right.index = _normalize_index_nulls(right.index)
 
     pd.testing.assert_frame_equal(left, right, **kwargs)
 
@@ -155,6 +171,10 @@ def assert_series_equal(
     if nulls_are_nan:
         left = _normalize_all_nulls(left)
         right = _normalize_all_nulls(right)
+        left.index = _normalize_index_nulls(left.index)
+        right.index = _normalize_index_nulls(right.index)
+        left.name = pd.NA if pd.isna(left.name) else left.name  # type: ignore
+        right.name = pd.NA if pd.isna(right.name) else right.name  # type: ignore
 
     pd.testing.assert_series_equal(left, right, **kwargs)
 
diff --git a/tests/system/small/core/test_reshape.py b/tests/system/small/core/test_reshape.py
@@ -13,11 +13,11 @@
 # limitations under the License.
 
 import pandas as pd
-import pandas.testing
 import pytest
 
 from bigframes import session
 from bigframes.core.reshape import merge
+import bigframes.testing
 
 
 @pytest.mark.parametrize(
@@ -56,7 +56,7 @@ def test_join_with_index(
         how=how,
     )
 
-    pandas.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         bf_result, pd_result, check_dtype=False, check_index_type=False
     )
 
diff --git a/tests/system/small/ml/test_utils.py b/tests/system/small/ml/test_utils.py
@@ -13,10 +13,10 @@
 # limitations under the License.
 
 import pandas as pd
-import pandas.testing
 import pytest
 
 import bigframes.ml.utils as utils
+import bigframes.testing
 
 _DATA_FRAME = pd.DataFrame({"column": [1, 2, 3]})
 _SERIES = pd.Series([1, 2, 3], name="column")
@@ -31,7 +31,7 @@ def test_convert_to_dataframe(session, data):
 
     (actual_result,) = utils.batch_convert_to_dataframe(bf_data)
 
-    pandas.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         actual_result.to_pandas(),
         _DATA_FRAME,
         check_index_type=False,
@@ -46,7 +46,7 @@ def test_convert_to_dataframe(session, data):
 def test_convert_pandas_to_dataframe(data, session):
     (actual_result,) = utils.batch_convert_to_dataframe(data, session=session)
 
-    pandas.testing.assert_frame_equal(
+    bigframes.testing.assert_frame_equal(
         actual_result.to_pandas(),
         _DATA_FRAME,
         check_index_type=False,
@@ -63,7 +63,7 @@ def test_convert_to_series(session, data):
 
     (actual_result,) = utils.batch_convert_to_series(bf_data)
 
-    pandas.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         actual_result.to_pandas(), _SERIES, check_index_type=False, check_dtype=False
     )
 
@@ -75,6 +75,6 @@ def test_convert_to_series(session, data):
 def test_convert_pandas_to_series(data, session):
     (actual_result,) = utils.batch_convert_to_series(data, session=session)
 
-    pandas.testing.assert_series_equal(
+    bigframes.testing.assert_series_equal(
         actual_result.to_pandas(), _SERIES, check_index_type=False, check_dtype=False
     )
diff --git a/tests/system/small/operations/test_timedeltas.py b/tests/system/small/operations/test_timedeltas.py
@@ -25,6 +25,9 @@
 from bigframes import dtypes
 import bigframes.testing
 
+# Some methods/features used by this test don't exist in pandas 1.x
+pytest.importorskip("pandas", minversion="2.0.0")
+
 
 @pytest.fixture(scope="module")
 def temporal_dfs(session):
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py