rapidsai · rapids-bot · Feb 10, 2025 · Feb 7, 2025 · Feb 8, 2025 · Feb 8, 2025
@@ -1643,12 +1643,25 @@ def _normalize_aggs(
         the keys. The aggs are applied to the corresponding column in the tuple.
         Each agg can be string or lambda functions.
         """
-
         aggs_per_column: Iterable[AggType | Iterable[AggType]]
         # TODO: Remove isinstance condition when the legacy dask_cudf API is removed.
         # See https://github.com/rapidsai/cudf/pull/16528#discussion_r1715482302 for information.
         if aggs or isinstance(aggs, dict):
             if isinstance(aggs, dict):
+                if any(
+                    is_list_like(values) and len(set(values)) != len(values)  # type: ignore[arg-type]
+                    for values in aggs.values()
+                ):
+                    if cudf.get_option("mode.pandas_compatible"):
+                        raise NotImplementedError(
+                            "Duplicate aggregations per column are currently not supported."
+                        )
+                    else:
+                        warnings.warn(
+                            "Duplicate aggregations per column found. "
+                            "The resulting duplicate columns will be dropped.",
+                            UserWarning,
+                        )
                 column_names, aggs_per_column = aggs.keys(), aggs.values()
                 columns = tuple(self.obj._data[col] for col in column_names)
             else:

@@ -4115,3 +4115,20 @@ def test_scan_int_null_pandas_compatible(op):
     with cudf.option_context("mode.pandas_compatible", True):
         result = getattr(df_cudf.groupby("b")["a"], op)()
     assert_eq(result, expected)
+
+
+def test_agg_duplicate_aggs_pandas_compat_raises():
+    agg = {"b": ["mean", "mean"]}
+    dfgb = cudf.DataFrame({"a": [1, 1, 2], "b": [4, 5, 6]}).groupby(["a"])
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            dfgb.agg(agg)
+
+    with pytest.warns(UserWarning):
+        result = dfgb.agg(agg)
+    expected = cudf.DataFrame(
+        [4.5, 6.0],
+        index=cudf.Index([1, 2], name="a"),
+        columns=pd.MultiIndex.from_tuples([("b", "mean")]),
+    )
+    assert_eq(result, expected)