Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,7 @@ Bug fixes
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`)
- Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`)
- Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`)
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def __init__(
if isinstance(values, Index):
arr = values._data._pa_array.combine_chunks()
else:
arr = values._pa_array.combine_chunks()
arr = extract_array(values)._pa_array.combine_chunks()
categories = arr.dictionary.to_pandas(types_mapper=ArrowDtype)
codes = arr.indices.to_numpy()
dtype = CategoricalDtype(categories, values.dtype.pyarrow_dtype.ordered)
Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pandas as pd
from pandas import (
ArrowDtype,
Categorical,
DataFrame,
Grouper,
Expand Down Expand Up @@ -2851,3 +2852,31 @@ def test_pivot_margins_with_none_index(self):
),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_pivot_with_pyarrow_categorical(self):
# GH#53051
pa = pytest.importorskip("pyarrow")

df = DataFrame(
{"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
).astype(
{
"string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
"number_column": "float[pyarrow]",
}
)

df = df.pivot(columns=["string_column"], values=["number_column"])

multi_index = MultiIndex.from_arrays(
[["number_column", "number_column", "number_column"], ["A", "B", "C"]],
names=(None, "string_column"),
)
df_expected = DataFrame(
[[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]],
columns=multi_index,
)
tm.assert_frame_equal(
df, df_expected, check_dtype=False, check_column_type=False
)
29 changes: 29 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
from pandas import (
ArrowDtype,
DataFrame,
MultiIndex,
Series,
Expand Down Expand Up @@ -318,6 +319,34 @@ def test_multiindex_dt_with_nan(self):
expected = Series(["a", "b", "c", "d"], name=("sub", np.nan))
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_multiindex_with_pyarrow_categorical(self):
# GH#53051
pa = pytest.importorskip("pyarrow")

df = DataFrame(
{"string_column": ["A", "B", "C"], "number_column": [1, 2, 3]}
).astype(
{
"string_column": ArrowDtype(pa.dictionary(pa.int32(), pa.string())),
"number_column": "float[pyarrow]",
}
)

df = df.set_index(["string_column", "number_column"])

df_expected = DataFrame(
index=MultiIndex.from_arrays(
[["A", "B", "C"], [1, 2, 3]], names=["string_column", "number_column"]
)
)
tm.assert_frame_equal(
df,
df_expected,
check_index_type=False,
check_column_type=False,
)


class TestSorted:
"""everything you wanted to test about sorting"""
Expand Down