Skip to content

Commit 33f7b5c

Browse files
add explicit tests and fix insert for empty object dtype
1 parent 3deda60 commit 33f7b5c

File tree

5 files changed

+65
-7
lines changed

5 files changed

+65
-7
lines changed

pandas/core/indexes/base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6242,6 +6242,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
62426242
# special case: if left or right is a zero-length RangeIndex or
62436243
# Index[object], those can be created by the default empty constructors
62446244
# -> for that case ignore this dtype and always return the other
6245+
# (https://github.com/pandas-dev/pandas/pull/60797)
62456246
from pandas.core.indexes.range import RangeIndex
62466247

62476248
if len(self) == 0 and (
@@ -6908,6 +6909,14 @@ def insert(self, loc: int, item) -> Index:
69086909

69096910
arr = self._values
69106911

6912+
if using_string_dtype and len(self) == 0 and self.dtype == np.object_:
6913+
# special case: if we are an empty object-dtype Index, also
6914+
# take into account the inserted item for the resulting dtype
6915+
# (https://github.com/pandas-dev/pandas/pull/60797)
6916+
dtype = self._find_common_type_compat(item)
6917+
if dtype != self.dtype:
6918+
return self.astype(dtype).insert(loc, item)
6919+
69116920
try:
69126921
if isinstance(arr, ExtensionArray):
69136922
res_values = arr.insert(loc, item)

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,9 @@ def test_setitem_different_dtype(self):
144144
)
145145
tm.assert_series_equal(result, expected)
146146

147-
def test_setitem_empty_columns(self):
148-
# GH 13522
147+
def test_setitem_overwrite_index(self):
148+
# GH 13522 - assign the index as a column and then overwrite the values
149+
# -> should not affect the index
149150
df = DataFrame(index=["A", "B", "C"])
150151
df["X"] = df.index
151152
df["X"] = ["x", "y", "z"]
@@ -154,6 +155,21 @@ def test_setitem_empty_columns(self):
154155
)
155156
tm.assert_frame_equal(df, exp)
156157

158+
def test_setitem_empty_columns(self):
159+
# Starting from an empty DataFrame and setting a column should result
160+
# in a default string dtype for the columns' Index
161+
# https://github.com/pandas-dev/pandas/issues/60338
162+
163+
df = DataFrame()
164+
df["foo"] = [1, 2, 3]
165+
expected = DataFrame({"foo": [1, 2, 3]})
166+
tm.assert_frame_equal(df, expected)
167+
168+
df = DataFrame(columns=Index([]))
169+
df["foo"] = [1, 2, 3]
170+
expected = DataFrame({"foo": [1, 2, 3]})
171+
tm.assert_frame_equal(df, expected)
172+
157173
def test_setitem_dt64_index_empty_columns(self):
158174
rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
159175
df = DataFrame(index=np.arange(len(rng)))

pandas/tests/frame/methods/test_reset_index.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,3 +778,34 @@ def test_reset_index_false_index_name():
778778
result_frame.reset_index()
779779
expected_frame = DataFrame(range(5, 10), RangeIndex(range(5), name=False))
780780
tm.assert_frame_equal(result_frame, expected_frame)
781+
782+
783+
@pytest.mark.parametrize("columns", [None, Index([])])
784+
def test_reset_index_with_empty_frame(columns):
785+
# Currently empty DataFrame has RangeIndex or object dtype Index, but when
786+
# resetting the index we still want to end up with the default string dtype
787+
# https://github.com/pandas-dev/pandas/issues/60338
788+
789+
index = Index([], name="foo")
790+
df = DataFrame(index=index, columns=columns)
791+
result = df.reset_index()
792+
expected = DataFrame(columns=["foo"])
793+
tm.assert_frame_equal(result, expected)
794+
795+
index = Index([1, 2, 3], name="foo")
796+
df = DataFrame(index=index, columns=columns)
797+
result = df.reset_index()
798+
expected = DataFrame({"foo": [1, 2, 3]})
799+
tm.assert_frame_equal(result, expected)
800+
801+
index = MultiIndex.from_tuples([], names=["foo", "bar"])
802+
df = DataFrame(index=index, columns=columns)
803+
result = df.reset_index()
804+
expected = DataFrame(columns=["foo", "bar"])
805+
tm.assert_frame_equal(result, expected)
806+
807+
index = MultiIndex.from_tuples([(1, 2), (2, 3)], names=["foo", "bar"])
808+
df = DataFrame(index=index, columns=columns)
809+
result = df.reset_index()
810+
expected = DataFrame({"foo": [1, 2], "bar": [2, 3]})
811+
tm.assert_frame_equal(result, expected)

pandas/tests/indexes/base_class/test_reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_insert(self):
3434

3535
# test empty
3636
null_index = Index([])
37-
tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))
37+
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
3838

3939
def test_insert_missing(self, nulls_fixture, using_infer_string):
4040
# GH#22295

pandas/tests/indexes/test_old_base.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -454,10 +454,12 @@ def test_insert_out_of_bounds(self, index, using_infer_string):
454454
else:
455455
msg = "slice indices must be integers or None or have an __index__ method"
456456

457-
if using_infer_string and (
458-
index.dtype == "string" or index.dtype == "category"
459-
):
460-
msg = "loc must be an integer between"
457+
if using_infer_string:
458+
if index.dtype == "string" or index.dtype == "category":
459+
msg = "loc must be an integer between"
460+
elif index.dtype == "object" and len(index) == 0:
461+
msg = "loc must be an integer between"
462+
err = TypeError
461463

462464
with pytest.raises(err, match=msg):
463465
index.insert(0.5, "foo")

0 commit comments

Comments
 (0)