Skip to content

feat(index): append #1282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pandas-stubs/core/indexes/base.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from typing import (
ClassVar,
Literal,
TypeAlias,
TypeVar,
final,
overload,
)
Expand Down Expand Up @@ -43,6 +44,7 @@ from typing_extensions import (
from pandas._libs.interval import _OrderableT
from pandas._typing import (
S1,
S2,
AnyAll,
AxesData,
DropKeep,
Expand All @@ -64,6 +66,8 @@ from pandas._typing import (
type_t,
)

_T_INDEX = TypeVar("_T_INDEX", bound=Index) # ty: ignore[unresolved-reference]

class InvalidIndexError(Exception): ...

class Index(IndexOpsMixin[S1]):
Expand Down Expand Up @@ -401,7 +405,14 @@ class Index(IndexOpsMixin[S1]):
) -> Self: ...
@overload
def __getitem__(self, idx: int | tuple[np_ndarray_anyint, ...]) -> S1: ...
def append(self, other): ...
@overload
def append(self, other: Index[S1] | Sequence[Index[S1]]) -> Self: ...
@overload
def append(self, other: Index[S2] | Sequence[Index[S2]]) -> Index[S1 | S2]: ...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that's right. I think the result would be Index[S1] | Index[S2] .

I'm not sure how the downstream Index stuff would work with the union type inside the generic.

@overload
def append(self, other: Sequence[_T_INDEX]) -> Self | _T_INDEX: ...
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't we use Sequence[Index] here without the TypeVar ?

@overload
def append(self, other: Index | Sequence) -> Index: ...
def putmask(self, mask, value): ...
def equals(self, other) -> bool: ...
@final
Expand Down
2 changes: 1 addition & 1 deletion pandas-stubs/core/indexes/multi.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class MultiIndex(Index):
def take(
self, indices, axis: int = ..., allow_fill: bool = ..., fill_value=..., **kwargs
): ...
def append(self, other): ...
def append(self, other): ... # pyrefly: ignore
def argsort(self, *args, **kwargs): ...
def repeat(self, repeats, axis=...): ...
@final
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ mypy = "1.17.0"
pandas = "2.3.0"
pyarrow = ">=10.0.1"
pytest = ">=7.1.2"
pyright = ">=1.1.400"
pyright = ">=1.1.403"
ty = "^0.0.1a8"
pyrefly = "^0.21.0"
poethepoet = ">=0.16.5"
Expand Down
62 changes: 62 additions & 0 deletions tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import datetime as dt
from typing import (
TYPE_CHECKING,
Any,
Union,
cast,
)

import numpy as np
Expand Down Expand Up @@ -1028,6 +1030,66 @@ def test_getitem() -> None:
check(assert_type(i0[[0, 2]], "pd.Index[str]"), pd.Index, str)


def test_append_mix() -> None:
"""Test pd.Index.append that gives pd.Index[Any]"""
first = pd.Index([1])
second = pd.Index(["a"])
third = pd.Index([1, "a"])
check(assert_type(first.append(second), "pd.Index[int | str]"), pd.Index)
check(assert_type(first.append([second]), "pd.Index[int | str]"), pd.Index)

check(assert_type(first.append(third), "pd.Index[int | str]"), pd.Index) # type: ignore[assert-type]
check(assert_type(first.append([third]), "pd.Index[int | str]"), pd.Index) # type: ignore[assert-type]
check(
assert_type( # type: ignore[assert-type]
first.append([second, third]), # pyright: ignore[reportAssertTypeFailure]
"pd.Index[int | str]",
),
pd.Index,
)

check(assert_type(third.append([]), "pd.Index[int | str]"), pd.Index) # type: ignore[assert-type]
check(
assert_type(third.append(cast("list[Index[Any]]", [])), "pd.Index[int | str]"), # type: ignore[assert-type]
pd.Index,
)
check(assert_type(third.append([first]), "pd.Index[int | str]"), pd.Index) # type: ignore[assert-type]
check(
assert_type( # type: ignore[assert-type]
third.append([first, second]), # pyright: ignore[reportAssertTypeFailure]
"pd.Index[int | str]",
),
pd.Index,
Comment on lines +1041 to +1062
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these tests need to work without having ignore in them. So you'll need to fix the types in the append() declarations to make that happen.

)


def test_append_int() -> None:
"""Test pd.Index[int].append"""
first = pd.Index([1])
second = pd.Index([2])
check(assert_type(first.append([]), "pd.Index[int]"), pd.Index, np.int64)
check(assert_type(first.append(second), "pd.Index[int]"), pd.Index, np.int64)
check(assert_type(first.append([second]), "pd.Index[int]"), pd.Index, np.int64)


def test_append_str() -> None:
"""Test pd.Index[str].append"""
first = pd.Index(["str"])
second = pd.Index(["rts"])
check(assert_type(first.append([]), "pd.Index[str]"), pd.Index, str)
check(assert_type(first.append(second), "pd.Index[str]"), pd.Index, str)
check(assert_type(first.append([second]), "pd.Index[str]"), pd.Index, str)


def test_append_list_str() -> None:
"""Test pd.Index[list[str]].append"""
first = pd.Index([["str", "rts"]])
second = pd.Index([["srt", "trs"]])
check(assert_type(first.append([]), "pd.Index[list[str]]"), pd.Index, list)
check(assert_type(first.append(second), "pd.Index[list[str]]"), pd.Index, list)
check(assert_type(first.append([second]), "pd.Index[list[str]]"), pd.Index, list)
Comment on lines +1085 to +1090
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should be supporting Index[list[str]] because list[str] is not hashable and labels in an Index should be hashable.

But this is a bug in pandas, I think. See pandas-dev/pandas#61937

So can you remove this test.

Then we will have to separate out list[str] from S1 and have an I1 that includes everything in S1 except list[str], while S1 includes everything.

So can you make that change as well?



def test_range_index_range() -> None:
"""Test that pd.RangeIndex can be initialized from range."""
iri = pd.RangeIndex(range(5))
Expand Down
Loading