Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Add support for pd.Series.select #1446

Merged
merged 8 commits into from
Feb 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## [Unreleased]

- [ENH] Added support for pd.Series.select - Issue #1394 @samukweku

## [v0.30.0] - 2024-12-04

## [v0.29.2] - 2024-09-28
Expand All @@ -28,7 +30,7 @@
- [ENH] Added a `clean_names` method for polars - it can be used to clean the column names, or clean column values . Issue #1343 @samukweku
- [ENH] Improved performance for non-equi joins when using numba - @samukweku PR #1341
- [ENH] pandas Index,Series, DataFrame now supported in the `complete` method. - PR #1369 @samukweku
- [ENH] Improve performance for `first/last` in \`conditional_join, when the join columns in the right dataframe are sorted. - PR #1382 @samukweku
- [ENH] Improve performance for `first/last` in conditional_join, when the join columns in the right dataframe are sorted. - PR #1382 @samukweku

## [v0.27.0] - 2024-03-21

Expand Down
27 changes: 18 additions & 9 deletions janitor/functions/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,24 +328,27 @@ def select_rows(


@pf.register_dataframe_method
@pf.register_series_method
@deprecated_alias(rows="index")
def select(
df: pd.DataFrame,
df: pd.DataFrame | pd.Series,
*args: tuple,
index: Any = None,
columns: Any = None,
axis: str = "columns",
invert: bool = False,
) -> pd.DataFrame:
"""Method-chainable selection of rows and columns.
) -> pd.DataFrame | pd.Series:
"""Method-chainable selection of rows and/or columns.

It accepts a string, shell-like glob strings `(*string*)`,
regex, slice, array-like object, or a list of the previous options.

Selection on a MultiIndex on a level, or multiple levels,
is possible with a dictionary.

This method does not mutate the original DataFrame.
This method does not mutate the original DataFrame or Series.

If the pandas object is a Series, selection is possible only on the index.

Selection can be inverted with the `DropLabel` class.

Expand All @@ -366,6 +369,8 @@ def select(
- 0.26.0
- Added variable `args`, `invert` and `axis` parameters.
- `rows` keyword deprecated in favour of `index`.
- 0.31.0
- Add support for pd.Series.

Examples:
>>> import pandas as pd
Expand Down Expand Up @@ -429,9 +434,8 @@ def select(
ValueError: If args and index/columns are provided.

Returns:
A pandas DataFrame with the specified rows and/or columns selected.
A pandas DataFrame or Series with the specified rows and/or columns selected.
""" # noqa: E501

if args:
check("invert", invert, [bool])
if (index is not None) or (columns is not None):
Expand Down Expand Up @@ -851,16 +855,19 @@ def _index_converter(arr, index):


def _select(
df: pd.DataFrame,
df: pd.DataFrame | pd.Series,
invert: bool = False,
rows=None,
columns=None,
) -> pd.DataFrame:
) -> pd.DataFrame | pd.Series:
"""
Index DataFrame on the index or columns.
If it is a Series, indexing is only on the index.

Returns a DataFrame.
Returns a DataFrame or Series.
"""
if (columns is not None) and isinstance(df, pd.Series):
raise ValueError("columns axis is not supported for pd.Series.select")
if rows is None:
row_indexer = slice(None)
else:
Expand All @@ -870,6 +877,8 @@ def _select(
row_indexer[outcome] = False
else:
row_indexer = outcome
if isinstance(df, pd.Series):
return df.iloc[row_indexer]
if columns is None:
column_indexer = slice(None)
else:
Expand Down
2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ plugins:
docstring_style: "google"
docstring_options:
trim_doctest_flags: true
show_if_no_docstring: false
show_if_no_docstring: false
show_root_toc_entry: false
show_root_heading: false
show_submodules: true
Expand Down
3 changes: 3 additions & 0 deletions mkdocs/api/biology.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Biology

::: janitor.biology
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/chemistry.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Chemistry

::: janitor.chemistry
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/engineering.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Engineering

::: janitor.engineering
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/finance.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Finance

::: janitor.finance
options:
filters:
- "!^_"
2 changes: 2 additions & 0 deletions mkdocs/api/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

::: janitor.functions
options:
filters:
- "!^_"
members:
- add_columns
- also
Expand Down
3 changes: 3 additions & 0 deletions mkdocs/api/io.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Input/Output (io)

::: janitor.io
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/math.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Math

::: janitor.math
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/ml.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Machine Learning

::: janitor.ml
options:
filters:
- "!^_"
2 changes: 2 additions & 0 deletions mkdocs/api/polars.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

::: janitor.polars
options:
filters:
- "!^_"
members:
- clean_names
- complete
Expand Down
3 changes: 3 additions & 0 deletions mkdocs/api/timeseries.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Timeseries

::: janitor.timeseries
options:
filters:
- "!^_"
3 changes: 3 additions & 0 deletions mkdocs/api/xarray.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# XArray

::: janitor.xarray.functions
options:
filters:
- "!^_"
18 changes: 17 additions & 1 deletion tests/functions/test_select.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
from pandas.testing import assert_frame_equal, assert_series_equal

from janitor.functions.select import DropLabel

Expand All @@ -22,6 +22,22 @@ def dataframe():
)


def test_series_axis(dataframe):
"""Raise if object is a Series and axis is a column"""
with pytest.raises(
ValueError, match="columns axis is not supported for pd.Series.select"
):
dataframe.iloc[:, 0].select("bar", axis="columns")


def test_series_select(dataframe):
"""Test output when selecting on a Series"""
series = dataframe.iloc[:, 0]
actual = series.select(index="bar")
expected = series.loc[["bar"]]
assert_series_equal(actual, expected)


def test_args_and_rows_and_columns(dataframe):
"""
Raise if args and rows/columns are provided.
Expand Down
Loading