Skip to content

Commit 2790420

Browse files
authored
Merge dev into pre-commit-ci-update-config
2 parents 9f97235 + 85b7af3 commit 2790420

File tree

3 files changed

+68
-30
lines changed

3 files changed

+68
-30
lines changed

.github/workflows/docs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
- name: Build docs
3737
run: mkdocs build
3838

39-
- uses: actions/upload-artifact@v3
39+
- uses: actions/upload-artifact@v4
4040
with:
4141
name: website
4242
path: site/

janitor/functions/select.py

+46-29
Original file line numberDiff line numberDiff line change
@@ -152,13 +152,13 @@ def select_columns(
152152
153153
Exclude columns with the `DropLabel` class:
154154
>>> from janitor import DropLabel
155-
>>> df.select_columns(DropLabel(slice("name", "awake")), "conservation")
156-
brainwt bodywt conservation
157-
0 NaN 50.000 lc
158-
1 0.01550 0.480 NaN
159-
2 NaN 1.350 nt
160-
3 0.00029 0.019 lc
161-
4 0.42300 600.000 domesticated
155+
>>> df.select_columns(DropLabel(slice("name", "awake")))
156+
brainwt bodywt
157+
0 NaN 50.000
158+
1 0.01550 0.480
159+
2 NaN 1.350
160+
3 0.00029 0.019
161+
4 0.42300 600.000
162162
163163
Selection on MultiIndex columns:
164164
>>> d = {'num_legs': [4, 4, 2, 2],
@@ -673,7 +673,7 @@ def _index_dispatch(arg, df, axis): # noqa: F811
673673
674674
Returns an array of integers.
675675
"""
676-
level_label = {}
676+
677677
index = getattr(df, axis)
678678
if not isinstance(index, pd.MultiIndex):
679679
return _select_index(list(arg), df, axis)
@@ -687,6 +687,7 @@ def _index_dispatch(arg, df, axis): # noqa: F811
687687
"in the MultiIndex, and should either be all "
688688
"strings or integers."
689689
)
690+
level_label = {}
690691
for key, value in arg.items():
691692
if isinstance(value, dispatch_callable):
692693
indexer = index.get_level_values(key)
@@ -757,14 +758,14 @@ def _index_dispatch(arg, df, axis): # noqa: F811
757758
def _column_sel_dispatch(cols, df, axis): # noqa: F811
758759
"""
759760
Base function for selection on a Pandas Index object.
760-
Returns the inverse of the passed label(s).
761+
Returns the inverse of the passed label(s),
762+
or the set difference if it is part of a list of labels.
761763
762764
Returns an array of integers.
763765
"""
764766
arr = _select_index(cols.label, df, axis)
765767
index = np.arange(getattr(df, axis).size)
766-
arr = _index_converter(arr, index)
767-
return np.delete(index, arr)
768+
return _index_converter(arr, index)
768769

769770

770771
@_select_index.register(set)
@@ -797,27 +798,43 @@ def _index_dispatch(arg, df, axis): # noqa: F811
797798
indices = index.get_indexer_for(list(arg))
798799
if (indices != -1).all():
799800
return indices
800-
# treat multiple DropLabel instances as a single unit
801-
checks = (isinstance(entry, DropLabel) for entry in arg)
802-
if sum(checks) > 1:
803-
drop_labels = (entry for entry in arg if isinstance(entry, DropLabel))
804-
drop_labels = [entry.label for entry in drop_labels]
805-
drop_labels = DropLabel(drop_labels)
806-
arg = [entry for entry in arg if not isinstance(entry, DropLabel)]
807-
arg.append(drop_labels)
808-
indices = [_select_index(entry, df, axis) for entry in arg]
801+
802+
include = []
803+
exclude = []
804+
for entry in arg:
805+
if isinstance(entry, DropLabel):
806+
exclude.append(entry)
807+
else:
808+
outcome = _select_index(entry, df, axis)
809+
include.append(outcome)
810+
if exclude:
811+
if len(exclude) > 1:
812+
exclude = [entry.label for entry in exclude]
813+
exclude = DropLabel(exclude)
814+
else:
815+
exclude = exclude[0]
816+
exclude = _select_index(exclude, df, axis)
817+
len_exclude = len(exclude)
818+
if len_exclude and not include:
819+
index_arr = np.arange(getattr(df, axis).size)
820+
return np.delete(index_arr, exclude)
821+
if include and len_exclude:
822+
include = [_index_converter(arr, index) for arr in include]
823+
include = np.concatenate(include)
824+
mask = np.isin(include, exclude)
825+
return include[~mask]
809826
# single entry does not need to be combined
810827
# or materialized if possible;
811828
# this offers more performance
812-
if len(indices) == 1:
813-
if is_scalar(indices[0]):
814-
return indices
815-
indices = indices[0]
816-
if is_list_like(indices):
817-
indices = np.asanyarray(indices)
818-
return indices
819-
indices = [_index_converter(arr, index) for arr in indices]
820-
return np.concatenate(indices)
829+
if len(include) == 1:
830+
if is_scalar(include[0]):
831+
return include
832+
include = include[0]
833+
if is_list_like(include):
834+
include = np.asanyarray(include)
835+
return include
836+
include = [_index_converter(arr, index) for arr in include]
837+
return np.concatenate(include)
821838

822839

823840
def _index_converter(arr, index):

tests/functions/test_select_columns.py

+21
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,27 @@ def test_select_column_names_droplabel(dataframe, invert, expected):
4444
assert_frame_equal(df, dataframe[expected])
4545

4646

47+
def test_select_column_names_droplabel_mix():
48+
"Test DropLabel, mixed with other labels"
49+
data = {
50+
"rowid": [1, 2],
51+
"species": ["Adelie", "Adelie"],
52+
"island": ["Torgersen", "Torgersen"],
53+
"bill_length_mm": [39.1, 39.5],
54+
"bill_depth_mm": [18.7, 17.4],
55+
"flipper_length_mm": [181.0, 186.0],
56+
"body_mass_g": [3750.0, 3800.0],
57+
"sex": ["male", "female"],
58+
"year": [2007, 2007],
59+
}
60+
61+
df = pd.DataFrame(data)
62+
selectors = [pd.api.types.is_numeric_dtype, DropLabel(["year", "rowid"])]
63+
actual = df.select(selectors)
64+
expected = df.select_dtypes("number").drop(columns=["year", "rowid"])
65+
assert_frame_equal(actual, expected)
66+
67+
4768
@pytest.mark.functions
4869
def test_select_column_names_droplabel_multiple(dataframe):
4970
"Base DataFrame"

0 commit comments

Comments
 (0)