Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] remove restrictions for mutate/summarise #1452

Merged
merged 1 commit into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions janitor/functions/mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@ def mutate(
mutate creates new columns that are functions of existing columns.
It can also modify columns (if the name is the same as an existing column).

The argument provided to *args* should be either a dictionary, a tuple or a callable.
The argument provided to *args* should be either
a dictionary, a callable or a tuple; however,
anything can be passed, as long as it can
be aligned with the original DataFrame.


- **dictionary argument**:
If the argument is a dictionary,
Expand Down Expand Up @@ -193,10 +197,6 @@ def mutate(

@singledispatch
def _mutator(arg, df, by):
if not callable(arg):
raise NotImplementedError(
f"janitor.mutate is not supported for {type(arg)}"
)
if by is None:
val = df
else:
Expand All @@ -212,7 +212,7 @@ def _mutator(arg, df, by):
df[column] = outcome[column]
return df
raise TypeError(
"The output from a callable should be a named Series or a DataFrame"
"The output from the mutation should be a named Series or a DataFrame"
)


Expand Down
19 changes: 9 additions & 10 deletions janitor/functions/summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
import pandas_flavor as pf
from pandas.api.types import is_scalar
from pandas.core.common import apply_if_callable
from pandas.core.groupby.generic import DataFrameGroupBy

from janitor.functions.select import get_index_labels
Expand All @@ -33,7 +34,10 @@ def summarise(
the output will have a single row
summarising all observations in the input.

The argument provided to *args* should be either a dictionary or a tuple.
The argument provided to *args* should be either
a dictionary, a callable or a tuple; however,
anything can be passed, as long as it fits
within pandas' aggregation semantics.

- **dictionary argument**:
If the argument is a dictionary,
Expand Down Expand Up @@ -187,15 +191,11 @@ def summarise(
values = map(is_scalar, dictionary.values())
if all(values):
return pd.Series(dictionary)
return pd.concat(dictionary, axis=1, sort=False, copy=False)
return pd.concat(dictionary, axis="columns", sort=False, copy=False)


@singledispatch
def _mutator(arg, df, by):
if not callable(arg):
raise NotImplementedError(
f"janitor.summarise is not supported for {type(arg)}"
)
if by is None:
val = df
else:
Expand All @@ -205,9 +205,8 @@ def _mutator(arg, df, by):
if not outcome.name:
raise ValueError("Ensure the pandas Series object has a name")
return {outcome.name: outcome}
# assumption: should return a DataFrame
outcome = {key: outcome[key] for key in outcome}
return outcome
# assumption: a mapping - DataFrame/dictionary/...
return {**outcome}


@_mutator.register(dict)
Expand Down Expand Up @@ -247,7 +246,7 @@ def _process_maybe_callable(func: callable, obj):
try:
column = obj.agg(func)
except: # noqa: E722
column = func(obj)
column = apply_if_callable(maybe_callable=func, obj=obj)
return column


Expand Down
5 changes: 3 additions & 2 deletions tests/functions/test_mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_mutate_callable(df_mutate):
"Raise if output of callable is not a pandas Series/DataFrame"
with pytest.raises(
TypeError,
match="The output from a callable should be a named Series or a DataFrame",
match="The output from the mutation should be a named Series or a DataFrame",
):
df_mutate.mutate(lambda df: np.sum(df["avg_run"]))

Expand All @@ -64,7 +64,8 @@ def test_mutate_wrong_arg(df_mutate):
Raise if wrong arg is provided
"""
with pytest.raises(
NotImplementedError, match="janitor.mutate is not supported for.+"
TypeError,
match="The output from the mutation should be a named Series or a DataFrame",
):
df_mutate.mutate(1)

Expand Down
10 changes: 0 additions & 10 deletions tests/functions/test_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,6 @@ def df_summarise():
return pd.DataFrame(data)


def test_summarise_wrong_arg(df_summarise):
"""
Raise if wrong arg is provided
"""
with pytest.raises(
NotImplementedError, match="janitor.summarise is not supported for.+"
):
df_summarise.summarise(1)


def test_mutate_callable_series_unnamed(df_summarise):
"""Test output for callable"""
with pytest.raises(
Expand Down
Loading