Skip to content

Commit 1013934

Browse files
Feat: compatible with multiple pandas objects accessors (df, series, idx)
- Improved docstrs - Improved tests - Improved whatsnew entry Co-authored-by: Afonso Antunes <[email protected]>
1 parent 001c05a commit 1013934

File tree

3 files changed

+340
-75
lines changed

3 files changed

+340
-75
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ Other enhancements
8383
- Improved deprecation message for offset aliases (:issue:`60820`)
8484
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
8585
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
86-
- Support :class:`DataFrame` plugin accessor via entry points (:issue:`29076`)
86+
- Support :class:`DataFrame`, :class:`Series` and :class:`Index` plugin accessors via entry points (:issue:`29076`)
8787
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
8888
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
8989
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)

pandas/core/accessor.py

Lines changed: 83 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import functools
1111
from typing import (
1212
TYPE_CHECKING,
13-
Any,
1413
final,
1514
)
1615
import warnings
@@ -405,17 +404,31 @@ def accessor_entry_point_loader() -> None:
405404
"""
406405
Load and register pandas accessors declared via entry points.
407406
408-
This function scans the 'pandas.accessor' entry point group for accessors
409-
registered by third-party packages. Each entry point is expected to follow
410-
the format:
407+
This function scans the 'pandas.<pd_obj>.accessor' entry point group for
408+
accessors registered by third-party packages. Each entry point is expected
409+
to follow the format:
411410
412-
TODO
411+
# setup.py
412+
entry_points={
413+
'pandas.DataFrame.accessor': [ <name> = <module>:<AccessorClass>, ... ],
414+
'pandas.Series.accessor': [ <name> = <module>:<AccessorClass>, ... ],
415+
'pandas.Index.accessor': [ <name> = <module>:<AccessorClass>, ... ],
416+
}
413417
414-
For example:
418+
OR for pyproject.toml file:
415419
416-
TODO
417-
TODO
418-
TODO
420+
# pyproject.toml
421+
[project.entry-points."pandas.DataFrame.accessor"]
422+
<name> = "<module>:<AccessorClass>"
423+
424+
[project.entry-points."pandas.Series.accessor"]
425+
<name> = "<module>:<AccessorClass>"
426+
427+
[project.entry-points."pandas.Index.accessor"]
428+
<name> = "<module>:<AccessorClass>"
429+
430+
For more information about entrypoints:
431+
https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#plugin-entry-points
419432
420433
421434
For each valid entry point:
@@ -428,6 +441,7 @@ def accessor_entry_point_loader() -> None:
428441
Notes
429442
-----
430443
- This function is only intended to be called at pandas startup.
444+
- For more information about accessors read their documentation.
431445
432446
Raises
433447
------
@@ -436,48 +450,67 @@ def accessor_entry_point_loader() -> None:
436450
437451
Examples
438452
--------
439-
df.myplugin.do_something() # Assuming such accessor was registered
440-
"""
441-
442-
ENTRY_POINT_GROUP: str = "pandas.accessor"
443-
444-
accessors: EntryPoints = entry_points(group=ENTRY_POINT_GROUP)
445-
accessor_package_dict: dict[str, str] = {}
446-
447-
for new_accessor in accessors:
448-
if new_accessor.dist is not None:
449-
# Try to get new_accessor.dist.name,
450-
# if that's not possible: new_pkg_name = 'Unknown'
451-
new_pkg_name: str = getattr(new_accessor.dist, "name", "Unknown")
452-
else:
453-
new_pkg_name: str = "Unknown"
453+
# setup.py
454+
entry_points={
455+
'pandas.DataFrame.accessor': [
456+
'myplugin = myplugin.accessor:MyPluginAccessor',
457+
],
458+
}
459+
# END setup.py
454460
455-
# Verifies duplicated accessor names
456-
if new_accessor.name in accessor_package_dict:
457-
loaded_pkg_name: str = accessor_package_dict.get(new_accessor.name)
461+
- That entrypoint would allow the following code:
458462
459-
if loaded_pkg_name is None:
460-
loaded_pkg_name = "Unknown"
463+
import pandas as pd
461464
462-
warnings.warn(
463-
"Warning: you have two accessors with the same name:"
464-
f" '{new_accessor.name}' has already been registered"
465-
f" by the package '{new_pkg_name}'. So the "
466-
f"'{new_accessor.name}' provided by the package "
467-
f"'{loaded_pkg_name}' is not being used. "
468-
"Uninstall the package you don't want"
469-
"to use if you want to get rid of this warning.\n",
470-
UserWarning,
471-
stacklevel=2,
472-
)
473-
474-
accessor_package_dict.update({new_accessor.name: new_pkg_name})
475-
476-
def make_accessor(ep):
477-
def accessor(self) -> Any:
478-
cls_ = ep.load()
479-
return cls_(self)
480-
481-
return accessor
465+
df = pd.DataFrame({"A": [1, 2, 3]})
466+
df.myplugin.do_something() # Calls MyPluginAccessor.do_something()
467+
"""
482468

483-
register_dataframe_accessor(new_accessor.name)(make_accessor(new_accessor))
469+
PD_OBJECTS_ENTRYPOINTS: list[str] = [
470+
"pandas.DataFrame.accessor",
471+
"pandas.Series.accessor",
472+
"pandas.Index.accessor",
473+
]
474+
475+
ACCESSOR_REGISTRY_FUNCTIONS: dict[str, Callable] = {
476+
"pandas.DataFrame.accessor": register_dataframe_accessor,
477+
"pandas.Series.accessor": register_series_accessor,
478+
"pandas.Index.accessor": register_index_accessor,
479+
}
480+
481+
for pd_obj_entrypoint in PD_OBJECTS_ENTRYPOINTS:
482+
accessors: EntryPoints = entry_points(group=pd_obj_entrypoint)
483+
accessor_package_dict: dict[str, str] = {}
484+
485+
for new_accessor in accessors:
486+
dist = getattr(new_accessor, "dist", None)
487+
new_pkg_name = getattr(dist, "name", "Unknown") if dist else "Unknown"
488+
489+
# Verifies duplicated accessor names
490+
if new_accessor.name in accessor_package_dict:
491+
loaded_pkg_name: str = accessor_package_dict.get(new_accessor.name)
492+
493+
if loaded_pkg_name is None:
494+
loaded_pkg_name: str = "Unknown"
495+
496+
warnings.warn(
497+
"Warning: you have two accessors with the same name:"
498+
f" '{new_accessor.name}' has already been registered"
499+
f" by the package '{new_pkg_name}'. The "
500+
f"'{new_accessor.name}' provided by the package "
501+
f"'{loaded_pkg_name}' is not being used. "
502+
"Uninstall the package you don't want"
503+
"to use if you want to get rid of this warning.\n",
504+
UserWarning,
505+
stacklevel=2,
506+
)
507+
508+
accessor_package_dict.update({new_accessor.name: new_pkg_name})
509+
510+
def make_accessor(ep):
511+
return lambda self, ep=ep: ep.load()(self)
512+
513+
register_fn = ACCESSOR_REGISTRY_FUNCTIONS.get(pd_obj_entrypoint)
514+
515+
if register_fn is not None:
516+
register_fn(new_accessor.name)(make_accessor(new_accessor))

0 commit comments

Comments
 (0)