Skip to content

Commit 1417e5f

Browse files
committed
Fix doc-strings and NestedDtype.__repr__
1 parent 0bc249f commit 1417e5f

File tree

6 files changed

+45
-23
lines changed

6 files changed

+45
-23
lines changed

src/nested_pandas/datasets/generation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:
2424
2525
Examples
2626
--------
27-
>>> nested_pandas.datasets.generate_data(10,100)
28-
>>> nested_pandas.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
27+
>>> from nested_pandas.datasets import generate_data
28+
>>> nf1 = generate_data(10,100)
29+
>>> nf2 = generate_data(10, {"nested_a": 100, "nested_b": 200})
2930
"""
3031
# use provided seed, "None" acts as if no seed is provided
3132
randomstate = np.random.RandomState(seed=seed)

src/nested_pandas/nestedframe/core.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def add_nested(
278278
Examples
279279
--------
280280
281-
281+
>>> import nested_pandas as npd
282282
>>> nf = npd.NestedFrame({"a": [1, 2, 3], "b": [4, 5, 6]},
283283
... index=[0,1,2])
284284
>>> nf2 = npd.NestedFrame({"c":[1,2,3,4,5,6,7,8,9]},
@@ -320,11 +320,12 @@ def nest_lists(self, name: str, columns: list[str]) -> NestedFrame:
320320
Examples
321321
--------
322322
323+
>>> import nested_pandas as npd
323324
>>> nf = npd.NestedFrame({"c":[1,2,3], "d":[2,4,6],
324325
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
325326
... index=[0,1,2])
326327
327-
>>> nf.nest_lists(columns=["c","d"], name="nested")
328+
>>> nf.nest_lists(columns=["e"], name="nested")
328329
c d nested
329330
0 1 2 [{e: 1}; …] (3 rows)
330331
1 2 4 [{e: 4}; …] (3 rows)
@@ -367,6 +368,7 @@ def from_flat(cls, df, base_columns, nested_columns=None, on: str | None = None,
367368
Examples
368369
--------
369370
371+
>>> import nested_pandas as npd
370372
>>> nf = npd.NestedFrame({"a":[1,1,1,2,2], "b":[2,2,2,4,4],
371373
... "c":[1,2,3,4,5], "d":[2,4,6,8,10]},
372374
... index=[0,0,0,1,1])
@@ -424,6 +426,7 @@ def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
424426
Examples
425427
--------
426428
429+
>>> import nested_pandas as npd
427430
>>> nf = npd.NestedFrame({"c":[1,2,3], "d":[2,4,6],
428431
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
429432
... index=[0,1,2])
@@ -605,7 +608,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
605608
606609
>>> nf = nf.query("nested.t > 10")
607610
>>> nf
608-
a b nested
611+
a b nested
609612
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
610613
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
611614
2 0.000114 0.691121 [{t: 11.173797, flux: 28.044399, band: 'r'}; …...
@@ -619,9 +622,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
619622
with rows of that particular nested structure filtered. For example,
620623
querying the NestedFrame "df" with nested structure "my_nested" as
621624
below will return all rows of df, but with mynested filtered by the
622-
condition:
623-
624-
>>> df.query("mynested.a > 2")
625+
condition: `nf.query("mynested.a > 2")`
625626
"""
626627
if not isinstance(expr, str):
627628
msg = f"expr must be a string to be evaluated, {type(expr)} given"
@@ -786,7 +787,7 @@ def dropna(
786787
>>> # this query empties several of the nested dataframes
787788
>>> nf = nf.query("nested.t > 19")
788789
>>> nf
789-
a b nested
790+
a b nested
790791
0 0.417022 0.184677 None
791792
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
792793
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
@@ -796,7 +797,7 @@ def dropna(
796797
797798
>>> # dropna removes rows with those emptied dataframes
798799
>>> nf.dropna(subset="nested")
799-
a b nested
800+
a b nested
800801
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
801802
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
802803
@@ -806,9 +807,20 @@ def dropna(
806807
>>> nf = generate_data(5,5, seed=1)
807808
>>> # Either on the whole dataframe
808809
>>> nf.dropna(on_nested="nested")
810+
a b nested
811+
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
812+
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
813+
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
814+
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
815+
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
809816
>>> # or on a specific nested column
810817
>>> nf.dropna(subset="nested.t")
811-
818+
a b nested
819+
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
820+
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
821+
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
822+
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
823+
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
812824
813825
Notes
814826
-----
@@ -909,7 +921,7 @@ def sort_values(
909921
910922
>>> # Sort nested values
911923
>>> nf.sort_values(by="nested.band")
912-
a b nested
924+
a b nested
913925
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
914926
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
915927
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
@@ -1017,13 +1029,15 @@ def reduce(self, func, *args, infer_nesting=True, **kwargs) -> NestedFrame: # t
10171029
>>> from nested_pandas.datasets.generation import generate_data
10181030
>>> import numpy as np
10191031
>>> nf = generate_data(5,5, seed=1)
1020-
1032+
>>>
10211033
>>> # define a custom user function
1034+
>>> # reduce will return a NestedFrame with two columns
10221035
>>> def example_func(base_col, nested_col):
1023-
>>> '''reduce will return a NestedFrame with two columns'''
1024-
>>> return {"mean": np.mean(nested_col),
1025-
... "mean_minus_base": np.mean(nested_col) - base_col}
1026-
1036+
... return {
1037+
... "mean": np.mean(nested_col),
1038+
... "mean_minus_base": np.mean(nested_col) - base_col,
1039+
... }
1040+
>>>
10271041
>>> # apply the function
10281042
>>> nf.reduce(example_func, "a", "nested.t")
10291043
mean mean_minus_base
@@ -1038,8 +1052,8 @@ def reduce(self, func, *args, infer_nesting=True, **kwargs) -> NestedFrame: # t
10381052
10391053
>>> # define a custom user function that returns nested structure
10401054
>>> def example_func(base_col1, base_col2, nested_col):
1041-
>>> '''reduce will return a NestedFrame with nested structure'''
1042-
>>> return {"offsets.t_a": nested_col - base_col1,
1055+
... '''reduce will return a NestedFrame with nested structure'''
1056+
... return {"offsets.t_a": nested_col - base_col1,
10431057
... "offsets.t_b": nested_col - base_col2}
10441058
10451059
By giving both output columns the prefix "offsets.", we signal

src/nested_pandas/nestedframe/io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@ def read_parquet(
6464
Simple loading example:
6565
6666
>>> import nested_pandas as npd
67-
>>> nf = npd.read_parquet("path/to/file.parquet")
67+
>>> nf = npd.read_parquet("path/to/file.parquet") # doctest: +SKIP
6868
6969
Partial loading:
7070
7171
>>> #Load only the "flux" sub-column of the "nested" column
72-
>>> nf = npd.read_parquet("path/to/file.parquet", columns=["a", "nested.flux"])
72+
>>> nf = npd.read_parquet("path/to/file.parquet", columns=["a", "nested.flux"]) # doctest: +SKIP
7373
"""
7474

7575
# Type convergence for reject_nesting

src/nested_pandas/series/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def with_list_field(self, field: str, value: ArrayLike) -> pd.Series:
258258
... [["g","g"],
259259
... ["r","r"]])
260260
>>> # Look at one row of the series
261-
>>> nested_with_avg[0]
261+
>>> nf_new_band[0]
262262
t flux band new_band
263263
0 2.935118 39.676747 g g
264264
1 3.725204 41.919451 r g

src/nested_pandas/series/dtype.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def name(self) -> str:
5656
fields = ", ".join([f"{field}: [{dtype!s}]" for field, dtype in nice_dtypes.items()])
5757
return f"nested<{fields}>"
5858

59+
def __repr__(self) -> str:
60+
return self.name
61+
5962
@classmethod
6063
def construct_array_type(cls) -> Type[ExtensionArray]:
6164
"""Corresponded array type, always NestedExtensionArray"""

src/nested_pandas/utils/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame:
2727
Examples
2828
--------
2929
30+
>>> import pandas as pd
31+
>>> # Show all columns
32+
>>> pd.set_option("display.width", 200)
33+
>>> pd.set_option("display.max_columns", None)
3034
>>> from nested_pandas.datasets.generation import generate_data
31-
>>> nf = generate_data(5,10,seed=1)
35+
>>> nf = generate_data(5, 10, seed=1)
3236
3337
>>> from nested_pandas.utils import count_nested
3438
>>> count_nested(nf, "nested")

0 commit comments

Comments
 (0)