Skip to content

Commit f10acc0

Browse files
authored
Merge pull request #243 from lincc-frameworks/ppt-2.0.6
PPT 2.0.6, doctest fixes, NestedFrame.__repr__
2 parents a96120e + 1417e5f commit f10acc0

File tree

10 files changed

+62
-28
lines changed

10 files changed

+62
-28
lines changed

.copier-answers.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Changes here will be overwritten by Copier
2-
_commit: v2.0.5
2+
_commit: v2.0.6
33
_src_path: gh:lincc-frameworks/python-project-template
44
author_email: [email protected]
55
author_name: LINCC Frameworks

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ repos:
9797
"-d", # Flag for cached environment and doctrees
9898
"./docs/_build/doctrees", # Directory
9999
"-D", # Flag to override settings in conf.py
100-
"exclude_patterns=notebooks/*", # Exclude our notebooks from pre-commit
100+
"exclude_patterns=notebooks/*,_build", # Exclude notebooks and build dir from pre-commit
101101
]
102102
# Run unit tests, verify that they pass. Note that coverage is run against
103103
# the ./src directory here because that is what will be committed. In the

.setup_dev.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
11
#!/usr/bin/env bash
22

3+
# Bash Unofficial strict mode (http://redsymbol.net/articles/unofficial-bash-strict-mode/)
4+
# and (https://disconnected.systems/blog/another-bash-strict-mode/)
5+
set -o nounset # Any uninitialized variable is an error
6+
set -o errexit # Exit the script on the failure of any command to execute without error
7+
set -o pipefail # Fail command pipelines on the failure of any individual step
8+
IFS=$'\n\t' #set internal field separator to avoid iteration errors
9+
# Trap all exits and output something helpful
10+
trap 's=$?; echo "$0: Error on line "$LINENO": $BASH_COMMAND"; exit $s' ERR
11+
312
# This script should be run by new developers to install this package in
413
# editable mode and configure their local environment
514

615
echo "Checking virtual environment"
7-
if [ -z "${VIRTUAL_ENV}" ] && [ -z "${CONDA_PREFIX}" ]; then
16+
if [ "${VIRTUAL_ENV:-missing}" = "missing" ] && [ "${CONDA_PREFIX:-missing}" = "missing" ]; then
817
echo 'No virtual environment detected: none of $VIRTUAL_ENV or $CONDA_PREFIX is set.'
918
echo
1019
echo "=== This script is going to install the project in the system python environment ==="
@@ -20,7 +29,7 @@ fi
2029

2130
echo "Checking pip version"
2231
MINIMUM_PIP_VERSION=22
23-
pipversion=( $(python -m pip --version | awk '{print $2}' | sed 's/\./ /g') )
32+
pipversion=( $(python -m pip --version | awk '{print $2}' | sed 's/\./\n\t/g') )
2433
if let "${pipversion[0]}<${MINIMUM_PIP_VERSION}"; then
2534
echo "Insufficient version of pip found. Requires at least version ${MINIMUM_PIP_VERSION}."
2635
echo "See https://lincc-ppt.readthedocs.io/ for details."
@@ -32,7 +41,7 @@ python -m pip install -e . > /dev/null
3241

3342
echo "Installing developer dependencies in local environment"
3443
python -m pip install -e .'[dev]' > /dev/null
35-
if [ -f docs/requirements.txt ]; then python -m pip install -r docs/requirements.txt; fi
44+
if [ -f docs/requirements.txt ]; then python -m pip install -r docs/requirements.txt > /dev/null; fi
3645

3746
echo "Installing pre-commit"
3847
pre-commit install > /dev/null

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,10 @@ write_to = "src/nested_pandas/_version.py"
5656
[tool.pytest.ini_options]
5757
testpaths = [
5858
"tests",
59+
"src",
60+
"docs",
5961
]
62+
addopts = "--doctest-modules --doctest-glob=*.rst"
6063

6164
[tool.black]
6265
line-length = 110

src/nested_pandas/datasets/generation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ def generate_data(n_base, n_layer, seed=None) -> NestedFrame:
2424
2525
Examples
2626
--------
27-
>>> nested_pandas.datasets.generate_data(10,100)
28-
>>> nested_pandas.datasets.generate_data(10, {"nested_a": 100, "nested_b": 200})
27+
>>> from nested_pandas.datasets import generate_data
28+
>>> nf1 = generate_data(10,100)
29+
>>> nf2 = generate_data(10, {"nested_a": 100, "nested_b": 200})
2930
"""
3031
# use provided seed, "None" acts as if no seed is provided
3132
randomstate = np.random.RandomState(seed=seed)

src/nested_pandas/nestedframe/core.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def add_nested(
278278
Examples
279279
--------
280280
281-
281+
>>> import nested_pandas as npd
282282
>>> nf = npd.NestedFrame({"a": [1, 2, 3], "b": [4, 5, 6]},
283283
... index=[0,1,2])
284284
>>> nf2 = npd.NestedFrame({"c":[1,2,3,4,5,6,7,8,9]},
@@ -320,11 +320,12 @@ def nest_lists(self, name: str, columns: list[str]) -> NestedFrame:
320320
Examples
321321
--------
322322
323+
>>> import nested_pandas as npd
323324
>>> nf = npd.NestedFrame({"c":[1,2,3], "d":[2,4,6],
324325
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
325326
... index=[0,1,2])
326327
327-
>>> nf.nest_lists(columns=["c","d"], name="nested")
328+
>>> nf.nest_lists(columns=["e"], name="nested")
328329
c d nested
329330
0 1 2 [{e: 1}; …] (3 rows)
330331
1 2 4 [{e: 4}; …] (3 rows)
@@ -367,6 +368,7 @@ def from_flat(cls, df, base_columns, nested_columns=None, on: str | None = None,
367368
Examples
368369
--------
369370
371+
>>> import nested_pandas as npd
370372
>>> nf = npd.NestedFrame({"a":[1,1,1,2,2], "b":[2,2,2,4,4],
371373
... "c":[1,2,3,4,5], "d":[2,4,6,8,10]},
372374
... index=[0,0,0,1,1])
@@ -424,6 +426,7 @@ def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
424426
Examples
425427
--------
426428
429+
>>> import nested_pandas as npd
427430
>>> nf = npd.NestedFrame({"c":[1,2,3], "d":[2,4,6],
428431
... "e":[[1,2,3], [4,5,6], [7,8,9]]},
429432
... index=[0,1,2])
@@ -605,7 +608,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
605608
606609
>>> nf = nf.query("nested.t > 10")
607610
>>> nf
608-
a b nested
611+
a b nested
609612
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
610613
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
611614
2 0.000114 0.691121 [{t: 11.173797, flux: 28.044399, band: 'r'}; …...
@@ -619,9 +622,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> NestedFrame |
619622
with rows of that particular nested structure filtered. For example,
620623
querying the NestedFrame "df" with nested structure "my_nested" as
621624
below will return all rows of df, but with mynested filtered by the
622-
condition:
623-
624-
>>> df.query("mynested.a > 2")
625+
condition: `nf.query("mynested.a > 2")`
625626
"""
626627
if not isinstance(expr, str):
627628
msg = f"expr must be a string to be evaluated, {type(expr)} given"
@@ -786,7 +787,7 @@ def dropna(
786787
>>> # this query empties several of the nested dataframes
787788
>>> nf = nf.query("nested.t > 19")
788789
>>> nf
789-
a b nested
790+
a b nested
790791
0 0.417022 0.184677 None
791792
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
792793
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
@@ -796,7 +797,7 @@ def dropna(
796797
797798
>>> # dropna removes rows with those emptied dataframes
798799
>>> nf.dropna(subset="nested")
799-
a b nested
800+
a b nested
800801
1 0.720324 0.372520 [{t: 19.365232, flux: 90.85955, band: 'r'}]
801802
2 0.000114 0.691121 [{t: 19.157791, flux: 14.672857, band: 'r'}]
802803
@@ -806,9 +807,20 @@ def dropna(
806807
>>> nf = generate_data(5,5, seed=1)
807808
>>> # Either on the whole dataframe
808809
>>> nf.dropna(on_nested="nested")
810+
a b nested
811+
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
812+
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
813+
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
814+
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
815+
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
809816
>>> # or on a specific nested column
810817
>>> nf.dropna(subset="nested.t")
811-
818+
a b nested
819+
0 0.417022 0.184677 [{t: 8.38389, flux: 31.551563, band: 'r'}; …] ...
820+
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
821+
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
822+
3 0.302333 0.793535 [{t: 17.562349, flux: 1.828828, band: 'g'}; …]...
823+
4 0.146756 1.077633 [{t: 0.547752, flux: 75.014431, band: 'g'}; …]...
812824
813825
Notes
814826
-----
@@ -909,7 +921,7 @@ def sort_values(
909921
910922
>>> # Sort nested values
911923
>>> nf.sort_values(by="nested.band")
912-
a b nested
924+
a b nested
913925
0 0.417022 0.184677 [{t: 13.40935, flux: 98.886109, band: 'g'}; …]...
914926
1 0.720324 0.372520 [{t: 13.70439, flux: 68.650093, band: 'g'}; …]...
915927
2 0.000114 0.691121 [{t: 4.089045, flux: 83.462567, band: 'g'}; …]...
@@ -1017,13 +1029,15 @@ def reduce(self, func, *args, infer_nesting=True, **kwargs) -> NestedFrame: # t
10171029
>>> from nested_pandas.datasets.generation import generate_data
10181030
>>> import numpy as np
10191031
>>> nf = generate_data(5,5, seed=1)
1020-
1032+
>>>
10211033
>>> # define a custom user function
1034+
>>> # reduce will return a NestedFrame with two columns
10221035
>>> def example_func(base_col, nested_col):
1023-
>>> '''reduce will return a NestedFrame with two columns'''
1024-
>>> return {"mean": np.mean(nested_col),
1025-
... "mean_minus_base": np.mean(nested_col) - base_col}
1026-
1036+
... return {
1037+
... "mean": np.mean(nested_col),
1038+
... "mean_minus_base": np.mean(nested_col) - base_col,
1039+
... }
1040+
>>>
10271041
>>> # apply the function
10281042
>>> nf.reduce(example_func, "a", "nested.t")
10291043
mean mean_minus_base
@@ -1038,8 +1052,8 @@ def reduce(self, func, *args, infer_nesting=True, **kwargs) -> NestedFrame: # t
10381052
10391053
>>> # define a custom user function that returns nested structure
10401054
>>> def example_func(base_col1, base_col2, nested_col):
1041-
>>> '''reduce will return a NestedFrame with nested structure'''
1042-
>>> return {"offsets.t_a": nested_col - base_col1,
1055+
... '''reduce will return a NestedFrame with nested structure'''
1056+
... return {"offsets.t_a": nested_col - base_col1,
10431057
... "offsets.t_b": nested_col - base_col2}
10441058
10451059
By giving both output columns the prefix "offsets.", we signal

src/nested_pandas/nestedframe/io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,12 @@ def read_parquet(
6464
Simple loading example:
6565
6666
>>> import nested_pandas as npd
67-
>>> nf = npd.read_parquet("path/to/file.parquet")
67+
>>> nf = npd.read_parquet("path/to/file.parquet") # doctest: +SKIP
6868
6969
Partial loading:
7070
7171
>>> #Load only the "flux" sub-column of the "nested" column
72-
>>> nf = npd.read_parquet("path/to/file.parquet", columns=["a", "nested.flux"])
72+
>>> nf = npd.read_parquet("path/to/file.parquet", columns=["a", "nested.flux"]) # doctest: +SKIP
7373
"""
7474

7575
# Type convergence for reject_nesting

src/nested_pandas/series/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ def with_list_field(self, field: str, value: ArrayLike) -> pd.Series:
258258
... [["g","g"],
259259
... ["r","r"]])
260260
>>> # Look at one row of the series
261-
>>> nested_with_avg[0]
261+
>>> nf_new_band[0]
262262
t flux band new_band
263263
0 2.935118 39.676747 g g
264264
1 3.725204 41.919451 r g

src/nested_pandas/series/dtype.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def name(self) -> str:
5656
fields = ", ".join([f"{field}: [{dtype!s}]" for field, dtype in nice_dtypes.items()])
5757
return f"nested<{fields}>"
5858

59+
def __repr__(self) -> str:
60+
return self.name
61+
5962
@classmethod
6063
def construct_array_type(cls) -> Type[ExtensionArray]:
6164
"""Corresponded array type, always NestedExtensionArray"""

src/nested_pandas/utils/utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,12 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame:
2727
Examples
2828
--------
2929
30+
>>> import pandas as pd
31+
>>> # Show all columns
32+
>>> pd.set_option("display.width", 200)
33+
>>> pd.set_option("display.max_columns", None)
3034
>>> from nested_pandas.datasets.generation import generate_data
31-
>>> nf = generate_data(5,10,seed=1)
35+
>>> nf = generate_data(5, 10, seed=1)
3236
3337
>>> from nested_pandas.utils import count_nested
3438
>>> count_nested(nf, "nested")

0 commit comments

Comments
 (0)