Skip to content

Commit 0f9f790

Browse files
authored
Better default behavior of the Coordinates constructor (#8107)
* ``Coordinates.__init__`` create default indexes ... for any input dimension coordinate, if ``indexes=None``. Also, if another ``Coordinates`` object is passed, extract its indexes and raise if ``indexes`` is not None (no align/merge supported here). * add docstring examples * fix doctests * fix tests * update what's new
1 parent afda88e commit 0f9f790

File tree

5 files changed

+131
-43
lines changed

5 files changed

+131
-43
lines changed

doc/whats-new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ New Features
2626
Breaking changes
2727
~~~~~~~~~~~~~~~~
2828

29+
- The :py:class:`Coordinates` constructor now creates a (pandas) index by
30+
default for each dimension coordinate. To keep the previous behavior (no index
31+
created), pass an empty dictionary to ``indexes``. The constructor now also
32+
extracts and add the indexes from another :py:class:`Coordinates` object
33+
passed via ``coords`` (:pull:`8107`).
34+
By `Benoît Bovy <https://github.com/benbovy>`_.
2935

3036
Deprecations
3137
~~~~~~~~~~~~

xarray/core/coordinates.py

Lines changed: 87 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from xarray.core.indexes import (
1818
Index,
1919
Indexes,
20+
PandasIndex,
2021
PandasMultiIndex,
2122
assert_no_index_corrupted,
2223
create_default_index_implicit,
@@ -192,22 +193,69 @@ class Coordinates(AbstractCoordinates):
192193
Coordinates are either:
193194
194195
- returned via the :py:attr:`Dataset.coords` and :py:attr:`DataArray.coords`
195-
properties.
196-
- built from index objects (e.g., :py:meth:`Coordinates.from_pandas_multiindex`).
197-
- built directly from coordinate data and index objects (beware that no consistency
198-
check is done on those inputs).
199-
200-
In the latter case, no default (pandas) index is created.
196+
properties
197+
- built from Pandas or other index objects
198+
(e.g., :py:meth:`Coordinates.from_pandas_multiindex`)
199+
- built directly from coordinate data and Xarray ``Index`` objects (beware that
200+
no consistency check is done on those inputs)
201201
202202
Parameters
203203
----------
204-
coords: dict-like
205-
Mapping where keys are coordinate names and values are objects that
206-
can be converted into a :py:class:`~xarray.Variable` object
207-
(see :py:func:`~xarray.as_variable`).
208-
indexes: dict-like
209-
Mapping of where keys are coordinate names and values are
210-
:py:class:`~xarray.indexes.Index` objects.
204+
coords: dict-like, optional
205+
Mapping where keys are coordinate names and values are objects that
206+
can be converted into a :py:class:`~xarray.Variable` object
207+
(see :py:func:`~xarray.as_variable`). If another
208+
:py:class:`~xarray.Coordinates` object is passed, its indexes
209+
will be added to the new created object.
210+
indexes: dict-like, optional
211+
Mapping of where keys are coordinate names and values are
212+
:py:class:`~xarray.indexes.Index` objects. If None (default),
213+
pandas indexes will be created for each dimension coordinate.
214+
Passing an empty dictionary will skip this default behavior.
215+
216+
Examples
217+
--------
218+
Create a dimension coordinate with a default (pandas) index:
219+
220+
>>> xr.Coordinates({"x": [1, 2]})
221+
Coordinates:
222+
* x (x) int64 1 2
223+
224+
Create a dimension coordinate with no index:
225+
226+
>>> xr.Coordinates(coords={"x": [1, 2]}, indexes={})
227+
Coordinates:
228+
x (x) int64 1 2
229+
230+
Create a new Coordinates object from existing dataset coordinates
231+
(indexes are passed):
232+
233+
>>> ds = xr.Dataset(coords={"x": [1, 2]})
234+
>>> xr.Coordinates(ds.coords)
235+
Coordinates:
236+
* x (x) int64 1 2
237+
238+
Create indexed coordinates from a ``pandas.MultiIndex`` object:
239+
240+
>>> midx = pd.MultiIndex.from_product([["a", "b"], [0, 1]])
241+
>>> xr.Coordinates.from_pandas_multiindex(midx, "x")
242+
Coordinates:
243+
* x (x) object MultiIndex
244+
* x_level_0 (x) object 'a' 'a' 'b' 'b'
245+
* x_level_1 (x) int64 0 1 0 1
246+
247+
Create a new Dataset object by passing a Coordinates object:
248+
249+
>>> midx_coords = xr.Coordinates.from_pandas_multiindex(midx, "x")
250+
>>> xr.Dataset(coords=midx_coords)
251+
<xarray.Dataset>
252+
Dimensions: (x: 4)
253+
Coordinates:
254+
* x (x) object MultiIndex
255+
* x_level_0 (x) object 'a' 'a' 'b' 'b'
256+
* x_level_1 (x) int64 0 1 0 1
257+
Data variables:
258+
*empty*
211259
212260
"""
213261

@@ -227,17 +275,40 @@ def __init__(
227275
from xarray.core.dataset import Dataset
228276

229277
if coords is None:
230-
variables = {}
231-
elif isinstance(coords, Coordinates):
278+
coords = {}
279+
280+
variables: dict[Hashable, Variable]
281+
default_indexes: dict[Hashable, PandasIndex] = {}
282+
coords_obj_indexes: dict[Hashable, Index] = {}
283+
284+
if isinstance(coords, Coordinates):
285+
if indexes is not None:
286+
raise ValueError(
287+
"passing both a ``Coordinates`` object and a mapping of indexes "
288+
"to ``Coordinates.__init__`` is not allowed "
289+
"(this constructor does not support merging them)"
290+
)
232291
variables = {k: v.copy() for k, v in coords.variables.items()}
292+
coords_obj_indexes = dict(coords.xindexes)
233293
else:
234-
variables = {k: as_variable(v) for k, v in coords.items()}
294+
variables = {}
295+
for name, data in coords.items():
296+
var = as_variable(data, name=name)
297+
if var.dims == (name,) and indexes is None:
298+
index, index_vars = create_default_index_implicit(var, list(coords))
299+
default_indexes.update({k: index for k in index_vars})
300+
variables.update(index_vars)
301+
else:
302+
variables[name] = var
235303

236304
if indexes is None:
237305
indexes = {}
238306
else:
239307
indexes = dict(indexes)
240308

309+
indexes.update(default_indexes)
310+
indexes.update(coords_obj_indexes)
311+
241312
no_coord_index = set(indexes) - set(variables)
242313
if no_coord_index:
243314
raise ValueError(

xarray/tests/test_coordinates.py

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,17 @@ def test_init_noindex(self) -> None:
1717
expected = Dataset(coords={"foo": ("x", [0, 1, 2])})
1818
assert_identical(coords.to_dataset(), expected)
1919

20+
def test_init_default_index(self) -> None:
21+
coords = Coordinates(coords={"x": [1, 2]})
22+
expected = Dataset(coords={"x": [1, 2]})
23+
assert_identical(coords.to_dataset(), expected)
24+
assert "x" in coords.xindexes
25+
26+
def test_init_no_default_index(self) -> None:
27+
# dimension coordinate with no default index (explicit)
28+
coords = Coordinates(coords={"x": [1, 2]}, indexes={})
29+
assert "x" not in coords.xindexes
30+
2031
def test_init_from_coords(self) -> None:
2132
expected = Dataset(coords={"foo": ("x", [0, 1, 2])})
2233
coords = Coordinates(coords=expected.coords)
@@ -25,10 +36,19 @@ def test_init_from_coords(self) -> None:
2536
# test variables copied
2637
assert coords.variables["foo"] is not expected.variables["foo"]
2738

28-
# default index
29-
expected = Dataset(coords={"x": ("x", [0, 1, 2])})
30-
coords = Coordinates(coords=expected.coords, indexes=expected.xindexes)
39+
# test indexes are extracted
40+
expected = Dataset(coords={"x": [0, 1, 2]})
41+
coords = Coordinates(coords=expected.coords)
3142
assert_identical(coords.to_dataset(), expected)
43+
assert expected.xindexes == coords.xindexes
44+
45+
# coords + indexes not supported
46+
with pytest.raises(
47+
ValueError, match="passing both.*Coordinates.*indexes.*not allowed"
48+
):
49+
coords = Coordinates(
50+
coords=expected.coords, indexes={"x": PandasIndex([0, 1, 2], "x")}
51+
)
3252

3353
def test_init_empty(self) -> None:
3454
coords = Coordinates()
@@ -60,37 +80,31 @@ def test_from_pandas_multiindex(self) -> None:
6080
assert_identical(expected[name], coords.variables[name])
6181

6282
def test_dims(self) -> None:
63-
_ds = Dataset(coords={"x": [0, 1, 2]})
64-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
83+
coords = Coordinates(coords={"x": [0, 1, 2]})
6584
assert coords.dims == {"x": 3}
6685

6786
def test_sizes(self) -> None:
68-
_ds = Dataset(coords={"x": [0, 1, 2]})
69-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
87+
coords = Coordinates(coords={"x": [0, 1, 2]})
7088
assert coords.sizes == {"x": 3}
7189

7290
def test_dtypes(self) -> None:
73-
_ds = Dataset(coords={"x": [0, 1, 2]})
74-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
91+
coords = Coordinates(coords={"x": [0, 1, 2]})
7592
assert coords.dtypes == {"x": int}
7693

7794
def test_getitem(self) -> None:
78-
_ds = Dataset(coords={"x": [0, 1, 2]})
79-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
95+
coords = Coordinates(coords={"x": [0, 1, 2]})
8096
assert_identical(
8197
coords["x"],
8298
DataArray([0, 1, 2], coords={"x": [0, 1, 2]}, name="x"),
8399
)
84100

85101
def test_delitem(self) -> None:
86-
_ds = Dataset(coords={"x": [0, 1, 2]})
87-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
102+
coords = Coordinates(coords={"x": [0, 1, 2]})
88103
del coords["x"]
89104
assert "x" not in coords
90105

91106
def test_update(self) -> None:
92-
_ds = Dataset(coords={"x": [0, 1, 2]})
93-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
107+
coords = Coordinates(coords={"x": [0, 1, 2]})
94108

95109
coords.update({"y": ("y", [4, 5, 6])})
96110
assert "y" in coords
@@ -99,18 +113,16 @@ def test_update(self) -> None:
99113
assert_identical(coords["y"], expected)
100114

101115
def test_equals(self):
102-
_ds = Dataset(coords={"x": [0, 1, 2]})
103-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
116+
coords = Coordinates(coords={"x": [0, 1, 2]})
104117

105118
assert coords.equals(coords)
106-
assert not coords.equals("no_a_coords")
119+
assert not coords.equals("not_a_coords")
107120

108121
def test_identical(self):
109-
_ds = Dataset(coords={"x": [0, 1, 2]})
110-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
122+
coords = Coordinates(coords={"x": [0, 1, 2]})
111123

112124
assert coords.identical(coords)
113-
assert not coords.identical("no_a_coords")
125+
assert not coords.identical("not_a_coords")
114126

115127
def test_copy(self) -> None:
116128
no_index_coords = Coordinates({"foo": ("x", [1, 2, 3])})
@@ -129,8 +141,7 @@ def test_copy(self) -> None:
129141
assert source_ndarray(v0.data) is not source_ndarray(v1.data)
130142

131143
def test_align(self) -> None:
132-
_ds = Dataset(coords={"x": [0, 1, 2]})
133-
coords = Coordinates(coords=_ds.coords, indexes=_ds.xindexes)
144+
coords = Coordinates(coords={"x": [0, 1, 2]})
134145

135146
left = coords
136147

xarray/tests/test_dataarray.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ def test_constructor_dask_coords(self) -> None:
489489

490490
def test_constructor_no_default_index(self) -> None:
491491
# explicitly passing a Coordinates object skips the creation of default index
492-
da = DataArray(range(3), coords=Coordinates({"x": ("x", [1, 2, 3])}))
492+
da = DataArray(range(3), coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
493493
assert "x" in da.coords
494494
assert "x" not in da.xindexes
495495

@@ -1587,7 +1587,7 @@ class CustomIndex(Index):
15871587
assert isinstance(actual.xindexes["x"], CustomIndex)
15881588

15891589
def test_assign_coords_no_default_index(self) -> None:
1590-
coords = Coordinates({"y": ("y", [1, 2, 3])})
1590+
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
15911591
da = DataArray([1, 2, 3], dims="y")
15921592
actual = da.assign_coords(coords)
15931593
assert_identical(actual.coords, coords, check_default_indexes=False)

xarray/tests/test_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ def test_constructor_with_coords(self) -> None:
636636

637637
def test_constructor_no_default_index(self) -> None:
638638
# explicitly passing a Coordinates object skips the creation of default index
639-
ds = Dataset(coords=Coordinates({"x": ("x", [1, 2, 3])}))
639+
ds = Dataset(coords=Coordinates({"x": [1, 2, 3]}, indexes={}))
640640
assert "x" in ds
641641
assert "x" not in ds.xindexes
642642

@@ -4356,7 +4356,7 @@ class CustomIndex(Index):
43564356
assert isinstance(actual.xindexes["x"], CustomIndex)
43574357

43584358
def test_assign_coords_no_default_index(self) -> None:
4359-
coords = Coordinates({"y": ("y", [1, 2, 3])})
4359+
coords = Coordinates({"y": [1, 2, 3]}, indexes={})
43604360
ds = Dataset()
43614361
actual = ds.assign_coords(coords)
43624362
expected = coords.to_dataset()

0 commit comments

Comments
 (0)