Skip to content

Commit 06db74b

Browse files
committed
get axes/coords use regex matching too
Built into _get_axis_coord now is logic from cf-xarray guess_axis_coord for regex matching. This is done here if no other results have been found to help find reasonable matches.
1 parent 297621b commit 06db74b

File tree

4 files changed

+56
-4
lines changed

4 files changed

+56
-4
lines changed

cf_pandas/accessor.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import itertools
6+
from collections import ChainMap
67
from typing import (
78
Any,
89
Callable,
@@ -25,9 +26,14 @@
2526

2627
import cf_pandas as cfp
2728

28-
from .criteria import coordinate_criteria
29+
from .criteria import coordinate_criteria, guess_regex
2930
from .options import OPTIONS
30-
from .utils import always_iterable, match_criteria_key, set_up_criteria
31+
from .utils import (
32+
_is_datetime_like,
33+
always_iterable,
34+
match_criteria_key,
35+
set_up_criteria,
36+
)
3137
from .vocab import Vocab
3238

3339
#: `axis` names understood by cf_xarray
@@ -195,7 +201,6 @@ def axes(self) -> Dict[str, List[str]]:
195201
"""
196202
# vardict = {key: self.__getitem__(key) for key in _AXIS_NAMES}
197203
vardict = {key: _get_all(self._obj, key) for key in _AXIS_NAMES}
198-
199204
return {k: sorted(v) for k, v in vardict.items() if v}
200205

201206
@property
@@ -275,23 +280,28 @@ def standard_names(self):
275280

276281
def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
277282
"""
278-
Translate from axis or coord name to variable name
283+
Translate from axis or coord name to variable name. After matching based on coordinate_criteria,
284+
if there are no matches for key, then guess_regex is used to search for matches.
285+
279286
Parameters
280287
----------
281288
obj : DataArray, Dataset
282289
DataArray belonging to the coordinate to be checked
283290
key : str, ["X", "Y", "Z", "T", "longitude", "latitude", "vertical", "time"]
284291
key to check for.
292+
285293
Returns
286294
-------
287295
List[str], Variable name(s) in parent xarray object that matches axis or coordinate `key`
296+
288297
Notes
289298
-----
290299
This functions checks for the following attributes in order
291300
- `standard_name` (CF option)
292301
- `_CoordinateAxisType` (from THREDDS)
293302
- `axis` (CF option)
294303
- `positive` (CF standard for non-pressure vertical coordinate)
304+
295305
References
296306
----------
297307
MetPy's parse_cf
@@ -340,6 +350,18 @@ def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
340350
# units = getattr(col.data, "units", None)
341351
# if units in expected:
342352
# results.update((col,))
353+
354+
# also use the guess_regex approach by default, but only if no results so far
355+
# this takes the logic from cf-xarray guess_coord_axis
356+
if len(results) == 0:
357+
if obj[col].ndim == 1 and _is_datetime_like(obj[col]):
358+
results.update((col,))
359+
continue # prevent second detection
360+
361+
pattern = guess_regex[key]
362+
if pattern.match(col.lower()):
363+
results.update((col,))
364+
343365
return list(results)
344366

345367

cf_pandas/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
from collections import ChainMap
66
from typing import Any, Iterable, Optional, Union
77

8+
import numpy as np
89
import pandas as pd
910
import regex
11+
from pandas import Series
1012

1113
from .options import OPTIONS
1214

@@ -152,3 +154,12 @@ def standard_names():
152154
standard_names = [entry.get("id") for entry in soup.find_all("entry")]
153155

154156
return standard_names
157+
158+
159+
def _is_datetime_like(da: Series) -> bool:
160+
if np.issubdtype(da.dtype, np.datetime64) or np.issubdtype(
161+
da.dtype, np.timedelta64
162+
):
163+
return True
164+
165+
return False

tests/test_accessor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,10 @@ def test_set_item():
121121
assert all(df.cf["temp"].values == np.arange(8))
122122
df.cf["longitude"] = np.arange(8)
123123
assert all(df.cf["longitude"].values == np.arange(8))
124+
125+
126+
def test_get_by_guess_regex():
127+
df = pd.DataFrame(columns=["lon", "lat", "min"])
128+
assert df.cf["longitude"].name == "lon"
129+
assert df.cf["latitude"].name == "lat"
130+
assert df.cf["time"].name == "min"

tests/test_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from unittest import mock
44

5+
import pandas as pd
56
import requests
67

78
import cf_pandas as cfp
@@ -49,3 +50,14 @@ def test_standard_names(mock_requests):
4950
mock_requests.return_value = resp
5051
names = cfp.standard_names()
5152
assert "wind_speed" in names
53+
54+
55+
def test__is_datetime_like():
56+
df = pd.DataFrame()
57+
df["time"] = pd.date_range(start="2001-1-1", end="2001-1-5", freq="1D")
58+
assert cfp.utils._is_datetime_like(df["time"])
59+
60+
df = pd.DataFrame()
61+
df["time"] = ["2001-1-1", "2001-1-2", "2001-1-3"]
62+
assert not cfp.utils._is_datetime_like(df["time"])
63+
assert cfp.utils._is_datetime_like(pd.to_datetime(df["time"]))

0 commit comments

Comments
 (0)