Skip to content

Commit

Permalink
test and fix for index being guessed
Browse files Browse the repository at this point in the history
  • Loading branch information
kthyng committed Apr 28, 2023
1 parent 0b2d383 commit c5dec82
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
19 changes: 13 additions & 6 deletions cf_pandas/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ def _validate(self):
# verify that necessary keys are present. Z would also be nice but might be missing.
# but don't use the accessor to check
keys = ["T", "longitude", "latitude"]
missing_keys = [key for key in keys if len(_get_axis_coord(self._obj, key)) == 0]
missing_keys = [
key for key in keys if len(_get_axis_coord(self._obj, key)) == 0
]
if len(missing_keys) > 0:
raise AttributeError(
f'{"longitude", "latitude", "time"} must be identifiable in DataFrame but {missing_keys} are missing.'
Expand Down Expand Up @@ -368,14 +370,19 @@ def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
# units = getattr(col.data, "units", None)
# if units in expected:
# results.update((col,))

# also use the guess_regex approach by default, but only if no results so far
# this takes the logic from cf-xarray guess_coord_axis
if len(results) == 0:
if key in ("T", "time") and _is_datetime_like(obj[col]):
results.update((col,))
continue # prevent second detection

if col in obj.columns:
if key in ("T", "time") and _is_datetime_like(obj[col]):
results.update((col,))
continue # prevent second detection
elif col in obj.index.names:
if key in ("T", "time") and _is_datetime_like(
obj.index.get_level_values(col)
):
results.update((col,))
continue # prevent second detection
pattern = guess_regex[key]
if pattern.match(col.lower()):
results.update((col,))
Expand Down
11 changes: 11 additions & 0 deletions tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,14 @@ def test_get_by_guess_regex():
assert df.cf["longitude"].name == "lon"
assert df.cf["latitude"].name == "lat"
assert df.cf["time"].name == "min"

df = pd.DataFrame(columns=["blah_lon", "table_lat"])
assert df.cf["longitude"].name == "blah_lon"
assert df.cf["latitude"].name == "table_lat"


def test_index():
"""Test when time is in index."""
df = pd.DataFrame(index=["m_time"])
df.index.rename("m_time", inplace=True)
assert df.cf["T"].name == "m_time"

0 comments on commit c5dec82

Please sign in to comment.