Skip to content

Commit 6a79ff3

Browse files
authored
Merge pull request #17 from kthyng/split
Split
2 parents f5a5e70 + aa5b6b9 commit 6a79ff3

File tree

4 files changed

+51
-10
lines changed

4 files changed

+51
-10
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ repos:
2121
- id: file-contents-sorter
2222
files: requirements-dev.txt
2323

24-
- repo: https://gitlab.com/pycqa/flake8
24+
- repo: https://github.com/pycqa/flake8
2525
rev: 3.7.9
2626
hooks:
2727
- id: flake8

cf_pandas/accessor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77
import cf_pandas as cfp
88

99

10+
try:
11+
# delete the accessor to avoid warning
12+
del pd.DataFrame.cf
13+
except AttributeError:
14+
pass
15+
16+
1017
@pd.api.extensions.register_dataframe_accessor("cf")
1118
class CFAccessor:
1219
"""Dataframe accessor analogous to cf-xarray accessor."""

cf_pandas/utils.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def match_criteria_key(
6161
available_values: list,
6262
keys_to_match: Union[str, list],
6363
criteria: Optional[dict] = None,
64+
split: bool = False,
6465
) -> list:
6566
"""Use criteria to choose match to key from available available_values.
6667
@@ -72,6 +73,8 @@ def match_criteria_key(
7273
Key(s) from criteria to match with available_values.
7374
criteria : dict, optional
7475
Criteria to use to map from variable to attributes describing the variable. If user has defined custom_criteria, this will be used by default.
76+
split : bool, optional
77+
If split is True, split the available_values by white space before performing matches. This is helpful e.g. when columns headers have the form "standard_name (units)" and you want to match standard_name.
7578
7679
Returns
7780
-------
@@ -93,17 +96,32 @@ def match_criteria_key(
9396
# criterion is the attribute type — in this function we don't use it,
9497
# instead we use all the patterns available in criteria to match with available_values
9598
for criterion, patterns in custom_criteria[key].items():
96-
results.extend(
97-
list(
98-
set(
99-
[
100-
value
101-
for value in available_values
102-
if regex.match(patterns, value)
103-
]
99+
if split:
100+
results.extend(
101+
list(
102+
set(
103+
[
104+
value
105+
for value in available_values
106+
for value_part in value.split()
107+
if regex.match(patterns, value_part)
108+
]
109+
)
110+
)
111+
)
112+
113+
else:
114+
results.extend(
115+
list(
116+
set(
117+
[
118+
value
119+
for value in available_values
120+
if regex.match(patterns, value)
121+
]
122+
)
104123
)
105124
)
106-
)
107125

108126
# catch scenario that user input valid reader variable names
109127
else:

tests/test_utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@ def test_match_criteria_key():
2222
assert cfp.match_criteria_key(vals, ["wind_s"], criteria) == ["wind_speed"]
2323

2424

25+
def test_match_criteria_key_split():
26+
27+
vals = ["wind_speed (m/s)", "WIND_SPEED", "wind_speed_status"]
28+
29+
# test function with set_options criteria
30+
with cfp.set_options(custom_criteria=criteria):
31+
assert cfp.match_criteria_key(vals, ["wind_s"], split=True) == [
32+
"wind_speed (m/s)"
33+
]
34+
35+
# test function with input criteria
36+
assert cfp.match_criteria_key(vals, ["wind_s"], criteria, split=True) == [
37+
"wind_speed (m/s)"
38+
]
39+
40+
2541
def test_standard_names():
2642

2743
names = cfp.standard_names()

0 commit comments

Comments
 (0)