From b288d0ee46ba3a3845a93ddb0433898fe2212435 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:19:34 -0500
Subject: [PATCH 1/7] accessor changes:

* checks both column names and index names for matching with regular expressions
* for axis and coordinates as well as for custon names/vocabs
* Some improvements in the handling of guessing when matches are not found
---
 cf_pandas/accessor.py | 60 ++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py
index 1dabf01..1407c99 100644
--- a/cf_pandas/accessor.py
+++ b/cf_pandas/accessor.py
@@ -58,7 +58,8 @@ class CFAccessor:
     """Dataframe accessor analogous to cf-xarray accessor."""
 
     def __init__(self, pandas_obj):
-        self._validate(pandas_obj)
+        # don't automatically validate but can when needed
+        # self._validate(pandas_obj)
         self._obj = pandas_obj
 
     @staticmethod
@@ -110,9 +111,12 @@ def __getitem__(self, key: str) -> Union[pd.Series, pd.DataFrame]:
         else:
             col_names = _get_custom_criteria(self._obj, key)
 
-        # return series
-        if len(col_names) == 1:
+        # return series for column
+        if len(col_names) == 1 and col_names[0] in self._obj.columns:
             return self._obj[col_names[0]]
+        # return index 
+        elif len(col_names) == 1 and col_names[0] in self._obj.index.names:
+            return self._obj.index.get_level_values(col_names[0])
         # return DataFrame
         elif len(col_names) > 1:
             return self._obj[col_names]
@@ -247,6 +251,32 @@ def custom_keys(self):
         }
 
         return vardict
+    
+    @property
+    def axes_cols(self) -> List[str]:
+        """
+        Property that returns a list of column names from the axes mapping.
+
+        Returns
+        -------
+        list
+            Variable names that are the column names which represent axes.
+        """
+        
+        return list(itertools.chain(*[*self.axes.values()]))
+    
+    @property
+    def coordinates_cols(self) -> List[str]:
+        """
+        Property that returns a list of column names from the coordinates mapping.
+
+        Returns
+        -------
+        list
+            Variable names that are the column names which represent coordinates.
+        """
+        
+        return list(itertools.chain(*[*self.coordinates.values()]))
 
     @property
     def standard_names(self):
@@ -313,26 +343,14 @@ def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
             f"cf_xarray did not understand key {key!r}. Expected one of {valid_keys!r}"
         )
 
-    # search_in = set()
-    # attrs_or_encoding = ChainMap(obj.attrs, obj.encoding)
-    # coordinates = attrs_or_encoding.get("coordinates", None)
-
-    # # Handles case where the coordinates attribute is None
-    # # This is used to tell xarray to not write a coordinates attribute
-    # if coordinates:
-    #     search_in.update(coordinates.split(" "))
-    # if not search_in:
-    #     search_in = set(obj.coords)
-
-    # # maybe only do this for key in _AXIS_NAMES?
-    # search_in.update(obj.indexes)
-
-    # search_in = search_in & set(obj.coords)
+    # loop over column names and index names
     results: set = set()
-    for col in obj.columns:
-        # var = obj.coords[coord]
+    cols_and_indices = list(obj.columns)
+    cols_and_indices += obj.index.names
+    # remove None if in names from index
+    cols_and_indices = [name for name in cols_and_indices if name is not None]
+    for col in cols_and_indices:
         if key in coordinate_criteria:
-            # import pdb; pdb.set_trace()
             for criterion, expected in coordinate_criteria[key].items():
                 # allow for the column header having a space in it that separate
                 # the name from the units, for example

From ec8abb7fa9b6df2ff6ac2beb7eaf4b3a8a90865d Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:22:16 -0500
Subject: [PATCH 2/7] updates to guess_regex:

* for Z, "dbars" no longer has to be at the start of the string to match
* for time, "time" no longer has to be at the start of the string to match
* for latitude and longitude, "lat" and "lon", respectively, no longer have to be at the start of the string to match.
This should probably be a change for all such matches, but I want to discuss with cf-xarray people.
---
 cf_pandas/criteria.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/cf_pandas/criteria.py b/cf_pandas/criteria.py
index 5b30f35..be00bb7 100644
--- a/cf_pandas/criteria.py
+++ b/cf_pandas/criteria.py
@@ -103,15 +103,24 @@
 coordinate_criteria["X"]["long_name"] += ("cell index along first dimension",)
 coordinate_criteria["Y"]["long_name"] += ("cell index along second dimension",)
 
+# changes allow for the pattern string to not be at the start of the comparison string
+# like (?=.*lon)
 guess_regex = {
-    "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"),
+    "time": re.compile("\\bt\\b|(?=.*time|min|hour|day|week|month|year)[0-9]*"),
+    # "time": re.compile("\\bt\\b|(time|min|hour|day|week|month|year)[0-9]*"),
     "Z": re.compile(
-        "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|h(ei)?ght|altitude|depth|"
+        "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|(?=.*dbars)|h(ei)?ght|altitude|depth|"
         "isobaric|pres|isotherm)[a-z_]*[0-9]*"
     ),
+    # "Z": re.compile(
+    #     "(z|nav_lev|gdep|lv_|[o]*lev|bottom_top|sigma|h(ei)?ght|altitude|depth|"
+    #     "isobaric|pres|isotherm)[a-z_]*[0-9]*"
+    # ),
     "Y": re.compile("y|j|nlat|nj"),
-    "latitude": re.compile("y?(nav_lat|lat|gphi)[a-z0-9]*"),
+    "latitude": re.compile("y?(nav_lat|(?=.*lat)|gphi)[a-z0-9]*"),
+    # "latitude": re.compile("(?i)y?(?=.*lat)[a-z0-9]*"),
     "X": re.compile("x|i|nlon|ni"),
-    "longitude": re.compile("x?(nav_lon|lon|glam)[a-z0-9]*"),
+    # "longitude": re.compile("(?i)x?(?=.*lon)[a-z0-9]*"),
+    "longitude": re.compile("x?(nav_lon|(?=.*lon)|glam)[a-z0-9]*"),
 }
 guess_regex["T"] = guess_regex["time"]

From efd2dcb8805f9a23a3e7c5ca285975fd8d3f0e71 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:22:41 -0500
Subject: [PATCH 3/7] removed some comments

---
 cf_pandas/criteria.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cf_pandas/criteria.py b/cf_pandas/criteria.py
index be00bb7..bf8226a 100644
--- a/cf_pandas/criteria.py
+++ b/cf_pandas/criteria.py
@@ -118,9 +118,7 @@
     # ),
     "Y": re.compile("y|j|nlat|nj"),
     "latitude": re.compile("y?(nav_lat|(?=.*lat)|gphi)[a-z0-9]*"),
-    # "latitude": re.compile("(?i)y?(?=.*lat)[a-z0-9]*"),
     "X": re.compile("x|i|nlon|ni"),
-    # "longitude": re.compile("(?i)x?(?=.*lon)[a-z0-9]*"),
     "longitude": re.compile("x?(nav_lon|(?=.*lon)|glam)[a-z0-9]*"),
 }
 guess_regex["T"] = guess_regex["time"]

From 859f08440ff86dc8d5e7b21971d077d316c76399 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:23:53 -0500
Subject: [PATCH 4/7] linting

---
 cf_pandas/accessor.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py
index 1407c99..da0190c 100644
--- a/cf_pandas/accessor.py
+++ b/cf_pandas/accessor.py
@@ -114,7 +114,7 @@ def __getitem__(self, key: str) -> Union[pd.Series, pd.DataFrame]:
         # return series for column
         if len(col_names) == 1 and col_names[0] in self._obj.columns:
             return self._obj[col_names[0]]
-        # return index 
+        # return index
         elif len(col_names) == 1 and col_names[0] in self._obj.index.names:
             return self._obj.index.get_level_values(col_names[0])
         # return DataFrame
@@ -251,7 +251,7 @@ def custom_keys(self):
         }
 
         return vardict
-    
+
     @property
     def axes_cols(self) -> List[str]:
         """
@@ -262,9 +262,9 @@ def axes_cols(self) -> List[str]:
         list
             Variable names that are the column names which represent axes.
         """
-        
+
         return list(itertools.chain(*[*self.axes.values()]))
-    
+
     @property
     def coordinates_cols(self) -> List[str]:
         """
@@ -275,7 +275,7 @@ def coordinates_cols(self) -> List[str]:
         list
             Variable names that are the column names which represent coordinates.
         """
-        
+
         return list(itertools.chain(*[*self.coordinates.values()]))
 
     @property

From 0b2d38344239a2d83904d1bb41b1e4765c5b75a0 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:30:52 -0500
Subject: [PATCH 5/7] updated test for validation

---
 cf_pandas/accessor.py  | 6 +++---
 tests/test_accessor.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py
index da0190c..0577f39 100644
--- a/cf_pandas/accessor.py
+++ b/cf_pandas/accessor.py
@@ -62,14 +62,14 @@ def __init__(self, pandas_obj):
         # self._validate(pandas_obj)
         self._obj = pandas_obj
 
-    @staticmethod
-    def _validate(obj):
+    # @staticmethod
+    def _validate(self):
         """what is necessary for basic use."""
 
         # verify that necessary keys are present. Z would also be nice but might be missing.
         # but don't use the accessor to check
         keys = ["T", "longitude", "latitude"]
-        missing_keys = [key for key in keys if len(_get_axis_coord(obj, key)) == 0]
+        missing_keys = [key for key in keys if len(_get_axis_coord(self._obj, key)) == 0]
         if len(missing_keys) > 0:
             raise AttributeError(
                 f'{"longitude", "latitude", "time"} must be identifiable in DataFrame but {missing_keys} are missing.'
diff --git a/tests/test_accessor.py b/tests/test_accessor.py
index 6ce4f67..360b564 100644
--- a/tests/test_accessor.py
+++ b/tests/test_accessor.py
@@ -33,7 +33,7 @@ def test_validate():
         ]
     )
     with pytest.raises(AttributeError):
-        df.cf.keys()
+        df.cf._validate()
 
 
 def test_match_criteria_key_accessor():

From c5dec821a6a4ed477becb196d85e165e21cd6246 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:51:15 -0500
Subject: [PATCH 6/7] test and fix for index being guessed

---
 cf_pandas/accessor.py  | 19 +++++++++++++------
 tests/test_accessor.py | 11 +++++++++++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/cf_pandas/accessor.py b/cf_pandas/accessor.py
index 0577f39..d2cfdd8 100644
--- a/cf_pandas/accessor.py
+++ b/cf_pandas/accessor.py
@@ -69,7 +69,9 @@ def _validate(self):
         # verify that necessary keys are present. Z would also be nice but might be missing.
         # but don't use the accessor to check
         keys = ["T", "longitude", "latitude"]
-        missing_keys = [key for key in keys if len(_get_axis_coord(self._obj, key)) == 0]
+        missing_keys = [
+            key for key in keys if len(_get_axis_coord(self._obj, key)) == 0
+        ]
         if len(missing_keys) > 0:
             raise AttributeError(
                 f'{"longitude", "latitude", "time"} must be identifiable in DataFrame but {missing_keys} are missing.'
@@ -368,14 +370,19 @@ def _get_axis_coord(obj: Union[DataFrame, Series], key: str) -> list:
                     #     units = getattr(col.data, "units", None)
                     #     if units in expected:
                     #         results.update((col,))
-
         # also use the guess_regex approach by default, but only if no results so far
         # this takes the logic from cf-xarray guess_coord_axis
         if len(results) == 0:
-            if key in ("T", "time") and _is_datetime_like(obj[col]):
-                results.update((col,))
-                continue  # prevent second detection
-
+            if col in obj.columns:
+                if key in ("T", "time") and _is_datetime_like(obj[col]):
+                    results.update((col,))
+                    continue  # prevent second detection
+            elif col in obj.index.names:
+                if key in ("T", "time") and _is_datetime_like(
+                    obj.index.get_level_values(col)
+                ):
+                    results.update((col,))
+                    continue  # prevent second detection
             pattern = guess_regex[key]
             if pattern.match(col.lower()):
                 results.update((col,))
diff --git a/tests/test_accessor.py b/tests/test_accessor.py
index 360b564..4351b30 100644
--- a/tests/test_accessor.py
+++ b/tests/test_accessor.py
@@ -128,3 +128,14 @@ def test_get_by_guess_regex():
     assert df.cf["longitude"].name == "lon"
     assert df.cf["latitude"].name == "lat"
     assert df.cf["time"].name == "min"
+
+    df = pd.DataFrame(columns=["blah_lon", "table_lat"])
+    assert df.cf["longitude"].name == "blah_lon"
+    assert df.cf["latitude"].name == "table_lat"
+
+
+def test_index():
+    """Test when time is in index."""
+    df = pd.DataFrame(index=["m_time"])
+    df.index.rename("m_time", inplace=True)
+    assert df.cf["T"].name == "m_time"

From 0b991b44c5da62388d4b99e73385fd512e23ce06 Mon Sep 17 00:00:00 2001
From: Kristen Thyng <kthyng@gmail.com>
Date: Fri, 28 Apr 2023 15:56:56 -0500
Subject: [PATCH 7/7] added another test

---
 tests/test_accessor.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_accessor.py b/tests/test_accessor.py
index 4351b30..990db14 100644
--- a/tests/test_accessor.py
+++ b/tests/test_accessor.py
@@ -139,3 +139,9 @@ def test_index():
     df = pd.DataFrame(index=["m_time"])
     df.index.rename("m_time", inplace=True)
     assert df.cf["T"].name == "m_time"
+
+
+def test_cols():
+    df = pd.DataFrame(columns=["m_time", "lon", "lat", "temp"])
+    assert df.cf.axes_cols == ["m_time"]
+    assert sorted(df.cf.coordinates_cols) == ["lat", "lon", "m_time"]