precommit

axiom-data-science · Jan 9, 2023 · 136df23 · 136df23
1 parent cd90cf8
commit 136df23
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 84 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -51,7 +51,7 @@ repos:
   rev: v0.982
   hooks:
   - id: mypy
-    additional_dependencies: [types-setuptools]
+    additional_dependencies: [types-setuptools, types-PyYAML]
     exclude: docs/source/conf.py
     args: [--ignore-missing-imports]
 

diff --git a/docs/cli.md b/docs/cli.md
@@ -15,7 +15,7 @@ Make a catalog with known local or remote file(s).
 
 #### Available options
 
-    omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]" 
+    omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]"
 
 * `project_name`: Will be used as the name of the directory where the catalog is saved. The directory is located in a user application cache directory, the address of which can be found for your setup with  `omsa proj_path --project_name PROJ_NAME`.
 * `catalog_type`: Type of catalog to make. Options are "erddap", "axds", or "local".
@@ -30,19 +30,19 @@ Make a catalog with known local or remote file(s).
 
 
 
-
 
 
 
-
 
 
 
-
 
 
 
-
+
+
+
+
 
 
 
@@ -52,7 +52,7 @@ Make a catalog with known local or remote file(s).
 #### Examples
 
 ```{code-cell} ipython3
-!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://researchworkspace.com/files/8114311/ecofoci_2011CHAOZ_CTD_Nutrient_mb1101.csv]" 
+!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://researchworkspace.com/files/8114311/ecofoci_2011CHAOZ_CTD_Nutrient_mb1101.csv]"
 ```
 
 ### ERDDAP Catalog
@@ -70,11 +70,11 @@ Make a catalog from datasets available from an ERDDAP server using `intake-erdda
 * `metadata`: Metadata for catalog.
 * `kwargs`: Keyword arguments to make the ERDDAP catalog. See `intake-erddap.erddap_cat()` for more details.
   * `server`: ERDDAP server address, for example: "http://erddap.sensors.ioos.us/erddap"
-  * `category_search`: 
-  * `erddap_client`: 
-  * `use_source_constraints`: 
-  * `protocol`: 
-  * `metadata`: 
+  * `category_search`:
+  * `erddap_client`:
+  * `use_source_constraints`:
+  * `protocol`:
+  * `metadata`:
   * other keyword arguments can be passed into the intake `Catalog` class
 * `kwargs_search`: Keyword arguments to input to search on the server before making the catalog.
   * `min_lon`, `min_lat`, `max_lon`, `max_lat`: search for datasets within this spatial box

diff --git a/ocean_model_skill_assessor/CLI.py b/ocean_model_skill_assessor/CLI.py
@@ -6,19 +6,25 @@
 
 import ocean_model_skill_assessor as omsa
 
+
 def is_int(s):
+    """Check if string is actually int."""
     try:
         int(s)
         return True
     except (ValueError, TypeError):
         return False
+
+
 def is_float(s):
+    """Check if string is actually float."""
     try:
         float(s)
         return True
     except (ValueError, TypeError):
         return False
 
+
 # https://sumit-ghosh.com/articles/parsing-dictionary-key-value-pairs-kwargs-argparse-python/
 class ParseKwargs(argparse.Action):
     """With can user can input dicts on CLI."""
@@ -99,7 +105,10 @@ def main():
     parser.add_argument(
         "--key", help="Key from vocab representing the variable to compare."
     )
-    parser.add_argument("--model_name", help="Name of catalog for model output, created in a `make_Catalog` command.")
+    parser.add_argument(
+        "--model_name",
+        help="Name of catalog for model output, created in a `make_Catalog` command.",
+    )
     parser.add_argument(
         "--ndatasets",
         type=int,

diff --git a/ocean_model_skill_assessor/main.py b/ocean_model_skill_assessor/main.py
@@ -47,9 +47,9 @@ def make_local_catalog(
     kwargs_open: Optional[Dict] = None,
 ) -> Catalog:
     """Make an intake catalog from specified data files, including model output locations.
-    
+
     Pass keywords for xarray for model output into the catalog through kwargs_xarray.
-    
+
     kwargs_open and metadata must be the same for all filenames. If it is not, make multiple catalogs and you can input them individually into the run command.
 
     Parameters
@@ -73,42 +73,50 @@ def make_local_catalog(
     -------
     Catalog
         Intake catalog with an entry for each dataset represented by a filename.
-    
+
     Examples
     --------
-    
+
     Make catalog to represent local or remote files with specific locations:
-    
+
     >>> make_local_catalog([filename1, filename2])
-    
+
     Make catalog to represent model output:
-    
+
     >>> make_local_catalog([model output location], skip_entry_metadata=True, kwargs_open={"drop_variables": "tau"})
     """
-    
+
     metadata = metadata or {}
     metadata_catalog = metadata_catalog or {}
-    
+
     kwargs_open = kwargs_open or {}
-    
+
     # if any of kwargs_open came in with "None" instead of None because of CLI, change back to None
     kwargs_open.update({key: None for key, val in kwargs_open.items() if val == "None"})
 
     sources = []
     for filename in filenames:
         mtype = mimetypes.guess_type(filename)[0]
-        if (mtype is not None and ("csv" in mtype or "text" in mtype)) or "csv" in filename or "text" in filename:
+        if (
+            (mtype is not None and ("csv" in mtype or "text" in mtype))
+            or "csv" in filename
+            or "text" in filename
+        ):
             source = getattr(intake, "open_csv")(filename, csv_kwargs=kwargs_open)
         elif "thredds" in filename and "dodsC" in filename:
             # use netcdf4 engine if not input in kwargs_xarray
             kwargs_open.setdefault("engine", "netcdf4")
             source = getattr(intake, "open_opendap")(filename, **kwargs_open)
-        elif (mtype is not None and "netcdf" in mtype) or "netcdf" in filename or ".nc" in filename:
+        elif (
+            (mtype is not None and "netcdf" in mtype)
+            or "netcdf" in filename
+            or ".nc" in filename
+        ):
             source = getattr(intake, "open_netcdf")(filename, **kwargs_open)
-        
+
         # combine input metadata with source metadata
         source.metadata.update(metadata)
-        
+
         sources.append(source)
 
     # create dictionary of catalog entries
@@ -144,8 +152,8 @@ def make_local_catalog(
                 "minLatitude",
                 "maxLatitude",
             }:
-                dd['longitude'] = cat[source].metadata["minLongitude"]
-                dd['latitude'] = cat[source].metadata["minLatitude"]
+                dd["longitude"] = cat[source].metadata["minLongitude"]
+                dd["latitude"] = cat[source].metadata["minLatitude"]
                 cat[source].metadata = {
                     "minLongitude": cat[source].metadata["minLongitude"],
                     "minLatitude": cat[source].metadata["minLatitude"],
@@ -171,10 +179,12 @@ def make_local_catalog(
                 )
                 dd.index = dd.index.tz_convert(None)
                 dd.cf["T"] = dd.index
-            metadata.update({
-                "minTime": str(dd.cf["T"].min()),
-                "maxTime": str(dd.cf["T"].max()),
-            })
+            metadata.update(
+                {
+                    "minTime": str(dd.cf["T"].min()),
+                    "maxTime": str(dd.cf["T"].max()),
+                }
+            )
 
             cat[source].metadata.update(metadata)
             cat[source]._entry._metadata.update(metadata)
@@ -347,7 +357,9 @@ def make_catalog(
     if save_cat:
         # save cat to file
         cat.save(omsa.CAT_PATH(catalog_name, project_name))
-        print(f"Catalog saved to {omsa.CAT_PATH(catalog_name, project_name)} with {len(list(cat))} entries.")
+        print(
+            f"Catalog saved to {omsa.CAT_PATH(catalog_name, project_name)} with {len(list(cat))} entries."
+        )
 
     if return_cat:
         return cat
@@ -434,8 +446,10 @@ def run(
         dam = dam.assign_coords(lon=(((dam[lkey] + 180) % 360) - 180))
         # rotate arrays so that the locations and values are -180 to 180
         # instead of 0 to 180 to -180 to 0
-        dam = dam.roll(lon=int((dam[lkey]<0).sum()), roll_coords=True)
-        print("Longitudes are being shifted because they look like they are not -180 to 180.")
+        dam = dam.roll(lon=int((dam[lkey] < 0).sum()), roll_coords=True)
+        print(
+            "Longitudes are being shifted because they look like they are not -180 to 180."
+        )
 
     # loop over catalogs and sources to pull out lon/lat locations for plot
     maps = []
@@ -525,7 +539,6 @@ def run(
                     model_var = dam.em.sel2dcf(**kwargs)  # .to_dataset()
 
             if model_var.size == 0:
-                import pdb; pdb.set_trace()
                 # model output isn't available to match data
                 # data must not be in the space/time range of model
                 maps.pop(-1)

diff --git a/ocean_model_skill_assessor/plot/map.py b/ocean_model_skill_assessor/plot/map.py
@@ -17,7 +17,11 @@
 
 
 def plot_map(
-    maps: np.array, figname: Union[str, PurePath], ds: Union[DataArray, Dataset], alpha: int = 5, dd: int = 2,
+    maps: np.array,
+    figname: Union[str, PurePath],
+    ds: Union[DataArray, Dataset],
+    alpha: int = 5,
+    dd: int = 2,
 ):
     """Plot and save to file map of model domain and data locations.
 

diff --git a/ocean_model_skill_assessor/stats.py b/ocean_model_skill_assessor/stats.py
@@ -5,14 +5,14 @@
 from typing import Tuple, Union
 
 import numpy as np
-import ocean_model_skill_assessor as omsa
 import pandas as pd
 import yaml
 
-
 from pandas import DataFrame
 from xarray import DataArray
 
+import ocean_model_skill_assessor as omsa
+
 
 def _align(
     obs: Union[DataFrame, DataArray], model: Union[DataFrame, DataArray]
@@ -162,9 +162,16 @@ def compute_root_mean_square_error(
     return float(np.sqrt(mse))
 
 
-def compute_descriptive_statistics(model: DataFrame, ddof=0) -> Tuple:
+def compute_descriptive_statistics(model: DataFrame, ddof=0) -> list:
     """Given obs and model signals, return the standard deviation"""
-    return list([float(model.max()), float(model.min()), float(model.mean()), float(model.std(ddof=ddof))])
+    return list(
+        [
+            float(model.max()),
+            float(model.min()),
+            float(model.mean()),
+            float(model.std(ddof=ddof)),
+        ]
+    )
 
 
 def compute_stats(obs: DataFrame, model: DataFrame) -> dict:
@@ -185,32 +192,48 @@ def compute_stats(obs: DataFrame, model: DataFrame) -> dict:
         "rmse": compute_root_mean_square_error(obs, model),
         "descriptive": compute_descriptive_statistics(model),
     }
-    
+
 
 def save_stats(source_name: str, stats: dict, project_name: str):
     """Save computed stats to file."""
-
-    stats["bias"] = {'value': stats['bias'],
-            'name': "Bias",
-            'long_name': "Bias or MSD",}
-    stats["corr"] = {"value": stats["corr"],
-            "name": "Correlation Coefficient",
-            "long_name": "Pearson product-moment correlation coefficient",}
-    stats["ioa"] =  {"value": stats["ioa"],
-            "name": "Index of Agreement",
-            "long_name": "Index of Agreement (Willmott 1981)",}
-    stats["mse"] =  {"value": stats["mse"],
-            "name": "Mean Squared Error",
-            "long_name": "Mean Squared Error (MSE)",}
-    stats["mss"] = {"value": stats["mss"],
-            "name": "Murphy Skill Score",
-            "long_name": "Murphy Skill Score (Murphy 1988)",}
-    stats["rmse"] =  {"value": stats["rmse"],
-            "name": "RMSE",
-            "long_name": "Root Mean Square Error (RMSE)",}
-    stats["descriptive"] = {"value": stats["descriptive"],
-            "name": "Descriptive Statistics",
-            "long_name": "Max, Min, Mean, Standard Deviation"}
-
-    with open(omsa.PROJ_DIR(project_name) / f"stats_{source_name}.yaml", "w") as outfile:
+
+    stats["bias"] = {
+        "value": stats["bias"],
+        "name": "Bias",
+        "long_name": "Bias or MSD",
+    }
+    stats["corr"] = {
+        "value": stats["corr"],
+        "name": "Correlation Coefficient",
+        "long_name": "Pearson product-moment correlation coefficient",
+    }
+    stats["ioa"] = {
+        "value": stats["ioa"],
+        "name": "Index of Agreement",
+        "long_name": "Index of Agreement (Willmott 1981)",
+    }
+    stats["mse"] = {
+        "value": stats["mse"],
+        "name": "Mean Squared Error",
+        "long_name": "Mean Squared Error (MSE)",
+    }
+    stats["mss"] = {
+        "value": stats["mss"],
+        "name": "Murphy Skill Score",
+        "long_name": "Murphy Skill Score (Murphy 1988)",
+    }
+    stats["rmse"] = {
+        "value": stats["rmse"],
+        "name": "RMSE",
+        "long_name": "Root Mean Square Error (RMSE)",
+    }
+    stats["descriptive"] = {
+        "value": stats["descriptive"],
+        "name": "Descriptive Statistics",
+        "long_name": "Max, Min, Mean, Standard Deviation",
+    }
+
+    with open(
+        omsa.PROJ_DIR(project_name) / f"stats_{source_name}.yaml", "w"
+    ) as outfile:
         yaml.dump(stats, outfile, default_flow_style=False)