diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d291cb1..7642fae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: rev: v0.982 hooks: - id: mypy - additional_dependencies: [types-setuptools] + additional_dependencies: [types-setuptools, types-PyYAML] exclude: docs/source/conf.py args: [--ignore-missing-imports] diff --git a/docs/cli.md b/docs/cli.md index 60bdfe3..cd4d64d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -15,7 +15,7 @@ Make a catalog with known local or remote file(s). #### Available options - omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]" + omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]" * `project_name`: Will be used as the name of the directory where the catalog is saved. The directory is located in a user application cache directory, the address of which can be found for your setup with `omsa proj_path --project_name PROJ_NAME`. * `catalog_type`: Type of catalog to make. Options are "erddap", "axds", or "local". @@ -30,19 +30,19 @@ Make a catalog with known local or remote file(s). - - - - + + + + @@ -52,7 +52,7 @@ Make a catalog with known local or remote file(s). #### Examples ```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://researchworkspace.com/files/8114311/ecofoci_2011CHAOZ_CTD_Nutrient_mb1101.csv]" +!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://researchworkspace.com/files/8114311/ecofoci_2011CHAOZ_CTD_Nutrient_mb1101.csv]" ``` ### ERDDAP Catalog @@ -70,11 +70,11 @@ Make a catalog from datasets available from an ERDDAP server using `intake-erdda * `metadata`: Metadata for catalog. * `kwargs`: Keyword arguments to make the ERDDAP catalog. See `intake-erddap.erddap_cat()` for more details. * `server`: ERDDAP server address, for example: "http://erddap.sensors.ioos.us/erddap" - * `category_search`: - * `erddap_client`: - * `use_source_constraints`: - * `protocol`: - * `metadata`: + * `category_search`: + * `erddap_client`: + * `use_source_constraints`: + * `protocol`: + * `metadata`: * other keyword arguments can be passed into the intake `Catalog` class * `kwargs_search`: Keyword arguments to input to search on the server before making the catalog. * `min_lon`, `min_lat`, `max_lon`, `max_lat`: search for datasets within this spatial box diff --git a/ocean_model_skill_assessor/CLI.py b/ocean_model_skill_assessor/CLI.py index f33096f..119a5fe 100644 --- a/ocean_model_skill_assessor/CLI.py +++ b/ocean_model_skill_assessor/CLI.py @@ -6,19 +6,25 @@ import ocean_model_skill_assessor as omsa + def is_int(s): + """Check if string is actually int.""" try: int(s) return True except (ValueError, TypeError): return False + + def is_float(s): + """Check if string is actually float.""" try: float(s) return True except (ValueError, TypeError): return False + # https://sumit-ghosh.com/articles/parsing-dictionary-key-value-pairs-kwargs-argparse-python/ class ParseKwargs(argparse.Action): """With can user can input dicts on CLI.""" @@ -99,7 +105,10 @@ def main(): parser.add_argument( "--key", help="Key from vocab representing the variable to compare." ) - parser.add_argument("--model_name", help="Name of catalog for model output, created in a `make_Catalog` command.") + parser.add_argument( + "--model_name", + help="Name of catalog for model output, created in a `make_Catalog` command.", + ) parser.add_argument( "--ndatasets", type=int, diff --git a/ocean_model_skill_assessor/main.py b/ocean_model_skill_assessor/main.py index 8b29ac8..fdea3ae 100644 --- a/ocean_model_skill_assessor/main.py +++ b/ocean_model_skill_assessor/main.py @@ -47,9 +47,9 @@ def make_local_catalog( kwargs_open: Optional[Dict] = None, ) -> Catalog: """Make an intake catalog from specified data files, including model output locations. - + Pass keywords for xarray for model output into the catalog through kwargs_xarray. - + kwargs_open and metadata must be the same for all filenames. If it is not, make multiple catalogs and you can input them individually into the run command. Parameters @@ -73,42 +73,50 @@ def make_local_catalog( ------- Catalog Intake catalog with an entry for each dataset represented by a filename. - + Examples -------- - + Make catalog to represent local or remote files with specific locations: - + >>> make_local_catalog([filename1, filename2]) - + Make catalog to represent model output: - + >>> make_local_catalog([model output location], skip_entry_metadata=True, kwargs_open={"drop_variables": "tau"}) """ - + metadata = metadata or {} metadata_catalog = metadata_catalog or {} - + kwargs_open = kwargs_open or {} - + # if any of kwargs_open came in with "None" instead of None because of CLI, change back to None kwargs_open.update({key: None for key, val in kwargs_open.items() if val == "None"}) sources = [] for filename in filenames: mtype = mimetypes.guess_type(filename)[0] - if (mtype is not None and ("csv" in mtype or "text" in mtype)) or "csv" in filename or "text" in filename: + if ( + (mtype is not None and ("csv" in mtype or "text" in mtype)) + or "csv" in filename + or "text" in filename + ): source = getattr(intake, "open_csv")(filename, csv_kwargs=kwargs_open) elif "thredds" in filename and "dodsC" in filename: # use netcdf4 engine if not input in kwargs_xarray kwargs_open.setdefault("engine", "netcdf4") source = getattr(intake, "open_opendap")(filename, **kwargs_open) - elif (mtype is not None and "netcdf" in mtype) or "netcdf" in filename or ".nc" in filename: + elif ( + (mtype is not None and "netcdf" in mtype) + or "netcdf" in filename + or ".nc" in filename + ): source = getattr(intake, "open_netcdf")(filename, **kwargs_open) - + # combine input metadata with source metadata source.metadata.update(metadata) - + sources.append(source) # create dictionary of catalog entries @@ -144,8 +152,8 @@ def make_local_catalog( "minLatitude", "maxLatitude", }: - dd['longitude'] = cat[source].metadata["minLongitude"] - dd['latitude'] = cat[source].metadata["minLatitude"] + dd["longitude"] = cat[source].metadata["minLongitude"] + dd["latitude"] = cat[source].metadata["minLatitude"] cat[source].metadata = { "minLongitude": cat[source].metadata["minLongitude"], "minLatitude": cat[source].metadata["minLatitude"], @@ -171,10 +179,12 @@ def make_local_catalog( ) dd.index = dd.index.tz_convert(None) dd.cf["T"] = dd.index - metadata.update({ - "minTime": str(dd.cf["T"].min()), - "maxTime": str(dd.cf["T"].max()), - }) + metadata.update( + { + "minTime": str(dd.cf["T"].min()), + "maxTime": str(dd.cf["T"].max()), + } + ) cat[source].metadata.update(metadata) cat[source]._entry._metadata.update(metadata) @@ -347,7 +357,9 @@ def make_catalog( if save_cat: # save cat to file cat.save(omsa.CAT_PATH(catalog_name, project_name)) - print(f"Catalog saved to {omsa.CAT_PATH(catalog_name, project_name)} with {len(list(cat))} entries.") + print( + f"Catalog saved to {omsa.CAT_PATH(catalog_name, project_name)} with {len(list(cat))} entries." + ) if return_cat: return cat @@ -434,8 +446,10 @@ def run( dam = dam.assign_coords(lon=(((dam[lkey] + 180) % 360) - 180)) # rotate arrays so that the locations and values are -180 to 180 # instead of 0 to 180 to -180 to 0 - dam = dam.roll(lon=int((dam[lkey]<0).sum()), roll_coords=True) - print("Longitudes are being shifted because they look like they are not -180 to 180.") + dam = dam.roll(lon=int((dam[lkey] < 0).sum()), roll_coords=True) + print( + "Longitudes are being shifted because they look like they are not -180 to 180." + ) # loop over catalogs and sources to pull out lon/lat locations for plot maps = [] @@ -525,7 +539,6 @@ def run( model_var = dam.em.sel2dcf(**kwargs) # .to_dataset() if model_var.size == 0: - import pdb; pdb.set_trace() # model output isn't available to match data # data must not be in the space/time range of model maps.pop(-1) diff --git a/ocean_model_skill_assessor/plot/map.py b/ocean_model_skill_assessor/plot/map.py index f4abf08..c79fad6 100644 --- a/ocean_model_skill_assessor/plot/map.py +++ b/ocean_model_skill_assessor/plot/map.py @@ -17,7 +17,11 @@ def plot_map( - maps: np.array, figname: Union[str, PurePath], ds: Union[DataArray, Dataset], alpha: int = 5, dd: int = 2, + maps: np.array, + figname: Union[str, PurePath], + ds: Union[DataArray, Dataset], + alpha: int = 5, + dd: int = 2, ): """Plot and save to file map of model domain and data locations. diff --git a/ocean_model_skill_assessor/stats.py b/ocean_model_skill_assessor/stats.py index 9e043fc..1ea5b07 100644 --- a/ocean_model_skill_assessor/stats.py +++ b/ocean_model_skill_assessor/stats.py @@ -5,14 +5,14 @@ from typing import Tuple, Union import numpy as np -import ocean_model_skill_assessor as omsa import pandas as pd import yaml - from pandas import DataFrame from xarray import DataArray +import ocean_model_skill_assessor as omsa + def _align( obs: Union[DataFrame, DataArray], model: Union[DataFrame, DataArray] @@ -162,9 +162,16 @@ def compute_root_mean_square_error( return float(np.sqrt(mse)) -def compute_descriptive_statistics(model: DataFrame, ddof=0) -> Tuple: +def compute_descriptive_statistics(model: DataFrame, ddof=0) -> list: """Given obs and model signals, return the standard deviation""" - return list([float(model.max()), float(model.min()), float(model.mean()), float(model.std(ddof=ddof))]) + return list( + [ + float(model.max()), + float(model.min()), + float(model.mean()), + float(model.std(ddof=ddof)), + ] + ) def compute_stats(obs: DataFrame, model: DataFrame) -> dict: @@ -185,32 +192,48 @@ def compute_stats(obs: DataFrame, model: DataFrame) -> dict: "rmse": compute_root_mean_square_error(obs, model), "descriptive": compute_descriptive_statistics(model), } - + def save_stats(source_name: str, stats: dict, project_name: str): """Save computed stats to file.""" - - stats["bias"] = {'value': stats['bias'], - 'name': "Bias", - 'long_name': "Bias or MSD",} - stats["corr"] = {"value": stats["corr"], - "name": "Correlation Coefficient", - "long_name": "Pearson product-moment correlation coefficient",} - stats["ioa"] = {"value": stats["ioa"], - "name": "Index of Agreement", - "long_name": "Index of Agreement (Willmott 1981)",} - stats["mse"] = {"value": stats["mse"], - "name": "Mean Squared Error", - "long_name": "Mean Squared Error (MSE)",} - stats["mss"] = {"value": stats["mss"], - "name": "Murphy Skill Score", - "long_name": "Murphy Skill Score (Murphy 1988)",} - stats["rmse"] = {"value": stats["rmse"], - "name": "RMSE", - "long_name": "Root Mean Square Error (RMSE)",} - stats["descriptive"] = {"value": stats["descriptive"], - "name": "Descriptive Statistics", - "long_name": "Max, Min, Mean, Standard Deviation"} - - with open(omsa.PROJ_DIR(project_name) / f"stats_{source_name}.yaml", "w") as outfile: + + stats["bias"] = { + "value": stats["bias"], + "name": "Bias", + "long_name": "Bias or MSD", + } + stats["corr"] = { + "value": stats["corr"], + "name": "Correlation Coefficient", + "long_name": "Pearson product-moment correlation coefficient", + } + stats["ioa"] = { + "value": stats["ioa"], + "name": "Index of Agreement", + "long_name": "Index of Agreement (Willmott 1981)", + } + stats["mse"] = { + "value": stats["mse"], + "name": "Mean Squared Error", + "long_name": "Mean Squared Error (MSE)", + } + stats["mss"] = { + "value": stats["mss"], + "name": "Murphy Skill Score", + "long_name": "Murphy Skill Score (Murphy 1988)", + } + stats["rmse"] = { + "value": stats["rmse"], + "name": "RMSE", + "long_name": "Root Mean Square Error (RMSE)", + } + stats["descriptive"] = { + "value": stats["descriptive"], + "name": "Descriptive Statistics", + "long_name": "Max, Min, Mean, Standard Deviation", + } + + with open( + omsa.PROJ_DIR(project_name) / f"stats_{source_name}.yaml", "w" + ) as outfile: yaml.dump(stats, outfile, default_flow_style=False) diff --git a/ocean_model_skill_assessor/utils.py b/ocean_model_skill_assessor/utils.py index 0fb1893..5e2586c 100644 --- a/ocean_model_skill_assessor/utils.py +++ b/ocean_model_skill_assessor/utils.py @@ -96,7 +96,7 @@ def find_bbox(ds: xr.DataArray, dd: int = 1, alpha: int = 5) -> tuple: # this leads to a circular import error if read in at top level bc of other packages brought in. import alphashape - + lon, lat = lon[::dd], lat[::dd] pts = list(zip(lon, lat)) @@ -107,12 +107,12 @@ def find_bbox(ds: xr.DataArray, dd: int = 1, alpha: int = 5) -> tuple: # import pdb; pdb.set_trace() # pts = shapely.geometry.MultiPoint(list(zip(lon, lat))) p1 = alphashape.alphashape(pts, alpha) - + # else: # 2D coordinates - + # # this leads to a circular import error if read in at top level bc of other packages brought in. # import alphashape - + # lon, lat = lon.flatten()[::dd], lat.flatten()[::dd] # # need to calculate concave hull or alphashape of grid @@ -162,7 +162,9 @@ def kwargs_search_from_model(kwargs_search: Dict[str, Union[str, float]]) -> dic ) # read in model output - model_cat = intake.open_catalog(omsa.CAT_PATH(kwargs_search['model_name'], kwargs_search['project_name'])) + model_cat = intake.open_catalog( + omsa.CAT_PATH(kwargs_search["model_name"], kwargs_search["project_name"]) + ) dsm = model_cat[list(model_cat)[0]].to_dask() kwargs_search.pop("model_name") @@ -177,12 +179,12 @@ def kwargs_search_from_model(kwargs_search: Dict[str, Union[str, float]]) -> dic "max_lat", } ): - min_lon, max_lon = float(dsm[dsm.cf.coordinates["longitude"][0]].min()), float( - dsm[dsm.cf.coordinates["longitude"][0]].max() - ) - min_lat, max_lat = float(dsm[dsm.cf.coordinates["latitude"][0]].min()), float( - dsm[dsm.cf.coordinates["latitude"][0]].max() - ) + min_lon, max_lon = float( + dsm[dsm.cf.coordinates["longitude"][0]].min() + ), float(dsm[dsm.cf.coordinates["longitude"][0]].max()) + min_lat, max_lat = float( + dsm[dsm.cf.coordinates["latitude"][0]].min() + ), float(dsm[dsm.cf.coordinates["latitude"][0]].max()) if abs(min_lon) > 180 or abs(max_lon) > 180: min_lon -= 360 diff --git a/tests/test_main.py b/tests/test_main.py index 41600d7..35f7a69 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -19,7 +19,7 @@ @mock.patch("intake.open_catalog") def test_run_variable(mock_open_cat, mock_open_cat_model, mock_to_dask, mock_read): """Test running with variable that is not present in catalog dataset.""" - + # make model catalog entries = { "name": LocalCatalogEntry( @@ -78,7 +78,7 @@ def test_run_variable(mock_open_cat, mock_open_cat_model, mock_to_dask, mock_rea {"standard_name": "sea_water_temperature", "coordinates": "lon"}, ) mock_to_dask.return_value = ds - + mock_open_cat.return_value = cat mock_open_cat_model.return_value = catm diff --git a/tests/test_utils.py b/tests/test_utils.py index 8eaa82b..9935c7b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,8 +5,10 @@ import pytest import shapely.geometry import xarray as xr + from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry + import ocean_model_skill_assessor as omsa @@ -52,7 +54,7 @@ def test_kwargs_search_from_model(mock_open_cat, mock_to_dask): ) mock_open_cat.return_value = cat - + mock_to_dask.return_value = ds kwargs_search = omsa.utils.kwargs_search_from_model(kwargs_search) @@ -66,7 +68,12 @@ def test_kwargs_search_from_model(mock_open_cat, mock_to_dask): } assert kwargs_search == output - kwargs_search = {"min_time": 1, "max_time": 2, "model_name": "path", "project_name": "test_project"} + kwargs_search = { + "min_time": 1, + "max_time": 2, + "model_name": "path", + "project_name": "test_project", + } kwargs_search = omsa.utils.kwargs_search_from_model(kwargs_search) output = { "min_lon": 0.0, @@ -84,7 +91,7 @@ def test_kwargs_search_from_model(mock_open_cat, mock_to_dask): "min_lat": 1, "max_lat": 2, "model_name": "path", - "project_name": "test_project" + "project_name": "test_project", } kwargs_search = omsa.utils.kwargs_search_from_model(kwargs_search) output = { @@ -105,7 +112,7 @@ def test_kwargs_search_from_model(mock_open_cat, mock_to_dask): "min_lat": "1", "max_lat": "2", "model_name": "path", - "project_name": "test_project" + "project_name": "test_project", } with pytest.raises(KeyError): kwargs_search = omsa.utils.kwargs_search_from_model(kwargs_search)