diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index a8d91b0..d366fc4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -23,7 +23,9 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: # mamba-version: "*" # activate this to build with mamba. - # channels: conda-forge, defaults # These need to be specified to use mamba + python-version: ${{ matrix.python-version }} + miniforge-variant: Mambaforge + channels: conda-forge, defaults # These need to be specified to use mamba channel-priority: true environment-file: ci/environment-py${{ matrix.python-version }}.yml diff --git a/.gitignore b/.gitignore index 4e50bee..1e5887d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +_build __pycache__/ .ipynb_checkpoints/ .DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7642fae..b69047c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -51,7 +51,7 @@ repos: rev: v0.982 hooks: - id: mypy - additional_dependencies: [types-setuptools, types-PyYAML] + additional_dependencies: [types-setuptools, types-PyYAML, types-requests] exclude: docs/source/conf.py args: [--ignore-missing-imports] diff --git a/.readthedocs.yml b/.readthedocs.yml index 3c9c005..107cf20 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,5 +1,9 @@ version: 2 +# Build PDF only +formats: + - pdf + build: os: "ubuntu-20.04" tools: diff --git a/README.md b/README.md index 79313e2..52fd7c7 100644 --- a/README.md +++ b/README.md @@ -12,24 +12,21 @@ ocean-model-skill-assessor A package to fully run the comparison between data and model to assess model skill. --------- +You can run the analysis as a Python package or with a command-line interface. -
Project based on the cookiecutter science project template.
+There are three steps to follow for a set of model-data validation, which is for one variable: +1. Make a catalog for your model output. +2. Make a catalog for your data. +3. Run the comparison. +These steps will save files into a user application directory cache. See the demos for more details. -## Installation +-------- -### Install into existing environment +Project based on the cookiecutter science project template.
-From `conda-forge`: -``` base -$ conda install -c conda-forge ocean-model-skill-assessor -``` -From PyPI: -``` base -$ pip install ocean-model-skill-assessor -``` +## Installation ### Set up environment @@ -47,6 +44,32 @@ Activate your new Python environment to use it with $ conda activate omsa ``` +Also install `cartopy` to be able to plot maps: +``` base +$ conda install -c conda-forge cartopy +``` + + +### Install into existing environment + +From `conda-forge`: +``` base +$ conda install -c conda-forge ocean-model-skill-assessor +``` + +From PyPI: +``` base +$ pip install ocean-model-skill-assessor +``` + +To plot a map of the model domain with data locations, you'll need to additionally install `cartopy`. If you used `conda` above: +``` base +$ conda install -c conda-forge cartopy +``` + +If you installed from PyPI, check out the instructions for installing `cartopy` [here](https://scitools.org.uk/cartopy/docs/latest/installing.html#building-from-source). + + ### Extra packages for development To also develop this package, install additional packages with: diff --git a/docs/add_vocab.md b/docs/add_vocab.md new file mode 100644 index 0000000..e0d9352 --- /dev/null +++ b/docs/add_vocab.md @@ -0,0 +1,139 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.14.4 +kernelspec: + display_name: Python 3.10.8 ('omsa') + language: python + name: python3 +--- + +# How to make and work with vocabularies + +This page demonstrates the workflow of making a new vocabulary, saving it to the user application cache, and reading it back in to use it. The vocabulary created is the exact same as the "general" vocabulary that is saved with the OMSA package, though here it is given another name to demonstrate that you could be making any new vocabulary you want. + +Here is the list of variables of interest (with "nickname"), aimed at a physical oceanographer, which are built into the vocabulary: + +* water temperature "temp" +* salinity "salt" +* sea surface height "ssh" +* u velocity "u" +* v velocity "v" +* w upward velocity "w" +* direction of water velocity "water_dir" +* magnitude of water velocity "water_speed" +* wind direction "wind_dir" +* wind speed "wind_speed" +* sea ice velocity u "sea_ice_u" +* sea ice velocity v "sea_ice_v" +* sea ice area fraction "sea_ice_area_fraction" + +```{code-cell} ipython3 +import cf_pandas as cfp +import ocean_model_skill_assessor as omsa +import pandas as pd +``` + +## Vocabulary workflow + +### Make vocabulary + +Here we show making the "general" vocabulary that is saved into the repository. This is a more general vocabulary to identify variables from sources that don't use exact CF standard_names. + +```{code-cell} ipython3 +nickname = "temp" +vocab = cfp.Vocab() + +# define a regular expression to represent your variable +reg = cfp.Reg(include_or=["temp","sst"], exclude=["air","qc","status","atmospheric","bottom"]) + +# Make an entry to add to your vocabulary +vocab.make_entry(nickname, reg.pattern(), attr="name") + +vocab.make_entry("salt", cfp.Reg(include_or=["sal","sss"], exclude=["soil","qc","status","bottom"]).pattern(), attr="name") +vocab.make_entry("ssh", cfp.Reg(include_or=["sea_surface_height","surface_elevation"], exclude=["qc","status"]).pattern(), attr="name") + +reg = cfp.Reg(include=["east", "vel"]) +vocab.make_entry("u", "u$", attr="name") +vocab.make_entry("u", reg.pattern(), attr="name") + +reg = cfp.Reg(include=["north", "vel"]) +vocab.make_entry("v", "v$", attr="name") +vocab.make_entry("v", reg.pattern(), attr="name") + +reg = cfp.Reg(include=["up", "vel"]) +vocab.make_entry("w", "w$", attr="name") +vocab.make_entry("w", reg.pattern(), attr="name") + +vocab.make_entry("water_dir", cfp.Reg(include=["dir","water"], exclude=["qc","status","air","wind"]).pattern(), attr="name") + +vocab.make_entry("water_speed", cfp.Reg(include=["speed","water"], exclude=["qc","status","air","wind"]).pattern(), attr="name") + +vocab.make_entry("wind_dir", cfp.Reg(include=["dir","wind"], exclude=["qc","status","water"]).pattern(), attr="name") + +vocab.make_entry("wind_speed", cfp.Reg(include=["speed","wind"], exclude=["qc","status","water"]).pattern(), attr="name") + +reg1 = cfp.Reg(include=["sea","ice","u"], exclude=["qc","status"]) +reg2 = cfp.Reg(include=["sea","ice","x","vel"], exclude=["qc","status"]) +reg3 = cfp.Reg(include=["sea","ice","east","vel"], exclude=["qc","status"]) +vocab.make_entry("sea_ice_u", reg1.pattern(), attr="name") +vocab.make_entry("sea_ice_u", reg2.pattern(), attr="name") +vocab.make_entry("sea_ice_u", reg3.pattern(), attr="name") + +reg1 = cfp.Reg(include=["sea","ice","v"], exclude=["qc","status"]) +reg2 = cfp.Reg(include=["sea","ice","y","vel"], exclude=["qc","status"]) +reg3 = cfp.Reg(include=["sea","ice","north","vel"], exclude=["qc","status"]) +vocab.make_entry("sea_ice_v", reg1.pattern(), attr="name") +vocab.make_entry("sea_ice_v", reg2.pattern(), attr="name") +vocab.make_entry("sea_ice_v", reg3.pattern(), attr="name") + +vocab.make_entry("sea_ice_area_fraction", cfp.Reg(include=["sea","ice","area","fraction"], exclude=["qc","status"]).pattern(), attr="name") + +vocab +``` + +### Save it + +This exact vocabulary was previously saved as "general" and is available under that name, but this page demonstrates saving a new vocabulary and so we use the name "general2" to differentiate. + +```{code-cell} ipython3 +vocab.save(omsa.VOCAB_PATH("general2")) +``` + +```{code-cell} ipython3 +omsa.VOCAB_PATH("general2") +``` + +### Use it later + +Read the saved vocabulary back in to use it: + +```{code-cell} ipython3 +vocab = cfp.Vocab(omsa.VOCAB_PATH("general2")) + +df = pd.DataFrame(columns=["sst", "time", "lon", "lat"], data={"sst": [1,2,3]}) +with cfp.set_options(custom_criteria=vocab.vocab): + print(df.cf["temp"]) +``` + +## Combine vocabularies + +A user can add together vocabularies. For example, here we combine the built-in "standard_names" and "general" vocabularies. + +```{code-cell} ipython3 +v1 = cfp.Vocab(omsa.VOCAB_PATH("standard_names")) +v2 = cfp.Vocab(omsa.VOCAB_PATH("general")) + +v = v1 + v2 +v +``` + +## Using the `cf-pandas` widget + ++++ + +.. raw:: html + diff --git a/docs/api.rst b/docs/api.rst index 57791c9..20ebfaa 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -10,6 +10,7 @@ API main utils + paths accessor stats plot.map diff --git a/docs/cli.md b/docs/cli.md index 223487d..5bd71ac 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -1,17 +1,8 @@ - -```{code-cell} ipython3 -import ocean_model_skill_assessor as omsa -from IPython.display import Code, Image -import cf_pandas as cfp -``` - # Using OMSA through Command Line Interface (CLI) -Example commands will be run below and prefaced with `!` to run as a shell command instead of as Python code. In the terminal window, you should remove the `!` before running the command. - -This page is focused on explaining all the options more than demonstrating a workflow. For a more clear demonstration, check out the [demo](https://ocean-model-skill-assessor.readthedocs.io/en/latest/demo.html). +Example commands are shown (but not run) below. You can copy these commands directly to a terminal window or command prompt. -+++ +This page is focused on explaining all the command line options, not demonstrating a workflow. For a more clear demonstration, check out the [Python package demo](https://ocean-model-skill-assessor.readthedocs.io/en/latest/demo.html) or [CLI demo](https://ocean-model-skill-assessor.readthedocs.io/en/latest/demo_cli.html). ## Make catalog(s) for data and model @@ -21,11 +12,9 @@ There are 3 types of catalogs in OMSA: local, erddap, and axds. Make a catalog with known local or remote file(s). Also use a local catalog to represent your model output. -+++ - #### Available options - omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]" --kwargs_open KWARG=VALUE + omsa make_catalog --project_name PROJ_NAME --catalog_type local --catalog_name CATALOG_NAME --description "Catalog description" --kwargs filenames="[FILE1,FILE2]" --kwargs_open KWARG=VALUE --verbose --mode MODE * `project_name`: Will be used as the name of the directory where the catalog is saved. The directory is located in a user application cache directory, the address of which can be found for your setup with `omsa proj_path --project_name PROJ_NAME`. * `catalog_type`: Type of catalog to make. Options are "erddap", "axds", or "local". @@ -35,22 +24,14 @@ Make a catalog with known local or remote file(s). Also use a local catalog to r * `kwargs`: Some keyword arguments to make the local catalog. See `omsa.main.make_local_catalog()` for more details. * `filenames`: (Required) Where to find dataset(s) from which to make local catalog. * `kwargs_open`: Keyword arguments to pass on to the appropriate intake open_* call for model or dataset. - -+++ +* `verbose` Print useful runtime commands to stdout if True as well as save in log, otherwise silently save in log. Log is located in the project directory, which can be checked on the command line with `omsa proj_path --project_name PROJECT_NAME`. Default is True, to turn off use `--no-verbose`. +* `mode` mode for logging file. Default is to overwrite an existing logfile, but can be changed to other modes, e.g. "a" to instead append to an existing log file. #### Examples ##### Basic catalog for single dataset -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://erddap.sensors.axds.co/erddap/tabledap/aoos_204.csvp?time%2Clatitude%2Clongitude%2Cz%2Csea_water_temperature&time%3E=2022-01-01T00%3A00%3A00Z&time%3C=2022-01-06T00%3A00%3A00Z]" --kwargs_open blocksize=None -``` - -Show the catalog file: - -```{code-cell} ipython3 -Code(filename=omsa.CAT_PATH("example_local_catalog", "test1")) -``` + omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog --description "Example local catalog description" --kwargs filenames="[https://erddap.sensors.axds.co/erddap/tabledap/aoos_204.csvp?time%2Clatitude%2Clongitude%2Cz%2Csea_water_temperature&time%3E=2022-01-01T00%3A00%3A00Z&time%3C=2022-01-06T00%3A00%3A00Z]" --kwargs_open blocksize=None ##### Dataset with no lon/lat @@ -58,25 +39,13 @@ When a dataset does not contain location information, you can input it as metada Station page: https://tidesandcurrents.noaa.gov/stationhome.html?id=9455500 -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog2 --kwargs filenames="[https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?product=water_temperature&application=NOS.COOPS.TAC.PHYSOCEAN&begin_date=20230109&end_date=20230109&station=9455500&time_zone=GMT&units=english&interval=6&format=csv]" --metadata minLongitude=-151.72 maxLongitude=-151.72 minLatitude=59.44 maxLatitude=59.44 -``` - -```{code-cell} ipython3 -Code(filename=omsa.CAT_PATH("example_local_catalog2", "test1")) -``` + omsa make_catalog --project_name test1 --catalog_type local --catalog_name example_local_catalog2 --kwargs filenames="[https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?product=water_temperature&application=NOS.COOPS.TAC.PHYSOCEAN&begin_date=20230109&end_date=20230109&station=9455500&time_zone=GMT&units=english&interval=6&format=csv]" --metadata minLongitude=-151.72 maxLongitude=-151.72 minLatitude=59.44 maxLatitude=59.44 ##### Set up model Use this approach to set up a catalog file for your model output, so that it can be used by OMSA. Use `skip_entry_metadata=True` when running for a model. -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type local --catalog_name model --kwargs filenames=https://www.ncei.noaa.gov/thredds/dodsC/model-ciofs-agg/Aggregated_CIOFS_Fields_Forecast_best.ncd skip_entry_metadata=True --kwargs_open drop_variables=ocean_time -``` - -```{code-cell} ipython3 -Code(filename=omsa.CAT_PATH("model", "test1")) -``` + omsa make_catalog --project_name test1 --catalog_type local --catalog_name model --kwargs filenames=https://www.ncei.noaa.gov/thredds/dodsC/model-ciofs-agg/Aggregated_CIOFS_Fields_Forecast_best.ncd skip_entry_metadata=True --kwargs_open drop_variables=ocean_time ### ERDDAP Catalog @@ -84,7 +53,7 @@ Make a catalog from datasets available from an ERDDAP server using `intake-erdda #### Available options - omsa make_catalog --project_name PROJ_NAME --catalog_type erddap --catalog_name CATALOG_NAME --description "Catalog description" --kwargs server=SERVER --kwargs_search min_lon=MIN_LON min_lat=MIN_LAT max_lon=MAX_LON max_lat=MAX_LAT min_time=MIN_TIME max_time=MAX_TIME search_for=SEARCH_TEXT + omsa make_catalog --project_name PROJ_NAME --catalog_type erddap --catalog_name CATALOG_NAME --description "Catalog description" --kwargs server=SERVER --kwargs_search min_lon=MIN_LON min_lat=MIN_LAT max_lon=MAX_LON max_lat=MAX_LAT min_time=MIN_TIME max_time=MAX_TIME search_for=SEARCH_TEXT --verbose --mode MODE * `project_name`: Will be used as the name of the directory where the catalog is saved. The directory is located in a user application cache directory, the address of which can be found for your setup with `omsa proj_path --project_name PROJ_NAME`. * `catalog_type`: Type of catalog to make. Options are "erddap", "axds", or "local". @@ -104,6 +73,8 @@ Make a catalog from datasets available from an ERDDAP server using `intake-erdda * `min_time`, `max_time`: search for datasets with data within this time range * `model_name`: input a path to the model output to instead select the space and time search specifications based on the model. This input is specific to OMSA, not `intake-erddap`. * `search_for`: text-based search +* `verbose` Print useful runtime commands to stdout if True as well as save in log, otherwise silently save in log. Log is located in the project directory, which can be checked on the command line with `omsa proj_path --project_name PROJECT_NAME`. Default is True, to turn off use `--no-verbose`. +* `mode` mode for logging file. Default is to overwrite an existing logfile, but can be changed to other modes, e.g. "a" to instead append to an existing log file. #### Examples @@ -111,33 +82,25 @@ Make a catalog from datasets available from an ERDDAP server using `intake-erdda Select a spatial box and time range over which to search catalog: -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name example_erddap_catalogA --description "Example ERDDAP catalog description" --kwargs server=https://erddap.sensors.ioos.us/erddap --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2022-1-1 max_time=2022-1-6 -``` + omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name example_erddap_catalogA --description "Example ERDDAP catalog description" --kwargs server=https://erddap.sensors.ioos.us/erddap --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2022-1-1 max_time=2022-1-6 ##### Narrow search with model output Input model output to use to create the space search range, but choose time search range. We use the model catalog created in a previous example: -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name example_erddap_catalog --description "Example ERDDAP catalog description" --kwargs server=https://erddap.sensors.ioos.us/erddap --kwargs_search model_name=model min_time=2022-1-1 max_time=2022-1-6 -``` + omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name example_erddap_catalog --description "Example ERDDAP catalog description" --kwargs server=https://erddap.sensors.ioos.us/erddap --kwargs_search model_name=model min_time=2022-1-1 max_time=2022-1-6 ##### Narrow search also with `query_type` You can additionally narrow your search by a text term by adding the `search_for` and `query_type` keyword inputs. This example searches for datasets containing the varaible "sea_surface_temperature" and, somewhere in the dataset metadata, the term "Timeseries". If we had wanted datasets that contain one OR the other, we could use `query_type=union`. -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat2 --kwargs server=https://erddap.sensors.ioos.us/erddap standard_names="[sea_surface_temperature]" search_for="[Timeseries]" query_type=intersection -``` + omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat2 --kwargs server=https://erddap.sensors.ioos.us/erddap standard_names="[sea_surface_temperature]" search_for="[Timeseries]" query_type=intersection ##### Variable selection by standard_name Narrow your search by variable. For `intake-erddap` you can filter by the CF `standard_name` of the variable directly with the following. -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat1 --kwargs server=https://erddap.sensors.ioos.us/erddap standard_names="[sea_surface_temperature,sea_water_temperature]" -``` + omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat1 --kwargs server=https://erddap.sensors.ioos.us/erddap standard_names="[sea_surface_temperature,sea_water_temperature]" ##### Variable selection by pattern matching with vocab @@ -151,14 +114,11 @@ This is more complicated than simply defining the desired standard_names as show The example below uses the pre-defined vocabulary "standard_names" since we are using the IOOS ERDDAP server which uses standard_names as one of its search categories, and will search for matching variables by standard_name and matching the variable nickname "temp". The "standard_names" vocabulary is shown here and includes the standard_names from the previous example (it includes others too but they aren't present on the server). The regular expressions are set up to match exactly those standard_names. This is why we return the same results from either approach. -```{code-cell} ipython3 +``` vocab = cfp.Vocab(omsa.VOCAB_PATH("standard_names")) -vocab ``` -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat3 --kwargs server=https://erddap.sensors.ioos.us/erddap category_search="[standard_name,temp]" --vocab_name standard_names -``` + omsa make_catalog --project_name test1 --catalog_type erddap --catalog_name cat3 --kwargs server=https://erddap.sensors.ioos.us/erddap category_search="[standard_name,temp]" --vocab_name standard_names ### Catalog for Axiom assets @@ -166,7 +126,7 @@ Make a catalog of Axiom Data Science-stored assets using `intake-axds`. #### Available options - omsa make_catalog --project_name PROJ_NAME --catalog_type axds --catalog_name CATALOG_NAME --description "Catalog description" --kwargs datatype="platform2 standard_names="[STANDARD_NAME1,STANDARD_NAME2]" page_size=PAGE_SIZE verbose=BOOL --kwargs_search min_lon=MIN_LON min_lat=MIN_LAT max_lon=MAX_LON max_lat=MAX_LAT min_time=MIN_TIME max_time=MAX_TIME search_for=SEARCH_TEXT + omsa make_catalog --project_name PROJ_NAME --catalog_type axds --catalog_name CATALOG_NAME --description "Catalog description" --kwargs datatype="platform2 standard_names="[STANDARD_NAME1,STANDARD_NAME2]" page_size=PAGE_SIZE verbose=BOOL --kwargs_search min_lon=MIN_LON min_lat=MIN_LAT max_lon=MAX_LON max_lat=MAX_LAT min_time=MIN_TIME max_time=MAX_TIME search_for=SEARCH_TEXT --verbose --mode MODE * `project_name`: Will be used as the name of the directory where the catalog is saved. The directory is located in a user application cache directory, the address of which can be found for your setup with `omsa proj_path --project_name PROJ_NAME`. * `catalog_type`: Type of catalog to make. Options are "erddap", "axds", or "local". @@ -186,6 +146,8 @@ Make a catalog of Axiom Data Science-stored assets using `intake-axds`. * `min_time`, `max_time`: search for datasets with data within this time range * `model_name`: input a path to the model output to instead select the space and time search specifications based on the model. This input is specific to OMSA, not `intake-axds`. * `search_for`: text-based search +* `verbose` Print useful runtime commands to stdout if True as well as save in log, otherwise silently save in log. Log is located in the project directory, which can be checked on the command line with `omsa proj_path --project_name PROJECT_NAME`. Default is True, to turn off use `--no-verbose`. +* `mode` mode for logging file. Default is to overwrite an existing logfile, but can be changed to other modes, e.g. "a" to instead append to an existing log file. #### Examples @@ -195,17 +157,13 @@ Many of the options available for an Axiom catalog are the same as for an ERDDAP Select a box and time range over which to search catalog along with standard_name selections, with `verbose=True`. -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type axds --catalog_name example_axds_catalog1 --description "Example AXDS catalog description" --kwargs page_size=50000 standard_names='[sea_water_temperature]' verbose=True --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2000-1-1 max_time=2002-1-1 -``` + omsa make_catalog --project_name test1 --catalog_type axds --catalog_name example_axds_catalog1 --description "Example AXDS catalog description" --kwargs page_size=50000 standard_names='[sea_water_temperature]' verbose=True --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2000-1-1 max_time=2002-1-1 ##### Same but with vocab As in the ERDDAP catalog example above, we can instead get the same results by inputting a vocabulary to use, in this case "standard_names" which will map to variable names in Axiom systems, along with the variable nickname from the vocabulary to find: "temp". -```{code-cell} ipython3 -!omsa make_catalog --project_name test1 --catalog_type axds --catalog_name example_axds_catalog2 --description "Example AXDS catalog description" --vocab_name standard_names --kwargs page_size=50000 keys_to_match='[temp]' --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2000-1-1 max_time=2002-1-1 -``` + omsa make_catalog --project_name test1 --catalog_type axds --catalog_name example_axds_catalog2 --description "Example AXDS catalog description" --vocab_name standard_names --kwargs page_size=50000 keys_to_match='[temp]' --kwargs_search min_lon=-170 min_lat=53 max_lon=-165 max_lat=56 min_time=2000-1-1 max_time=2002-1-1 ## Run model-data comparison @@ -217,7 +175,7 @@ The datasets need to all cover the same time periods. ### Available options - omsa run --project_name test1 --catalogs CATALOG_NAME1 CATALOG_NAME2 --vocab_names VOCAB1 VOCAB2 --key KEY --model_path PATH_TO_MODEL_OUTPUT --ndatasets NDATASETS + omsa run --project_name test1 --catalogs CATALOG_NAME1 CATALOG_NAME2 --vocab_names VOCAB1 VOCAB2 --key KEY --model_path PATH_TO_MODEL_OUTPUT --ndatasets NDATASETS --verbose --mode MODE * `project_name`: Subdirectory in cache dir to store files associated together. * `catalog_names`: Catalog name(s). Datasets will be accessed from catalog entries. @@ -225,16 +183,16 @@ The datasets need to all cover the same time periods. * `key`: Key in vocab(s) representing variable to compare between model and datasets. * `model_name`: name of the model catalog we previously created * `ndatasets`: Max number of datasets from each input catalog to use. +* `verbose` Print useful runtime commands to stdout if True as well as save in log, otherwise silently save in log. Log is located in the project directory, which can be checked on the command line with `omsa proj_path --project_name PROJECT_NAME`. Default is True, to turn off use `--no-verbose`. +* `mode` mode for logging file. Default is to overwrite an existing logfile, but can be changed to other modes, e.g. "a" to instead append to an existing log file. ### Example Run a model-data comparison for the first 3 datasets in each of the 3 catalogs that we created previously in this notebook. Use vocabularies `standard_names` and `general` for variable matching. Match on the temperature variable with variable nickname "temp". -This example doesn't fully work because the combination of datasets are at different time periods and of different types that don't make sense to compare with the model output. So, it is commented out but shown as a template. +This example doesn't fully work because the combination of datasets are at different time periods and of different types that don't make sense to compare with the model output. It is shown as a template. -```{code-cell} ipython3 -#!omsa run --project_name test1 --catalog_names example_local_catalog example_erddap_catalog example_axds_catalog1 --vocab_names standard_names general --key temp --model_name model --ndatasets 1 -``` + omsa run --project_name test1 --catalog_names example_local_catalog example_erddap_catalog example_axds_catalog1 --vocab_names standard_names general --key temp --model_name model --ndatasets 1 ## Utilities @@ -244,12 +202,14 @@ A few handy utilities. With this you can check all of the project-related files you've created. -```{code-cell} ipython3 -!omsa proj_path --project_name test1 -``` + omsa proj_path --project_name test1 ### Check available vocabularies -```{code-cell} ipython3 -!omsa vocabs -``` + omsa vocabs + +### Get information about a vocabulary + +Return the path to the vocab file and the nicknames of the variables in the file. + + omsa vocab_info --vocab_name general diff --git a/docs/conf.py b/docs/conf.py index 12ee150..24f0c91 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ # |version| and |release|, also used in various other places throughout the # built documents. # see https://pypi.org/project/setuptools-scm/ for details -from pkg_resources import get_distribution +from importlib.metadata import version as imversion print("python exec:", sys.executable) @@ -35,7 +35,7 @@ copyright = "2021-2023, Axiom Data Science" author = "Axiom Data Science" -release = get_distribution("ocean-model-skill-assessor").version +release = imversion("ocean-model-skill-assessor") # for example take major/minor version = ".".join(release.split(".")[:2]) @@ -82,9 +82,10 @@ ".DS_Store", "_old_docs", ".ipynb", + "notebooks", ] -html_extra_path = ["create_vocabs.html"] +html_extra_path = ["vocab_widget.html"] # -- Options for HTML output ------------------------------------------------- @@ -108,9 +109,11 @@ # had this message: # WARNING: 'execution_timeout' is deprecated for 'nb_execution_timeout' [mystnb.config] # WARNING: 'execution_allow_errors' is deprecated for 'nb_execution_allow_errors' [mystnb.config] -nb_execution_timeout = 300 # seconds. +nb_execution_timeout = 600 # seconds. nb_execution_allow_errors = False +# https://myst-nb.readthedocs.io/en/v0.9.0/use/execute.html +jupyter_execute_notebooks = "off" # -- nbsphinx specific options ---------------------------------------------- # this allows notebooks to be run even if they produce errors. diff --git a/docs/create_vocabs_wrapper.md b/docs/create_vocabs_wrapper.md deleted file mode 100644 index 44244e4..0000000 --- a/docs/create_vocabs_wrapper.md +++ /dev/null @@ -1,4 +0,0 @@ -# Creating vocabularies for known servers - -.. raw:: html - diff --git a/docs/demo.ipynb b/docs/demo.ipynb new file mode 100644 index 0000000..482f427 --- /dev/null +++ b/docs/demo.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import ocean_model_skill_assessor as omsa\n", + "import cf_pandas as cfp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use `ocean-model-skill-assessor`\n", + "\n", + "... as a Python package. Other notebooks describe its command line interface uses.\n", + "\n", + "But, this is written in parallel to the [CLI demo](https://ocean-model-skill-assessor.readthedocs.io/en/latest/demo_cli.html), but will be more brief.\n", + "\n", + "There are three steps to follow for a set of model-data validation, which is for one variable:\n", + "1. Make a catalog for your model output.\n", + "2. Make a catalog for your data.\n", + "3. Run the comparison.\n", + "\n", + "These steps will save files into a user application directory cache, along with a log. A project directory can be checked on the command line with `omsa proj_path --project_name PROJECT_NAME`.\n", + "\n", + "\n", + "## Make model catalog" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "cat_model = omsa.make_catalog(project_name=\"demo_local_package\", catalog_type=\"local\", catalog_name=\"model\", \n", + " kwargs=dict(filenames=\"https://www.ncei.noaa.gov/thredds/dodsC/model-ciofs-agg/Aggregated_CIOFS_Fields_Forecast_best.ncd\",\n", + " skip_entry_metadata=True),\n", + " kwargs_open=dict(drop_variables=\"ocean_time\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/yaml": "model:\n args:\n description: Catalog of type local.\n name: model\n description: Catalog of type local.\n driver: intake.catalog.base.Catalog\n metadata: {}\n", + "text/plain": [ + "model:\n", + " args:\n", + " description: Catalog of type local.\n", + " name: model\n", + " description: Catalog of type local.\n", + " driver: intake.catalog.base.Catalog\n", + " metadata: {}\n" + ] + }, + "metadata": { + "application/json": { + "root": "model" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "cat_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make data catalog \n", + "\n", + "Set up a catalog of the datasets with which you want to compare your model output. In this example, we use only known data file locations to create our catalog." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-01-27 12:46:02,753] {/Users/kthyng/projects/ocean-model-skill-assessor/ocean_model_skill_assessor/main.py:185} WARNING - Dataset noaa_nos_co_ops_9455500 had a timezone UTC which is being removed. Make sure the timezone matches the model output.\n" + ] + } + ], + "source": [ + "filenames = [\"https://erddap.sensors.axds.co/erddap/tabledap/noaa_nos_co_ops_9455500.csvp?time%2Clatitude%2Clongitude%2Cz%2Csea_water_temperature&time%3E=2022-01-01T00%3A00%3A00Z&time%3C=2022-01-06T00%3A00%3A00Z\",\n", + "]\n", + "\n", + "cat_data = omsa.make_catalog(project_name=\"demo_local_package\", catalog_type=\"local\", catalog_name=\"local\",\n", + " kwargs=dict(filenames=filenames), kwargs_open=dict(blocksize=None))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "application/yaml": "local:\n args:\n description: Catalog of type local.\n name: local\n description: Catalog of type local.\n driver: intake.catalog.base.Catalog\n metadata: {}\n", + "text/plain": [ + "local:\n", + " args:\n", + " description: Catalog of type local.\n", + " name: local\n", + " description: Catalog of type local.\n", + " driver: intake.catalog.base.Catalog\n", + " metadata: {}\n" + ] + }, + "metadata": { + "application/json": { + "root": "local" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "cat_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run comparison\n", + "\n", + "Now that the model output and dataset catalogs are prepared, we can run the comparison of the two.\n", + "\n", + "At this point we need to select a single variable to compare between the model and datasets, and this requires a little extra input. Because we don't know specifics about the format of any given input data file, variables will be interpreted with some flexibility in the form of a set of regular expressions. In the present case, we will compare the water temperature between the model and the datasets (the model output and datasets selected for our catalogs should contain the variable we want to compare). Several sets of regular expressions, called \"vocabularies\", are available with the package to be used for this purpose, and in this case we will use one called \"general\" which should match many commonly-used variable names. \"general\" is selected under `vocab_names`, and the particular key from the general vocabulary that we are comparing is selected with `key`.\n", + "\n", + "See the vocabulary here.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'temp': {'name': '(?i)^(?!.*(air|qc|status|atmospheric|bottom|dew)).*(temp|sst).*'}, 'salt': {'name': '(?i)^(?!.*(soil|qc|status|bottom)).*(sal|sss).*'}, 'ssh': {'name': '(?i)^(?!.*(qc|status)).*(sea_surface_height|surface_elevation).*'}, 'u': {'name': 'u$|(?i)(?=.*east)(?=.*vel)'}, 'v': {'name': 'v$|(?i)(?=.*north)(?=.*vel)'}, 'w': {'name': 'w$|(?i)(?=.*up)(?=.*vel)'}, 'water_dir': {'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*dir)(?=.*water)'}, 'water_speed': {'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*speed)(?=.*water)'}, 'wind_dir': {'name': '(?i)^(?!.*(qc|status|water))(?=.*dir)(?=.*wind)'}, 'wind_speed': {'name': '(?i)^(?!.*(qc|status|water))(?=.*speed)(?=.*wind)'}, 'sea_ice_u': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*east)(?=.*vel)'}, 'sea_ice_v': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*v)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*y)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*north)(?=.*vel)'}, 'sea_ice_area_fraction': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*area)(?=.*fraction)'}}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cfp.Vocab(omsa.VOCAB_PATH(\"general\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-01-27 12:46:13,702] {/Users/kthyng/projects/ocean-model-skill-assessor/ocean_model_skill_assessor/main.py:452} INFO - Note that there are 1 datasets to use. This might take awhile.\n", + "[2023-01-27 12:46:23,556] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'u' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,558] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'v' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,560] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'w' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,563] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'temp' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,568] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'salt' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,570] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'Pair' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,572] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'Uwind' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n", + "[2023-01-27 12:46:23,574] {/Users/kthyng/miniconda3/envs/omsa/lib/python3.10/warnings.py:109} WARNING - /Users/kthyng/miniconda3/envs/omsa/lib/python3.10/site-packages/xarray/conventions.py:523: SerializationWarning: variable 'Vwind' has multiple fill values {0.0, 1e+37}, decoding all values to NaN.\n", + " new_vars[k] = decode_cf_variable(\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1 [00:00, ?it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2023-01-27 12:47:26,230] {/Users/kthyng/projects/ocean-model-skill-assessor/ocean_model_skill_assessor/main.py:488} INFO - Catalogdescription: Catalog of type local.\n",
+ "metadata: {}\n",
+ "name: model\n",
+ "sources:\n",
+ " Aggregated_CIOFS_Fields_Forecast_best:\n",
+ " args:\n",
+ " drop_variables: ocean_time\n",
+ " engine: netcdf4\n",
+ " urlpath: https://www.ncei.noaa.gov/thredds/dodsC/model-ciofs-agg/Aggregated_CIOFS_Fields_Forecast_best.ncd\n",
+ " description: ''\n",
+ " direct_access: allow\n",
+ " driver: intake_xarray.opendap.OpenDapSource\n",
+ " metadata:\n",
+ " catalog_dir: ''\n",
+ " name: Aggregated_CIOFS_Fields_Forecast_best\n",
+ " parameters: {}\n",
+ "
description: Catalog of type local.\n",
+ "metadata: {}\n",
+ "name: local\n",
+ "sources:\n",
+ " aoos_204:\n",
+ " args:\n",
+ " csv_kwargs:\n",
+ " blocksize: null\n",
+ " urlpath: https://erddap.sensors.axds.co/erddap/tabledap/aoos_204.csvp?time%2Clatitude%2Clongitude%2Cz%2Csea_water_temperature&time%3E=2022-01-01T00%3A00%3A00Z&time%3C=2022-01-06T00%3A00%3A00Z\n",
+ " description: ''\n",
+ " direct_access: allow\n",
+ " driver: intake.source.csv.CSVSource\n",
+ " metadata:\n",
+ " catalog_dir: ''\n",
+ " maxLatitude: 59.5973\n",
+ " maxLongitude: -151.8291\n",
+ " maxTime: '2022-01-06 00:00:00'\n",
+ " minLatitude: 59.5973\n",
+ " minLongitude: -151.8291\n",
+ " minTime: '2022-01-01 00:00:00'\n",
+ " name: aoos_204\n",
+ " parameters: {}\n",
+ " noaa_nos_co_ops_9455500:\n",
+ " args:\n",
+ " csv_kwargs:\n",
+ " blocksize: null\n",
+ " urlpath: https://erddap.sensors.axds.co/erddap/tabledap/noaa_nos_co_ops_9455500.csvp?time%2Clatitude%2Clongitude%2Cz%2Csea_water_temperature&time%3E=2022-01-01T00%3A00%3A00Z&time%3C=2022-01-06T00%3A00%3A00Z\n",
+ " description: ''\n",
+ " direct_access: allow\n",
+ " driver: intake.source.csv.CSVSource\n",
+ " metadata:\n",
+ " catalog_dir: ''\n",
+ " maxLatitude: 59.440528\n",
+ " maxLongitude: -151.719944\n",
+ " maxTime: '2022-01-06 00:00:00'\n",
+ " minLatitude: 59.440528\n",
+ " minLongitude: -151.719944\n",
+ " minTime: '2022-01-01 00:00:00'\n",
+ " name: noaa_nos_co_ops_9455500\n",
+ " parameters: {}\n",
+ "
{"temp": {"name": "^(?!.*(air|qc|status|atmospheric|bottom))(?=.*temp)"}, "salt": {"name": "^(?!.*(soil|qc|status|bottom))(?=.*sal)"}, "ssh": {"name": "^(?!.*(qc|status))(?=.*sea_surface_height)(?=.*surface_elevation)"}}\n",
+ "
bias:\n",
+ " long_name: Bias or MSD\n",
+ " name: Bias\n",
+ " value: -0.8833044407247495\n",
+ "corr:\n",
+ " long_name: Pearson product-moment correlation coefficient\n",
+ " name: Correlation Coefficient\n",
+ " value: 0.9753704069600988\n",
+ "descriptive:\n",
+ " long_name: Max, Min, Mean, Standard Deviation\n",
+ " name: Descriptive Statistics\n",
+ " value:\n",
+ " - 4.201254844665527\n",
+ " - -0.03469964489340782\n",
+ " - 1.705991268157959\n",
+ " - 1.3660595417022705\n",
+ "ioa:\n",
+ " long_name: Index of Agreement (Willmott 1981)\n",
+ " name: Index of Agreement\n",
+ " value: 0.8235311833374434\n",
+ "mse:\n",
+ " long_name: Mean Square Error (MSE)\n",
+ " name: Mean Square Error\n",
+ " value: 1.041590395442294\n",
+ "mss:\n",
+ " long_name: Murphy Skill Score (Murphy 1988)\n",
+ " name: Murphy Skill Score\n",
+ " value: -0.25506379908872145\n",
+ "rmse:\n",
+ " long_name: Root Mean Square Error (RMSE)\n",
+ " name: RMSE\n",
+ " value: 1.0205833603593064\n",
+ "
bias:\n",
+ " long_name: Bias or MSD\n",
+ " name: Bias\n",
+ " value: -0.8878410455767599\n",
+ "corr:\n",
+ " long_name: Pearson product-moment correlation coefficient\n",
+ " name: Correlation Coefficient\n",
+ " value: 0.06920705857903918\n",
+ "descriptive:\n",
+ " long_name: Max, Min, Mean, Standard Deviation\n",
+ " name: Descriptive Statistics\n",
+ " value:\n",
+ " - 4.227032661437988\n",
+ " - 2.051501989364624\n",
+ " - 3.102297782897949\n",
+ " - 0.6421294808387756\n",
+ "ioa:\n",
+ " long_name: Index of Agreement (Willmott 1981)\n",
+ " name: Index of Agreement\n",
+ " value: 0.3597021219453842\n",
+ "mse:\n",
+ " long_name: Mean Square Error (MSE)\n",
+ " name: Mean Square Error\n",
+ " value: 1.2352567365192588\n",
+ "mss:\n",
+ " long_name: Murphy Skill Score (Murphy 1988)\n",
+ " name: Murphy Skill Score\n",
+ " value: -4.753303126266135\n",
+ "rmse:\n",
+ " long_name: Root Mean Square Error (RMSE)\n",
+ " name: RMSE\n",
+ " value: 1.11142104376301\n",
+ "
We may need a more general vocabulary to capture variables from other sources that don't use these exact names. Here we make a vocabulary for that purpose.
- -nickname = "temp"
-vocab = cfp.Vocab()
-
-# define a regular expression to represent your variable
-reg = cfp.Reg(include="temp", exclude=["air","qc","status","atmospheric","bottom"])
-
-# Make an entry to add to your vocabulary
-vocab.make_entry(nickname, reg.pattern(), attr="name")
-
-vocab.make_entry("salt", cfp.Reg(include="sal", exclude=["soil","qc","status","bottom"]).pattern(), attr="name")
-vocab.make_entry("ssh", cfp.Reg(include=["sea_surface_height","surface_elevation"], exclude=["qc","status"]).pattern(), attr="name")
-
-reg = cfp.Reg(include=["east", "vel"])
-vocab.make_entry("u", "u$", attr="name")
-vocab.make_entry("u", reg.pattern(), attr="name")
-
-reg = cfp.Reg(include=["north", "vel"])
-vocab.make_entry("v", "v$", attr="name")
-vocab.make_entry("v", reg.pattern(), attr="name")
-
-reg = cfp.Reg(include=["up", "vel"])
-vocab.make_entry("w", "w$", attr="name")
-vocab.make_entry("w", reg.pattern(), attr="name")
-
-vocab.make_entry("water_dir", cfp.Reg(include=["dir","water"], exclude=["qc","status","air","wind"]).pattern(), attr="name")
-
-vocab.make_entry("water_speed", cfp.Reg(include=["speed","water"], exclude=["qc","status","air","wind"]).pattern(), attr="name")
-
-vocab.make_entry("wind_dir", cfp.Reg(include=["dir","wind"], exclude=["qc","status","water"]).pattern(), attr="name")
-
-vocab.make_entry("wind_speed", cfp.Reg(include=["speed","wind"], exclude=["qc","status","water"]).pattern(), attr="name")
-
-reg1 = cfp.Reg(include=["sea","ice","u"], exclude=["qc","status"])
-reg2 = cfp.Reg(include=["sea","ice","x","vel"], exclude=["qc","status"])
-reg3 = cfp.Reg(include=["sea","ice","east","vel"], exclude=["qc","status"])
-vocab.make_entry("sea_ice_u", reg1.pattern(), attr="name")
-vocab.make_entry("sea_ice_u", reg2.pattern(), attr="name")
-vocab.make_entry("sea_ice_u", reg3.pattern(), attr="name")
-
-reg1 = cfp.Reg(include=["sea","ice","v"], exclude=["qc","status"])
-reg2 = cfp.Reg(include=["sea","ice","y","vel"], exclude=["qc","status"])
-reg3 = cfp.Reg(include=["sea","ice","north","vel"], exclude=["qc","status"])
-vocab.make_entry("sea_ice_v", reg1.pattern(), attr="name")
-vocab.make_entry("sea_ice_v", reg2.pattern(), attr="name")
-vocab.make_entry("sea_ice_v", reg3.pattern(), attr="name")
-
-vocab.make_entry("sea_ice_area_fraction", cfp.Reg(include=["sea","ice","area","fraction"], exclude=["qc","status"]).pattern(), attr="name")
-
-# vocab.save(omsa.VOCAB_PATH("general"))
-
-vocab
-
{'temp': {'name': '(?i)^(?!.*(air|qc|status|atmospheric|bottom))(?=.*temp)'}, 'salt': {'name': '(?i)^(?!.*(soil|qc|status|bottom))(?=.*sal)'}, 'ssh': {'name': '(?i)^(?!.*(qc|status))(?=.*sea_surface_height)(?=.*surface_elevation)'}, 'u': {'name': 'u$|(?i)(?=.*east)(?=.*vel)'}, 'v': {'name': 'v$|(?i)(?=.*north)(?=.*vel)'}, 'w': {'name': 'w$|(?i)(?=.*up)(?=.*vel)'}, 'water_dir': {'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*dir)(?=.*water)'}, 'water_speed': {'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*speed)(?=.*water)'}, 'wind_dir': {'name': '(?i)^(?!.*(qc|status|water))(?=.*dir)(?=.*wind)'}, 'wind_speed': {'name': '(?i)^(?!.*(qc|status|water))(?=.*speed)(?=.*wind)'}, 'sea_ice_u': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*east)(?=.*vel)'}, 'sea_ice_v': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*v)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*y)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*north)(?=.*vel)'}, 'sea_ice_area_fraction': {'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*area)(?=.*fraction)'}}-
A user can add together vocabularies. This vocabulary exactly matches all of the selections we made for the vocabularies above.
-For example:
- -v1 = cfp.Vocab(omsa.VOCAB_PATH("standard_names"))
-v2 = cfp.Vocab(omsa.VOCAB_PATH("general"))
-
v = v1 + v2
-v
-
{'v': {'standard_name': 'baroclinic_northward_sea_water_velocity$|barotropic_northward_sea_water_velocity$|barotropic_sea_water_y_velocity$|northward_sea_water_velocity$|northward_sea_water_velocity_assuming_no_tide$|northward_sea_water_velocity_due_to_tides$|sea_water_y_velocity$|surface_northward_sea_water_velocity$', 'name': 'v$|(?i)(?=.*north)(?=.*vel)'}, 'water_speed': {'standard_name': 'sea_water_speed$', 'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*speed)(?=.*water)'}, 'ssh': {'standard_name': 'sea_surface_height_above_geoid$|sea_surface_height_above_geopotential_datum$|sea_surface_height_above_mean_sea_level$|sea_surface_height_above_reference_ellipsoid$|surface_height_above_geopotential_datum$|tidal_sea_surface_height_above_lowest_astronomical_tide$|tidal_sea_surface_height_above_mean_higher_high_water$|tidal_sea_surface_height_above_mean_lower_low_water$|tidal_sea_surface_height_above_mean_low_water_springs$|tidal_sea_surface_height_above_mean_sea_level$|water_surface_height_above_reference_datum$|water_surface_reference_datum_altitude$', 'name': '(?i)^(?!.*(qc|status))(?=.*sea_surface_height)(?=.*surface_elevation)'}, 'sea_ice_v': {'standard_name': 'northward_sea_ice_velocity$|sea_ice_y_velocity$', 'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*v)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*y)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*north)(?=.*vel)'}, 'salt': {'standard_name': 'sea_surface_salinity$|sea_water_absolute_salinity$|sea_water_practical_salinity$|sea_water_salinity$', 'name': '(?i)^(?!.*(soil|qc|status|bottom))(?=.*sal)'}, 'u': {'standard_name': 'baroclinic_eastward_sea_water_velocity$|barotropic_eastward_sea_water_velocity$|barotropic_sea_water_x_velocity$|eastward_sea_water_velocity$|eastward_sea_water_velocity_assuming_no_tide$|geostrophic_eastward_sea_water_velocity$|sea_water_x_velocity$|surface_eastward_sea_water_velocity$|surface_geostrophic_eastward_sea_water_velocity$|surface_geostrophic_sea_water_x_velocity$', 'name': 'u$|(?i)(?=.*east)(?=.*vel)'}, 'temp': {'standard_name': 'sea_surface_temperature$|sea_water_potential_temperature$|sea_water_temperature$', 'name': '(?i)^(?!.*(air|qc|status|atmospheric|bottom))(?=.*temp)'}, 'water_dir': {'standard_name': 'sea_water_velocity_from_direction$|sea_water_velocity_to_direction$', 'name': '(?i)^(?!.*(qc|status|air|wind))(?=.*dir)(?=.*water)'}, 'wind_dir': {'standard_name': 'wind_from_direction$|wind_to_direction$', 'name': '(?i)^(?!.*(qc|status|water))(?=.*dir)(?=.*wind)'}, 'sea_ice_u': {'standard_name': 'eastward_sea_ice_velocity$|sea_ice_x_velocity$', 'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*u)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*x)(?=.*vel)|(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*east)(?=.*vel)'}, 'wind_speed': {'standard_name': 'wind_speed$', 'name': '(?i)^(?!.*(qc|status|water))(?=.*speed)(?=.*wind)'}, 'sea_ice_area_fraction': {'standard_name': 'sea_ice_area_fraction$', 'name': '(?i)^(?!.*(qc|status))(?=.*sea)(?=.*ice)(?=.*area)(?=.*fraction)'}, 'w': {'standard_name': 'upward_sea_water_velocity$', 'name': 'w$|(?i)(?=.*up)(?=.*vel)'}}-