Skip to content

Commit

Permalink
Add preprocessors distance_metrics and histogram (#2299)
Browse files Browse the repository at this point in the history
Co-authored-by: Axel Lauer <[email protected]>
  • Loading branch information
schlunma and axel-lauer authored May 8, 2024
1 parent 8276a62 commit cffb1e9
Show file tree
Hide file tree
Showing 20 changed files with 3,017 additions and 767 deletions.
298 changes: 274 additions & 24 deletions doc/recipe/preprocessor.rst

Large diffs are not rendered by default.

74 changes: 57 additions & 17 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import os
import subprocess
from functools import partial
from pprint import pformat
from shutil import which
from typing import Any, Iterable
Expand Down Expand Up @@ -395,47 +396,86 @@ def differing_timeranges(timeranges, required_vars):
"Set `timerange` to a common value.")


def bias_type(settings: dict) -> None:
"""Check that bias_type for bias preprocessor is valid."""
if 'bias' not in settings:
def _check_literal(
settings: dict,
*,
step: str,
option: str,
allowed_values: tuple[str],
) -> None:
"""Check that an option for a preprocessor has a valid value."""
if step not in settings:
return
valid_options = ('absolute', 'relative')
user_bias_type = settings['bias'].get('bias_type', 'absolute')
if user_bias_type not in valid_options:
user_value = settings[step].get(option, allowed_values[0])
if user_value not in allowed_values:
raise RecipeError(
f"Expected one of {valid_options} for `bias_type`, got "
f"'{user_bias_type}'"
f"Expected one of {allowed_values} for `{option}`, got "
f"'{user_value}'"
)


def reference_for_bias_preproc(products):
"""Check that exactly one reference dataset for bias preproc is given."""
step = 'bias'
bias_type = partial(
_check_literal,
step='bias',
option='bias_type',
allowed_values=('absolute', 'relative'),
)


metric_type = partial(
_check_literal,
step='distance_metric',
option='metric',
allowed_values=(
'rmse',
'weighted_rmse',
'pearsonr',
'weighted_pearsonr',
'emd',
'weighted_emd',
),
)


def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
"""Check that exactly one reference dataset is given."""
products = {p for p in products if step in p.settings}
if not products:
return

# Check that exactly one dataset contains the facet ``reference_for_bias:
# true``
# Check that exactly one dataset contains the specified facet
reference_products = []
for product in products:
if product.attributes.get('reference_for_bias', False):
if product.attributes.get(attr_name, False):
reference_products.append(product)
if len(reference_products) != 1:
products_str = [p.filename for p in products]
if not reference_products:
ref_products_str = ". "
else:
ref_products_str = [p.filename for p in reference_products]
ref_products_str = f":\n{pformat(ref_products_str)}.\n"
ref_products_str = (
f":\n{pformat([p.filename for p in reference_products])}.\n"
)
raise RecipeError(
f"Expected exactly 1 dataset with 'reference_for_bias: true' in "
f"Expected exactly 1 dataset with '{attr_name}: true' in "
f"products\n{pformat(products_str)},\nfound "
f"{len(reference_products):d}{ref_products_str}Please also "
f"ensure that the reference dataset is not excluded with the "
f"'exclude' option")


reference_for_bias_preproc = partial(
_check_ref_attributes, step='bias', attr_name='reference_for_bias'
)


reference_for_distance_metric_preproc = partial(
_check_ref_attributes,
step='distance_metric',
attr_name='reference_for_metric',
)


def statistics_preprocessors(settings: dict) -> None:
"""Check options of statistics preprocessors."""
mm_stats = (
Expand Down
4 changes: 3 additions & 1 deletion esmvalcore/_recipe/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@
)
from esmvalcore.preprocessor._area import _update_shapefile_path
from esmvalcore.preprocessor._multimodel import _get_stat_identifier
from esmvalcore.preprocessor._other import _group_products
from esmvalcore.preprocessor._regrid import (
_spec_to_latlonvals,
get_cmor_levels,
get_reference_levels,
parse_cell_spec,
)
from esmvalcore.preprocessor._shared import _group_products

from . import check
from .from_datasets import datasets_to_recipe
Expand Down Expand Up @@ -555,6 +555,7 @@ def _get_preprocessor_products(
f'{separator.join(sorted(missing_vars))}')

check.reference_for_bias_preproc(products)
check.reference_for_distance_metric_preproc(products)

_configure_multi_product_preprocessor(
products=products,
Expand Down Expand Up @@ -656,6 +657,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
check.statistics_preprocessors(settings)
check.regridding_schemes(settings)
check.bias_type(settings)
check.metric_type(settings)


def _get_preprocessor_task(datasets, profiles, task_name):
Expand Down
12 changes: 3 additions & 9 deletions esmvalcore/iris_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def rechunk_cube(
Input cube.
complete_coords:
(Names of) coordinates along which the output cubes should not be
chunked. The given coordinates must span exactly 1 dimension.
chunked.
remaining_dims:
Chunksize of the remaining dimensions.
Expand All @@ -248,17 +248,11 @@ def rechunk_cube(
"""
cube = cube.copy() # do not modify input cube

# Make sure that complete_coords span exactly 1 dimension
complete_dims = []
for coord in complete_coords:
coord = cube.coord(coord)
dims = cube.coord_dims(coord)
if len(dims) != 1:
raise CoordinateMultiDimError(
f"Complete coordinates must be 1D coordinates, got "
f"{len(dims):d}D coordinate '{coord.name()}'"
)
complete_dims.append(dims[0])
complete_dims.extend(cube.coord_dims(coord))
complete_dims = list(set(complete_dims))

# Rechunk data
if cube.has_lazy_data():
Expand Down
10 changes: 7 additions & 3 deletions esmvalcore/preprocessor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
meridional_statistics,
zonal_statistics,
)
from ._bias import bias
from ._compare_with_refs import bias, distance_metric
from ._cycles import amplitude
from ._derive import derive
from ._detrend import detrend
Expand All @@ -46,7 +46,7 @@
mask_outside_range,
)
from ._multimodel import ensemble_statistics, multi_model_statistics
from ._other import clip
from ._other import clip, histogram
from ._regrid import (
extract_coordinate_points,
extract_levels,
Expand Down Expand Up @@ -175,12 +175,15 @@
'linear_trend_stderr',
# Convert units
'convert_units',
# Histograms
'histogram',
# Ensemble statistics
'ensemble_statistics',
# Multi model statistics
'multi_model_statistics',
# Bias calculation
# Comparison with reference datasets
'bias',
'distance_metric',
# Remove supplementary variables from cube
'remove_supplementary_variables',
# Save to file
Expand Down Expand Up @@ -215,6 +218,7 @@

MULTI_MODEL_FUNCTIONS = {
'bias',
'distance_metric',
'ensemble_statistics',
'multi_model_statistics',
'mask_multimodel',
Expand Down
Loading

0 comments on commit cffb1e9

Please sign in to comment.