Add preprocessors distance_metrics and histogram (#2299)

Co-authored-by: Axel Lauer <[email protected]>
ESMValGroup · May 8, 2024 · cffb1e9 · cffb1e9
1 parent 8276a62
commit cffb1e9
Show file tree

Hide file tree

Showing 20 changed files with 3,017 additions and 767 deletions.
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import subprocess
+from functools import partial
 from pprint import pformat
 from shutil import which
 from typing import Any, Iterable
@@ -395,47 +396,86 @@ def differing_timeranges(timeranges, required_vars):
             "Set `timerange` to a common value.")
 
 
-def bias_type(settings: dict) -> None:
-    """Check that bias_type for bias preprocessor is valid."""
-    if 'bias' not in settings:
+def _check_literal(
+    settings: dict,
+    *,
+    step: str,
+    option: str,
+    allowed_values: tuple[str],
+) -> None:
+    """Check that an option for a preprocessor has a valid value."""
+    if step not in settings:
         return
-    valid_options = ('absolute', 'relative')
-    user_bias_type = settings['bias'].get('bias_type', 'absolute')
-    if user_bias_type not in valid_options:
+    user_value = settings[step].get(option, allowed_values[0])
+    if user_value not in allowed_values:
         raise RecipeError(
-            f"Expected one of {valid_options} for `bias_type`, got "
-            f"'{user_bias_type}'"
+            f"Expected one of {allowed_values} for `{option}`, got "
+            f"'{user_value}'"
         )
 
 
-def reference_for_bias_preproc(products):
-    """Check that exactly one reference dataset for bias preproc is given."""
-    step = 'bias'
+bias_type = partial(
+    _check_literal,
+    step='bias',
+    option='bias_type',
+    allowed_values=('absolute', 'relative'),
+)
+
+
+metric_type = partial(
+    _check_literal,
+    step='distance_metric',
+    option='metric',
+    allowed_values=(
+        'rmse',
+        'weighted_rmse',
+        'pearsonr',
+        'weighted_pearsonr',
+        'emd',
+        'weighted_emd',
+    ),
+)
+
+
+def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
+    """Check that exactly one reference dataset is given."""
     products = {p for p in products if step in p.settings}
     if not products:
         return
 
-    # Check that exactly one dataset contains the facet ``reference_for_bias:
-    # true``
+    # Check that exactly one dataset contains the specified facet
     reference_products = []
     for product in products:
-        if product.attributes.get('reference_for_bias', False):
+        if product.attributes.get(attr_name, False):
             reference_products.append(product)
     if len(reference_products) != 1:
         products_str = [p.filename for p in products]
         if not reference_products:
             ref_products_str = ". "
         else:
-            ref_products_str = [p.filename for p in reference_products]
-            ref_products_str = f":\n{pformat(ref_products_str)}.\n"
+            ref_products_str = (
+                f":\n{pformat([p.filename for p in reference_products])}.\n"
+            )
         raise RecipeError(
-            f"Expected exactly 1 dataset with 'reference_for_bias: true' in "
+            f"Expected exactly 1 dataset with '{attr_name}: true' in "
             f"products\n{pformat(products_str)},\nfound "
             f"{len(reference_products):d}{ref_products_str}Please also "
             f"ensure that the reference dataset is not excluded with the "
             f"'exclude' option")
 
 
+reference_for_bias_preproc = partial(
+    _check_ref_attributes, step='bias', attr_name='reference_for_bias'
+)
+
+
+reference_for_distance_metric_preproc = partial(
+    _check_ref_attributes,
+    step='distance_metric',
+    attr_name='reference_for_metric',
+)
+
+
 def statistics_preprocessors(settings: dict) -> None:
     """Check options of statistics preprocessors."""
     mm_stats = (

diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
@@ -37,13 +37,13 @@
 )
 from esmvalcore.preprocessor._area import _update_shapefile_path
 from esmvalcore.preprocessor._multimodel import _get_stat_identifier
-from esmvalcore.preprocessor._other import _group_products
 from esmvalcore.preprocessor._regrid import (
     _spec_to_latlonvals,
     get_cmor_levels,
     get_reference_levels,
     parse_cell_spec,
 )
+from esmvalcore.preprocessor._shared import _group_products
 
 from . import check
 from .from_datasets import datasets_to_recipe
@@ -555,6 +555,7 @@ def _get_preprocessor_products(
             f'{separator.join(sorted(missing_vars))}')
 
     check.reference_for_bias_preproc(products)
+    check.reference_for_distance_metric_preproc(products)
 
     _configure_multi_product_preprocessor(
         products=products,
@@ -656,6 +657,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
     check.statistics_preprocessors(settings)
     check.regridding_schemes(settings)
     check.bias_type(settings)
+    check.metric_type(settings)
 
 
 def _get_preprocessor_task(datasets, profiles, task_name):

diff --git a/esmvalcore/iris_helpers.py b/esmvalcore/iris_helpers.py
@@ -236,7 +236,7 @@ def rechunk_cube(
         Input cube.
     complete_coords:
         (Names of) coordinates along which the output cubes should not be
-        chunked. The given coordinates must span exactly 1 dimension.
+        chunked.
     remaining_dims:
         Chunksize of the remaining dimensions.
 
@@ -248,17 +248,11 @@ def rechunk_cube(
     """
     cube = cube.copy()  # do not modify input cube
 
-    # Make sure that complete_coords span exactly 1 dimension
     complete_dims = []
     for coord in complete_coords:
         coord = cube.coord(coord)
-        dims = cube.coord_dims(coord)
-        if len(dims) != 1:
-            raise CoordinateMultiDimError(
-                f"Complete coordinates must be 1D coordinates, got "
-                f"{len(dims):d}D coordinate '{coord.name()}'"
-            )
-        complete_dims.append(dims[0])
+        complete_dims.extend(cube.coord_dims(coord))
+    complete_dims = list(set(complete_dims))
 
     # Rechunk data
     if cube.has_lazy_data():

diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py
@@ -22,7 +22,7 @@
     meridional_statistics,
     zonal_statistics,
 )
-from ._bias import bias
+from ._compare_with_refs import bias, distance_metric
 from ._cycles import amplitude
 from ._derive import derive
 from ._detrend import detrend
@@ -46,7 +46,7 @@
     mask_outside_range,
 )
 from ._multimodel import ensemble_statistics, multi_model_statistics
-from ._other import clip
+from ._other import clip, histogram
 from ._regrid import (
     extract_coordinate_points,
     extract_levels,
@@ -175,12 +175,15 @@
     'linear_trend_stderr',
     # Convert units
     'convert_units',
+    # Histograms
+    'histogram',
     # Ensemble statistics
     'ensemble_statistics',
     # Multi model statistics
     'multi_model_statistics',
-    # Bias calculation
+    # Comparison with reference datasets
     'bias',
+    'distance_metric',
     # Remove supplementary variables from cube
     'remove_supplementary_variables',
     # Save to file
@@ -215,6 +218,7 @@
 
 MULTI_MODEL_FUNCTIONS = {
     'bias',
+    'distance_metric',
     'ensemble_statistics',
     'multi_model_statistics',
     'mask_multimodel',