Allow cubes as input for bias preprocessor (#2183)

Co-authored-by: Valeriu Predoi <[email protected]>
ESMValGroup · Jan 23, 2024 · 989839f · 989839f
1 parent 0668223
commit 989839f
Show file tree

Hide file tree

Showing 6 changed files with 464 additions and 198 deletions.
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -2386,9 +2386,9 @@ The bias module contains the following preprocessor functions:
 ``bias``
 --------
 
-This function calculates biases with respect to a given reference dataset. For
-this, exactly one input dataset needs to be declared as ``reference_for_bias:
-true`` in the recipe, e.g.,
+This function calculates biases with respect to a given reference dataset.
+For this, exactly one input dataset needs to be declared as
+``reference_for_bias: true`` in the recipe, e.g.,
 
 .. code-block:: yaml
 
@@ -2400,34 +2400,37 @@ true`` in the recipe, e.g.,
        reference_for_bias: true}
 
 In the example above, ERA-Interim is used as reference dataset for the bias
-calculation. For this preprocessor, all input datasets need to have identical
-dimensional coordinates. This can for example be ensured with the preprocessors
-:func:`esmvalcore.preprocessor.regrid` and/or
-:func:`esmvalcore.preprocessor.regrid_time`.
-
-The ``bias`` preprocessor supports 4 optional arguments:
-
-   * ``bias_type`` (:obj:`str`, default: ``'absolute'``): Bias type that is
-     calculated. Can be ``'absolute'`` (i.e., calculate bias for dataset
-     :math:`X` and reference :math:`R` as :math:`X - R`) or ``relative`` (i.e,
-     calculate bias as :math:`\frac{X - R}{R}`).
-   * ``denominator_mask_threshold`` (:obj:`float`, default: ``1e-3``):
-     Threshold to mask values close to zero in the denominator (i.e., the
-     reference dataset) during the calculation of relative biases. All values
-     in the reference dataset with absolute value less than the given threshold
-     are masked out. This setting is ignored when ``bias_type`` is set to
-     ``'absolute'``. Please note that for some variables with very small
-     absolute values (e.g., carbon cycle fluxes, which are usually :math:`<
-     10^{-6}` kg m :math:`^{-2}` s :math:`^{-1}`) it is absolutely essential to
-     change the default value in order to get reasonable results.
-   * ``keep_reference_dataset`` (:obj:`bool`, default: ``False``): If
-     ``True``, keep the reference dataset in the output. If ``False``, drop the
-     reference dataset.
-   * ``exclude`` (:obj:`list` of :obj:`str`): Exclude specific datasets from
-     this preprocessor. Note that this option is only available in the recipe,
-     not when using :func:`esmvalcore.preprocessor.bias` directly (e.g., in
-     another python script). If the reference dataset has been excluded, an
-     error is raised.
+calculation.
+The reference dataset needs to be broadcastable to all other datasets.
+This supports `iris' rich broadcasting abilities
+<https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
+html#calculating-a-cube-anomaly>`__.
+To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
+:func:`esmvalcore.preprocessor.regrid_time` might be helpful.
+
+The ``bias`` preprocessor supports 4 optional arguments in the recipe:
+
+* ``bias_type`` (:obj:`str`, default: ``'absolute'``): Bias type that is
+  calculated. Can be ``'absolute'`` (i.e., calculate bias for dataset
+  :math:`X` and reference :math:`R` as :math:`X - R`) or ``relative`` (i.e.,
+  calculate bias as :math:`\frac{X - R}{R}`).
+* ``denominator_mask_threshold`` (:obj:`float`, default: ``1e-3``):
+  Threshold to mask values close to zero in the denominator (i.e., the
+  reference dataset) during the calculation of relative biases. All values
+  in the reference dataset with absolute value less than the given threshold
+  are masked out. This setting is ignored when ``bias_type`` is set to
+  ``'absolute'``. Please note that for some variables with very small
+  absolute values (e.g., carbon cycle fluxes, which are usually :math:`<
+  10^{-6}` kg m :math:`^{-2}` s :math:`^{-1}`) it is absolutely essential to
+  change the default value in order to get reasonable results.
+* ``keep_reference_dataset`` (:obj:`bool`, default: ``False``): If
+  ``True``, keep the reference dataset in the output. If ``False``, drop the
+  reference dataset.
+* ``exclude`` (:obj:`list` of :obj:`str`): Exclude specific datasets from
+  this preprocessor. Note that this option is only available in the recipe,
+  not when using :func:`esmvalcore.preprocessor.bias` directly (e.g., in
+  another python script). If the reference dataset has been excluded, an
+  error is raised.
 
 Example:
 

diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
@@ -385,6 +385,19 @@ def differing_timeranges(timeranges, required_vars):
             "Set `timerange` to a common value.")
 
 
+def bias_type(settings: dict) -> None:
+    """Check that bias_type for bias preprocessor is valid."""
+    if 'bias' not in settings:
+        return
+    valid_options = ('absolute', 'relative')
+    user_bias_type = settings['bias'].get('bias_type', 'absolute')
+    if user_bias_type not in valid_options:
+        raise RecipeError(
+            f"Expected one of {valid_options} for `bias_type`, got "
+            f"'{user_bias_type}'"
+        )
+
+
 def reference_for_bias_preproc(products):
     """Check that exactly one reference dataset for bias preproc is given."""
     step = 'bias'

diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
@@ -656,6 +656,7 @@ def _update_preproc_functions(settings, dataset, datasets, missing_vars):
     if dataset.facets.get('frequency') == 'fx':
         check.check_for_temporal_preprocs(settings)
     check.statistics_preprocessors(settings)
+    check.bias_type(settings)
 
 
 def _get_preprocessor_task(datasets, profiles, task_name):

diff --git a/esmvalcore/preprocessor/_bias.py b/esmvalcore/preprocessor/_bias.py
@@ -1,36 +1,62 @@
 """Preprocessor functions to calculate biases from data."""
+from __future__ import annotations
+
 import logging
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Literal, Optional
 
 import dask.array as da
-import iris.cube
+from iris.cube import Cube, CubeList
 
 from ._io import concatenate
 
+if TYPE_CHECKING:
+    from esmvalcore.preprocessor import PreprocessorFile
+
 logger = logging.getLogger(__name__)
 
 
-def bias(products, bias_type='absolute', denominator_mask_threshold=1e-3,
-         keep_reference_dataset=False):
-    """Calculate biases.
+BiasType = Literal['absolute', 'relative']
+
+
+def bias(
+    products: set[PreprocessorFile] | Iterable[Cube],
+    ref_cube: Optional[Cube] = None,
+    bias_type: BiasType = 'absolute',
+    denominator_mask_threshold: float = 1e-3,
+    keep_reference_dataset: bool = False,
+) -> set[PreprocessorFile] | CubeList:
+    """Calculate biases relative to a reference dataset.
+
+    The reference dataset needs to be broadcastable to all input `products`.
+    This supports `iris' rich broadcasting abilities
+    <https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
+    html#calculating-a-cube-anomaly>`__. To ensure this, the preprocessors
+    :func:`esmvalcore.preprocessor.regrid` and/or
+    :func:`esmvalcore.preprocessor.regrid_time` might be helpful.
 
     Notes
     -----
-    This preprocessor requires a reference dataset. For this, exactly one input
-    dataset needs to have the facet ``reference_for_bias: true`` defined in the
-    recipe. In addition, all input datasets need to have identical dimensional
-    coordinates. This can for example be ensured with the preprocessors
-    :func:`esmvalcore.preprocessor.regrid` and/or
-    :func:`esmvalcore.preprocessor.regrid_time`.
+    The reference dataset can be specified with the `ref_cube` argument. If
+    `ref_cube` is ``None``, exactly one input dataset in the `products` set
+    needs to have the facet ``reference_for_bias: true`` defined in the recipe.
+    Please do **not** specify the option `ref_cube` when using this
+    preprocessor function in a recipe.
 
     Parameters
     ----------
-    products: set of esmvalcore.preprocessor.PreprocessorFile
-        Input datasets. Exactly one datasets needs the facet
-        ``reference_for_bias: true``.
-    bias_type: str, optional (default: 'absolute')
+    products:
+        Input datasets/cubes for which the bias is calculated relative to a
+        reference dataset/cube.
+    ref_cube:
+        Cube which is used as reference for the bias calculation. If ``None``,
+        `products` needs to be a :obj:`set` of
+        `~esmvalcore.preprocessor.PreprocessorFile` objects and exactly one
+        dataset in `products` needs the facet ``reference_for_bias: true``.
+    bias_type:
         Bias type that is calculated. Must be one of ``'absolute'`` (dataset -
         ref) or ``'relative'`` ((dataset - ref) / ref).
-    denominator_mask_threshold: float, optional (default: 1e-3)
+    denominator_mask_threshold:
         Threshold to mask values close to zero in the denominator (i.e., the
         reference dataset) during the calculation of relative biases. All
         values in the reference dataset with absolute value less than the given
@@ -40,79 +66,123 @@ def bias(products, bias_type='absolute', denominator_mask_threshold=1e-3,
         :math:`< 10^{-6}` kg m :math:`^{-2}` s :math:`^{-1}`) it is absolutely
         essential to change the default value in order to get reasonable
         results.
-    keep_reference_dataset: bool, optional (default: False)
+    keep_reference_dataset:
         If ``True``, keep the reference dataset in the output. If ``False``,
-        drop the reference dataset.
+        drop the reference dataset. Ignored if `ref_cube` is given.
 
     Returns
     -------
-    set of esmvalcore.preprocessor.PreprocessorFile
-        Output datasets.
+    set[PreprocessorFile] | CubeList
+        Output datasets/cubes. Will be a :obj:`set` of
+        :class:`~esmvalcore.preprocessor.PreprocessorFile` objects if
+        `products` is also one, a :class:`~iris.cube.CubeList` otherwise.
 
     Raises
     ------
     ValueError
         Not exactly one input datasets contains the facet
-        ``reference_for_bias: true``; ``bias_type`` is not one of
+        ``reference_for_bias: true`` if ``ref_cube=None``; ``ref_cube=None``
+        and the input products are given as iterable of
+        :class:`~iris.cube.Cube` objects; ``bias_type`` is not one of
         ``'absolute'`` or ``'relative'``.
 
     """
-    # Get reference product
-    reference_product = []
-    for product in products:
-        if product.attributes.get('reference_for_bias', False):
-            reference_product.append(product)
-    if len(reference_product) != 1:
-        raise ValueError(
-            f"Expected exactly 1 dataset with 'reference_for_bias: true', "
-            f"found {len(reference_product):d}")
-    reference_product = reference_product[0]
+    ref_product = None
+    all_cubes_given = all(isinstance(p, Cube) for p in products)
 
-    # Extract reference cube
-    # Note: For technical reasons, product objects contain the member
-    # ``cubes``, which is a list of cubes. However, this is expected to be a
-    # list with exactly one element due to the call of concatenate earlier in
-    # the preprocessing chain of ESMValTool. To make sure that this
-    # preprocessor can also be used outside the ESMValTool preprocessing chain,
-    # an additional concatenate call is added here.
-    ref_cube = concatenate(reference_product.cubes)
+    # Get reference cube if not explicitly given
+    if ref_cube is None:
+        if all_cubes_given:
+            raise ValueError(
+                "A list of Cubes is given to this preprocessor; please "
+                "specify a `ref_cube`"
+            )
+        (ref_cube, ref_product) = _get_ref(products, 'reference_for_bias')
+    else:
+        ref_product = None
+
+    # Mask reference cube appropriately for relative biases
     if bias_type == 'relative':
         ref_cube = ref_cube.copy()
-        ref_cube.data = da.ma.masked_inside(ref_cube.core_data(),
-                                            -denominator_mask_threshold,
-                                            denominator_mask_threshold)
-
-    # Iterate over all input datasets and calculate bias
+        ref_cube.data = da.ma.masked_inside(
+            ref_cube.core_data(),
+            -denominator_mask_threshold,
+            denominator_mask_threshold,
+        )
+
+    # If input is an Iterable of Cube objects, calculate bias for each element
+    if all_cubes_given:
+        cubes = [_calculate_bias(c, ref_cube, bias_type) for c in products]
+        return CubeList(cubes)
+
+    # Otherwise, iterate over all input products, calculate bias and adapt
+    # metadata and provenance information accordingly
     output_products = set()
     for product in products:
-        if product == reference_product:
+        if product == ref_product:
             continue
         cube = concatenate(product.cubes)
-        cube_metadata = cube.metadata
 
         # Calculate bias
-        if bias_type == 'absolute':
-            cube = cube - ref_cube
-            new_units = str(cube.units)
-        elif bias_type == 'relative':
-            cube = (cube - ref_cube) / ref_cube
-            new_units = '1'
-        else:
-            raise ValueError(
-                f"Expected one of ['absolute', 'relative'] for bias_type, got "
-                f"'{bias_type}'")
+        cube = _calculate_bias(cube, ref_cube, bias_type)
 
-        # Adapt cube metadata and provenance information
-        cube.metadata = cube_metadata
-        cube.units = new_units
-        product.attributes['units'] = new_units
-        product.wasderivedfrom(reference_product)
+        # Adapt metadata and provenance information
+        product.attributes['units'] = str(cube.units)
+        if ref_product is not None:
+            product.wasderivedfrom(ref_product)
 
-        product.cubes = iris.cube.CubeList([cube])
+        product.cubes = CubeList([cube])
         output_products.add(product)
 
     # Add reference dataset to output if desired
-    if keep_reference_dataset:
-        output_products.add(reference_product)
+    if keep_reference_dataset and ref_product is not None:
+        output_products.add(ref_product)
 
     return output_products
+
+
+def _get_ref(products, ref_tag: str) -> tuple[Cube, PreprocessorFile]:
+    """Get reference cube and product."""
+    ref_products = []
+    for product in products:
+        if product.attributes.get(ref_tag, False):
+            ref_products.append(product)
+    if len(ref_products) != 1:
+        raise ValueError(
+            f"Expected exactly 1 dataset with '{ref_tag}: true', found "
+            f"{len(ref_products):d}"
+        )
+    ref_product = ref_products[0]
+
+    # Extract reference cube
+    # Note: For technical reasons, product objects contain the member
+    # ``cubes``, which is a list of cubes. However, this is expected to be a
+    # list with exactly one element due to the call of concatenate earlier in
+    # the preprocessing chain of ESMValTool. To make sure that this
+    # preprocessor can also be used outside the ESMValTool preprocessing chain,
+    # an additional concatenate call is added here.
+    ref_cube = concatenate(ref_product.cubes)
+
+    return (ref_cube, ref_product)
+
+
+def _calculate_bias(cube: Cube, ref_cube: Cube, bias_type: BiasType) -> Cube:
+    """Calculate bias for a single cube relative to a reference cube."""
+    cube_metadata = cube.metadata
+
+    if bias_type == 'absolute':
+        cube = cube - ref_cube
+        new_units = cube.units
+    elif bias_type == 'relative':
+        cube = (cube - ref_cube) / ref_cube
+        new_units = '1'
+    else:
+        raise ValueError(
+            f"Expected one of ['absolute', 'relative'] for bias_type, got "
+            f"'{bias_type}'"
+        )
+
+    cube.metadata = cube_metadata
+    cube.units = new_units
+
+    return cube