Allow using output from multi_model_statistics or ensemble_statistics as reference for bias or distance_metric (#2652)

schlunma · web-flow · commit 7cf47f14ae7a · 2025-02-04T16:54:47.000+01:00
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -2602,6 +2602,35 @@ For this, exactly one input dataset needs to be declared as
 
 In the example above, ERA-Interim is used as reference dataset for the bias
 calculation.
+
+It is also possible to use the output from the :ref:`multi-model statistics` or
+:ref:`ensemble statistics` preprocessor as reference dataset.
+In this case, make sure to use ``reference_for_bias: true`` for each dataset
+that will be used to create the reference dataset and use the option
+``keep_input_datasets: false`` for the multi-dataset preprocessor.
+For example:
+
+.. code-block:: yaml
+
+  datasets:
+    - {dataset: CanESM5, group: ref, reference_for_bias: true}
+    - {dataset: CESM2,   group: ref, reference_for_bias: true}
+    - {dataset: MIROC6,  group: notref}
+
+  preprocessors:
+    calculate_bias:
+      custom_order: true
+      multi_model_statistics:
+        statistics: [mean]
+        span: overlap
+        groupby: [group]
+        keep_input_datasets: false
+      bias:
+        bias_type: relative
+
+Here, the bias of MIROC6 is calculated relative to the multi-model mean from
+the models CanESM5 and CESM2.
+
 The reference dataset needs to be broadcastable to all other datasets.
 This supports `iris' rich broadcasting abilities
 <https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
@@ -2668,6 +2697,35 @@ For this, exactly one input dataset needs to be declared as
 
 In the example above, ERA-Interim is used as reference dataset for the distance
 metric calculation.
+
+It is also possible to use the output from the :ref:`multi-model statistics` or
+:ref:`ensemble statistics` preprocessor as reference dataset.
+In this case, make sure to use ``reference_for_metric: true`` for each dataset
+that will be used to create the reference dataset and use the option
+``keep_input_datasets: false`` for the multi-dataset preprocessor.
+For example:
+
+.. code-block:: yaml
+
+  datasets:
+    - {dataset: CanESM5, group: ref, reference_for_metric: true}
+    - {dataset: CESM2,   group: ref, reference_for_metric: true}
+    - {dataset: MIROC6,  group: notref}
+
+  preprocessors:
+    calculate_distance_metric:
+      custom_order: true
+      multi_model_statistics:
+        statistics: [mean]
+        span: overlap
+        groupby: [group]
+        keep_input_datasets: false
+      distance_metric:
+        metric: emd
+
+Here, the EMD metric of MIROC6 is calculated relative to the the multi-model
+mean from the models CanESM5 and CESM2.
+
 All datasets need to have the same shape and coordinates.
 To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
 :func:`esmvalcore.preprocessor.regrid_time` might be helpful.
diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
@@ -501,6 +501,16 @@ def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
     if not products:
         return
 
+    # It is fine to have multiple references when preprocessors are used that
+    # combine datasets
+    multi_dataset_preprocs = (
+        "multi_model_statistics",
+        "ensemble_statistics",
+    )
+    for preproc in multi_dataset_preprocs:
+        if any(preproc in p.settings for p in products):
+            return
+
     # Check that exactly one dataset contains the specified facet
     reference_products = []
     for product in products:
diff --git a/esmvalcore/preprocessor/_compare_with_refs.py b/esmvalcore/preprocessor/_compare_with_refs.py
@@ -335,25 +335,9 @@ def distance_metric(
                 "A list of Cubes is given to this preprocessor; please "
                 "specify a `reference`"
             )
-        reference_products = []
-        for product in products:
-            if product.attributes.get("reference_for_metric", False):
-                reference_products.append(product)
-        if len(reference_products) != 1:
-            raise ValueError(
-                f"Expected exactly 1 dataset with 'reference_for_metric: "
-                f"true', found {len(reference_products):d}"
-            )
-        reference_product = reference_products[0]
-
-        # Extract reference cube
-        # Note: For technical reasons, product objects contain the member
-        # ``cubes``, which is a list of cubes. However, this is expected to be
-        # a list with exactly one element due to the call of concatenate
-        # earlier in the preprocessing chain of ESMValTool. To make sure that
-        # this preprocessor can also be used outside the ESMValTool
-        # preprocessing chain, an additional concatenate call is added here.
-        reference = concatenate(reference_product.cubes)
+        reference, reference_product = _get_ref(
+            products, "reference_for_metric"
+        )
 
     # If input is an Iterable of Cube objects, calculate distance metric for
     # each element
diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py
@@ -3052,6 +3052,45 @@ def test_bias_two_refs(tmp_path, patched_datafinder, session):
     assert "found 2" in exc.value.failed_tasks[0].message
 
 
+def test_bias_two_refs_with_mmm(tmp_path, patched_datafinder, session):
+    content = dedent("""
+        preprocessors:
+          test_bias:
+            custom_order: true
+            multi_model_statistics:
+              statistics: [mean]
+              span: overlap
+              groupby: [group]
+              keep_input_datasets: false
+            bias:
+              bias_type: relative
+              denominator_mask_threshold: 5
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_bias
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CanESM5,    group: ref, reference_for_bias: true}
+                  - {dataset: CESM2,      group: ref, reference_for_bias: true}
+                  - {dataset: MPI-ESM-LR, group: notref}
+
+            scripts: null
+        """)
+    recipe = get_recipe(tmp_path, content, session)
+
+    assert len(recipe.tasks) == 1
+    task = recipe.tasks.pop()
+    assert len(task.products) == 3
+
+
 def test_invalid_bias_type(tmp_path, patched_datafinder, session):
     content = dedent("""
         preprocessors:
@@ -3322,6 +3361,44 @@ def test_distance_metric_two_refs(tmp_path, patched_datafinder, session):
     assert "found 2" in exc.value.failed_tasks[0].message
 
 
+def test_distance_metrics_two_refs_with_mmm(
+    tmp_path, patched_datafinder, session
+):
+    content = dedent("""
+        preprocessors:
+          test_distance_metric:
+            custom_order: true
+            ensemble_statistics:
+              statistics: [mean]
+              span: overlap
+            distance_metric:
+              metric: emd
+
+        diagnostics:
+          diagnostic_name:
+            variables:
+              ta:
+                preprocessor: test_distance_metric
+                project: CMIP6
+                mip: Amon
+                exp: historical
+                timerange: '20000101/20001231'
+                ensemble: r1i1p1f1
+                grid: gn
+                additional_datasets:
+                  - {dataset: CESM2, ensemble: r1i1p1f1, reference_for_metric: true}
+                  - {dataset: CESM2, ensemble: r2i1p1f1, reference_for_metric: true}
+                  - {dataset: MPI-ESM-LR}
+
+            scripts: null
+        """)
+    recipe = get_recipe(tmp_path, content, session)
+
+    assert len(recipe.tasks) == 1
+    task = recipe.tasks.pop()
+    assert len(task.products) == 3
+
+
 def test_invalid_metric(tmp_path, patched_datafinder, session):
     content = dedent("""
         preprocessors: