Skip to content

Commit 7cf47f1

Browse files
authored
Allow using output from multi_model_statistics or ensemble_statistics as reference for bias or distance_metric (#2652)
1 parent e3eb9ff commit 7cf47f1

File tree

4 files changed

+148
-19
lines changed

4 files changed

+148
-19
lines changed

doc/recipe/preprocessor.rst

+58
Original file line numberDiff line numberDiff line change
@@ -2602,6 +2602,35 @@ For this, exactly one input dataset needs to be declared as
26022602
26032603
In the example above, ERA-Interim is used as reference dataset for the bias
26042604
calculation.
2605+
2606+
It is also possible to use the output from the :ref:`multi-model statistics` or
2607+
:ref:`ensemble statistics` preprocessor as reference dataset.
2608+
In this case, make sure to use ``reference_for_bias: true`` for each dataset
2609+
that will be used to create the reference dataset and use the option
2610+
``keep_input_datasets: false`` for the multi-dataset preprocessor.
2611+
For example:
2612+
2613+
.. code-block:: yaml
2614+
2615+
datasets:
2616+
- {dataset: CanESM5, group: ref, reference_for_bias: true}
2617+
- {dataset: CESM2, group: ref, reference_for_bias: true}
2618+
- {dataset: MIROC6, group: notref}
2619+
2620+
preprocessors:
2621+
calculate_bias:
2622+
custom_order: true
2623+
multi_model_statistics:
2624+
statistics: [mean]
2625+
span: overlap
2626+
groupby: [group]
2627+
keep_input_datasets: false
2628+
bias:
2629+
bias_type: relative
2630+
2631+
Here, the bias of MIROC6 is calculated relative to the multi-model mean from
2632+
the models CanESM5 and CESM2.
2633+
26052634
The reference dataset needs to be broadcastable to all other datasets.
26062635
This supports `iris' rich broadcasting abilities
26072636
<https://scitools-iris.readthedocs.io/en/stable/userguide/cube_maths.
@@ -2668,6 +2697,35 @@ For this, exactly one input dataset needs to be declared as
26682697
26692698
In the example above, ERA-Interim is used as reference dataset for the distance
26702699
metric calculation.
2700+
2701+
It is also possible to use the output from the :ref:`multi-model statistics` or
2702+
:ref:`ensemble statistics` preprocessor as reference dataset.
2703+
In this case, make sure to use ``reference_for_metric: true`` for each dataset
2704+
that will be used to create the reference dataset and use the option
2705+
``keep_input_datasets: false`` for the multi-dataset preprocessor.
2706+
For example:
2707+
2708+
.. code-block:: yaml
2709+
2710+
datasets:
2711+
- {dataset: CanESM5, group: ref, reference_for_metric: true}
2712+
- {dataset: CESM2, group: ref, reference_for_metric: true}
2713+
- {dataset: MIROC6, group: notref}
2714+
2715+
preprocessors:
2716+
calculate_distance_metric:
2717+
custom_order: true
2718+
multi_model_statistics:
2719+
statistics: [mean]
2720+
span: overlap
2721+
groupby: [group]
2722+
keep_input_datasets: false
2723+
distance_metric:
2724+
metric: emd
2725+
2726+
Here, the EMD metric of MIROC6 is calculated relative to the the multi-model
2727+
mean from the models CanESM5 and CESM2.
2728+
26712729
All datasets need to have the same shape and coordinates.
26722730
To ensure this, the preprocessors :func:`esmvalcore.preprocessor.regrid` and/or
26732731
:func:`esmvalcore.preprocessor.regrid_time` might be helpful.

esmvalcore/_recipe/check.py

+10
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,16 @@ def _check_ref_attributes(products: set, *, step: str, attr_name: str) -> None:
501501
if not products:
502502
return
503503

504+
# It is fine to have multiple references when preprocessors are used that
505+
# combine datasets
506+
multi_dataset_preprocs = (
507+
"multi_model_statistics",
508+
"ensemble_statistics",
509+
)
510+
for preproc in multi_dataset_preprocs:
511+
if any(preproc in p.settings for p in products):
512+
return
513+
504514
# Check that exactly one dataset contains the specified facet
505515
reference_products = []
506516
for product in products:

esmvalcore/preprocessor/_compare_with_refs.py

+3-19
Original file line numberDiff line numberDiff line change
@@ -335,25 +335,9 @@ def distance_metric(
335335
"A list of Cubes is given to this preprocessor; please "
336336
"specify a `reference`"
337337
)
338-
reference_products = []
339-
for product in products:
340-
if product.attributes.get("reference_for_metric", False):
341-
reference_products.append(product)
342-
if len(reference_products) != 1:
343-
raise ValueError(
344-
f"Expected exactly 1 dataset with 'reference_for_metric: "
345-
f"true', found {len(reference_products):d}"
346-
)
347-
reference_product = reference_products[0]
348-
349-
# Extract reference cube
350-
# Note: For technical reasons, product objects contain the member
351-
# ``cubes``, which is a list of cubes. However, this is expected to be
352-
# a list with exactly one element due to the call of concatenate
353-
# earlier in the preprocessing chain of ESMValTool. To make sure that
354-
# this preprocessor can also be used outside the ESMValTool
355-
# preprocessing chain, an additional concatenate call is added here.
356-
reference = concatenate(reference_product.cubes)
338+
reference, reference_product = _get_ref(
339+
products, "reference_for_metric"
340+
)
357341

358342
# If input is an Iterable of Cube objects, calculate distance metric for
359343
# each element

tests/integration/recipe/test_recipe.py

+77
Original file line numberDiff line numberDiff line change
@@ -3052,6 +3052,45 @@ def test_bias_two_refs(tmp_path, patched_datafinder, session):
30523052
assert "found 2" in exc.value.failed_tasks[0].message
30533053

30543054

3055+
def test_bias_two_refs_with_mmm(tmp_path, patched_datafinder, session):
3056+
content = dedent("""
3057+
preprocessors:
3058+
test_bias:
3059+
custom_order: true
3060+
multi_model_statistics:
3061+
statistics: [mean]
3062+
span: overlap
3063+
groupby: [group]
3064+
keep_input_datasets: false
3065+
bias:
3066+
bias_type: relative
3067+
denominator_mask_threshold: 5
3068+
3069+
diagnostics:
3070+
diagnostic_name:
3071+
variables:
3072+
ta:
3073+
preprocessor: test_bias
3074+
project: CMIP6
3075+
mip: Amon
3076+
exp: historical
3077+
timerange: '20000101/20001231'
3078+
ensemble: r1i1p1f1
3079+
grid: gn
3080+
additional_datasets:
3081+
- {dataset: CanESM5, group: ref, reference_for_bias: true}
3082+
- {dataset: CESM2, group: ref, reference_for_bias: true}
3083+
- {dataset: MPI-ESM-LR, group: notref}
3084+
3085+
scripts: null
3086+
""")
3087+
recipe = get_recipe(tmp_path, content, session)
3088+
3089+
assert len(recipe.tasks) == 1
3090+
task = recipe.tasks.pop()
3091+
assert len(task.products) == 3
3092+
3093+
30553094
def test_invalid_bias_type(tmp_path, patched_datafinder, session):
30563095
content = dedent("""
30573096
preprocessors:
@@ -3322,6 +3361,44 @@ def test_distance_metric_two_refs(tmp_path, patched_datafinder, session):
33223361
assert "found 2" in exc.value.failed_tasks[0].message
33233362

33243363

3364+
def test_distance_metrics_two_refs_with_mmm(
3365+
tmp_path, patched_datafinder, session
3366+
):
3367+
content = dedent("""
3368+
preprocessors:
3369+
test_distance_metric:
3370+
custom_order: true
3371+
ensemble_statistics:
3372+
statistics: [mean]
3373+
span: overlap
3374+
distance_metric:
3375+
metric: emd
3376+
3377+
diagnostics:
3378+
diagnostic_name:
3379+
variables:
3380+
ta:
3381+
preprocessor: test_distance_metric
3382+
project: CMIP6
3383+
mip: Amon
3384+
exp: historical
3385+
timerange: '20000101/20001231'
3386+
ensemble: r1i1p1f1
3387+
grid: gn
3388+
additional_datasets:
3389+
- {dataset: CESM2, ensemble: r1i1p1f1, reference_for_metric: true}
3390+
- {dataset: CESM2, ensemble: r2i1p1f1, reference_for_metric: true}
3391+
- {dataset: MPI-ESM-LR}
3392+
3393+
scripts: null
3394+
""")
3395+
recipe = get_recipe(tmp_path, content, session)
3396+
3397+
assert len(recipe.tasks) == 1
3398+
task = recipe.tasks.pop()
3399+
assert len(task.products) == 3
3400+
3401+
33253402
def test_invalid_metric(tmp_path, patched_datafinder, session):
33263403
content = dedent("""
33273404
preprocessors:

0 commit comments

Comments
 (0)