ESMValGroup · schlunma · Feb 2, 2022 · Feb 2, 2022 · Feb 3, 2022 · Apr 7, 2022
diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py
@@ -53,7 +53,6 @@
 from .preprocessor._regrid import (
     _spec_to_latlonvals,
     get_cmor_levels,
-    get_reference_levels,
     parse_cell_spec,
 )
 
@@ -153,9 +152,15 @@ def _update_target_levels(variable, variables, settings, config_user):
     if not isinstance(levels, dict):
         return
 
+    # If a CMOR table and a coordinate are specified, extract the target levels
+    # from the CMOR tables
     if 'cmor_table' in levels and 'coordinate' in levels:
         settings['extract_levels']['levels'] = get_cmor_levels(
             levels['cmor_table'], levels['coordinate'])
+
+    # If a reference dataset is specified, add information to the levels dict
+    # (this is handled in the preprocessor itself). If the current dataset is
+    # the reference dataset, remove this preprocessing step.
     elif 'dataset' in levels:
         dataset = levels['dataset']
         if variable['dataset'] == dataset:
@@ -164,15 +169,15 @@ def _update_target_levels(variable, variables, settings, config_user):
             variable_data = _get_dataset_info(dataset, variables)
             filename = _dataset_to_file(variable_data, config_user)
             fix_dir = f"{os.path.splitext(variable_data['filename'])[0]}_fixed"
-            settings['extract_levels']['levels'] = get_reference_levels(
-                filename=filename,
-                project=variable_data['project'],
-                dataset=dataset,
-                short_name=variable_data['short_name'],
-                mip=variable_data['mip'],
-                frequency=variable_data['frequency'],
-                fix_dir=fix_dir,
-            )
+            settings['extract_levels']['levels'].update({
+                'filename': filename,
+                'project': variable_data['project'],
+                'dataset': dataset,
+                'short_name': variable_data['short_name'],
+                'mip': variable_data['mip'],
+                'frequency': variable_data['frequency'],
+                'fix_dir': fix_dir,
+            })
 
 
 def _update_target_grid(variable, variables, settings, config_user):
@@ -183,11 +188,26 @@ def _update_target_grid(variable, variables, settings, config_user):
 
     grid = _special_name_to_dataset(variable, grid)
 
+    # If a reference dataset is specified, add information to the target_grid
+    # dict (this is handled in the preprocessor itself). If the current dataset
+    # is the reference dataset, remove this preprocessing step.
     if variable['dataset'] == grid:
         del settings['regrid']
     elif any(grid == v['dataset'] for v in variables):
-        settings['regrid']['target_grid'] = _dataset_to_file(
-            _get_dataset_info(grid, variables), config_user)
+        variable_data = _get_dataset_info(grid, variables)
+        filename = _dataset_to_file(variable_data, config_user)
+        fix_dir = f"{os.path.splitext(variable_data['filename'])[0]}_fixed"
+        settings['regrid']['target_grid'] = {
+            'filename': filename,
+            'project': variable_data['project'],
+            'dataset': grid,
+            'short_name': variable_data['short_name'],
+            'mip': variable_data['mip'],
+            'frequency': variable_data['frequency'],
+            'fix_dir': fix_dir,
+        }
+
+    # target_grid is either a MxN grid spec or a grid spec described by a dict
     else:
         # Check that MxN grid spec is correct
         target_grid = settings['regrid']['target_grid']

diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py
@@ -451,10 +451,10 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
 
     Note that the target grid can be a cube (:py:class:`~iris.cube.Cube`),
     path to a cube (``str``), a grid spec (``str``) in the form
-    of `MxN`, or a ``dict`` specifying the target grid.
+    of `MxN`, a ``dict`` specifying the target grid, or a ``dict`` specifying a
+    reference dataset.
 
-    For the latter, the ``target_grid`` should be a ``dict`` with the
-    following keys:
+    To specify a target grid with a ``dict``, the following keys are necessary:
 
     - ``start_longitude``: longitude at the center of the first grid cell.
     - ``end_longitude``: longitude at the center of the last grid cell.
@@ -464,6 +464,26 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
     - ``end_latitude``: longitude at the center of the last grid cell.
     - ``step_latitude``: constant latitude distance between grid cell centers.
 
+    To specify a reference dataset with a ``dict``, the following keys are
+    necessary:
+
+    - ``filename``: path to the file that contains the reference grid.
+    - ``project``: project of the reference dataset.
+    - ``dataset``: name of the reference dataset.
+    - ``short_name``: name of the variable in the reference dataset.
+    - ``mip``: name of the MIP table used.
+    - ``frequency``: temporal frequency of the reference dataset.
+    - ``fix_dir``: path to the directory where the new file is saved in case
+      fixes are necessary.
+
+    Note
+    ----
+    If ``levels`` is a ``dict`` and it does not contain the key ``filename``,
+    it is automatically assumed that you specified the target grid with
+    ``start_longitude``, ``end_longitude``, etc. If a valid reference dataset
+    is specified, fixes are applied to the reference dataset prior to loading
+    the data.
+
     Parameters
     ----------
     cube : :py:class:`~iris.cube.Cube`
@@ -476,9 +496,9 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
         of the form ``MxN``, which specifies the extent of the cell, longitude
         by latitude (degrees) for a global, regular target grid.
 
-        Alternatively, a dictionary with a regional target grid may
-        be specified (see above).
-
+        Alternatively, a dictionary with a regional target grid may or a
+        dictionary describing the reference dataset may be provided (see
+        above).
     scheme : str or dict
         The regridding scheme to perform. If both source and target grid are
         structured (regular or irregular), can be one of the built-in schemes
@@ -499,6 +519,13 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
     :py:class:`~iris.cube.Cube`
         Regridded cube.
 
+    Raises
+    ------
+    KeyError
+        If ``target_grid`` is a :obj:`dict` describing a reference dataset
+        (i.e., it contains a ``filename``) and at least one of the other
+        necessary keys is missing.
+
     See Also
     --------
     extract_levels : Perform vertical regridding.
@@ -589,8 +616,13 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
             ycoord.coord_system = src_cs
 
     elif isinstance(target_grid, dict):
-        # Generate a target grid from the provided specification,
-        target_grid = _regional_stock_cube(target_grid)
+        # Use reference dataset to extract grid
+        if 'filename' in target_grid:
+            target_grid = _get_reference_target_grid(target_grid)
+
+        # Generate a target grid from the provided specification
+        else:
+            target_grid = _regional_stock_cube(target_grid)
 
     if not isinstance(target_grid, iris.cube.Cube):
         raise ValueError('Expecting a cube, got {}.'.format(target_grid))
@@ -894,12 +926,21 @@ def extract_levels(cube,
     ----------
     cube : iris.cube.Cube
         The source cube to be vertically interpolated.
-    levels : ArrayLike
+    levels : ArrayLike or dict
         One or more target levels for the vertical interpolation. Assumed
         to be in the same S.I. units of the source cube vertical dimension
         coordinate. If the requested levels are sufficiently close to the
         levels of the cube, cube slicing will take place instead of
-        interpolation.
+        interpolation. If a :obj:`dict` is given, a reference dataset is used
+        to infer the target levels. The :obj:`dict` needs to include the
+        following keys: ``filename`` (path to the file that contains the
+        reference levels), ``project`` (project of the reference dataset),
+        ``dataset`` (name of the reference dataset), ``short_name`` (name of
+        the variable in the reference dataset), ``mip`` (name of the MIP table
+        used), ``frequency`` (temporal frequency of the reference dataset), and
+        ``fix_dir`` (path to the directory where the new file is ssaved in case
+        fixes are necessary). Fixes are applied to the reference dataset prior
+        to loading it.
     scheme : str
         The vertical interpolation scheme to use. Choose from
         'linear',
@@ -929,13 +970,22 @@ def extract_levels(cube,
     iris.cube.Cube
         A cube with the requested vertical levels.
 
+    Raises
+    ------
+    KeyError
+        If ``levels`` is a :obj:`dict` and at least one of the necessary keys
+        is missing.
 
     See Also
     --------
     regrid : Perform horizontal regridding.
     """
     interpolation, extrapolation = parse_vertical_scheme(scheme)
 
+    # If a reference dataset is specified, load it and extract the levels
+    if isinstance(levels, dict):
+        levels = _get_reference_levels(levels)
+
     # Ensure we have a non-scalar array of levels.
     levels = np.array(levels, ndmin=1)
 
@@ -1032,37 +1082,34 @@ def get_cmor_levels(cmor_table, coordinate):
             coordinate, cmor_table))
 
 
-def get_reference_levels(filename, project, dataset, short_name, mip,
-                         frequency, fix_dir):
-    """Get level definition from a reference dataset.
-
-    Parameters
-    ----------
-    filename: str
-        Path to the reference file
-    project : str
-        Name of the project
-    dataset : str
-        Name of the dataset
-    short_name : str
-        Name of the variable
-    mip : str
-        Name of the mip table
-    frequency : str
-        Time frequency
-    fix_dir : str
-        Output directory for fixed data
-
-    Returns
-    -------
-    list[float]
-
-    Raises
-    ------
-    ValueError:
-        If the dataset is not defined, the coordinate does not specify any
-        levels or the string is badly formatted.
-    """
+def _get_fixed_cube(dataset_dict):
+    """Get fixed cube that can be used as reference for regridding."""
+    necessary_keys = [
+        'filename',
+        'project',
+        'dataset',
+        'short_name',
+        'mip',
+        'frequency',
+        'fix_dir',
+    ]
+    for key in necessary_keys:
+        if key not in dataset_dict:
+            raise KeyError(
+                f"Necessary key '{key}' is missing in description of "
+                f"reference dataset. If the regridding target grid/levels is "
+                f"specified with a dict describing a reference dataset, the "
+                f"following keys are necessary: {necessary_keys}. Got "
+                f"{dataset_dict}")
+    filename = dataset_dict['filename']
+    project = dataset_dict['project']
+    dataset = dataset_dict['dataset']
+    short_name = dataset_dict['short_name']
+    mip = dataset_dict['mip']
+    frequency = dataset_dict['frequency']
+    fix_dir = dataset_dict['fix_dir']
+
+    # Load dataset, apply fixes and extract target levels
     filename = fix_file(
         file=filename,
         short_name=short_name,
@@ -1080,9 +1127,20 @@ def get_reference_levels(filename, project, dataset, short_name, mip,
         mip=mip,
         frequency=frequency,
     )
-    cube = cubes[0]
+    return cubes[0]
+
+
+def _get_reference_levels(levels_dict):
+    """Get level definition from a reference dataset."""
+    cube = _get_fixed_cube(levels_dict)
     try:
         coord = cube.coord(axis='Z')
     except iris.exceptions.CoordinateNotFoundError:
-        raise ValueError('z-coord not available in {}'.format(filename))
+        raise ValueError(
+            f"z-coord not available in {levels_dict['filename']}")
     return coord.points.tolist()
+
+
+def _get_reference_target_grid(grid_dict):
+    """Get target grid for horizontal regridding from a reference dataset."""
+    return _get_fixed_cube(grid_dict)