Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moved extraction of reference datasets for (horizontal and vertical) regridding into preprocessor functions #1455

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 32 additions & 12 deletions esmvalcore/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from .preprocessor._regrid import (
_spec_to_latlonvals,
get_cmor_levels,
get_reference_levels,
parse_cell_spec,
)

Expand Down Expand Up @@ -153,9 +152,15 @@ def _update_target_levels(variable, variables, settings, config_user):
if not isinstance(levels, dict):
return

# If a CMOR table and a coordinate are specified, extract the target levels
# from the CMOR tables
if 'cmor_table' in levels and 'coordinate' in levels:
settings['extract_levels']['levels'] = get_cmor_levels(
levels['cmor_table'], levels['coordinate'])

# If a reference dataset is specified, add information to the levels dict
# (this is handled in the preprocessor itself). If the current dataset is
# the reference dataset, remove this preprocessing step.
elif 'dataset' in levels:
dataset = levels['dataset']
if variable['dataset'] == dataset:
Expand All @@ -164,15 +169,15 @@ def _update_target_levels(variable, variables, settings, config_user):
variable_data = _get_dataset_info(dataset, variables)
filename = _dataset_to_file(variable_data, config_user)
fix_dir = f"{os.path.splitext(variable_data['filename'])[0]}_fixed"
settings['extract_levels']['levels'] = get_reference_levels(
filename=filename,
project=variable_data['project'],
dataset=dataset,
short_name=variable_data['short_name'],
mip=variable_data['mip'],
frequency=variable_data['frequency'],
fix_dir=fix_dir,
)
settings['extract_levels']['levels'].update({
'filename': filename,
'project': variable_data['project'],
'dataset': dataset,
'short_name': variable_data['short_name'],
'mip': variable_data['mip'],
'frequency': variable_data['frequency'],
'fix_dir': fix_dir,
})


def _update_target_grid(variable, variables, settings, config_user):
Expand All @@ -183,11 +188,26 @@ def _update_target_grid(variable, variables, settings, config_user):

grid = _special_name_to_dataset(variable, grid)

# If a reference dataset is specified, add information to the target_grid
# dict (this is handled in the preprocessor itself). If the current dataset
# is the reference dataset, remove this preprocessing step.
if variable['dataset'] == grid:
del settings['regrid']
elif any(grid == v['dataset'] for v in variables):
settings['regrid']['target_grid'] = _dataset_to_file(
_get_dataset_info(grid, variables), config_user)
variable_data = _get_dataset_info(grid, variables)
filename = _dataset_to_file(variable_data, config_user)
fix_dir = f"{os.path.splitext(variable_data['filename'])[0]}_fixed"
settings['regrid']['target_grid'] = {
'filename': filename,
'project': variable_data['project'],
'dataset': grid,
'short_name': variable_data['short_name'],
'mip': variable_data['mip'],
'frequency': variable_data['frequency'],
'fix_dir': fix_dir,
}

# target_grid is either a MxN grid spec or a grid spec described by a dict
else:
# Check that MxN grid spec is correct
target_grid = settings['regrid']['target_grid']
Expand Down
144 changes: 101 additions & 43 deletions esmvalcore/preprocessor/_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,10 +451,10 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):

Note that the target grid can be a cube (:py:class:`~iris.cube.Cube`),
path to a cube (``str``), a grid spec (``str``) in the form
of `MxN`, or a ``dict`` specifying the target grid.
of `MxN`, a ``dict`` specifying the target grid, or a ``dict`` specifying a
reference dataset.

For the latter, the ``target_grid`` should be a ``dict`` with the
following keys:
To specify a target grid with a ``dict``, the following keys are necessary:

- ``start_longitude``: longitude at the center of the first grid cell.
- ``end_longitude``: longitude at the center of the last grid cell.
Expand All @@ -464,6 +464,26 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
- ``end_latitude``: longitude at the center of the last grid cell.
- ``step_latitude``: constant latitude distance between grid cell centers.

To specify a reference dataset with a ``dict``, the following keys are
necessary:

- ``filename``: path to the file that contains the reference grid.
- ``project``: project of the reference dataset.
- ``dataset``: name of the reference dataset.
- ``short_name``: name of the variable in the reference dataset.
- ``mip``: name of the MIP table used.
- ``frequency``: temporal frequency of the reference dataset.
- ``fix_dir``: path to the directory where the new file is saved in case
fixes are necessary.

Note
----
If ``levels`` is a ``dict`` and it does not contain the key ``filename``,
it is automatically assumed that you specified the target grid with
``start_longitude``, ``end_longitude``, etc. If a valid reference dataset
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait what? A grid is specified by an MxN specification, not by defining a box, am confusado here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah nevermind, it's the _spec_to_latlonvals() stuff - can you add a pointer to that func here maybe, that confused me, prob gonna confuse somebody else too

is specified, fixes are applied to the reference dataset prior to loading
the data.

Parameters
----------
cube : :py:class:`~iris.cube.Cube`
Expand All @@ -476,9 +496,9 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
of the form ``MxN``, which specifies the extent of the cell, longitude
by latitude (degrees) for a global, regular target grid.

Alternatively, a dictionary with a regional target grid may
be specified (see above).

Alternatively, a dictionary with a regional target grid may or a
dictionary describing the reference dataset may be provided (see
above).
scheme : str or dict
The regridding scheme to perform. If both source and target grid are
structured (regular or irregular), can be one of the built-in schemes
Expand All @@ -499,6 +519,13 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
:py:class:`~iris.cube.Cube`
Regridded cube.

Raises
------
KeyError
If ``target_grid`` is a :obj:`dict` describing a reference dataset
(i.e., it contains a ``filename``) and at least one of the other
necessary keys is missing.

See Also
--------
extract_levels : Perform vertical regridding.
Expand Down Expand Up @@ -589,8 +616,13 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True):
ycoord.coord_system = src_cs

elif isinstance(target_grid, dict):
# Generate a target grid from the provided specification,
target_grid = _regional_stock_cube(target_grid)
# Use reference dataset to extract grid
if 'filename' in target_grid:
target_grid = _get_reference_target_grid(target_grid)

# Generate a target grid from the provided specification
else:
target_grid = _regional_stock_cube(target_grid)

if not isinstance(target_grid, iris.cube.Cube):
raise ValueError('Expecting a cube, got {}.'.format(target_grid))
Expand Down Expand Up @@ -894,12 +926,21 @@ def extract_levels(cube,
----------
cube : iris.cube.Cube
The source cube to be vertically interpolated.
levels : ArrayLike
levels : ArrayLike or dict
One or more target levels for the vertical interpolation. Assumed
to be in the same S.I. units of the source cube vertical dimension
coordinate. If the requested levels are sufficiently close to the
levels of the cube, cube slicing will take place instead of
interpolation.
interpolation. If a :obj:`dict` is given, a reference dataset is used
to infer the target levels. The :obj:`dict` needs to include the
following keys: ``filename`` (path to the file that contains the
reference levels), ``project`` (project of the reference dataset),
``dataset`` (name of the reference dataset), ``short_name`` (name of
the variable in the reference dataset), ``mip`` (name of the MIP table
used), ``frequency`` (temporal frequency of the reference dataset), and
``fix_dir`` (path to the directory where the new file is ssaved in case
fixes are necessary). Fixes are applied to the reference dataset prior
to loading it.
scheme : str
The vertical interpolation scheme to use. Choose from
'linear',
Expand Down Expand Up @@ -929,13 +970,22 @@ def extract_levels(cube,
iris.cube.Cube
A cube with the requested vertical levels.

Raises
------
KeyError
If ``levels`` is a :obj:`dict` and at least one of the necessary keys
is missing.

See Also
--------
regrid : Perform horizontal regridding.
"""
interpolation, extrapolation = parse_vertical_scheme(scheme)

# If a reference dataset is specified, load it and extract the levels
if isinstance(levels, dict):
levels = _get_reference_levels(levels)

# Ensure we have a non-scalar array of levels.
levels = np.array(levels, ndmin=1)

Expand Down Expand Up @@ -1032,37 +1082,34 @@ def get_cmor_levels(cmor_table, coordinate):
coordinate, cmor_table))


def get_reference_levels(filename, project, dataset, short_name, mip,
frequency, fix_dir):
"""Get level definition from a reference dataset.

Parameters
----------
filename: str
Path to the reference file
project : str
Name of the project
dataset : str
Name of the dataset
short_name : str
Name of the variable
mip : str
Name of the mip table
frequency : str
Time frequency
fix_dir : str
Output directory for fixed data

Returns
-------
list[float]

Raises
------
ValueError:
If the dataset is not defined, the coordinate does not specify any
levels or the string is badly formatted.
"""
def _get_fixed_cube(dataset_dict):
"""Get fixed cube that can be used as reference for regridding."""
necessary_keys = [
'filename',
'project',
'dataset',
'short_name',
'mip',
'frequency',
'fix_dir',
]
for key in necessary_keys:
if key not in dataset_dict:
raise KeyError(
f"Necessary key '{key}' is missing in description of "
f"reference dataset. If the regridding target grid/levels is "
f"specified with a dict describing a reference dataset, the "
f"following keys are necessary: {necessary_keys}. Got "
f"{dataset_dict}")
filename = dataset_dict['filename']
project = dataset_dict['project']
dataset = dataset_dict['dataset']
short_name = dataset_dict['short_name']
mip = dataset_dict['mip']
frequency = dataset_dict['frequency']
fix_dir = dataset_dict['fix_dir']

# Load dataset, apply fixes and extract target levels
filename = fix_file(
file=filename,
short_name=short_name,
Expand All @@ -1080,9 +1127,20 @@ def get_reference_levels(filename, project, dataset, short_name, mip,
mip=mip,
frequency=frequency,
)
cube = cubes[0]
return cubes[0]


def _get_reference_levels(levels_dict):
"""Get level definition from a reference dataset."""
cube = _get_fixed_cube(levels_dict)
try:
coord = cube.coord(axis='Z')
except iris.exceptions.CoordinateNotFoundError:
raise ValueError('z-coord not available in {}'.format(filename))
raise ValueError(
f"z-coord not available in {levels_dict['filename']}")
return coord.points.tolist()


def _get_reference_target_grid(grid_dict):
"""Get target grid for horizontal regridding from a reference dataset."""
return _get_fixed_cube(grid_dict)