From 02128bf78f9a856fd46db4e5354ec5d744ee2def Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Fri, 3 Mar 2023 10:57:05 +0100 Subject: [PATCH] Fixed race condition that may result in errors in `cleanup` and deprecate `cleanup` (#1949) Co-authored-by: Bouwe Andela --- esmvalcore/_main.py | 26 ++- esmvalcore/_recipe/recipe.py | 11 - esmvalcore/cmor/_fixes/cmip6/cesm2.py | 23 +- esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 14 +- esmvalcore/cmor/_fixes/emac/emac.py | 6 +- esmvalcore/cmor/_fixes/fix.py | 78 +++++-- esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py | 8 +- esmvalcore/cmor/fix.py | 9 +- esmvalcore/config/_config_object.py | 6 + esmvalcore/config/_config_validators.py | 2 + esmvalcore/dataset.py | 61 +++-- esmvalcore/preprocessor/__init__.py | 9 +- esmvalcore/preprocessor/_io.py | 30 ++- .../cmor/_fixes/cmip6/test_cesm2.py | 4 +- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 6 +- tests/integration/cmor/_fixes/test_fix.py | 214 ++++++++++-------- .../preprocessor/_io/test_cleanup.py | 9 + tests/integration/recipe/test_recipe.py | 20 +- tests/unit/main/test_esmvaltool.py | 16 ++ .../preprocessor/test_preprocessor_file.py | 35 +++ tests/unit/recipe/test_recipe.py | 1 - tests/unit/test_dataset.py | 36 ++- 22 files changed, 429 insertions(+), 195 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index c941e46977..0a0f563ff5 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -456,10 +456,28 @@ def _run(self, recipe: Path, session) -> None: def _clean_preproc(session): import shutil - if session["remove_preproc_dir"] and session.preproc_dir.exists(): - logger.info("Removing preproc containing preprocessed data") - logger.info("If this data is further needed, then") - logger.info("set remove_preproc_dir to false in config-user.yml") + if (not session['save_intermediary_cubes'] and + session._fixed_file_dir.exists()): + logger.debug( + "Removing `preproc/fixed_files` directory containing fixed " + "data" + ) + logger.debug( + "If this data is further needed, then set " + "`save_intermediary_cubes` to `true` and `remove_preproc_dir` " + "to `false` in your user configuration file" + ) + shutil.rmtree(session._fixed_file_dir) + + if session['remove_preproc_dir'] and session.preproc_dir.exists(): + logger.info( + "Removing `preproc` directory containing preprocessed data" + ) + logger.info( + "If this data is further needed, then set " + "`remove_preproc_dir` to `false` in your user configuration " + "file" + ) shutil.rmtree(session.preproc_dir) @staticmethod diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py index 4f2a45c9c4..4644ef524b 100644 --- a/esmvalcore/_recipe/recipe.py +++ b/esmvalcore/_recipe/recipe.py @@ -223,17 +223,6 @@ def _get_default_settings(dataset): 'units': facets['units'], } - # Clean up fixed files - if not session['save_intermediary_cubes']: - fix_dirs = [] - for item in [dataset] + dataset.supplementaries: - output_file = _get_output_file(item.facets, session.preproc_dir) - fix_dir = f"{output_file.with_suffix('')}_fixed" - fix_dirs.append(fix_dir) - settings['cleanup'] = { - 'remove': fix_dirs, - } - # Strip supplementary variables before saving settings['remove_supplementary_variables'] = {} diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 51bc04685e..134f68b29c 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -18,9 +18,16 @@ class Cl(Fix): """Fixes for ``cl``.""" - def _fix_formula_terms(self, filepath, output_dir): + def _fix_formula_terms( + self, + filepath, + output_dir, + add_unique_suffix=False, + ): """Fix ``formula_terms`` attribute.""" - new_path = self.get_fixed_filepath(output_dir, filepath) + new_path = self.get_fixed_filepath( + output_dir, filepath, add_unique_suffix=add_unique_suffix + ) copyfile(filepath, new_path) dataset = Dataset(new_path, mode='a') dataset.variables['lev'].formula_terms = 'p0: p0 a: a b: b ps: ps' @@ -29,7 +36,7 @@ def _fix_formula_terms(self, filepath, output_dir): dataset.close() return new_path - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -45,8 +52,10 @@ def fix_file(self, filepath, output_dir): ---------- filepath : str Path to the original file. - output_dir : str - Path of the directory where the fixed file is saved to. + output_dir: Path + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -54,7 +63,9 @@ def fix_file(self, filepath, output_dir): Path to the fixed file. """ - new_path = self._fix_formula_terms(filepath, output_dir) + new_path = self._fix_formula_terms( + filepath, output_dir, add_unique_suffix=add_unique_suffix + ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][::-1, :] dataset.variables['b_bnds'][:] = dataset.variables['b_bnds'][::-1, :] diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index 200505f12b..f7263a00dd 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -1,17 +1,17 @@ """Fixes for CESM2-WACCM model.""" from netCDF4 import Dataset +from ..common import SiconcFixScalarCoord from .cesm2 import Cl as BaseCl from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Omon as BaseOmon from .cesm2 import Tas as BaseTas -from ..common import SiconcFixScalarCoord class Cl(BaseCl): """Fixes for cl.""" - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix hybrid pressure coordinate. Adds missing ``formula_terms`` attribute to file. @@ -27,8 +27,10 @@ def fix_file(self, filepath, output_dir): ---------- filepath : str Path to the original file. - output_dir : str - Path of the directory where the fixed file is saved to. + output_dir: Path + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- @@ -36,7 +38,9 @@ def fix_file(self, filepath, output_dir): Path to the fixed file. """ - new_path = self._fix_formula_terms(filepath, output_dir) + new_path = self._fix_formula_terms( + filepath, output_dir, add_unique_suffix=add_unique_suffix + ) dataset = Dataset(new_path, mode='a') dataset.variables['a_bnds'][:] = dataset.variables['a_bnds'][:, ::-1] dataset.variables['b_bnds'][:] = dataset.variables['b_bnds'][:, ::-1] diff --git a/esmvalcore/cmor/_fixes/emac/emac.py b/esmvalcore/cmor/_fixes/emac/emac.py index a65a0a21fa..958efa6204 100644 --- a/esmvalcore/cmor/_fixes/emac/emac.py +++ b/esmvalcore/cmor/_fixes/emac/emac.py @@ -37,7 +37,7 @@ class AllVars(EmacFix): 'kg/m**2s': 'kg m-2 s-1', } - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Fix file. Fixes hybrid pressure level coordinate. @@ -51,7 +51,9 @@ def fix_file(self, filepath, output_dir): """ if 'alevel' not in self.vardef.dimensions: return filepath - new_path = self.get_fixed_filepath(output_dir, filepath) + new_path = self.get_fixed_filepath( + output_dir, filepath, add_unique_suffix=add_unique_suffix + ) copyfile(filepath, new_path) with Dataset(new_path, mode='a') as dataset: if 'formula_terms' in dataset.variables['lev'].ncattrs(): diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index db41dc3ff8..96a1c709c1 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -1,7 +1,9 @@ """Contains the base class for dataset fixes.""" +from __future__ import annotations + import importlib import inspect -import os +import tempfile from pathlib import Path from ..table import CMOR_TABLES @@ -16,36 +18,45 @@ def __init__(self, vardef, extra_facets=None): Parameters ---------- vardef: str - CMOR table entry + CMOR table entry. extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. + """ self.vardef = vardef if extra_facets is None: extra_facets = {} self.extra_facets = extra_facets - def fix_file(self, filepath: Path, output_dir: Path) -> Path: + def fix_file( + self, + filepath: Path, + output_dir: Path, + add_unique_suffix: bool = False, + ) -> Path: """Apply fixes to the files prior to creating the cube. - Should be used only to fix errors that prevent loading or can - not be fixed in the cube (i.e. those related with missing_value - and _FillValue) + Should be used only to fix errors that prevent loading or cannot be + fixed in the cube (e.g., those related to `missing_value` or + `_FillValue`). Parameters ---------- filepath: Path - file to fix + File to fix. output_dir: Path - path to the folder to store the fixed files, if required + Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- Path Path to the corrected file. It can be different from the original filepath if a fix has been applied, but if not it should be the - original filepath + original filepath. + """ return filepath @@ -59,12 +70,13 @@ def fix_metadata(self, cubes): Parameters ---------- cubes: iris.cube.CubeList - Cubes to fix + Cubes to fix. Returns ------- iris.cube.CubeList Fixed cubes. They can be different instances. + """ return cubes @@ -74,19 +86,21 @@ def get_cube_from_list(self, cubes, short_name=None): Parameters ---------- cubes : iris.cube.CubeList - List of cubes to search - short_name : str - Cube's variable short name. If None, short name is the class name + List of cubes to search. + short_name : str or None + Cube's variable short name. If `None`, `short name` is the class + name. Raises ------ Exception - If no cube is found + If no cube is found. Returns ------- iris.Cube Variable's cube + """ if short_name is None: short_name = self.vardef.short_name @@ -103,12 +117,13 @@ def fix_data(self, cube): Parameters ---------- cube: iris.cube.Cube - Cube to fix + Cube to fix. Returns ------- iris.cube.Cube Fixed cube. It can be a difference instance. + """ return cube @@ -151,8 +166,9 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): Returns ------- - list(Fix) + list[Fix] Fixes to apply for the given data. + """ cmor_table = CMOR_TABLES[project] vardef = cmor_table.get_variable(mip, short_name) @@ -197,21 +213,35 @@ def get_fixes(project, dataset, mip, short_name, extra_facets=None): return fixes @staticmethod - def get_fixed_filepath(output_dir, filepath): + def get_fixed_filepath( + output_dir: str | Path, + filepath: str | Path, + add_unique_suffix: bool = False, + ) -> Path: """Get the filepath for the fixed file. Parameters ---------- - output_dir: str - Output directory. - filepath: str + output_dir: Path + Output directory for fixed files. Will be created if it does not + exist yet. + filepath: str or Path Original path. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. Returns ------- - str + Path Path to the fixed file. + """ - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - return os.path.join(output_dir, os.path.basename(filepath)) + output_dir = Path(output_dir) + if add_unique_suffix: + parent_dir = output_dir.parent + parent_dir.mkdir(parents=True, exist_ok=True) + prefix = output_dir.name + output_dir = Path(tempfile.mkdtemp(prefix=prefix, dir=parent_dir)) + else: + output_dir.mkdir(parents=True, exist_ok=True) + return output_dir / Path(filepath).name diff --git a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py index d590daf7af..362d950f6c 100644 --- a/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py +++ b/esmvalcore/cmor/_fixes/ipslcm/ipsl_cm6.py @@ -16,7 +16,7 @@ class AllVars(Fix): """Fixes for all IPSLCM variables.""" - def fix_file(self, filepath, output_dir): + def fix_file(self, filepath, output_dir, add_unique_suffix=False): """Select IPSLCM variable in filepath. This is done only if input file is a multi-variable one. This @@ -48,10 +48,12 @@ def fix_file(self, filepath, output_dir): # Proceed with CDO selvar varname = self.extra_facets.get(VARNAME_KEY, self.vardef.short_name) alt_filepath = str(filepath).replace(".nc", "_cdo_selected.nc") - outfile = self.get_fixed_filepath(output_dir, alt_filepath) + outfile = self.get_fixed_filepath( + output_dir, alt_filepath, add_unique_suffix=add_unique_suffix + ) tim1 = time.time() logger.debug("Using CDO for selecting %s in %s", varname, filepath) - command = ["cdo", "-selvar,%s" % varname, str(filepath), outfile] + command = ["cdo", "-selvar,%s" % varname, str(filepath), str(outfile)] subprocess.run(command, check=True) logger.debug("CDO selection done in %.2f seconds", time.time() - tim1) return outfile diff --git a/esmvalcore/cmor/fix.py b/esmvalcore/cmor/fix.py index ba10e28680..7afab44938 100644 --- a/esmvalcore/cmor/fix.py +++ b/esmvalcore/cmor/fix.py @@ -23,6 +23,7 @@ def fix_file( dataset: str, mip: str, output_dir: Path, + add_unique_suffix: bool = False, **extra_facets, ) -> Path: """Fix files before ESMValTool can load them. @@ -46,6 +47,8 @@ def fix_file( Variable's MIP. output_dir: Path Output directory for fixed files. + add_unique_suffix: bool, optional (default: False) + Adds a unique suffix to `output_dir` for thread safety. **extra_facets: dict, optional Extra facets are mainly used for data outside of the big projects like CMIP, CORDEX, obs4MIPs. For details, see :ref:`extra_facets`. @@ -55,8 +58,6 @@ def fix_file( Path: Path to the fixed file. """ - if not output_dir.exists(): - output_dir.mkdir(parents=True, exist_ok=True) # Update extra_facets with variable information given as regular arguments # to this function extra_facets.update({ @@ -71,7 +72,9 @@ def fix_file( mip=mip, short_name=short_name, extra_facets=extra_facets): - file = fix.fix_file(file, output_dir) + file = fix.fix_file( + file, output_dir, add_unique_suffix=add_unique_suffix + ) return file diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index c8b1f50470..425788b81a 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -159,6 +159,7 @@ class Session(ValidatedConfig): relative_run_dir = Path('run') relative_main_log = Path('run', 'main_log.txt') relative_main_log_debug = Path('run', 'main_log_debug.txt') + _relative_fixed_file_dir = Path('preproc', 'fixed_files') def __init__(self, config: dict, name: str = 'session'): super().__init__(config) @@ -214,6 +215,11 @@ def main_log_debug(self): """Return main log debug file.""" return self.session_dir / self.relative_main_log_debug + @property + def _fixed_file_dir(self): + """Return fixed file directory.""" + return self.session_dir / self._relative_fixed_file_dir + def to_config_user(self) -> dict: """Turn the `Session` object into a recipe-compatible dict. diff --git a/esmvalcore/config/_config_validators.py b/esmvalcore/config/_config_validators.py index cc65369c0c..736a6ba689 100644 --- a/esmvalcore/config/_config_validators.py +++ b/esmvalcore/config/_config_validators.py @@ -355,6 +355,7 @@ def deprecate_offline( Raw input value for ``offline`` option. validated_value: Any Validated value for ``offline`` option. + """ option = 'offline' deprecated_version = '2.8.0' @@ -386,6 +387,7 @@ def deprecate_use_legacy_supplementaries( Raw input value for ``use_legacy_supplementaries`` option. validated_value: Any Validated value for ``use_legacy_supplementaries`` option. + """ option = 'use_legacy_supplementaries' deprecated_version = '2.8.0' diff --git a/esmvalcore/dataset.py b/esmvalcore/dataset.py index 3b88e2bc2e..9293c42107 100644 --- a/esmvalcore/dataset.py +++ b/esmvalcore/dataset.py @@ -96,6 +96,22 @@ class Dataset: Facets describing the dataset. """ + _SUMMARY_FACETS = ( + 'short_name', + 'mip', + 'project', + 'dataset', + 'rcm_version', + 'driver', + 'domain', + 'activity', + 'exp', + 'ensemble', + 'grid', + 'version', + ) + """Facets used to create a summary of a Dataset instance.""" + def __init__(self, **facets: FacetValue): self.facets: Facets = {} @@ -433,6 +449,24 @@ def facets2str(facets): txt.append(f"session: '{self.session.session_name}'") return "\n".join(txt) + def _get_joined_summary_facets( + self, + separator: str, + join_lists: bool = False, + ) -> str: + """Get string consisting of joined summary facets.""" + summary_facets_vals = [] + for key in self._SUMMARY_FACETS: + if key not in self.facets: + continue + val = self.facets[key] + if join_lists and isinstance(val, (tuple, list)): + val = '-'.join(str(elem) for elem in val) + else: + val = str(val) + summary_facets_vals.append(val) + return separator.join(summary_facets_vals) + def summary(self, shorten: bool = False) -> str: """Summarize the content of dataset. @@ -449,28 +483,12 @@ def summary(self, shorten: bool = False) -> str: if not shorten: return repr(self) - keys = ( - 'short_name', - 'mip', - 'project', - 'dataset', - 'rcm_version', - 'driver', - 'domain', - 'activity', - 'exp', - 'ensemble', - 'grid', - 'version', - ) title = self.__class__.__name__ - txt = ( - f"{title}: " + - ", ".join(str(self.facets[k]) for k in keys if k in self.facets)) + txt = f"{title}: " + self._get_joined_summary_facets(', ') def supplementary_summary(dataset): return ", ".join( - str(dataset.facets[k]) for k in keys + str(dataset.facets[k]) for k in self._SUMMARY_FACETS if k in dataset.facets and dataset[k] != self.facets.get(k)) if self.supplementaries: @@ -699,10 +717,15 @@ def _load(self, callback) -> Cube: raise InputFilesNotFound(msg) output_file = _get_output_file(self.facets, self.session.preproc_dir) + fix_dir_prefix = Path( + self.session._fixed_file_dir, + self._get_joined_summary_facets('_', join_lists=True) + '_', + ) settings: dict[str, dict[str, Any]] = {} settings['fix_file'] = { - 'output_dir': Path(f"{output_file.with_suffix('')}_fixed"), + 'output_dir': fix_dir_prefix, + 'add_unique_suffix': True, **self.facets, } settings['load'] = {'callback': callback} diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 34be8289fb..a57fb50373 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -493,10 +493,11 @@ def save(self): 'save', input_files=self._input_files, **self.settings['save']) - preprocess([], - 'cleanup', - input_files=self._input_files, - **self.settings.get('cleanup', {})) + if 'cleanup' in self.settings: + preprocess([], + 'cleanup', + input_files=self._input_files, + **self.settings['cleanup']) def close(self): """Close the file.""" diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index ba134968f8..564ec89fe3 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -347,7 +347,35 @@ def _get_debug_filename(filename, step): def cleanup(files, remove=None): - """Clean up after running the preprocessor.""" + """Clean up after running the preprocessor. + + Warning + ------- + .. deprecated:: 2.8.0 + This function is no longer used and has been deprecated since + ESMValCore version 2.8.0. It is scheduled for removal in version + 2.10.0. + + Parameters + ---------- + files: list of Path + Preprocessor output files (will not be removed if not in `removed`). + remove: list of Path or None, optional (default: None) + Files or directories to remove. + + Returns + ------- + list of Path + Preprocessor output files. + + """ + deprecation_msg = ( + "The preprocessor function `cleanup` has been deprecated in " + "ESMValCore version 2.8.0 and is scheduled for removal in version " + "2.10.0." + ) + warnings.warn(deprecation_msg, ESMValCoreDeprecationWarning) + if remove is None: remove = [] diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index 212902dfed..55442a323a 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -90,7 +90,9 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): 'fixed_cesm2_cl.nc') fix = Cl(None) fixed_file = fix.fix_file(nc_path, tmp_path) - mock_get_filepath.assert_called_once_with(tmp_path, nc_path) + mock_get_filepath.assert_called_once_with( + tmp_path, nc_path, add_unique_suffix=False + ) fixed_cubes = iris.load(fixed_file) assert len(fixed_cubes) == 2 var_names = [cube.var_name for cube in fixed_cubes] diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 36acb3c5a8..b9075b6716 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -45,8 +45,10 @@ def test_cl_fix_file(mock_get_filepath, tmp_path, test_data_path): mock_get_filepath.return_value = os.path.join(tmp_path, 'fixed_cesm2_waccm_cl.nc') fix = Cl(None) - fixed_file = fix.fix_file(str(nc_path), tmp_path) - mock_get_filepath.assert_called_once_with(tmp_path, str(nc_path)) + fixed_file = fix.fix_file(nc_path, tmp_path) + mock_get_filepath.assert_called_once_with( + tmp_path, nc_path, add_unique_suffix=False + ) fixed_cube = iris.load_cube(fixed_file) lev_coord = fixed_cube.coord(var_name='lev') a_coord = fixed_cube.coord(var_name='a') diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 16629549fe..1bf38d1425 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -1,9 +1,7 @@ """Integration tests for fixes.""" import os -import shutil -import tempfile -import unittest +from pathlib import Path import pytest from iris.cube import Cube @@ -13,100 +11,136 @@ from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp from esmvalcore.cmor._fixes.cmip6.cesm2 import Omon, Tos from esmvalcore.cmor._fixes.cordex.cnrm_cerfacs_cnrm_cm5.cnrm_aladin63 import ( - Tas) + Tas, +) from esmvalcore.cmor._fixes.cordex.cordex_fixes import AllVars from esmvalcore.cmor.fix import Fix -class TestFix(unittest.TestCase): - def setUp(self): - """Set up temp folder.""" - self.temp_folder = tempfile.mkdtemp() - - def tearDown(self): - """Remove temp folder.""" - shutil.rmtree(self.temp_folder) - - def test_get_fix(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2'), [FgCo2(None)]) - - def test_get_fix_case_insensitive(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)]) - - def test_get_fix_cordex(self): - self.assertListEqual( - Fix.get_fixes( - 'CORDEX', - 'CNRM-ALADIN63', - 'Amon', - 'tas', - extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}), - [Tas(None), AllVars(None)]) - - def test_get_grid_fix_cordex(self): - self.assertListEqual( - Fix.get_fixes( - 'CORDEX', - 'CNRM-ALADIN53', - 'Amon', - 'tas', - extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}), - [AllVars(None)]) - - def test_get_fixes_with_replace(self): - self.assertListEqual(Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4'), - [Ch4(None)]) - - def test_get_fixes_with_generic(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp'), [Gpp(None)]) - - def test_get_fix_no_project(self): - with pytest.raises(KeyError): - Fix.get_fixes('BAD_PROJECT', 'BNU-ESM', 'Amon', 'ch4') - - def test_get_fix_no_model(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'BAD_MODEL', 'Amon', 'ch4'), []) - - def test_get_fix_no_var(self): - self.assertListEqual( - Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR'), []) - - def test_get_fix_only_mip(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao'), [Omon(None)]) - - def test_get_fix_only_mip_case_insensitive(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao'), [Omon(None)]) - - def test_get_fix_mip_and_var(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos'), - [Tos(None), Omon(None)]) +def test_get_fix(): + assert Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2') == [FgCo2(None)] + + +def test_get_fix_case_insensitive(): + assert Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)] + + +def test_get_fix_cordex(): + fix = Fix.get_fixes( + 'CORDEX', + 'CNRM-ALADIN63', + 'Amon', + 'tas', + extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}, + ) + assert fix == [Tas(None), AllVars(None)] + + +def test_get_grid_fix_cordex(): + fix = Fix.get_fixes( + 'CORDEX', + 'CNRM-ALADIN53', + 'Amon', + 'tas', + extra_facets={'driver': 'CNRM-CERFACS-CNRM-CM5'}, + ) + assert fix == [AllVars(None)] + + +def test_get_fixes_with_replace(): + assert Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4') == [Ch4(None)] + - def test_fix_metadata(self): - cube = Cube([0]) - reference = Cube([0]) +def test_get_fixes_with_generic(): + assert Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp') == [Gpp(None)] - self.assertEqual(Fix(None).fix_metadata(cube), reference) - def test_fix_data(self): - cube = Cube([0]) - reference = Cube([0]) +def test_get_fix_no_project(): + with pytest.raises(KeyError): + Fix.get_fixes('BAD_PROJECT', 'BNU-ESM', 'Amon', 'ch4') - self.assertEqual(Fix(None).fix_data(cube), reference) - def test_fix_file(self): - filepath = 'sample_filepath' - self.assertEqual(Fix(None).fix_file(filepath, 'preproc'), filepath) +def test_get_fix_no_model(): + assert Fix.get_fixes('CMIP5', 'BAD_MODEL', 'Amon', 'ch4') == [] + + +def test_get_fix_no_var(): + assert Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR') == [] + + +def test_get_fix_only_mip(): + assert Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao') == [Omon(None)] + + +def test_get_fix_only_mip_case_insensitive(): + assert Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao') == [Omon(None)] + + +def test_get_fix_mip_and_var(): + assert (Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos') == + [Tos(None), Omon(None)]) + - def test_fixed_filenam(self): - filepath = os.path.join(self.temp_folder, 'file.nc') - output_dir = os.path.join(self.temp_folder, 'fixed') - os.makedirs(output_dir) - fixed_filepath = Fix(None).get_fixed_filepath(output_dir, filepath) - self.assertTrue(fixed_filepath, os.path.join(output_dir, 'file.nc')) +def test_fix_metadata(): + cube = Cube([0]) + reference = Cube([0]) + assert Fix(None).fix_metadata(cube) == reference + + +def test_fix_data(): + cube = Cube([0]) + reference = Cube([0]) + assert Fix(None).fix_data(cube) == reference + + +def test_fix_file(): + filepath = 'sample_filepath' + assert Fix(None).fix_file(filepath, 'preproc') == filepath + + +def test_get_fixed_filepath_paths(tmp_path): + output_dir = tmp_path / 'fixed' + filepath = Path('this', 'is', 'a', 'file.nc') + assert not output_dir.is_dir() + fixed_path = Fix(None).get_fixed_filepath(output_dir, filepath) + assert output_dir.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path == tmp_path / 'fixed' / 'file.nc' + + +def test_get_fixed_filepath_temporary_paths(tmp_path): + output_dir = tmp_path / 'fixed' / 'prefix_1_' + filepath = Path('this', 'is', 'a', 'file.nc') + assert not output_dir.parent.is_dir() + fixed_path = Fix(None).get_fixed_filepath( + output_dir, filepath, add_unique_suffix=True + ) + assert fixed_path.parent.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path != tmp_path / 'fixed' / 'prefix_1_' / 'file.nc' + assert fixed_path.parent.name.startswith('prefix_1_') + assert fixed_path.name == 'file.nc' + + +def test_get_fixed_filepath_strs(tmp_path): + output_dir = os.path.join(str(tmp_path), 'fixed') + filepath = os.path.join('this', 'is', 'a', 'file.nc') + assert not Path(output_dir).is_dir() + fixed_path = Fix(None).get_fixed_filepath(output_dir, filepath) + assert Path(output_dir).is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path == tmp_path / 'fixed' / 'file.nc' + + +def test_get_fixed_filepath_temporary_strs(tmp_path): + output_dir = os.path.join(str(tmp_path), 'fixed', 'prefix_1_') + filepath = os.path.join('this', 'is', 'a', 'file.nc') + assert not Path(output_dir).parent.is_dir() + fixed_path = Fix(None).get_fixed_filepath( + output_dir, filepath, add_unique_suffix=True + ) + assert fixed_path.parent.is_dir() + assert isinstance(fixed_path, Path) + assert fixed_path != tmp_path / 'fixed' / 'prefix_1_' / 'file.nc' + assert fixed_path.parent.name.startswith('prefix_1_') + assert fixed_path.name == 'file.nc' diff --git a/tests/integration/preprocessor/_io/test_cleanup.py b/tests/integration/preprocessor/_io/test_cleanup.py index e8bfdd6156..3ef98b8574 100644 --- a/tests/integration/preprocessor/_io/test_cleanup.py +++ b/tests/integration/preprocessor/_io/test_cleanup.py @@ -4,6 +4,9 @@ import tempfile import unittest +import pytest + +from esmvalcore.exceptions import ESMValCoreDeprecationWarning from esmvalcore.preprocessor import _io @@ -37,3 +40,9 @@ def test_cleanup_when_files_removed(self): _io.cleanup([], self.temp_paths) for path in self.temp_paths: self.assertFalse(os.path.exists(path)) + + def test_deprecation(self): + """Test that deprecation warning is properly raised.""" + msg = "cleanup" + with pytest.warns(ESMValCoreDeprecationWarning, match=msg): + _io.cleanup([], []) diff --git a/tests/integration/recipe/test_recipe.py b/tests/integration/recipe/test_recipe.py index 7ea3bd4f4a..b63a4bd7f7 100644 --- a/tests/integration/recipe/test_recipe.py +++ b/tests/integration/recipe/test_recipe.py @@ -83,7 +83,6 @@ DEFAULT_PREPROCESSOR_STEPS = ( 'load', - 'cleanup', 'remove_supplementary_variables', 'save', ) @@ -104,16 +103,13 @@ def create_test_file(filename, tracking_id=None): iris.save(cube, filename) -def _get_default_settings_for_chl(fix_dir, save_filename): +def _get_default_settings_for_chl(save_filename): """Get default preprocessor settings for chl.""" defaults = { 'load': { 'callback': 'default' }, 'remove_supplementary_variables': {}, - 'cleanup': { - 'remove': [fix_dir] - }, 'save': { 'compress': False, 'filename': save_filename, @@ -432,9 +428,7 @@ def test_default_preprocessor(tmp_path, patched_datafinder, session): preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join( - preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(product.filename) assert product.settings == defaults @@ -472,9 +466,7 @@ def test_default_preprocessor_custom_order(tmp_path, patched_datafinder, preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join( - preproc_dir, 'CMIP5_CanESM2_Oyr_historical_r1i1p1_chl_2000-2005_fixed') - defaults = _get_default_settings_for_chl(fix_dir, product.filename) + defaults = _get_default_settings_for_chl(product.filename) assert product.settings == defaults @@ -564,17 +556,11 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, session): preproc_dir = os.path.dirname(product.filename) assert preproc_dir.startswith(str(tmp_path)) - fix_dir = os.path.join(preproc_dir, - 'CMIP5_CanESM2_fx_historical_r0i0p0_sftlf_fixed') - defaults = { 'load': { 'callback': 'default' }, 'remove_supplementary_variables': {}, - 'cleanup': { - 'remove': [fix_dir] - }, 'save': { 'compress': False, 'filename': product.filename, diff --git a/tests/unit/main/test_esmvaltool.py b/tests/unit/main/test_esmvaltool.py index cff43e6785..0bbe1b850d 100644 --- a/tests/unit/main/test_esmvaltool.py +++ b/tests/unit/main/test_esmvaltool.py @@ -30,6 +30,7 @@ def cfg(mocker, tmp_path): session.session_dir = output_dir / 'recipe_test' session.run_dir = session.session_dir / 'run_dir' session.preproc_dir = session.session_dir / 'preproc_dir' + session._fixed_file_dir = session.preproc_dir / 'fixed_files' cfg = mocker.Mock() cfg.start_session.return_value = session @@ -82,6 +83,7 @@ def test_run(mocker, session, search_esgf): session['log_level'] = 'default' session['config_file'] = '/path/to/config-user.yml' session['remove_preproc_dir'] = True + session['save_intermediary_cubes'] = False recipe = Path('/recipe_dir/recipe_test.yml') @@ -156,10 +158,24 @@ def test_run_session_dir_exists_alternative_fails(mocker, session): def test_clean_preproc_dir(session): session.preproc_dir.mkdir(parents=True) + session._fixed_file_dir.mkdir(parents=True) session['remove_preproc_dir'] = True + session['save_intermediary_cubes'] = False program = ESMValTool() program._clean_preproc(session) assert not session.preproc_dir.exists() + assert not session._fixed_file_dir.exists() + + +def test_do_not_clean_preproc_dir(session): + session.preproc_dir.mkdir(parents=True) + session._fixed_file_dir.mkdir(parents=True) + session['remove_preproc_dir'] = False + session['save_intermediary_cubes'] = True + program = ESMValTool() + program._clean_preproc(session) + assert session.preproc_dir.exists() + assert session._fixed_file_dir.exists() @mock.patch('esmvalcore._main.iter_entry_points') diff --git a/tests/unit/preprocessor/test_preprocessor_file.py b/tests/unit/preprocessor/test_preprocessor_file.py index df731e5655..3ebb3385d6 100644 --- a/tests/unit/preprocessor/test_preprocessor_file.py +++ b/tests/unit/preprocessor/test_preprocessor_file.py @@ -145,3 +145,38 @@ def test_close(): product.save.assert_called_once_with() product.save_provenance.assert_called_once_with() assert product._cubes is None + + +@mock.patch('esmvalcore.preprocessor.preprocess', autospec=True) +def test_save_no_cleanup(mock_preprocess): + """Test ``save``.""" + product = mock.create_autospec(PreprocessorFile, instance=True) + product.settings = {'save': {}} + product._cubes = mock.sentinel.cubes + product._input_files = mock.sentinel.input_files + + PreprocessorFile.save(product) + + assert mock_preprocess.mock_calls == [ + mock.call( + mock.sentinel.cubes, 'save', input_files=mock.sentinel.input_files + ), + ] + + +@mock.patch('esmvalcore.preprocessor.preprocess', autospec=True) +def test_save_cleanup(mock_preprocess): + """Test ``save``.""" + product = mock.create_autospec(PreprocessorFile, instance=True) + product.settings = {'save': {}, 'cleanup': {}} + product._cubes = mock.sentinel.cubes + product._input_files = mock.sentinel.input_files + + PreprocessorFile.save(product) + + assert mock_preprocess.mock_calls == [ + mock.call( + mock.sentinel.cubes, 'save', input_files=mock.sentinel.input_files + ), + mock.call([], 'cleanup', input_files=mock.sentinel.input_files), + ] diff --git a/tests/unit/recipe/test_recipe.py b/tests/unit/recipe/test_recipe.py index 603ab47ae7..b830351318 100644 --- a/tests/unit/recipe/test_recipe.py +++ b/tests/unit/recipe/test_recipe.py @@ -571,7 +571,6 @@ def test_get_default_settings(mocker): 'load': {'callback': 'default'}, 'remove_supplementary_variables': {}, 'save': {'compress': False, 'alias': 'sic'}, - 'cleanup': {'remove': ['/path/to/file_fixed']}, } diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 94101cc800..b8ebc209c8 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -47,6 +47,33 @@ def test_repr_supplementary(): """).strip() +@pytest.mark.parametrize( + "separator,join_lists,output", + [ + ('_', False, "1_d_dom_a_('e1', 'e2')_['ens2', 'ens1']_g1_v1"), + ('_', True, "1_d_dom_a_e1-e2_ens2-ens1_g1_v1"), + (' ', False, "1 d dom a ('e1', 'e2') ['ens2', 'ens1'] g1 v1"), + (' ', True, "1 d dom a e1-e2 ens2-ens1 g1 v1"), + ] +) +def test_get_joined_summary_facet(separator, join_lists, output): + ds = Dataset( + test='this should not appear', + rcm_version='1', + driver='d', + domain='dom', + activity='a', + exp=('e1', 'e2'), + ensemble=['ens2', 'ens1'], + grid='g1', + version='v1', + ) + joined_str = ds._get_joined_summary_facets( + separator, join_lists=join_lists + ) + assert joined_str == output + + def test_short_summary(): ds = Dataset( project='CMIP6', @@ -1613,7 +1640,11 @@ def test_load(mocker, session): ) dataset.session = session output_file = Path('/path/to/output.nc') - fix_dir = Path('/path/to/output_fixed') + fix_dir_prefix = Path( + session.preproc_dir, + 'fixed_files', + 'chl_Oyr_CMIP5_CanESM2_historical_r1i1p1_', + ) _get_output_file = mocker.patch.object( esmvalcore.dataset, '_get_output_file', @@ -1656,12 +1687,13 @@ def mock_preprocess(items, step, input_files, output_file, debug, 'callback': 'default' }, 'fix_file': { + 'add_unique_suffix': True, 'dataset': 'CanESM2', 'ensemble': 'r1i1p1', 'exp': 'historical', 'frequency': 'yr', 'mip': 'Oyr', - 'output_dir': fix_dir, + 'output_dir': fix_dir_prefix, 'project': 'CMIP5', 'short_name': 'chl', 'timerange': '2000/2005',