From 1ed9807c815f590b721e8fd906cf1e76fc3d261a Mon Sep 17 00:00:00 2001 From: Manuel Schlund <32543114+schlunma@users.noreply.github.com> Date: Thu, 15 Feb 2024 19:05:16 +0100 Subject: [PATCH] Do not read `~/.esmvaltool/config-user.yml` if `--config-file` is used (#2309) --- esmvalcore/_main.py | 14 +- esmvalcore/config/__init__.py | 6 +- esmvalcore/config/_config_object.py | 232 ++++++++++++++++------ tests/unit/config/test_config.py | 3 + tests/unit/config/test_config_object.py | 246 +++++++++++++++++++++++- 5 files changed, 429 insertions(+), 72 deletions(-) diff --git a/esmvalcore/_main.py b/esmvalcore/_main.py index afb0cc0476..1fa4e80ce5 100755 --- a/esmvalcore/_main.py +++ b/esmvalcore/_main.py @@ -353,8 +353,10 @@ def run(self, Recipe to run, as either the name of an installed recipe or the path to a non-installed one. config_file: str, optional - Configuration file to use. If not provided the file - ${HOME}/.esmvaltool/config-user.yml will be used. + Configuration file to use. Can be given as absolute or relative + path. In the latter case, search in the current working directory + and `${HOME}/.esmvaltool` (in that order). If not provided, the + file `${HOME}/.esmvaltool/config-user.yml` will be used. resume_from: list(str), optional Resume one or more previous runs by using preprocessor output files from these output directories. @@ -383,9 +385,15 @@ def run(self, """ from .config import CFG + # At this point, --config_file is already parsed if a valid file has + # been given (see + # https://github.com/ESMValGroup/ESMValCore/issues/2280), but no error + # has been raised if the file does not exist. Thus, reload the file + # here with `load_from_file` to make sure a proper error is raised. + CFG.load_from_file(config_file) + recipe = self._get_recipe(recipe) - CFG.load_from_file(config_file) session = CFG.start_session(recipe.stem) if check_level is not None: session['check_level'] = check_level diff --git a/esmvalcore/config/__init__.py b/esmvalcore/config/__init__.py index 74bcc486df..7c2b8e379c 100644 --- a/esmvalcore/config/__init__.py +++ b/esmvalcore/config/__init__.py @@ -4,8 +4,10 @@ ESMValCore configuration. - By default this will be loaded from the file - ``~/.esmvaltool/config-user.yml``. + By default, this will be loaded from the file + ``~/.esmvaltool/config-user.yml``. If used within the ``esmvaltool`` + program, this will respect the ``--config_file`` argument. + """ from ._config_object import CFG, Config, Session diff --git a/esmvalcore/config/_config_object.py b/esmvalcore/config/_config_object.py index 0b9c596177..92bd409a0b 100644 --- a/esmvalcore/config/_config_object.py +++ b/esmvalcore/config/_config_object.py @@ -1,15 +1,18 @@ """Importable config object.""" +from __future__ import annotations import os +import sys from datetime import datetime from pathlib import Path from types import MappingProxyType -from typing import Optional, Union +from typing import Optional import yaml import esmvalcore from esmvalcore.cmor.check import CheckLevels +from esmvalcore.exceptions import InvalidConfigParameter from ._config_validators import ( _deprecated_options_defaults, @@ -27,7 +30,9 @@ class Config(ValidatedConfig): Do not instantiate this class directly, but use :obj:`esmvalcore.config.CFG` instead. + """ + _DEFAULT_USER_CONFIG_DIR = Path.home() / '.esmvaltool' _validate = _validators _deprecate = _deprecators @@ -38,49 +43,65 @@ class Config(ValidatedConfig): ) @classmethod - def _load_user_config(cls, - filename: Union[os.PathLike, str], - raise_exception: bool = True): + def _load_user_config( + cls, + filename: Optional[os.PathLike | str] = None, + raise_exception: bool = True, + ): """Load user configuration from the given file. The config is cleared and updated in-place. Parameters ---------- - filename: pathlike - Name of the config file, must be yaml format + filename: + Name of the user configuration file (must be YAML format). If + `None`, use the rules given in `Config._get_config_user_path` to + determine the path. raise_exception : bool - Raise an exception if `filename` can not be found (default). - Otherwise, silently pass and use the default configuration. This - setting is necessary for the case where - `.esmvaltool/config-user.yml` has not been defined (i.e. first - start). + If ``True``, raise an exception if `filename` cannot be found. If + ``False``, silently pass and use the default configuration. This + setting is necessary during the loading of this module when no + configuration file is given (relevant if used within a script or + notebook). """ new = cls() + new.update(CFG_DEFAULT) + + config_user_path = cls._get_config_user_path(filename) try: - mapping = _read_config_file(filename) - mapping['config_file'] = filename - except IOError: + mapping = cls._read_config_file(config_user_path) + mapping['config_file'] = config_user_path + except FileNotFoundError: if raise_exception: raise mapping = {} - new.update(CFG_DEFAULT) - new.update(mapping) - new.check_missing() + try: + new.update(mapping) + new.check_missing() + except InvalidConfigParameter as exc: + raise InvalidConfigParameter( + f"Failed to parse user configuration file {config_user_path}: " + f"{str(exc)}" + ) from exc return new @classmethod - def _load_default_config(cls, filename: Union[os.PathLike, str]): + def _load_default_config(cls): """Load the default configuration.""" new = cls() - mapping = _read_config_file(filename) + package_config_user_path = Path( + esmvalcore.__file__ + ).parent / 'config-user.yml' + mapping = cls._read_config_file(package_config_user_path) + # Add defaults that are not available in esmvalcore/config-user.yml mapping['check_level'] = CheckLevels.DEFAULT - mapping['config_file'] = filename + mapping['config_file'] = package_config_user_path mapping['diagnostics'] = None mapping['extra_facets_dir'] = tuple() mapping['max_datasets'] = None @@ -93,29 +114,142 @@ def _load_default_config(cls, filename: Union[os.PathLike, str]): return new + @staticmethod + def _read_config_file(config_user_path: Path) -> dict: + """Read configuration file and store settings in a dictionary.""" + if not config_user_path.is_file(): + raise FileNotFoundError( + f"Config file '{config_user_path}' does not exist" + ) + + with open(config_user_path, 'r', encoding='utf-8') as file: + cfg = yaml.safe_load(file) + + return cfg + + @staticmethod + def _get_config_user_path( + filename: Optional[os.PathLike | str] = None + ) -> Path: + """Get path to user configuration file. + + `filename` can be given as absolute or relative path. In the latter + case, search in the current working directory and `~/.esmvaltool` (in + that order). + + If `filename` is not given, try to get user configuration file from the + following locations (sorted by descending priority): + + 1. Internal `_ESMVALTOOL_USER_CONFIG_FILE_` environment variable + (this ensures that any subprocess spawned by the esmvaltool program + will use the correct user configuration file). + 2. Command line arguments `--config-file` or `--config_file` (both + variants are allowed by the fire module), but only if script name is + `esmvaltool`. + 3. `config-user.yml` within default ESMValTool configuration directory + `~/.esmvaltool`. + + Note + ---- + This will NOT check if the returned file actually exists to allow + loading the module without any configuration file (this is relevant if + the module is used within a script or notebook). To check if the file + actually exists, use the method `load_from_file` (this is done when + using the `esmvaltool` CLI). + + If used within the esmvaltool program, set the + _ESMVALTOOL_USER_CONFIG_FILE_ at the end of this method to make sure + that subsequent calls of this method (also in suprocesses) use the + correct user configuration file. + + """ + # (1) Try to get user configuration file from `filename` argument + config_user = filename + + # (2) Try to get user configuration file from internal + # _ESMVALTOOL_USER_CONFIG_FILE_ environment variable + if ( + config_user is None and + '_ESMVALTOOL_USER_CONFIG_FILE_' in os.environ + ): + config_user = os.environ['_ESMVALTOOL_USER_CONFIG_FILE_'] + + # (3) Try to get user configuration file from CLI arguments + if config_user is None: + config_user = Config._get_config_path_from_cli() + + # (4) Default location + if config_user is None: + config_user = Config._DEFAULT_USER_CONFIG_DIR / 'config-user.yml' + + config_user = Path(config_user).expanduser() + + # Also search path relative to ~/.esmvaltool if necessary + if not (config_user.is_file() or config_user.is_absolute()): + config_user = Config._DEFAULT_USER_CONFIG_DIR / config_user + config_user = config_user.absolute() + + # If used within the esmvaltool program, make sure that subsequent + # calls of this method (also in suprocesses) use the correct user + # configuration file + if Path(sys.argv[0]).name == 'esmvaltool': + os.environ['_ESMVALTOOL_USER_CONFIG_FILE_'] = str(config_user) + + return config_user + + @staticmethod + def _get_config_path_from_cli() -> None | str: + """Try to get configuration path from CLI arguments. + + The hack of directly parsing the CLI arguments here (instead of using + the fire or argparser module) ensures that the correct user + configuration file is used. This will always work, regardless of when + this module has been imported in the code. + + Note + ---- + This only works if the script name is `esmvaltool`. Does not check if + file exists. + + """ + if Path(sys.argv[0]).name != 'esmvaltool': + return None + + for arg in sys.argv: + for opt in ('--config-file', '--config_file'): + if opt in arg: + # Parse '--config-file=/file.yml' or + # '--config_file=/file.yml' + partition = arg.partition('=') + if partition[2]: + return partition[2] + + # Parse '--config-file /file.yml' or + # '--config_file /file.yml' + config_idx = sys.argv.index(opt) + if config_idx == len(sys.argv) - 1: # no file given + return None + return sys.argv[config_idx + 1] + + return None + def load_from_file( self, - filename: Optional[Union[os.PathLike, str]] = None, + filename: Optional[os.PathLike | str] = None, ) -> None: """Load user configuration from the given file.""" - if filename is None: - filename = USER_CONFIG - path = Path(filename).expanduser() - if not path.exists(): - try_path = USER_CONFIG_DIR / filename - if try_path.exists(): - path = try_path - else: - raise FileNotFoundError(f'Cannot find: `{filename}`' - f'locally or in `{try_path}`') - self.clear() - self.update(Config._load_user_config(path)) + self.update(Config._load_user_config(filename)) def reload(self): """Reload the config file.""" - filename = self.get('config_file', DEFAULT_CONFIG) - self.load_from_file(filename) + if 'config_file' not in self: + raise ValueError( + "Cannot reload configuration, option 'config_file' is " + "missing; make sure to only use the `CFG` object from the " + "`esmvalcore.config` module" + ) + self.load_from_file(self['config_file']) def start_session(self, name: str): """Start a new session from this configuration object. @@ -161,7 +295,7 @@ class Session(ValidatedConfig): def __init__(self, config: dict, name: str = 'session'): super().__init__(config) - self.session_name: Union[str, None] = None + self.session_name: str | None = None self.set_session_name(name) def set_session_name(self, name: str = 'session'): @@ -201,7 +335,7 @@ def run_dir(self): @property def config_dir(self): """Return user config directory.""" - return USER_CONFIG_DIR + return Path(self['config_file']).parent @property def main_log(self): @@ -219,24 +353,6 @@ def _fixed_file_dir(self): return self.session_dir / self._relative_fixed_file_dir -def _read_config_file(config_file): - """Read config user file and store settings in a dictionary.""" - config_file = Path(config_file) - if not config_file.exists(): - raise IOError(f'Config file `{config_file}` does not exist.') - - with open(config_file, 'r', encoding='utf-8') as file: - cfg = yaml.safe_load(file) - - return cfg - - -DEFAULT_CONFIG_DIR = Path(esmvalcore.__file__).parent -DEFAULT_CONFIG = DEFAULT_CONFIG_DIR / 'config-user.yml' - -USER_CONFIG_DIR = Path.home() / '.esmvaltool' -USER_CONFIG = USER_CONFIG_DIR / 'config-user.yml' - -# initialize placeholders -CFG_DEFAULT = MappingProxyType(Config._load_default_config(DEFAULT_CONFIG)) -CFG = Config._load_user_config(USER_CONFIG, raise_exception=False) +# Initialize configuration objects +CFG_DEFAULT = MappingProxyType(Config._load_default_config()) +CFG = Config._load_user_config(raise_exception=False) diff --git a/tests/unit/config/test_config.py b/tests/unit/config/test_config.py index 6372a1c041..c840027796 100644 --- a/tests/unit/config/test_config.py +++ b/tests/unit/config/test_config.py @@ -4,6 +4,7 @@ import pytest import yaml +import esmvalcore from esmvalcore.cmor.check import CheckLevels from esmvalcore.config import CFG, _config from esmvalcore.config._config import ( @@ -193,6 +194,7 @@ def test_load_default_config(monkeypatch, default_config): 'preproc_dir', 'run_dir', 'work_dir', + 'config_dir', } # Check that only allowed keys are in it assert set(default_cfg) == set(cfg) @@ -210,6 +212,7 @@ def test_load_default_config(monkeypatch, default_config): for path in ('preproc', 'work', 'run'): assert getattr(cfg, path + '_dir') == cfg.session_dir / path assert cfg.plot_dir == cfg.session_dir / 'plots' + assert cfg.config_dir == Path(esmvalcore.__file__).parent # Check that projects were configured assert project_cfg diff --git a/tests/unit/config/test_config_object.py b/tests/unit/config/test_config_object.py index 9e916ff950..16084effc4 100644 --- a/tests/unit/config/test_config_object.py +++ b/tests/unit/config/test_config_object.py @@ -1,10 +1,26 @@ +import contextlib +import os +import sys from collections.abc import MutableMapping +from copy import deepcopy from pathlib import Path import pytest -from esmvalcore.config import Config, Session, _config_object +import esmvalcore +import esmvalcore.config._config_object +from esmvalcore.config import Config, Session from esmvalcore.exceptions import InvalidConfigParameter +from tests.integration.test_main import arguments + + +@contextlib.contextmanager +def environment(**kwargs): + """Temporary environment variables.""" + backup = deepcopy(os.environ) + os.environ = kwargs + yield + os.environ = backup def test_config_class(): @@ -57,25 +73,66 @@ def test_config_init(): def test_load_from_file(monkeypatch): - default_config_user_file = Path.home() / '.esmvaltool' / 'config-user.yml' - assert _config_object.USER_CONFIG == default_config_user_file - monkeypatch.setattr( - _config_object, - 'USER_CONFIG', - _config_object.DEFAULT_CONFIG, - ) + default_config_file = Path(esmvalcore.__file__).parent / 'config-user.yml' config = Config() assert not config - config.load_from_file() + config.load_from_file(default_config_file) assert config +def test_load_from_file_filenotfound(monkeypatch): + """Test `Config.load_from_file`.""" + config = Config() + assert not config + + expected_path = Path.home() / '.esmvaltool' / 'not_existent_file.yml' + msg = f"Config file '{expected_path}' does not exist" + with pytest.raises(FileNotFoundError, match=msg): + config.load_from_file('not_existent_file.yml') + + +def test_load_from_file_invalidconfigparameter(monkeypatch, tmp_path): + """Test `Config.load_from_file`.""" + monkeypatch.chdir(tmp_path) + cfg_path = tmp_path / 'test.yml' + cfg_path.write_text('invalid_param: 42') + + config = Config() + assert not config + + msg = ( + f"Failed to parse user configuration file {cfg_path}: `invalid_param` " + f"is not a valid config parameter." + ) + with pytest.raises(InvalidConfigParameter, match=msg): + config.load_from_file(cfg_path) + + def test_config_key_error(): config = Config() with pytest.raises(KeyError): config['invalid_key'] +def test_reload(): + """Test `Config.reload`.""" + cfg_path = Path(esmvalcore.__file__).parent / 'config-user.yml' + config = Config(config_file=cfg_path) + config.reload() + assert config['config_file'] == cfg_path + + +def test_reload_fail(): + """Test `Config.reload`.""" + config = Config() + msg = ( + "Cannot reload configuration, option 'config_file' is missing; make " + "sure to only use the `CFG` object from the `esmvalcore.config` module" + ) + with pytest.raises(ValueError, match=msg): + config.reload() + + def test_session(): config = Config({'output_dir': 'config'}) @@ -90,3 +147,174 @@ def test_session_key_error(): session = Session({}) with pytest.raises(KeyError): session['invalid_key'] + + +TEST_GET_CFG_PATH = [ + (None, None, None, '~/.esmvaltool/config-user.yml', False), + ( + None, + None, + ('any_other_module', '--config_file=cli.yml'), + '~/.esmvaltool/config-user.yml', + False, + ), + ( + None, + None, + ('esmvaltool', 'run', '--max-parallel-tasks=4'), + '~/.esmvaltool/config-user.yml', + True, + ), + ( + None, + None, + ('esmvaltool', '--config_file'), + '~/.esmvaltool/config-user.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config_file=/cli.yml'), + '/cli.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config_file=/cli.yml'), + '/cli.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config-file', '/cli.yml'), + '/cli.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + '/cli.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config-file=relative_cli.yml'), + '~/.esmvaltool/relative_cli.yml', + True, + ), + ( + None, + None, + ('esmvaltool', 'run', '--config-file=existing_cfg.yml'), + 'existing_cfg.yml', + True, + ), + ( + None, + {'_ESMVALTOOL_USER_CONFIG_FILE_': '/env.yml'}, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + '/env.yml', + True, + ), + ( + None, + {'_ESMVALTOOL_USER_CONFIG_FILE_': '/env.yml'}, + None, + '/env.yml', + True, + ), + ( + None, + {'_ESMVALTOOL_USER_CONFIG_FILE_': 'existing_cfg.yml'}, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + 'existing_cfg.yml', + True, + ), + ( + '/filename.yml', + {'_ESMVALTOOL_USER_CONFIG_FILE_': '/env.yml'}, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + '/filename.yml', + True, + ), + ( + '/filename.yml', + None, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + '/filename.yml', + True, + ), + ('/filename.yml', None, None, '/filename.yml', False), + ( + 'filename.yml', + None, + None, + '~/.esmvaltool/filename.yml', + False, + ), + ( + 'existing_cfg.yml', + {'_ESMVALTOOL_USER_CONFIG_FILE_': '/env.yml'}, + ('esmvaltool', 'run', '--config-file=/cli.yml'), + 'existing_cfg.yml', + True, + ), +] + + +@pytest.mark.parametrize( + 'filename,env,cli_args,output,env_var_set', TEST_GET_CFG_PATH +) +def test_get_config_user_path( + filename, env, cli_args, output, env_var_set, monkeypatch, tmp_path +): + """Test `Config._get_config_user_path`.""" + # Create empty test file + monkeypatch.chdir(tmp_path) + (tmp_path / 'existing_cfg.yml').write_text('') + + if env is None: + env = {} + if cli_args is None: + cli_args = sys.argv + + if output == 'existing_cfg.yml': + output = tmp_path / 'existing_cfg.yml' + else: + output = Path(output).expanduser() + + with environment(**env), arguments(*cli_args): + config_path = Config._get_config_user_path(filename) + if env_var_set: + assert os.environ['_ESMVALTOOL_USER_CONFIG_FILE_'] == str(output) + else: + assert '_ESMVALTOOL_USER_CONFIG_FILE_' not in os.environ + assert isinstance(config_path, Path) + assert config_path == output + + +def test_load_user_config_filenotfound(): + """Test `Config._load_user_config`.""" + expected_path = Path.home() / '.esmvaltool' / 'not_existent_file.yml' + msg = f"Config file '{expected_path}' does not exist" + with pytest.raises(FileNotFoundError, match=msg): + Config._load_user_config('not_existent_file.yml') + + +def test_load_user_config_invalidconfigparameter(monkeypatch, tmp_path): + """Test `Config._load_user_config`.""" + monkeypatch.chdir(tmp_path) + cfg_path = tmp_path / 'test.yml' + cfg_path.write_text('invalid_param: 42') + + msg = ( + f"Failed to parse user configuration file {cfg_path}: `invalid_param` " + f"is not a valid config parameter." + ) + with pytest.raises(InvalidConfigParameter, match=msg): + Config._load_user_config(cfg_path)