diff --git a/.codacy.yml b/.codacy.yml index 06a0ea342f..afe979f5c7 100644 --- a/.codacy.yml +++ b/.codacy.yml @@ -21,5 +21,6 @@ engines: exclude_paths: [ 'doc/sphinx/**', 'esmvaltool/cmor/tables/**', - 'tests/**' + 'tests/**', + 'esmvaltool/utils/recipe_test_workflow/app/configure/bin/test_configure.py' ] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2086d60173..3478d469b4 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,2 +1,3 @@ esmvaltool/cmorizers @ESMValGroup/obs-maintainers .github/workflows @valeriupredoi +esmvaltool/utils/recipe_test_workflow/ @alistairsellar @ehogan diff --git a/.github/workflows/check-rtw.yml b/.github/workflows/check-rtw.yml new file mode 100644 index 0000000000..611601dfd7 --- /dev/null +++ b/.github/workflows/check-rtw.yml @@ -0,0 +1,83 @@ +# This workflow performs various validation steps for Cylc and Rose. +name: Check Recipe Test Workflow (RTW) + +# Controls when the action will run +on: + # Triggers the workflow on push events + push: + paths: +# - esmvaltool/utils/recipe_test_workflow/** + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Common variables are defined here +env: + RTW_ROOT_DIR: esmvaltool/utils/recipe_test_workflow + +# Required shell entrypoint to have properly configured bash shell +defaults: + run: + shell: bash -l {0} + +# A workflow run is made up of one or more jobs that can run +# sequentially or in parallel +jobs: + # This workflow contains a single job called "check-rtw" + check-rtw: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part + # of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job + # can access it + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: "latest" + miniforge-variant: Miniforge3 + use-mamba: true + conda-remove-defaults: "true" + + - name: Install Cylc and Rose + run: conda install cylc-flow>=8.2 cylc-rose metomi-rose + + - name: Check current environment + run: conda list + + - name: Validate Cylc workflow + run: | + cd ${RTW_ROOT_DIR} + cylc validate . -O metoffice + + - name: Run Cylc configuration linter + run: | + cd ${RTW_ROOT_DIR} + cylc lint + + - name: Validate format of Rose configuration files + run: | + cd ${RTW_ROOT_DIR} + output="$(rose config-dump)" + msg="Run 'rose config-dump' to re-dump the Rose configuration files" + msg="${msg} in the common format, then commit the changes." + # The '-z' option returns true if 'output' is empty. + if [[ -z "${output}" ]]; then true; else echo "${msg}" && exit 1; fi + + - name: Validate Rose configuration metadata + run: | + cd ${RTW_ROOT_DIR} + rose metadata-check -C meta/ + + - name: Run Rose configuration validation macros + run: | + cd ${RTW_ROOT_DIR} + rose macro -V + + - name: Lint shell scripts + run: | + cd ${RTW_ROOT_DIR} + output=$(find . -name "*.sh" -exec shellcheck {} \;) + if [ "$output" ]; then echo "${output}" && exit 1; fi diff --git a/.zenodo.json b/.zenodo.json index c087c4ae21..be799a9dc1 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -81,13 +81,17 @@ "name": "Berg, Peter", "orcid": "0000-0002-1469-2568" }, + { + "affiliation": "Met Office, UK", + "name": "Billows, Chris" + }, { "affiliation": "DLR, Germany", "name": "Bock, Lisa", "orcid": "0000-0001-7058-5938" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Bodas-Salcedo, Alejandro", "orcid": "0000-0002-7890-2536" }, @@ -142,7 +146,7 @@ "name": "Docquier, David" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Dreyer, Laura" }, { @@ -150,13 +154,21 @@ "name": "Ehbrecht, Carsten" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Earnshaw, Paul" }, + { + "affiliation": "Met Office, UK", + "name": "Geddes, Theo" + }, { "affiliation": "University of Bremen, Germany", "name": "Gier, Bettina" }, + { + "affiliation": "Met Office, UK", + "name": "Gillett, Ed" + }, { "affiliation": "BSC, Spain", "name": "Gonzalez-Reviriego, Nube", @@ -191,6 +203,10 @@ "name": "Heuer, Helge", "orcid": "0000-0003-2411-7150" }, + { + "affiliation": "Met Office, UK", + "name": "Hogan, Emma" + }, { "affiliation": "BSC, Spain", "name": "Hunter, Alasdair", @@ -227,7 +243,7 @@ "orcid": "0000-0001-6085-5914" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Little, Bill" }, { @@ -279,7 +295,7 @@ "name": "Sandstad, Marit" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Sellar, Alistair" }, { @@ -305,6 +321,10 @@ "name": "Swaminathan, Ranjini", "orcid": "0000-0001-5853-2673" }, + { + "affiliation": "Met Office, UK", + "name": "Tomkins, Katherine" + }, { "affiliation": "BSC, Spain", "name": "Torralba, Verónica" @@ -387,7 +407,7 @@ "orcid": "0000-0003-3780-0784" }, { - "affiliation": "MetOffice, UK", + "affiliation": "Met Office, UK", "name": "Munday, Gregory", "orcid": "0000-0003-4750-9923" } diff --git a/CITATION.cff b/CITATION.cff index 1934c36ef1..ab158d2436 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -85,13 +85,17 @@ authors: family-names: Berg given-names: Peter orcid: "https://orcid.org/0000-0002-1469-2568" + - + affiliation: "Met Office, UK" + family-names: Billows + given-names: Chris - affiliation: "DLR, Germany" family-names: Bock given-names: Lisa orcid: "https://orcid.org/0000-0001-7058-5938" - - affiliation: "MetOffice, UK" + affiliation: "Met Office, UK" family-names: Bodas-Salcedo given-names: Alejandro orcid: "https://orcid.org/0000-0002-7890-2536" @@ -146,7 +150,7 @@ authors: family-names: Docquier given-names: David - - affiliation: "MetOffice, UK" + affiliation: "Met Office, UK" family-names: Dreyer given-names: Laura - @@ -154,13 +158,21 @@ authors: family-names: Ehbrecht given-names: Carsten - - affiliation: "MetOffice, UK" + affiliation: "Met Office, UK" family-names: Earnshaw given-names: Paul + - + affiliation: "Met Office, UK" + family-names: Geddes + given-names: Theo - affiliation: "University of Bremen, Germany" family-names: Gier given-names: Bettina + - + affiliation: "Met Office, UK" + family-names: Gillett + given-names: Ed - affiliation: "BSC, Spain" family-names: Gonzalez-Reviriego @@ -196,6 +208,10 @@ authors: family-names: Heuer given-names: Helge orcid: "https://orcid.org/0000-0003-2411-7150" + - + affiliation: "Met Office, UK" + family-names: Hogan + given-names: Emma - affiliation: "BSC, Spain" family-names: Hunter @@ -232,7 +248,7 @@ authors: given-names: Valerio orcid: "https://orcid.org/0000-0001-6085-5914" - - affiliation: "MetOffice, UK" + affiliation: "Met Office, UK" family-names: Little given-names: Bill - @@ -289,7 +305,7 @@ authors: family-names: Sandstad given-names: Marit - - affiliation: "MetOffice, UK" + affiliation: "Met Office, UK" family-names: Sellar given-names: Alistair - @@ -315,6 +331,10 @@ authors: family-names: Swaminathan given-names: Ranjini orcid: "https://orcid.org/0000-0001-5853-2673" + - + affiliation: "Met Office, UK" + family-names: Tomkins + given-names: Katherine - affiliation: "BSC, Spain" family-names: Torralba @@ -396,8 +416,8 @@ authors: family-names: Bonnet given-names: Pauline orcid: "https://orcid.org/0000-0003-3780-0784" - - - affiliation: "MetOffice, UK" + - + affiliation: "Met Office, UK" family-names: Munday given-names: Gregory orcid: "https://orcid.org/0000-0003-4750-9923" diff --git a/doc/sphinx/source/gensidebar.py b/doc/sphinx/source/gensidebar.py index 970722ff0a..f8b766ab7d 100644 --- a/doc/sphinx/source/gensidebar.py +++ b/doc/sphinx/source/gensidebar.py @@ -65,7 +65,7 @@ def _header(project, text): _write("esmvaltool", "Obtaining input data", "input") _write("esmvaltool", "Making a recipe or diagnostic", "develop/index") _write("esmvaltool", "Contributing to the community", "community/index") - _write("esmvaltool", "Utilities", "utils") + _write("esmvaltool", "Utilities", "utils/utils") _write("esmvaltool", "Diagnostics API Reference", "api/esmvaltool") _write("esmvaltool", "Frequently Asked Questions", "faq") _write("esmvaltool", "Changelog", "changelog") diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index fbc16b45ec..f9bcfafc3e 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -112,6 +112,21 @@ ESMValTool currently supports two ways to perform this reformatting (aka checks and fixes'). Details on this second method are given at the :ref:`end of this chapter `. +Tiers +----- + +All observational datasets are grouped into in three tiers: + +* **Tier 1**: obs4mips and ana4mips datasets. These datasets are publicly and freely available without any license restrictions. These datasets do not need any reformatting and can be used as is with ESMValTool. +* **Tier 2** other freely available datasets that are not obs4mips. There are no license restrictions. These datasets need to be reformatted to be used with ESMValTool ('CMORization', see above). +* **Tier 3** restricted datasets. Datasets which require registration to be downloaded or that can only be obtained upon request from the respective authors. License restrictions do not allow us to redistribute Tier 3 datasets. The data have to be obtained and reformatted by the user ('CMORization', see above). + +[!NOTE] +.. _tier3_note: +For some of the Tier 3 datasets, we obtained permission from the dataset providers to share the data among ESMValTool users on HPC systems. These Tier 3 datasets are marked with an asterisk in the table in section :ref:`supported datasets below`. + +An overview of the Tier 2 and Tier 3 datasets for which a CMORizing script is available in ESMValTool v2.0 is given in section :ref:`supported datasets below`. + A collection of readily CMORized OBS and OBS6 datasets can be accessed directly on CEDA/JASMIN and DKRZ. At CEDA/JASMIN OBS and OBS6 data is stored in the `esmeval` Group Workspace (GWS), and to be granted read (and execute) permissions to the GWS, one must apply at https://accounts.jasmin.ac.uk/services/group_workspaces/esmeval/ ; after permission has been granted, the user @@ -246,7 +261,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | CALIPSO-GOCCP | clcalipso (cfMon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| CALIPSO-ICECLOUD | cli (AMon) | 3 | NCL | +| CALIPSO-ICECLOUD* [#t3]_ | cli (AMon) | 3 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | CDS-SATELLITE-ALBEDO | bdalb (Lmon), bhalb (Lmon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ @@ -330,7 +345,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESRL | co2s (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| FLUXCOM | gpp (Lmon) | 3 | Python | +| FLUXCOM* [#t3]_ | gpp (Lmon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | GCP2018 | fgco2 (Omon [#note3]_), nbp (Lmon [#note3]_) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ @@ -380,17 +395,17 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | Landschuetzer2020 | spco2 (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| MAC-LWP | lwp, lwpStderr (Amon) | 3 | NCL | +| MAC-LWP* [#t3]_ | lwp, lwpStderr (Amon) | 3 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | MERRA | cli, clivi, clt, clw, clwvi, hur, hus, lwp, pr, prw, ps, psl, rlut, rlutcs, rsdt, rsut, rsutcs, ta, | 3 | NCL | | | tas, ts, ua, va, wap, zg (Amon) | | | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| MERRA2 | sm (Lmon) | 3 | Python | +| MERRA2* [#t3]_ | sm (Lmon) | 3 | Python | | | clt, pr, evspsbl, hfss, hfls, huss, prc, prsn, prw, ps, psl, rlds, rldscs, rlus, rlut, rlutcs, rsds, | | | | | rsdscs, rsdt, tas, tasmin, tasmax, tauu, tauv, ts, uas, vas, rsus, rsuscs, rsut, rsutcs, ta, ua, va, | | | | | tro3, zg, hus, wap, hur, cl, clw, cli, clwvi, clivi (Amon) | | | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| MLS-AURA | hur, hurStderr (day) | 3 | Python | +| MLS-AURA* [#t3]_ | hur, hurStderr (day) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | MOBO-DIC_MPIM | dissic (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ @@ -400,7 +415,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | MSWEP [#note1]_ | pr | 3 | n/a | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| MTE | gpp, gppStderr (Lmon) | 3 | Python | +| MTE* [#t3]_ | gpp, gppStderr (Lmon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NCEP-NCAR-R1 | clt, hur, hurs, hus, pr, prw, psl, rlut, rlutcs, rsut, rsutcs, sfcWind, ta, tas, | 2 | Python | | | tasmax, tasmin, ts, ua, va, wap, zg (Amon) | | | @@ -410,7 +425,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NDP | cVeg (Lmon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| NIWA-BS | toz, tozStderr (Amon) | 3 | NCL | +| NIWA-BS* [#t3]_ | toz, tozStderr (Amon) | 3 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NOAA-CIRES-20CR-V2 | clt, clwvi, hus, prw, rlut, rsut, pr, tauu, tauv (Amon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ @@ -448,7 +463,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | TCOM-N2O | n2o (Amon [#note3]_) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| UWisc | clwvi, lwpStderr (Amon) | 3 | NCL | +| UWisc* [#t3]_ | clwvi, lwpStderr (Amon) | 3 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | WFDE5 | tas, pr (Amon, day) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ @@ -456,6 +471,9 @@ A list of the datasets for which a CMORizers is available is provided in the fol | | no3, o2, po4, si (Oyr) | | | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +.. [#t3] We obtained permission from the dataset provider to share this dataset + among ESMValTool users on HPC systems. + .. [#note1] CMORization is built into ESMValTool through the native6 project, so there is no separate CMORizer script. diff --git a/doc/sphinx/source/utils/RTW/about.rst b/doc/sphinx/source/utils/RTW/about.rst new file mode 100644 index 0000000000..62883fe2e1 --- /dev/null +++ b/doc/sphinx/source/utils/RTW/about.rst @@ -0,0 +1,14 @@ +***** +About +***** + +.. include:: common.txt + +The Recipe Test Workflow (|RTW|) is a workflow that is used to regularly run +recipes so issues can be discovered during the development process sooner +rather than later. + +|Cylc| v8 and |Rose| v2 are used as the workflow engine and application +configuration system for the |RTW|, respectively. |Cylc| and |Rose| are not +included in the ESMValTool environment as they are typically already centrally +installed at sites e.g. JASMIN and the Met Office. diff --git a/doc/sphinx/source/utils/RTW/add_a_recipe.rst b/doc/sphinx/source/utils/RTW/add_a_recipe.rst new file mode 100644 index 0000000000..6e495e1f1c --- /dev/null +++ b/doc/sphinx/source/utils/RTW/add_a_recipe.rst @@ -0,0 +1,118 @@ +How to add a recipe to the |RTW| +================================ + +.. include:: common.txt + +.. note:: + Before you follow these steps to add your recipe, you must be able to + successfully run the recipe with the latest version of ESMValTool on the + compute server you use at your site, as detailed by the ``platform`` option + in the ``[[COMPUTE]]`` section in the site-specific ``.cylc`` file in the + ``esmvaltool/utils/recipe_test_workflow/site/`` directory. + +#. Open a `new ESMValTool issue`_ on GitHub, assign yourself to the issue, and + add the ``Recipe Test Workflow (RTW)`` label to the issue, see + `ESMValTool issue #3663`_ for an example. + +#. Create a branch. + +#. Obtain the duration and memory usage of the recipe from the messages printed + to screen, or at the end of the ``run/main_log.txt`` file in the recipe + output directory after running your recipe on the compute cluster you use at + your site; these messages will look something like:: + + YYYY-MM-DD HH:MM:SS:sss UTC [12345] INFO Time for running the recipe was: 0:02:13.334742 + YYYY-MM-DD HH:MM:SS:sss UTC [12345] INFO Maximum memory used (estimate): 2.4 GB + [...] + YYYY-MM-DD HH:MM:SS:sss UTC [12345] INFO Run was successful + +#. Add the recipe to the ``[task parameters]`` section in the + ``esmvaltool/utils/recipe_test_workflow/flow.cylc`` file. + + .. hint:: + If the recipe takes less than 10 minutes to run then it should be added + to the ``fast`` option. Recipes that take longer than ten minutes should + be added to the ``medium`` option. + + .. hint:: + The line added should follow the format of ``recipe_new_recipe, \``, + unless the line is the last one in the list, in which case the line added + should follow the format of ``recipe_new_recipe``. + +#. If the duration of the recipe is larger than the value specified by the + ``execution time limit`` option in the ``[[COMPUTE]]`` section in the + aforementioned site-specific ``.cylc`` file, and / or the memory usage of + the recipe is larger than the value specified by the ``--mem`` option in the + ``[[[directives]]]`` section in the ``[[COMPUTE]]`` section, add a section + (in alphabetical order) to this file as shown below (round the duration to + the nearest second):: + + [[process]] + # Actual: 0m31s, 2.5 GB on 2024-04-08. + execution time limit = PT2M + [[[directives]]] + --mem = 3G + + .. hint:: + The ``fast`` key in the example task definition above + (``[[process]]``) should match name of the + option the recipe was added to in the ``[task parameters]`` section in + the ``esmvaltool/utils/recipe_test_workflow/flow.cylc`` file + + .. hint:: + Set the ``execution time limit`` to 10-20% more than the actual duration. + For actual durations of up to ``1m45s``, set the ``execution time limit`` + to ``PT2M`` (2 minutes). + + .. hint:: + Try not to regularly waste more than 500 MiB in memory usage. Typically, + rounding the actual memory usage up to the nearest integer is acceptable. + +#. Stop any running ``recipe_test_workflow`` workflows:: + + cylc stop recipe_test_workflow/* + +#. Run the |RTW|, as detailed in the :ref:`quick_start_guide`; it is expected + that the ``compare`` task will fail. + +#. Update the Known Good Outputs (|KGOs|): + + * Recursively copy the recipe output directory (i.e. + ``recipe___