From 8e4df3eac2583d609800d8f4c77ea94815ea1d04 Mon Sep 17 00:00:00 2001
From: Katja Weigel <weigel@iup.physik.uni-bremen.de>
Date: Fri, 22 Nov 2024 13:47:15 +0100
Subject: [PATCH] Cleaning up provenanc and ruff changes.

---
 .../diag_scripts/ipcc_ar6/santer21jclimfig.py | 911 ++++++++++--------
 1 file changed, 501 insertions(+), 410 deletions(-)

diff --git a/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py b/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py
index e9d9a7467d..fe39e47e55 100755
--- a/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py
+++ b/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py
@@ -4,15 +4,15 @@
 """Diagnostic script to plot figure 3.12 of IPCC AR 6 chapter 3
 (based on Santer et al. (2007) and Santer et al. (2021)).
 
-###############################################################################
+##############################################################################
 ipcc_ar6/santer21jclimfig.py
 Author: Katja Weigel (IUP, Uni Bremen, Germany)
 EVal4CMIP ans 4C project
-###############################################################################
+##############################################################################
 
 Description
 -----------
-    Water Vapor Path trends compared to observational and reanalysis data
+    Trends in CMIP5 and CMIP6 compared to observational and reanalysis data
     based on methods used in Santer et al. (2007) and Santer et al. (2021).
     https://www.pnas.org/doi/10.1073/pnas.0702872104
     https://doi.org/10.1175/JCLI-D-20-0768.1
@@ -24,7 +24,7 @@
             grid as the filter.
             The filter must cover at least the time period used for the data.
 
-###############################################################################
+##############################################################################
 
 """
 
@@ -38,36 +38,37 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy import stats
-# from scipy.stats import norm
-from esmvaltool.diag_scripts.shared import (ProvenanceLogger,
-                                            plot, get_diagnostic_filename,
-                                            get_plot_filename,
-                                            group_metadata,
-                                            select_metadata, run_diagnostic,
-                                            variables_available,
-                                            extract_variables)
+from esmvaltool.diag_scripts.shared import (
+    ProvenanceLogger,
+    plot,
+    get_diagnostic_filename,
+    get_plot_filename,
+    group_metadata,
+    select_metadata,
+    run_diagnostic,
+    variables_available,
+    extract_variables,
+)
 
 logger = logging.getLogger(os.path.basename(__file__))
 
 
 def _apply_filter(cfg, cube):
     """Apply filter from RSS Anomalies to all data and calculates mean."""
-    if 'sample_obs' in cfg:
-        filt = iris.load(os.path.join(cfg['auxiliary_data_dir'],
-                                      cfg['sample_obs']))[0]
+    if "sample_obs" in cfg:
+        filt = iris.load(os.path.join(cfg["auxiliary_data_dir"], cfg["sample_obs"]))[0]
 
-        for iii, dim_str in enumerate(['time', 'latitude', 'longitude']):
+        for iii, dim_str in enumerate(["time", "latitude", "longitude"]):
             filt = _fit_dim(filt, cube, dim_str, iii)
 
         cube.data = cube.data * filt.data
 
-    coords = ('longitude', 'latitude')
+    coords = ("longitude", "latitude")
     for coord in coords:
         if not cube.coord(coord).has_bounds():
             cube.coord(coord).guess_bounds()
     cube_grid_areas = iris.analysis.cartography.area_weights(cube)
-    cube = (cube.collapsed(coords, iris.analysis.MEAN,
-                           weights=cube_grid_areas))
+    cube = cube.collapsed(coords, iris.analysis.MEAN, weights=cube_grid_areas)
 
     return cube
 
@@ -76,7 +77,7 @@ def _fit_dim(filt, cube, dim_str, dim_nr):
     """Adjust array for given dimension."""
     dimfil = filt.coord(dim_str)
     dimcu = cube.coord(dim_str)
-    if dim_str == 'time':
+    if dim_str == "time":
         start = dimcu.units.num2date(dimcu.points[0])
         end = dimcu.units.num2date(dimcu.points[-1])
         sdim = dimfil.nearest_neighbour_index(dimfil.units.date2num(start))
@@ -88,70 +89,78 @@ def _fit_dim(filt, cube, dim_str, dim_nr):
         edim = dimfil.nearest_neighbour_index(end)
 
     if dim_nr == 0:
-        filt = filt[sdim:edim + 1, :, :]
+        filt = filt[sdim : edim + 1, :, :]
     elif dim_nr == 1:
-        filt = filt[:, sdim:edim + 1, :]
+        filt = filt[:, sdim : edim + 1, :]
     elif dim_nr == 2:
-        filt = filt[:, :, sdim:edim + 1]
+        filt = filt[:, :, sdim : edim + 1]
 
     return filt
 
 
 def _calc_trend(cube_anom):
     """Calculate trends."""
-    reg_var = stats.linregress(np.linspace(0, len(cube_anom.data) - 1,
-                                           len(cube_anom.data)),
-                               cube_anom.data)
+    reg_var = stats.linregress(
+        np.linspace(0, len(cube_anom.data) - 1, len(cube_anom.data)),
+        cube_anom.data
+    )
 
     return reg_var.slope
 
 
 def _calculate_anomalies(cube):
     """Remove annual cycle from time series."""
-    c_n = cube.aggregated_by('month_number', iris.analysis.MEAN)
-    month_number = cube.coord('month_number').points
+    c_n = cube.aggregated_by("month_number", iris.analysis.MEAN)
+    month_number = cube.coord("month_number").points
     startind = np.where(month_number == 1)
     endind = np.where(month_number == 12)
     data_in = cube.data
     data_new = np.full(data_in.shape, 0.5)
     for iii in range((startind[0])[0], (endind[0])[-1], 12):
-        data_new[iii:iii + 12] = ((cube.data[iii:iii + 12] - c_n.data) /
-                                  c_n.data) * 100.0 * 120.0
+        data_new[iii : iii + 12] = (
+            ((cube.data[iii : iii + 12] - c_n.data) / c_n.data)
+            * 100.0 * 120.0
+        )
 
     cube.data = data_new
-    cube.units = Unit('percent')
+    cube.units = Unit("percent")
 
     return cube
 
 
 def _check_full_data(dataset_path, cube):
     """Check if cube covers time series from start year to end year."""
-    # cstart_month = cube.coord('month_number').points[0]
-    # cend_month = cube.coord('month_number').points[1]
-    cstart_year = cube.coord('year').points[0]
-    cend_year = cube.coord('year').points[-1]
-    start_year = int(dataset_path['start_year'])
-    end_year = int(dataset_path['end_year'])
+    cstart_year = cube.coord("year").points[0]
+    cend_year = cube.coord("year").points[-1]
+    start_year = int(dataset_path["start_year"])
+    end_year = int(dataset_path["end_year"])
 
     check = 0
     if start_year == cstart_year and end_year == cend_year:
         check = 1
-        print('Full time series:')
-        print(start_year, cstart_year, end_year, cend_year)
     else:
-        print('FAILED:')
-        print(start_year, cstart_year, end_year, cend_year)
+        logger.info(
+            "Time series incomplete: "
+            "Start(Recipe, Data): %i, %i; "
+            "End(Recipe, Data): %i, %i"
+            "Dataset %s excluded",
+            start_year,
+            cstart_year,
+            end_year,
+            cend_year,
+            dataset_path,
+        )
 
     return check
 
 
 def _get_hem_letter(lat):
     """Get S or N from Latitude."""
-    shem = ''
+    shem = ""
     if lat < 0:
-        shem = 'S'
+        shem = "S"
     if lat > 0:
-        shem = 'N'
+        shem = "N"
 
     return shem
 
@@ -159,124 +168,147 @@ def _get_hem_letter(lat):
 def _get_valid_datasets(input_data):
     """Get valid datasets list and number for each model."""
     number_of_subdata = OrderedDict()
-    available_dataset = list(group_metadata(input_data, 'dataset'))
+    available_dataset = list(group_metadata(input_data, "dataset"))
     valid_datasets = []
     period = {}
-    period['start_year'] = []
-    period['end_year'] = []
-    period['span'] = []
-    period['slat'] = []
-    period['elat'] = []
+    period["start_year"] = []
+    period["end_year"] = []
+    period["span"] = []
+    period["slat"] = []
+    period["elat"] = []
     for dataset in available_dataset:
         meta = select_metadata(input_data, dataset=dataset)
         number_of_subdata[dataset] = float(len(meta))
         for dataset_path in meta:
-            cube = iris.load(dataset_path['filename'])[0]
-            cat.add_year(cube, 'time', name='year')
+            cube = iris.load(dataset_path["filename"])[0]
+            cat.add_year(cube, "time", name="year")
             if not _check_full_data(dataset_path, cube):
                 number_of_subdata[dataset] = number_of_subdata[dataset] - 1
             else:
-                valid_datasets.append(dataset_path['filename'])
-                period['start_year'].append(int(dataset_path['start_year']))
-                period['end_year'].append(int(dataset_path['end_year']))
-                period['span'].append(int(dataset_path['end_year']) -
-                                      int(dataset_path['start_year']) + 1)
-                period['slat'].append(round(cube.coord('latitude').points[0]))
-                period['elat'].append(round(cube.coord('latitude').points[-1]))
-    if min(period['span']) == max(period['span']):
-        period['common_span'] = str(min(period['span']))
-    if min(period['start_year']) == max(period['start_year']) and\
-       min(period['end_year']) == max(period['end_year']):
-        period['common_period'] = str(min(period['start_year'])) + ' - ' +\
-            str(min(period['end_year']))
-
-    if min(period['slat']) == max(period['slat']) and\
-       min(period['elat']) == max(period['elat']):
-        shem = _get_hem_letter(min(period['slat']))
-        ehem = _get_hem_letter(min(period['elat']))
-
-        period['common_lats'] = str(abs(min(period['slat']))) + '°' + shem +\
-            ' - ' + str(abs(min(period['elat']))) + '°' + ehem
+                valid_datasets.append(dataset_path["filename"])
+                period["start_year"].append(int(dataset_path["start_year"]))
+                period["end_year"].append(int(dataset_path["end_year"]))
+                period["span"].append(
+                    int(dataset_path["end_year"])
+                    - int(dataset_path["start_year"]) + 1
+                )
+                period["slat"].append(round(cube.coord("latitude").points[0]))
+                period["elat"].append(round(cube.coord("latitude").points[-1]))
+    if min(period["span"]) == max(period["span"]):
+        period["common_span"] = str(min(period["span"]))
+    if min(period["start_year"]) == max(period["start_year"]) and min(
+        period["end_year"]
+    ) == max(period["end_year"]):
+        period["common_period"] = (
+            str(min(period["start_year"])) + " - " + str(min(period["end_year"]))
+        )
+
+    if min(period["slat"]) == max(period["slat"]) and min(period["elat"]) == max(
+        period["elat"]
+    ):
+        shem = _get_hem_letter(min(period["slat"]))
+        ehem = _get_hem_letter(min(period["elat"]))
+
+        period["common_lats"] = (
+            str(abs(min(period["slat"])))
+            + "°"
+            + shem
+            + " - "
+            + str(abs(min(period["elat"])))
+            + "°"
+            + ehem
+        )
 
     return valid_datasets, number_of_subdata, period
 
 
 def _plot_extratrends(cfg, extratrends, trends, period, axx_lim):
     """Plot trends for ensembles."""
-    res_ar = {'artrend': {}, 'kde1': {}}
-    # res_ar = {'xhist': np.linspace(axx_lim['histmin'],
-    #                                axx_lim['histmax'], 41),
-    #           'artrend': {},
-    #           'kde1': {}}
-    res_ar['xval'] = axx_lim['xval']
-    res_ar['xhist'] = axx_lim['xhist']
+    res_ar = {"artrend": {}, "kde1": {}}
+    res_ar["xval"] = axx_lim["xval"]
+    res_ar["xhist"] = axx_lim["xhist"]
     fig, axx = plt.subplots(figsize=(8, 6))
 
     valid_datasets = []
 
-    for xtrmdl in cfg['add_model_dist']:
+    for xtrmdl in cfg["add_model_dist"]:
         alias = list(extratrends[xtrmdl].keys())[0]
-        valid_datasets.append(select_metadata(cfg['input_data'].values(),
-                                              alias=alias)[0]['filename'])
-        if alias in trends['cmip6'].keys():
-            style = plot.get_dataset_style(xtrmdl, style_file='cmip6')
-        elif alias in trends['cmip5'].keys():
-            style = plot.get_dataset_style(xtrmdl, style_file='cmip5')
+        valid_datasets.append(
+            select_metadata(cfg["input_data"].values(), alias=alias)[0]["filename"]
+        )
+        if alias in trends["cmip6"].keys():
+            style = plot.get_dataset_style(xtrmdl, style_file="cmip6")
+        elif alias in trends["cmip5"].keys():
+            style = plot.get_dataset_style(xtrmdl, style_file="cmip5")
         else:
-            style = {'facecolor': (0, 0, 1, 0.2),
-                     'color': (0, 0, 1, 1.0)}
-
-        res_ar['artrend'][xtrmdl] = np.fromiter(extratrends[xtrmdl].values(),
-                                                dtype=float)
-        res_ar['kde1'][xtrmdl] = stats.gaussian_kde(res_ar['artrend'][xtrmdl],
-                                                    bw_method='scott')
-        hbin, bins1, patches = axx.hist(res_ar['artrend'][xtrmdl],
-                                        bins=res_ar['xhist'],
-                                        density=True,
-                                        edgecolor=style['color'],
-                                        facecolor=style['facecolor'])
+            style = {"facecolor": (0, 0, 1, 0.2), "color": (0, 0, 1, 1.0)}
+
+        res_ar["artrend"][xtrmdl] = np.fromiter(
+            extratrends[xtrmdl].values(), dtype=float
+        )
+        res_ar["kde1"][xtrmdl] = stats.gaussian_kde(
+            res_ar["artrend"][xtrmdl], bw_method="scott"
+        )
+        hbin, bins1, patches = axx.hist(
+            res_ar["artrend"][xtrmdl],
+            bins=res_ar["xhist"],
+            density=True,
+            edgecolor=style["color"],
+            facecolor=style["facecolor"],
+        )
         del bins1, patches
-        axx.plot(res_ar['xval'],
-                 res_ar['kde1'][xtrmdl](res_ar['xval']),
-                 color=style['color'],
-                 linewidth=3,
-                 label=xtrmdl)
-    axx_lim['maxh'] = (1.0 + 0.5 * axx_lim['factor']) * np.max(hbin)
+        axx.plot(
+            res_ar["xval"],
+            res_ar["kde1"][xtrmdl](res_ar["xval"]),
+            color=style["color"],
+            linewidth=3,
+            label=xtrmdl,
+        )
+    axx_lim["maxh"] = (1.0 + 0.5 * axx_lim["factor"]) * np.max(hbin)
     caption = _plot_obs(trends, axx, axx_lim)
-    caption = _plot_settings(cfg, axx, fig, 'fig2', period, axx_lim) + caption
+    caption = _plot_settings(cfg, axx, fig, "fig2", period, axx_lim) + caption
     plt.close()
 
-    caption = 'Probability density function of the decadal trend in ' + \
-        'the Water Vapor Path' + caption
-
-    provenance_record = get_provenance_record(valid_datasets,
-                                              caption, ['trend', 'other'],
-                                              ['reg'])
-
-    diagnostic_file = get_diagnostic_filename('fig2', cfg)
-
-    logger.info('Saving analysis results to %s', diagnostic_file)
-
-    iris.save(cube_to_save_vars(_write_list_dict(cfg,
-                                                 trends,
-                                                 res_ar)),
-              target=diagnostic_file)
-
-    logger.info('Recording provenance of %s:\n%s', diagnostic_file,
-                pformat(provenance_record))
+    vars = list(extract_variables(cfg).keys())
+    var = " ".join(vars)
+    print_long_name = extract_variables(cfg)[var]["long_name"]
+    caption = (
+        "Probability density function of the decadal trend in "
+        + "the "
+        + print_long_name
+        + caption
+    )
+
+    provenance_record = get_provenance_record(
+        valid_datasets, caption, ["trend", "other"], ["reg"]
+    )
+
+    diagnostic_file = get_diagnostic_filename("fig2", cfg)
+
+    logger.info("Saving analysis results to %s", diagnostic_file)
+
+    iris.save(
+        cube_to_save_vars(_write_list_dict(cfg, trends, res_ar)),
+        target=diagnostic_file
+    )
+
+    logger.info(
+        "Recording provenance of %s:\n%s", diagnostic_file,
+        pformat(provenance_record)
+    )
     with ProvenanceLogger(cfg) as provenance_logger:
         provenance_logger.log(diagnostic_file, provenance_record)
 
 
 def _plot_obs(trends, axx, axx_lim):
     """Plot observational or reanalysis data as vertical line."""
-    obs_str = ''
+    obs_str = ""
     # IPCC colors for obs from
     # https://github.com/IPCC-WG1/colormaps/blob/master/
     # categorical_colors_rgb_0-255/dark_cat.txt
-    if trends['obs']:
-        obs_str = ' Vertical lines show the trend for'
-        for iii, obsname in enumerate(trends['obs'].keys()):
+    if trends["obs"]:
+        obs_str = " Vertical lines show the trend for"
+        for iii, obsname in enumerate(trends["obs"].keys()):
             obscoli = float(iii)
             if iii > 4:
                 obscoli = float(iii) - 4.5
@@ -284,78 +316,97 @@ def _plot_obs(trends, axx, axx_lim):
                 obscoli = float(iii) - 8.75
             if iii > 12:
                 obscoli = float(iii) - 12.25
-            plotobscol = (1.0 - 0.25 * obscoli, 0.25 * obscoli,
-                          0.5 + obscoli * 0.1)
-            if obsname == 'RSS':
-                plotobscol = (221 / 255.0, 84 / 255.0, 46 / 255.0,)
+            plotobscol = (1.0 - 0.25 * obscoli, 0.25 * obscoli, 0.5 + obscoli * 0.1)
+            if obsname == "RSS":
+                plotobscol = (
+                    221 / 255.0,
+                    84 / 255.0,
+                    46 / 255.0,
+                )
                 # obsnamep = 'RSS'
-            if obsname == 'CRU':
+            if obsname == "CRU":
                 plotobscol = (0.8, 0.4, 0.1, 1)
-            if obsname == 'ERA5':
+            if obsname == "ERA5":
                 # obscol = [(0.8, 0.4, 0.1, 1), (1, 0, 0.2, 1),
                 #          (0.8, 0.1, 0.4, 1), (1, 0.6, 0, 1)]
                 # plotobscol = (1, 0, 0.2, 1)
-                plotobscol = (128 / 255.0, 54 / 255.0, 168 / 255.0,)
+                plotobscol = (
+                    128 / 255.0,
+                    54 / 255.0,
+                    168 / 255.0,
+                )
                 # obsnamep = 'ERA5.1'
-            if obsname == 'MERRA2':
+            if obsname == "MERRA2":
                 plotobscol = (0.8, 0.1, 0.4, 1)
-            if obsname == 'NCEP-NCAR-R1':
+            if obsname == "NCEP-NCAR-R1":
                 plotobscol = (1, 0.6, 0, 1)
-            axx.vlines(trends['obs'][obsname], 0, axx_lim['maxh'],
-                       colors=plotobscol,
-                       linewidth=3,
-                       label=obsname)
-        obs_str = obs_str + '.'
+            axx.vlines(
+                trends["obs"][obsname],
+                0,
+                axx_lim["maxh"],
+                colors=plotobscol,
+                linewidth=3,
+                label=obsname,
+            )
+        obs_str = obs_str + "."
     return obs_str
 
 
 def _get_ax_limits(cfg, trends):
-    """ Automatically find plot bounds, if 'histmin' and 'histmax' not given.
+    """Automatically find plot bounds, if 'histmin' and 'histmax' not given.
 
-        If no values are given, the limits for the x-axis are then determined
-        by the width of the histograms (plus offset).
+    If no values are given, the limits for the x-axis are then determined
+    by the width of the histograms (plus offset).
     """
-    axx_lim = {'factor': 0.3}
-    # factor determines offset between limits of histogram and x-axis
-    # depending on the histogram width
+    axx_lim = {"factor": 0.3}
 
-    if 'histmin' in cfg.keys():
-        histmin = cfg['histmin']
+    if "histmin" in cfg.keys():
+        histmin = cfg["histmin"]
         xmin = histmin
     else:
         histmin = 1000
-        for label in ['cmip5', 'cmip6', 'obs']:
+        for label in ["cmip5", "cmip6", "obs"]:
             if trends[label]:
-                histmin = np.min(np.array([histmin,
-                                           np.min(np.fromiter(
-                                               trends[label].values(),
-                                               dtype=float))]))
-
-    if 'histmax' in cfg.keys():
-        histmax = cfg['histmax']
+                histmin = np.min(
+                    np.array(
+                        [
+                            histmin,
+                            np.min(np.fromiter(trends[label].values(),
+                            dtype=float)),
+                        ]
+                    )
+                )
+
+    if "histmax" in cfg.keys():
+        histmax = cfg["histmax"]
         xmax = histmax
     else:
         histmax = -1000
-        for label in ['cmip5', 'cmip6', 'obs']:
+        for label in ["cmip5", "cmip6", "obs"]:
             if trends[label]:
-                histmax = np.max(np.array([histmax,
-                                           np.max(np.fromiter(
-                                               trends[label].values(),
-                                               dtype=float))]))
-
-    if 'histmin' not in cfg.keys():
-        xmin = histmin - axx_lim['factor'] * abs(abs(histmax) - abs(histmin))
-
-    if 'histmax' not in cfg.keys():
-        xmax = histmax + axx_lim['factor'] * abs(abs(histmax) - abs(histmin))
+                histmax = np.max(
+                    np.array(
+                        [
+                            histmax,
+                            np.max(np.fromiter(trends[label].values(),
+                            dtype=float)),
+                        ]
+                    )
+                )
+
+    if "histmin" not in cfg.keys():
+        xmin = histmin - axx_lim["factor"] * abs(abs(histmax) - abs(histmin))
+
+    if "histmax" not in cfg.keys():
+        xmax = histmax + axx_lim["factor"] * abs(abs(histmax) - abs(histmin))
 
     # Saving values in dictionary axx_lim:
-    axx_lim['xmin'] = xmin
-    axx_lim['xmax'] = xmax
-    axx_lim['histmin'] = histmin
-    axx_lim['histmax'] = histmax
-    axx_lim['xhist'] = np.linspace(histmin, histmax, 41)
-    axx_lim['xval'] = np.linspace(xmin, xmax, 41)
+    axx_lim["xmin"] = xmin
+    axx_lim["xmax"] = xmax
+    axx_lim["histmin"] = histmin
+    axx_lim["histmax"] = histmax
+    axx_lim["xhist"] = np.linspace(histmin, histmax, 41)
+    axx_lim["xval"] = np.linspace(xmin, xmax, 41)
 
     return axx_lim
 
@@ -363,13 +414,13 @@ def _get_ax_limits(cfg, trends):
 def _plot_trends(cfg, trends, valid_datasets, period, axx_lim):
     """Plot probability density function of trends.
 
-       The probability density function is estimated by using Gaussian kernel
-       density estimation. The models are weighted by their respective number
-       of realisations. Trends are depicted in a histogram, the probability
-       density function as a curve.
+    The probability density function is estimated by using Gaussian kernel
+    density estimation. The models are weighted by their respective number
+    of realisations. Trends are depicted in a histogram, the probability
+    density function as a curve.
     """
-    res_ar = {'xval': axx_lim['xval']}
-    res_ar['xhist'] = axx_lim['xhist']
+    res_ar = {"xval": axx_lim["xval"]}
+    res_ar["xhist"] = axx_lim["xhist"]
     fig, axx = plt.subplots(figsize=(8, 6))
     maxh = -1.0
 
@@ -377,154 +428,194 @@ def _plot_trends(cfg, trends, valid_datasets, period, axx_lim):
     # https://github.com/IPCC-WG1/colormaps/blob/master/
     # categorical_colors_rgb_0-255/cmip_cat.txt
     # CMIP5
-    if trends['cmip5']:
-        res_ar['artrend_c5'] = np.fromiter(trends['cmip5'].values(),
-                                           dtype=float)
-        res_ar['weights_c5'] = np.fromiter(trends['cmip5weights'].values(),
+    if trends["cmip5"]:
+        res_ar["artrend_c5"] = np.fromiter(trends["cmip5"].values(), dtype=float)
+        res_ar["weights_c5"] = np.fromiter(trends["cmip5weights"].values(),
                                            dtype=float)
-        # yyy_c5, xxx_c5 = np.histogram(artrend_c5, bins=xhist)
-        res_ar['kde1_c5'] = stats.gaussian_kde(res_ar['artrend_c5'],
-                                               weights=res_ar['weights_c5'],
-                                               bw_method='scott')
-        hbinx1, bins1, patches = axx.hist(res_ar['artrend_c5'],
-                                          bins=res_ar['xhist'],
-                                          density=True,
-                                          weights=res_ar['weights_c5'],
-                                          edgecolor=(37 / 250.0, 81 /
-                                                     255.0, 204 / 255.0, 1.0),
-                                          facecolor=(37 / 250.0, 81 /
-                                                     255.0, 204 / 255.0, 0.2))
+        res_ar["kde1_c5"] = stats.gaussian_kde(
+            res_ar["artrend_c5"], weights=res_ar["weights_c5"],
+            bw_method="scott"
+        )
+        hbinx1, bins1, patches = axx.hist(
+            res_ar["artrend_c5"],
+            bins=res_ar["xhist"],
+            density=True,
+            weights=res_ar["weights_c5"],
+            edgecolor=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1.0),
+            facecolor=(37 / 250.0, 81 / 255.0, 204 / 255.0, 0.2),
+        )
         del bins1, patches
-        axx.plot(res_ar['xval'], res_ar['kde1_c5'](res_ar['xval']),
-                 color=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1),
-                 linewidth=3,
-                 label='CMIP5')
+        axx.plot(
+            res_ar["xval"],
+            res_ar["kde1_c5"](res_ar["xval"]),
+            color=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1),
+            linewidth=3,
+            label="CMIP5",
+        )
         maxh = np.max([maxh, np.max(hbinx1)])
-        # maxh = np.max(kde1_c5(xhist))
 
     # CMIP6
-    if trends['cmip6']:
-        res_ar['artrend_c6'] = np.fromiter(trends['cmip6'].values(),
+    if trends["cmip6"]:
+        res_ar["artrend_c6"] = np.fromiter(trends["cmip6"].values(), dtype=float)
+        res_ar["weights_c6"] = np.fromiter(trends["cmip6weights"].values(),
                                            dtype=float)
-        res_ar['weights_c6'] = np.fromiter(trends['cmip6weights'].values(),
-                                           dtype=float)
-        # yyy_c6, xxx_c6 = np.histogram(artrend_c6, bins=xhist)
-        res_ar['kde1_c6'] = stats.gaussian_kde(res_ar['artrend_c6'],
-                                               weights=res_ar['weights_c6'],
-                                               bw_method='scott')
-        hbinx2, bins1, patches = axx.hist(res_ar['artrend_c6'],
-                                          bins=res_ar['xhist'],
-                                          density=True,
-                                          weights=res_ar['weights_c6'],
-                                          edgecolor=(204 / 250.0, 35 /
-                                                     255.0, 35 / 255.0, 1.0),
-                                          facecolor=(204 / 250.0, 35 /
-                                                     255.0, 35 / 255.0, 0.2))
+        res_ar["kde1_c6"] = stats.gaussian_kde(
+            res_ar["artrend_c6"], weights=res_ar["weights_c6"], bw_method="scott"
+        )
+        hbinx2, bins1, patches = axx.hist(
+            res_ar["artrend_c6"],
+            bins=res_ar["xhist"],
+            density=True,
+            weights=res_ar["weights_c6"],
+            edgecolor=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1.0),
+            facecolor=(204 / 250.0, 35 / 255.0, 35 / 255.0, 0.2),
+        )
         del bins1, patches
-        axx.plot(res_ar['xval'], res_ar['kde1_c6'](res_ar['xval']),
-                 color=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1),
-                 linewidth=3,
-                 label='CMIP6')
+        axx.plot(
+            res_ar["xval"],
+            res_ar["kde1_c6"](res_ar["xval"]),
+            color=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1),
+            linewidth=3,
+            label="CMIP6",
+        )
         maxh = np.max([maxh, np.max(hbinx2)])
-        # maxh = np.max(kde1_c6(xhist))
 
     # Find the highest bin in the histograms for the y-axis limit
-    axx_lim['maxh'] = maxh * (1.0 + 0.5 * axx_lim['factor'])
+    axx_lim["maxh"] = maxh * (1.0 + 0.5 * axx_lim["factor"])
     caption = _plot_obs(trends, axx, axx_lim)
-    caption = _plot_settings(cfg, axx, fig, 'fig1', period, axx_lim) + caption
+    caption = _plot_settings(cfg, axx, fig, "fig1", period, axx_lim) + caption
     plt.close()
 
-    caption = 'Probability density function of the decadal trend in ' + \
-        'the Water Vapor Path' + caption
+    vars = list(extract_variables(cfg).keys())
+    var = " ".join(vars)
+    print_long_name = extract_variables(cfg)[var]["long_name"]
+    caption = (
+        "Probability density function of the decadal trend in "
+        + "the "
+        + print_long_name
+        + caption
+    )
 
-    provenance_record = get_provenance_record(valid_datasets,
-                                              caption, ['trend', 'other'],
-                                              ['reg'])
+    provenance_record = get_provenance_record(
+        valid_datasets, caption, ["trend", "other"], ["reg"]
+    )
 
-    diagnostic_file = get_diagnostic_filename('fig1', cfg)
+    diagnostic_file = get_diagnostic_filename("fig1", cfg)
 
-    logger.info('Saving analysis results to %s', diagnostic_file)
+    logger.info("Saving analysis results to %s", diagnostic_file)
+
+    vars = list(extract_variables(cfg).keys())
+    var = " ".join(vars)
+    print_long_name = extract_variables(cfg)[var]["long_name"]
 
     list_dict = {}
-    list_dict['data'] = [res_ar['xhist']]
-    list_dict['name'] = [{'var_name': 'prw_trends_bins',
-                          'long_name': 'Water Vapor Path Trend bins',
-                          'units': 'percent'}]
-    if trends['cmip5']:
-        list_dict['data'].append(res_ar['artrend_c5'])
-        list_dict['name'].append({'var_name': 'prw_trends_cmip5',
-                                  'long_name': 'Water Vapor Path Trends CMIP5',
-                                  'units': 'percent'})
-        list_dict['data'].append(res_ar['weights_c5'])
-        list_dict['name'].append({'var_name': 'data_set_weights',
-                                  'long_name': 'Weights for each data set.',
-                                  'units': '1'})
-        list_dict['data'].append(res_ar['kde1_c5'](res_ar['xhist']))
-        list_dict['name'].append({'var_name': 'prw_trend_distribution_cmip5',
-                                  'long_name': 'Water Vapor Path Trends ' +
-                                               'distribution CMIP5',
-                                  'units': '1'})
-    if trends['cmip6']:
-        list_dict['data'].append(res_ar['artrend_c6'])
-        list_dict['name'].append({'var_name': 'prw_trends_cmip6',
-                                  'long_name': 'Water Vapor Path Trends CMIP6',
-                                  'units': 'percent'})
-        list_dict['data'].append(res_ar['weights_c6'])
-        list_dict['name'].append({'var_name': 'data_set_weights',
-                                  'long_name': 'Weights for each data set.',
-                                  'units': '1'})
-        list_dict['data'].append(res_ar['kde1_c6'](res_ar['xhist']))
-        list_dict['name'].append({'var_name': 'prw_trend_distribution_cmip6',
-                                  'long_name': 'Water Vapor Path Trends ' +
-                                               'distribution CMIP6',
-                                  'units': '1'})
-
-    if trends['obs']:
-        for obsname in trends['obs'].keys():
-            list_dict['data'].append(trends['obs'][obsname])
-            list_dict['name'].append({'var_name': 'prw_trend_' + obsname,
-                                      'long_name': 'Water Vapor Path Trend ' +
-                                                   obsname,
-                                      'units': 'percent'})
+    list_dict["data"] = [res_ar["xhist"]]
+    list_dict["name"] = [
+        {
+            "var_name": var + "_trends_bins",
+            "long_name": print_long_name + " Trend bins",
+            "units": "percent",
+        }
+    ]
+    if trends["cmip5"]:
+        list_dict["data"].append(res_ar["artrend_c5"])
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trends_cmip5",
+                "long_name": print_long_name + " Trends CMIP5",
+                "units": "percent",
+            }
+        )
+        list_dict["data"].append(res_ar["weights_c5"])
+        list_dict["name"].append(
+            {
+                "var_name": "data_set_weights",
+                "long_name": "Weights for each data set.",
+                "units": "1",
+            }
+        )
+        list_dict["data"].append(res_ar["kde1_c5"](res_ar["xhist"]))
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trend_distribution_cmip5",
+                "long_name": print_long_name + " Trends " + "distribution CMIP5",
+                "units": "1",
+            }
+        )
+    if trends["cmip6"]:
+        list_dict["data"].append(res_ar["artrend_c6"])
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trends_cmip6",
+                "long_name": print_long_name + " Trends CMIP6",
+                "units": "percent",
+            }
+        )
+        list_dict["data"].append(res_ar["weights_c6"])
+        list_dict["name"].append(
+            {
+                "var_name": "data_set_weights",
+                "long_name": "Weights for each data set.",
+                "units": "1",
+            }
+        )
+        list_dict["data"].append(res_ar["kde1_c6"](res_ar["xhist"]))
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trend_distribution_cmip6",
+                "long_name": print_long_name + " Trends " + "distribution CMIP6",
+                "units": "1",
+            }
+        )
+
+    if trends["obs"]:
+        for obsname in trends["obs"].keys():
+            list_dict["data"].append(trends["obs"][obsname])
+            list_dict["name"].append(
+                {
+                    "var_name": var + "_trend_" + obsname,
+                    "long_name": print_long_name + " Trend " + obsname,
+                    "units": "percent",
+                }
+            )
 
     iris.save(cube_to_save_vars(list_dict), target=diagnostic_file)
 
-    logger.info('Recording provenance of %s:\n%s', diagnostic_file,
-                pformat(provenance_record))
+    logger.info(
+        "Recording provenance of %s:\n%s", diagnostic_file,
+        pformat(provenance_record)
+    )
     with ProvenanceLogger(cfg) as provenance_logger:
         provenance_logger.log(diagnostic_file, provenance_record)
 
 
 def _plot_settings(cfg, axx, fig, figname, period, axx_lim):
     """Define Settings for pdf figures."""
-    for spine in ['top', 'right']:
+    for spine in ["top", "right"]:
         axx.spines[spine].set_visible(False)
     axx.legend(loc=1)
 
-    if 'ymax' in cfg.keys():
-        ymax = cfg['ymax']
+    if "ymax" in cfg.keys():
+        ymax = cfg["ymax"]
     else:
-        ymax = axx_lim['maxh']
+        ymax = axx_lim["maxh"]
     axx.set_ylim([0, ymax])
-    axx.set_ylabel('Probability density')
-    add = '.'
-    if 'common_period' in period.keys():
-        add = ' for ' + period['common_period'] + '.'
-    elif 'common_span' in period.keys():
-        add = ' for ' + period['common_span'] + ' years.'
-    if 'common_lats' in period.keys():
-        add = ' between ' + period['common_lats'] + add
+    axx.set_ylabel("Probability density")
+    add = "."
+    if "common_period" in period.keys():
+        add = " for " + period["common_period"] + "."
+    elif "common_span" in period.keys():
+        add = " for " + period["common_span"] + " years."
+    if "common_lats" in period.keys():
+        add = " between " + period["common_lats"] + add
 
     vars = list(extract_variables(cfg).keys())
-    var = ' '.join(vars)
-    if var == 'prw':
-        var = ' Water Vapour Path'
-    else:
-        var = ' ' + var
-    axx.set_title('Trends in' + var + add)
-    axx.set_xlabel('Trend (%/decade)')
+    var = " ".join(vars)
+    print_long_name = extract_variables(cfg)[var]["long_name"]
+    axx.set_title("Trends in" + print_long_name + add)
+    axx.set_xlabel("Trend (%/decade)")
 
-    axx.set_xlim(axx_lim['xmin'], axx_lim['xmax'])
+    axx.set_xlim(axx_lim["xmin"], axx_lim["xmax"])
     fig.tight_layout()
     fig.savefig(get_plot_filename(figname, cfg), dpi=300)
 
@@ -534,186 +625,186 @@ def _plot_settings(cfg, axx, fig, figname, period, axx_lim):
 def _set_extratrends_dict(cfg):
     """Set dictionary to plot pdf over model ensembles."""
     extratrends = {}
-    for extramodel in cfg['add_model_dist']:
+    for extramodel in cfg["add_model_dist"]:
         extratrends[extramodel] = OrderedDict()
     return extratrends
 
 
 def _write_list_dict(cfg, trends, res_ar):
     """Collect data for provenance."""
+
+    vars = list(extract_variables(cfg).keys())
+    var = " ".join(vars)
+    print_long_name = extract_variables(cfg)[var]["long_name"]
+
     list_dict = {}
-    list_dict['data'] = [res_ar['xhist']]
-    list_dict['name'] = [{'var_name': 'prw_trends_bins',
-                          'long_name': 'Water Vapor Path Trend bins',
-                          'units': 'percent'}]
-
-    for extramodel in cfg['add_model_dist']:
-        list_dict['data'].append(res_ar['artrend'][extramodel])
-        list_dict['name'].append({'var_name': 'prw_trends_' + extramodel,
-                                  'long_name': 'Water Vapor Path Trends ' +
-                                               extramodel,
-                                  'units': 'percent'})
-        list_dict['data'].append(res_ar['kde1'][extramodel](res_ar['xhist']))
-        list_dict['name'].append({'var_name': 'prw_trend_distribution_' +
-                                              extramodel,
-                                  'long_name': 'Water Vapor Path Trends ' +
-                                               'distribution ' + extramodel,
-                                  'units': '1'})
-
-    if trends['obs']:
-        for obsname in trends['obs'].keys():
-            list_dict['data'].append(trends['obs'][obsname])
-            list_dict['name'].append({'var_name': 'prw_trend_' + obsname,
-                                      'long_name': 'Water Vapor Path Trend ' +
-                                                   obsname,
-                                      'units': 'percent'})
+    list_dict["data"] = [res_ar["xhist"]]
+    list_dict["name"] = [
+        {
+            "var_name": var + "_trends_bins",
+            "long_name": print_long_name + " Trend bins",
+            "units": "percent",
+        }
+    ]
+
+    for extramodel in cfg["add_model_dist"]:
+        list_dict["data"].append(res_ar["artrend"][extramodel])
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trends_" + extramodel,
+                "long_name": print_long_name + " Trends " + extramodel,
+                "units": "percent",
+            }
+        )
+        list_dict["data"].append(res_ar["kde1"][extramodel](res_ar["xhist"]))
+        list_dict["name"].append(
+            {
+                "var_name": var + "_trend_distribution_" + extramodel,
+                "long_name": print_long_name
+                + " Trends "
+                + "distribution "
+                + extramodel,
+                "units": "1",
+            }
+        )
+
+    if trends["obs"]:
+        for obsname in trends["obs"].keys():
+            list_dict["data"].append(trends["obs"][obsname])
+            list_dict["name"].append(
+                {
+                    "var_name": var + "_trend_" + obsname,
+                    "long_name": print_long_name + " Trend " + obsname,
+                    "units": "percent",
+                }
+            )
     return list_dict
 
 
 def cube_to_save_vars(list_dict):
     """Create cubes to prepare bar plot data for saving to netCDF."""
-    # cubes = iris.cube.CubeList()
-    for iii, var in enumerate(list_dict['data']):
+    for iii, var in enumerate(list_dict["data"]):
         if iii == 0:
-            cubes = iris.cube.CubeList([
-                iris.cube.Cube(var,
-                               var_name=list_dict['name'][iii]['var_name'],
-                               long_name=list_dict['name'][iii]['long_name'],
-                               units=list_dict['name'][iii]['units'])])
+            cubes = iris.cube.CubeList(
+                [
+                    iris.cube.Cube(
+                        var,
+                        var_name=list_dict["name"][iii]["var_name"],
+                        long_name=list_dict["name"][iii]["long_name"],
+                        units=list_dict["name"][iii]["units"],
+                    )
+                ]
+            )
         else:
             cubes.append(
-                iris.cube.Cube(var,
-                               var_name=list_dict['name'][iii]['var_name'],
-                               long_name=list_dict['name'][iii]['long_name'],
-                               units=list_dict['name'][iii]['units']))
+                iris.cube.Cube(
+                    var,
+                    var_name=list_dict["name"][iii]["var_name"],
+                    long_name=list_dict["name"][iii]["long_name"],
+                    units=list_dict["name"][iii]["units"],
+                )
+            )
 
     return cubes
 
 
-def get_provenance_record(ancestor_files, caption, statistics,
-                          domains, plot_type='probability'):
+def get_provenance_record(
+    ancestor_files, caption, statistics, domains, plot_type="probability"
+):
     """Get Provenance record."""
     record = {
-        'caption': caption,
-        'statistics': statistics,
-        'domains': domains,
-        'plot_type': plot_type,
-        'realms': ['atmos'],
-        'themes': ['atmDyn'],
-        'authors': [
-            'weigel_katja',
+        "caption": caption,
+        "statistics": statistics,
+        "domains": domains,
+        "plot_type": plot_type,
+        "realms": ["atmos"],
+        "themes": ["atmDyn"],
+        "authors": [
+            "weigel_katja",
         ],
-        'references': [
-            'santer07jclim',
-            'santer21jclim',
-            'eyring21ipcc',
+        "references": [
+            "santer07jclim",
+            "santer21jclim",
+            "eyring21ipcc",
         ],
-        'ancestors': ancestor_files,
+        "ancestors": ancestor_files,
     }
     return record
 
 
-###############################################################################
+##############################################################################
 # Setup diagnostic
-###############################################################################
+##############################################################################
 
 
 def main(cfg):
     """Run the diagnostic."""
-    ###########################################################################
+    ##########################################################################
     # Read recipe data
-    ###########################################################################
+    ##########################################################################
 
     # Dataset data containers
-    input_data = cfg['input_data'].values()
+    input_data = cfg["input_data"].values()
 
-    if not variables_available(cfg, ['prw']):
-        logger.warning('This diagnostic was written and tested only for prw.')
+    if not variables_available(cfg, ["prw"]):
+        logger.warning("This diagnostic was written and tested only for prw.")
 
-    ###########################################################################
+    ##########################################################################
     # Read data
-    ###########################################################################
+    ##########################################################################
 
     # Create iris cube for each dataset and save annual means
     trends = {}
-    trends['cmip5'] = OrderedDict()
-    trends['cmip6'] = OrderedDict()
-    trends['cmip5weights'] = OrderedDict()
-    trends['cmip6weights'] = OrderedDict()
-    trends['obs'] = {}
-    # f_c5 = open(cfg['work_dir'] + "/cmip5_trends.txt", "a")
-    # f_c5.write("Model Alias Trend Weight Mean Median " +
-    #            "Maximum Mnimum Standard deviation \n")
-    # f_c6 = open(cfg['work_dir'] + "/cmip6_trends.txt", "a")
-    # f_c6.write("Model Alias Trend Weight Mean Median " +
-    #            "Maximum Mnimum Standard deviation \n")
-
-    if 'add_model_dist' in cfg:
+    trends["cmip5"] = OrderedDict()
+    trends["cmip6"] = OrderedDict()
+    trends["cmip5weights"] = OrderedDict()
+    trends["cmip6weights"] = OrderedDict()
+    trends["obs"] = {}
+
+    if "add_model_dist" in cfg:
         extratrends = _set_extratrends_dict(cfg)
 
     valid_datasets, number_of_subdata, period = _get_valid_datasets(input_data)
 
     for dataset_path in input_data:
-        project = dataset_path['project']
-        cube_load = iris.load(dataset_path['filename'])[0]
+        project = dataset_path["project"]
+        cube_load = iris.load(dataset_path["filename"])[0]
         cube = _apply_filter(cfg, cube_load)
-        cat.add_month_number(cube, 'time', name='month_number')
-        cat.add_year(cube, 'time', name='year')
-        alias = dataset_path['alias']
-        dataset = dataset_path['dataset']
-        # selection = select_metadata(input_data, dataset=dataset)
-        # number_of_subdata = float(len(select_metadata(input_data,
-        #                                               dataset=dataset)))
+        cat.add_month_number(cube, "time", name="month_number")
+        cat.add_year(cube, "time", name="year")
+        alias = dataset_path["alias"]
+        dataset = dataset_path["dataset"]
+
         if not _check_full_data(dataset_path, cube):
             continue
         cube_anom = _calculate_anomalies(cube)
 
-        if project == 'CMIP6':
+        if project == "CMIP6":
             trend = _calc_trend(cube_anom)
-            trends['cmip6'][alias] = trend
-            trends['cmip6weights'][alias] = 1. / number_of_subdata[dataset]
-            # f_c6.write(dataset + ' ' +
-            #            alias + ' ' +
-            #            str(round(trend, 4)) + ' ' +
-            #            str(round(1. / number_of_subdata[dataset], 4)) + ' ' +
-            #            str(round(np.mean(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.median(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.max(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.min(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.std(cube_only_filt.data), 4)) + '\n')
-        elif project == 'CMIP5':
+            trends["cmip6"][alias] = trend
+            trends["cmip6weights"][alias] = 1.0 / number_of_subdata[dataset]
+        elif project == "CMIP5":
             trend = _calc_trend(cube_anom)
-            trends['cmip5'][alias] = trend
-            trends['cmip5weights'][alias] = 1. / number_of_subdata[dataset]
-            # f_c5.write(dataset + ' ' +
-            #            alias + ' ' +
-            #            str(round(trend, 4)) + ' ' +
-            #            str(round(1. / number_of_subdata[dataset], 4)) + ' ' +
-            #            str(round(np.mean(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.median(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.max(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.min(cube_only_filt.data), 4)) + ' ' +
-            #            str(round(np.std(cube_only_filt.data), 4)) + '\n')
+            trends["cmip5"][alias] = trend
+            trends["cmip5weights"][alias] = 1.0 / number_of_subdata[dataset]
         else:
             trend = _calc_trend(cube_anom)
-            trends['obs'][dataset] = trend
+            trends["obs"][dataset] = trend
 
-        if 'add_model_dist' in cfg:
-            if dataset in cfg['add_model_dist']:
+        if "add_model_dist" in cfg:
+            if dataset in cfg["add_model_dist"]:
                 extratrends[dataset][alias] = trend
 
-    # f_c5.close()
-    # f_c6.close()
     axx_lim = _get_ax_limits(cfg, trends)
     _plot_trends(cfg, trends, valid_datasets, period, axx_lim)
-    if 'add_model_dist' in cfg:
+    if "add_model_dist" in cfg:
         _plot_extratrends(cfg, extratrends, trends, period, axx_lim)
 
-    ###########################################################################
+    ##########################################################################
     # Process data
-    ###########################################################################
+    ##########################################################################
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     with run_diagnostic() as config:
         main(config)