From 8e4df3eac2583d609800d8f4c77ea94815ea1d04 Mon Sep 17 00:00:00 2001 From: Katja Weigel Date: Fri, 22 Nov 2024 13:47:15 +0100 Subject: [PATCH] Cleaning up provenanc and ruff changes. --- .../diag_scripts/ipcc_ar6/santer21jclimfig.py | 911 ++++++++++-------- 1 file changed, 501 insertions(+), 410 deletions(-) diff --git a/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py b/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py index e9d9a7467d..fe39e47e55 100755 --- a/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py +++ b/esmvaltool/diag_scripts/ipcc_ar6/santer21jclimfig.py @@ -4,15 +4,15 @@ """Diagnostic script to plot figure 3.12 of IPCC AR 6 chapter 3 (based on Santer et al. (2007) and Santer et al. (2021)). -############################################################################### +############################################################################## ipcc_ar6/santer21jclimfig.py Author: Katja Weigel (IUP, Uni Bremen, Germany) EVal4CMIP ans 4C project -############################################################################### +############################################################################## Description ----------- - Water Vapor Path trends compared to observational and reanalysis data + Trends in CMIP5 and CMIP6 compared to observational and reanalysis data based on methods used in Santer et al. (2007) and Santer et al. (2021). https://www.pnas.org/doi/10.1073/pnas.0702872104 https://doi.org/10.1175/JCLI-D-20-0768.1 @@ -24,7 +24,7 @@ grid as the filter. The filter must cover at least the time period used for the data. -############################################################################### +############################################################################## """ @@ -38,36 +38,37 @@ import matplotlib.pyplot as plt import numpy as np from scipy import stats -# from scipy.stats import norm -from esmvaltool.diag_scripts.shared import (ProvenanceLogger, - plot, get_diagnostic_filename, - get_plot_filename, - group_metadata, - select_metadata, run_diagnostic, - variables_available, - extract_variables) +from esmvaltool.diag_scripts.shared import ( + ProvenanceLogger, + plot, + get_diagnostic_filename, + get_plot_filename, + group_metadata, + select_metadata, + run_diagnostic, + variables_available, + extract_variables, +) logger = logging.getLogger(os.path.basename(__file__)) def _apply_filter(cfg, cube): """Apply filter from RSS Anomalies to all data and calculates mean.""" - if 'sample_obs' in cfg: - filt = iris.load(os.path.join(cfg['auxiliary_data_dir'], - cfg['sample_obs']))[0] + if "sample_obs" in cfg: + filt = iris.load(os.path.join(cfg["auxiliary_data_dir"], cfg["sample_obs"]))[0] - for iii, dim_str in enumerate(['time', 'latitude', 'longitude']): + for iii, dim_str in enumerate(["time", "latitude", "longitude"]): filt = _fit_dim(filt, cube, dim_str, iii) cube.data = cube.data * filt.data - coords = ('longitude', 'latitude') + coords = ("longitude", "latitude") for coord in coords: if not cube.coord(coord).has_bounds(): cube.coord(coord).guess_bounds() cube_grid_areas = iris.analysis.cartography.area_weights(cube) - cube = (cube.collapsed(coords, iris.analysis.MEAN, - weights=cube_grid_areas)) + cube = cube.collapsed(coords, iris.analysis.MEAN, weights=cube_grid_areas) return cube @@ -76,7 +77,7 @@ def _fit_dim(filt, cube, dim_str, dim_nr): """Adjust array for given dimension.""" dimfil = filt.coord(dim_str) dimcu = cube.coord(dim_str) - if dim_str == 'time': + if dim_str == "time": start = dimcu.units.num2date(dimcu.points[0]) end = dimcu.units.num2date(dimcu.points[-1]) sdim = dimfil.nearest_neighbour_index(dimfil.units.date2num(start)) @@ -88,70 +89,78 @@ def _fit_dim(filt, cube, dim_str, dim_nr): edim = dimfil.nearest_neighbour_index(end) if dim_nr == 0: - filt = filt[sdim:edim + 1, :, :] + filt = filt[sdim : edim + 1, :, :] elif dim_nr == 1: - filt = filt[:, sdim:edim + 1, :] + filt = filt[:, sdim : edim + 1, :] elif dim_nr == 2: - filt = filt[:, :, sdim:edim + 1] + filt = filt[:, :, sdim : edim + 1] return filt def _calc_trend(cube_anom): """Calculate trends.""" - reg_var = stats.linregress(np.linspace(0, len(cube_anom.data) - 1, - len(cube_anom.data)), - cube_anom.data) + reg_var = stats.linregress( + np.linspace(0, len(cube_anom.data) - 1, len(cube_anom.data)), + cube_anom.data + ) return reg_var.slope def _calculate_anomalies(cube): """Remove annual cycle from time series.""" - c_n = cube.aggregated_by('month_number', iris.analysis.MEAN) - month_number = cube.coord('month_number').points + c_n = cube.aggregated_by("month_number", iris.analysis.MEAN) + month_number = cube.coord("month_number").points startind = np.where(month_number == 1) endind = np.where(month_number == 12) data_in = cube.data data_new = np.full(data_in.shape, 0.5) for iii in range((startind[0])[0], (endind[0])[-1], 12): - data_new[iii:iii + 12] = ((cube.data[iii:iii + 12] - c_n.data) / - c_n.data) * 100.0 * 120.0 + data_new[iii : iii + 12] = ( + ((cube.data[iii : iii + 12] - c_n.data) / c_n.data) + * 100.0 * 120.0 + ) cube.data = data_new - cube.units = Unit('percent') + cube.units = Unit("percent") return cube def _check_full_data(dataset_path, cube): """Check if cube covers time series from start year to end year.""" - # cstart_month = cube.coord('month_number').points[0] - # cend_month = cube.coord('month_number').points[1] - cstart_year = cube.coord('year').points[0] - cend_year = cube.coord('year').points[-1] - start_year = int(dataset_path['start_year']) - end_year = int(dataset_path['end_year']) + cstart_year = cube.coord("year").points[0] + cend_year = cube.coord("year").points[-1] + start_year = int(dataset_path["start_year"]) + end_year = int(dataset_path["end_year"]) check = 0 if start_year == cstart_year and end_year == cend_year: check = 1 - print('Full time series:') - print(start_year, cstart_year, end_year, cend_year) else: - print('FAILED:') - print(start_year, cstart_year, end_year, cend_year) + logger.info( + "Time series incomplete: " + "Start(Recipe, Data): %i, %i; " + "End(Recipe, Data): %i, %i" + "Dataset %s excluded", + start_year, + cstart_year, + end_year, + cend_year, + dataset_path, + ) return check def _get_hem_letter(lat): """Get S or N from Latitude.""" - shem = '' + shem = "" if lat < 0: - shem = 'S' + shem = "S" if lat > 0: - shem = 'N' + shem = "N" return shem @@ -159,124 +168,147 @@ def _get_hem_letter(lat): def _get_valid_datasets(input_data): """Get valid datasets list and number for each model.""" number_of_subdata = OrderedDict() - available_dataset = list(group_metadata(input_data, 'dataset')) + available_dataset = list(group_metadata(input_data, "dataset")) valid_datasets = [] period = {} - period['start_year'] = [] - period['end_year'] = [] - period['span'] = [] - period['slat'] = [] - period['elat'] = [] + period["start_year"] = [] + period["end_year"] = [] + period["span"] = [] + period["slat"] = [] + period["elat"] = [] for dataset in available_dataset: meta = select_metadata(input_data, dataset=dataset) number_of_subdata[dataset] = float(len(meta)) for dataset_path in meta: - cube = iris.load(dataset_path['filename'])[0] - cat.add_year(cube, 'time', name='year') + cube = iris.load(dataset_path["filename"])[0] + cat.add_year(cube, "time", name="year") if not _check_full_data(dataset_path, cube): number_of_subdata[dataset] = number_of_subdata[dataset] - 1 else: - valid_datasets.append(dataset_path['filename']) - period['start_year'].append(int(dataset_path['start_year'])) - period['end_year'].append(int(dataset_path['end_year'])) - period['span'].append(int(dataset_path['end_year']) - - int(dataset_path['start_year']) + 1) - period['slat'].append(round(cube.coord('latitude').points[0])) - period['elat'].append(round(cube.coord('latitude').points[-1])) - if min(period['span']) == max(period['span']): - period['common_span'] = str(min(period['span'])) - if min(period['start_year']) == max(period['start_year']) and\ - min(period['end_year']) == max(period['end_year']): - period['common_period'] = str(min(period['start_year'])) + ' - ' +\ - str(min(period['end_year'])) - - if min(period['slat']) == max(period['slat']) and\ - min(period['elat']) == max(period['elat']): - shem = _get_hem_letter(min(period['slat'])) - ehem = _get_hem_letter(min(period['elat'])) - - period['common_lats'] = str(abs(min(period['slat']))) + '°' + shem +\ - ' - ' + str(abs(min(period['elat']))) + '°' + ehem + valid_datasets.append(dataset_path["filename"]) + period["start_year"].append(int(dataset_path["start_year"])) + period["end_year"].append(int(dataset_path["end_year"])) + period["span"].append( + int(dataset_path["end_year"]) + - int(dataset_path["start_year"]) + 1 + ) + period["slat"].append(round(cube.coord("latitude").points[0])) + period["elat"].append(round(cube.coord("latitude").points[-1])) + if min(period["span"]) == max(period["span"]): + period["common_span"] = str(min(period["span"])) + if min(period["start_year"]) == max(period["start_year"]) and min( + period["end_year"] + ) == max(period["end_year"]): + period["common_period"] = ( + str(min(period["start_year"])) + " - " + str(min(period["end_year"])) + ) + + if min(period["slat"]) == max(period["slat"]) and min(period["elat"]) == max( + period["elat"] + ): + shem = _get_hem_letter(min(period["slat"])) + ehem = _get_hem_letter(min(period["elat"])) + + period["common_lats"] = ( + str(abs(min(period["slat"]))) + + "°" + + shem + + " - " + + str(abs(min(period["elat"]))) + + "°" + + ehem + ) return valid_datasets, number_of_subdata, period def _plot_extratrends(cfg, extratrends, trends, period, axx_lim): """Plot trends for ensembles.""" - res_ar = {'artrend': {}, 'kde1': {}} - # res_ar = {'xhist': np.linspace(axx_lim['histmin'], - # axx_lim['histmax'], 41), - # 'artrend': {}, - # 'kde1': {}} - res_ar['xval'] = axx_lim['xval'] - res_ar['xhist'] = axx_lim['xhist'] + res_ar = {"artrend": {}, "kde1": {}} + res_ar["xval"] = axx_lim["xval"] + res_ar["xhist"] = axx_lim["xhist"] fig, axx = plt.subplots(figsize=(8, 6)) valid_datasets = [] - for xtrmdl in cfg['add_model_dist']: + for xtrmdl in cfg["add_model_dist"]: alias = list(extratrends[xtrmdl].keys())[0] - valid_datasets.append(select_metadata(cfg['input_data'].values(), - alias=alias)[0]['filename']) - if alias in trends['cmip6'].keys(): - style = plot.get_dataset_style(xtrmdl, style_file='cmip6') - elif alias in trends['cmip5'].keys(): - style = plot.get_dataset_style(xtrmdl, style_file='cmip5') + valid_datasets.append( + select_metadata(cfg["input_data"].values(), alias=alias)[0]["filename"] + ) + if alias in trends["cmip6"].keys(): + style = plot.get_dataset_style(xtrmdl, style_file="cmip6") + elif alias in trends["cmip5"].keys(): + style = plot.get_dataset_style(xtrmdl, style_file="cmip5") else: - style = {'facecolor': (0, 0, 1, 0.2), - 'color': (0, 0, 1, 1.0)} - - res_ar['artrend'][xtrmdl] = np.fromiter(extratrends[xtrmdl].values(), - dtype=float) - res_ar['kde1'][xtrmdl] = stats.gaussian_kde(res_ar['artrend'][xtrmdl], - bw_method='scott') - hbin, bins1, patches = axx.hist(res_ar['artrend'][xtrmdl], - bins=res_ar['xhist'], - density=True, - edgecolor=style['color'], - facecolor=style['facecolor']) + style = {"facecolor": (0, 0, 1, 0.2), "color": (0, 0, 1, 1.0)} + + res_ar["artrend"][xtrmdl] = np.fromiter( + extratrends[xtrmdl].values(), dtype=float + ) + res_ar["kde1"][xtrmdl] = stats.gaussian_kde( + res_ar["artrend"][xtrmdl], bw_method="scott" + ) + hbin, bins1, patches = axx.hist( + res_ar["artrend"][xtrmdl], + bins=res_ar["xhist"], + density=True, + edgecolor=style["color"], + facecolor=style["facecolor"], + ) del bins1, patches - axx.plot(res_ar['xval'], - res_ar['kde1'][xtrmdl](res_ar['xval']), - color=style['color'], - linewidth=3, - label=xtrmdl) - axx_lim['maxh'] = (1.0 + 0.5 * axx_lim['factor']) * np.max(hbin) + axx.plot( + res_ar["xval"], + res_ar["kde1"][xtrmdl](res_ar["xval"]), + color=style["color"], + linewidth=3, + label=xtrmdl, + ) + axx_lim["maxh"] = (1.0 + 0.5 * axx_lim["factor"]) * np.max(hbin) caption = _plot_obs(trends, axx, axx_lim) - caption = _plot_settings(cfg, axx, fig, 'fig2', period, axx_lim) + caption + caption = _plot_settings(cfg, axx, fig, "fig2", period, axx_lim) + caption plt.close() - caption = 'Probability density function of the decadal trend in ' + \ - 'the Water Vapor Path' + caption - - provenance_record = get_provenance_record(valid_datasets, - caption, ['trend', 'other'], - ['reg']) - - diagnostic_file = get_diagnostic_filename('fig2', cfg) - - logger.info('Saving analysis results to %s', diagnostic_file) - - iris.save(cube_to_save_vars(_write_list_dict(cfg, - trends, - res_ar)), - target=diagnostic_file) - - logger.info('Recording provenance of %s:\n%s', diagnostic_file, - pformat(provenance_record)) + vars = list(extract_variables(cfg).keys()) + var = " ".join(vars) + print_long_name = extract_variables(cfg)[var]["long_name"] + caption = ( + "Probability density function of the decadal trend in " + + "the " + + print_long_name + + caption + ) + + provenance_record = get_provenance_record( + valid_datasets, caption, ["trend", "other"], ["reg"] + ) + + diagnostic_file = get_diagnostic_filename("fig2", cfg) + + logger.info("Saving analysis results to %s", diagnostic_file) + + iris.save( + cube_to_save_vars(_write_list_dict(cfg, trends, res_ar)), + target=diagnostic_file + ) + + logger.info( + "Recording provenance of %s:\n%s", diagnostic_file, + pformat(provenance_record) + ) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, provenance_record) def _plot_obs(trends, axx, axx_lim): """Plot observational or reanalysis data as vertical line.""" - obs_str = '' + obs_str = "" # IPCC colors for obs from # https://github.com/IPCC-WG1/colormaps/blob/master/ # categorical_colors_rgb_0-255/dark_cat.txt - if trends['obs']: - obs_str = ' Vertical lines show the trend for' - for iii, obsname in enumerate(trends['obs'].keys()): + if trends["obs"]: + obs_str = " Vertical lines show the trend for" + for iii, obsname in enumerate(trends["obs"].keys()): obscoli = float(iii) if iii > 4: obscoli = float(iii) - 4.5 @@ -284,78 +316,97 @@ def _plot_obs(trends, axx, axx_lim): obscoli = float(iii) - 8.75 if iii > 12: obscoli = float(iii) - 12.25 - plotobscol = (1.0 - 0.25 * obscoli, 0.25 * obscoli, - 0.5 + obscoli * 0.1) - if obsname == 'RSS': - plotobscol = (221 / 255.0, 84 / 255.0, 46 / 255.0,) + plotobscol = (1.0 - 0.25 * obscoli, 0.25 * obscoli, 0.5 + obscoli * 0.1) + if obsname == "RSS": + plotobscol = ( + 221 / 255.0, + 84 / 255.0, + 46 / 255.0, + ) # obsnamep = 'RSS' - if obsname == 'CRU': + if obsname == "CRU": plotobscol = (0.8, 0.4, 0.1, 1) - if obsname == 'ERA5': + if obsname == "ERA5": # obscol = [(0.8, 0.4, 0.1, 1), (1, 0, 0.2, 1), # (0.8, 0.1, 0.4, 1), (1, 0.6, 0, 1)] # plotobscol = (1, 0, 0.2, 1) - plotobscol = (128 / 255.0, 54 / 255.0, 168 / 255.0,) + plotobscol = ( + 128 / 255.0, + 54 / 255.0, + 168 / 255.0, + ) # obsnamep = 'ERA5.1' - if obsname == 'MERRA2': + if obsname == "MERRA2": plotobscol = (0.8, 0.1, 0.4, 1) - if obsname == 'NCEP-NCAR-R1': + if obsname == "NCEP-NCAR-R1": plotobscol = (1, 0.6, 0, 1) - axx.vlines(trends['obs'][obsname], 0, axx_lim['maxh'], - colors=plotobscol, - linewidth=3, - label=obsname) - obs_str = obs_str + '.' + axx.vlines( + trends["obs"][obsname], + 0, + axx_lim["maxh"], + colors=plotobscol, + linewidth=3, + label=obsname, + ) + obs_str = obs_str + "." return obs_str def _get_ax_limits(cfg, trends): - """ Automatically find plot bounds, if 'histmin' and 'histmax' not given. + """Automatically find plot bounds, if 'histmin' and 'histmax' not given. - If no values are given, the limits for the x-axis are then determined - by the width of the histograms (plus offset). + If no values are given, the limits for the x-axis are then determined + by the width of the histograms (plus offset). """ - axx_lim = {'factor': 0.3} - # factor determines offset between limits of histogram and x-axis - # depending on the histogram width + axx_lim = {"factor": 0.3} - if 'histmin' in cfg.keys(): - histmin = cfg['histmin'] + if "histmin" in cfg.keys(): + histmin = cfg["histmin"] xmin = histmin else: histmin = 1000 - for label in ['cmip5', 'cmip6', 'obs']: + for label in ["cmip5", "cmip6", "obs"]: if trends[label]: - histmin = np.min(np.array([histmin, - np.min(np.fromiter( - trends[label].values(), - dtype=float))])) - - if 'histmax' in cfg.keys(): - histmax = cfg['histmax'] + histmin = np.min( + np.array( + [ + histmin, + np.min(np.fromiter(trends[label].values(), + dtype=float)), + ] + ) + ) + + if "histmax" in cfg.keys(): + histmax = cfg["histmax"] xmax = histmax else: histmax = -1000 - for label in ['cmip5', 'cmip6', 'obs']: + for label in ["cmip5", "cmip6", "obs"]: if trends[label]: - histmax = np.max(np.array([histmax, - np.max(np.fromiter( - trends[label].values(), - dtype=float))])) - - if 'histmin' not in cfg.keys(): - xmin = histmin - axx_lim['factor'] * abs(abs(histmax) - abs(histmin)) - - if 'histmax' not in cfg.keys(): - xmax = histmax + axx_lim['factor'] * abs(abs(histmax) - abs(histmin)) + histmax = np.max( + np.array( + [ + histmax, + np.max(np.fromiter(trends[label].values(), + dtype=float)), + ] + ) + ) + + if "histmin" not in cfg.keys(): + xmin = histmin - axx_lim["factor"] * abs(abs(histmax) - abs(histmin)) + + if "histmax" not in cfg.keys(): + xmax = histmax + axx_lim["factor"] * abs(abs(histmax) - abs(histmin)) # Saving values in dictionary axx_lim: - axx_lim['xmin'] = xmin - axx_lim['xmax'] = xmax - axx_lim['histmin'] = histmin - axx_lim['histmax'] = histmax - axx_lim['xhist'] = np.linspace(histmin, histmax, 41) - axx_lim['xval'] = np.linspace(xmin, xmax, 41) + axx_lim["xmin"] = xmin + axx_lim["xmax"] = xmax + axx_lim["histmin"] = histmin + axx_lim["histmax"] = histmax + axx_lim["xhist"] = np.linspace(histmin, histmax, 41) + axx_lim["xval"] = np.linspace(xmin, xmax, 41) return axx_lim @@ -363,13 +414,13 @@ def _get_ax_limits(cfg, trends): def _plot_trends(cfg, trends, valid_datasets, period, axx_lim): """Plot probability density function of trends. - The probability density function is estimated by using Gaussian kernel - density estimation. The models are weighted by their respective number - of realisations. Trends are depicted in a histogram, the probability - density function as a curve. + The probability density function is estimated by using Gaussian kernel + density estimation. The models are weighted by their respective number + of realisations. Trends are depicted in a histogram, the probability + density function as a curve. """ - res_ar = {'xval': axx_lim['xval']} - res_ar['xhist'] = axx_lim['xhist'] + res_ar = {"xval": axx_lim["xval"]} + res_ar["xhist"] = axx_lim["xhist"] fig, axx = plt.subplots(figsize=(8, 6)) maxh = -1.0 @@ -377,154 +428,194 @@ def _plot_trends(cfg, trends, valid_datasets, period, axx_lim): # https://github.com/IPCC-WG1/colormaps/blob/master/ # categorical_colors_rgb_0-255/cmip_cat.txt # CMIP5 - if trends['cmip5']: - res_ar['artrend_c5'] = np.fromiter(trends['cmip5'].values(), - dtype=float) - res_ar['weights_c5'] = np.fromiter(trends['cmip5weights'].values(), + if trends["cmip5"]: + res_ar["artrend_c5"] = np.fromiter(trends["cmip5"].values(), dtype=float) + res_ar["weights_c5"] = np.fromiter(trends["cmip5weights"].values(), dtype=float) - # yyy_c5, xxx_c5 = np.histogram(artrend_c5, bins=xhist) - res_ar['kde1_c5'] = stats.gaussian_kde(res_ar['artrend_c5'], - weights=res_ar['weights_c5'], - bw_method='scott') - hbinx1, bins1, patches = axx.hist(res_ar['artrend_c5'], - bins=res_ar['xhist'], - density=True, - weights=res_ar['weights_c5'], - edgecolor=(37 / 250.0, 81 / - 255.0, 204 / 255.0, 1.0), - facecolor=(37 / 250.0, 81 / - 255.0, 204 / 255.0, 0.2)) + res_ar["kde1_c5"] = stats.gaussian_kde( + res_ar["artrend_c5"], weights=res_ar["weights_c5"], + bw_method="scott" + ) + hbinx1, bins1, patches = axx.hist( + res_ar["artrend_c5"], + bins=res_ar["xhist"], + density=True, + weights=res_ar["weights_c5"], + edgecolor=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1.0), + facecolor=(37 / 250.0, 81 / 255.0, 204 / 255.0, 0.2), + ) del bins1, patches - axx.plot(res_ar['xval'], res_ar['kde1_c5'](res_ar['xval']), - color=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1), - linewidth=3, - label='CMIP5') + axx.plot( + res_ar["xval"], + res_ar["kde1_c5"](res_ar["xval"]), + color=(37 / 250.0, 81 / 255.0, 204 / 255.0, 1), + linewidth=3, + label="CMIP5", + ) maxh = np.max([maxh, np.max(hbinx1)]) - # maxh = np.max(kde1_c5(xhist)) # CMIP6 - if trends['cmip6']: - res_ar['artrend_c6'] = np.fromiter(trends['cmip6'].values(), + if trends["cmip6"]: + res_ar["artrend_c6"] = np.fromiter(trends["cmip6"].values(), dtype=float) + res_ar["weights_c6"] = np.fromiter(trends["cmip6weights"].values(), dtype=float) - res_ar['weights_c6'] = np.fromiter(trends['cmip6weights'].values(), - dtype=float) - # yyy_c6, xxx_c6 = np.histogram(artrend_c6, bins=xhist) - res_ar['kde1_c6'] = stats.gaussian_kde(res_ar['artrend_c6'], - weights=res_ar['weights_c6'], - bw_method='scott') - hbinx2, bins1, patches = axx.hist(res_ar['artrend_c6'], - bins=res_ar['xhist'], - density=True, - weights=res_ar['weights_c6'], - edgecolor=(204 / 250.0, 35 / - 255.0, 35 / 255.0, 1.0), - facecolor=(204 / 250.0, 35 / - 255.0, 35 / 255.0, 0.2)) + res_ar["kde1_c6"] = stats.gaussian_kde( + res_ar["artrend_c6"], weights=res_ar["weights_c6"], bw_method="scott" + ) + hbinx2, bins1, patches = axx.hist( + res_ar["artrend_c6"], + bins=res_ar["xhist"], + density=True, + weights=res_ar["weights_c6"], + edgecolor=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1.0), + facecolor=(204 / 250.0, 35 / 255.0, 35 / 255.0, 0.2), + ) del bins1, patches - axx.plot(res_ar['xval'], res_ar['kde1_c6'](res_ar['xval']), - color=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1), - linewidth=3, - label='CMIP6') + axx.plot( + res_ar["xval"], + res_ar["kde1_c6"](res_ar["xval"]), + color=(204 / 250.0, 35 / 255.0, 35 / 255.0, 1), + linewidth=3, + label="CMIP6", + ) maxh = np.max([maxh, np.max(hbinx2)]) - # maxh = np.max(kde1_c6(xhist)) # Find the highest bin in the histograms for the y-axis limit - axx_lim['maxh'] = maxh * (1.0 + 0.5 * axx_lim['factor']) + axx_lim["maxh"] = maxh * (1.0 + 0.5 * axx_lim["factor"]) caption = _plot_obs(trends, axx, axx_lim) - caption = _plot_settings(cfg, axx, fig, 'fig1', period, axx_lim) + caption + caption = _plot_settings(cfg, axx, fig, "fig1", period, axx_lim) + caption plt.close() - caption = 'Probability density function of the decadal trend in ' + \ - 'the Water Vapor Path' + caption + vars = list(extract_variables(cfg).keys()) + var = " ".join(vars) + print_long_name = extract_variables(cfg)[var]["long_name"] + caption = ( + "Probability density function of the decadal trend in " + + "the " + + print_long_name + + caption + ) - provenance_record = get_provenance_record(valid_datasets, - caption, ['trend', 'other'], - ['reg']) + provenance_record = get_provenance_record( + valid_datasets, caption, ["trend", "other"], ["reg"] + ) - diagnostic_file = get_diagnostic_filename('fig1', cfg) + diagnostic_file = get_diagnostic_filename("fig1", cfg) - logger.info('Saving analysis results to %s', diagnostic_file) + logger.info("Saving analysis results to %s", diagnostic_file) + + vars = list(extract_variables(cfg).keys()) + var = " ".join(vars) + print_long_name = extract_variables(cfg)[var]["long_name"] list_dict = {} - list_dict['data'] = [res_ar['xhist']] - list_dict['name'] = [{'var_name': 'prw_trends_bins', - 'long_name': 'Water Vapor Path Trend bins', - 'units': 'percent'}] - if trends['cmip5']: - list_dict['data'].append(res_ar['artrend_c5']) - list_dict['name'].append({'var_name': 'prw_trends_cmip5', - 'long_name': 'Water Vapor Path Trends CMIP5', - 'units': 'percent'}) - list_dict['data'].append(res_ar['weights_c5']) - list_dict['name'].append({'var_name': 'data_set_weights', - 'long_name': 'Weights for each data set.', - 'units': '1'}) - list_dict['data'].append(res_ar['kde1_c5'](res_ar['xhist'])) - list_dict['name'].append({'var_name': 'prw_trend_distribution_cmip5', - 'long_name': 'Water Vapor Path Trends ' + - 'distribution CMIP5', - 'units': '1'}) - if trends['cmip6']: - list_dict['data'].append(res_ar['artrend_c6']) - list_dict['name'].append({'var_name': 'prw_trends_cmip6', - 'long_name': 'Water Vapor Path Trends CMIP6', - 'units': 'percent'}) - list_dict['data'].append(res_ar['weights_c6']) - list_dict['name'].append({'var_name': 'data_set_weights', - 'long_name': 'Weights for each data set.', - 'units': '1'}) - list_dict['data'].append(res_ar['kde1_c6'](res_ar['xhist'])) - list_dict['name'].append({'var_name': 'prw_trend_distribution_cmip6', - 'long_name': 'Water Vapor Path Trends ' + - 'distribution CMIP6', - 'units': '1'}) - - if trends['obs']: - for obsname in trends['obs'].keys(): - list_dict['data'].append(trends['obs'][obsname]) - list_dict['name'].append({'var_name': 'prw_trend_' + obsname, - 'long_name': 'Water Vapor Path Trend ' + - obsname, - 'units': 'percent'}) + list_dict["data"] = [res_ar["xhist"]] + list_dict["name"] = [ + { + "var_name": var + "_trends_bins", + "long_name": print_long_name + " Trend bins", + "units": "percent", + } + ] + if trends["cmip5"]: + list_dict["data"].append(res_ar["artrend_c5"]) + list_dict["name"].append( + { + "var_name": var + "_trends_cmip5", + "long_name": print_long_name + " Trends CMIP5", + "units": "percent", + } + ) + list_dict["data"].append(res_ar["weights_c5"]) + list_dict["name"].append( + { + "var_name": "data_set_weights", + "long_name": "Weights for each data set.", + "units": "1", + } + ) + list_dict["data"].append(res_ar["kde1_c5"](res_ar["xhist"])) + list_dict["name"].append( + { + "var_name": var + "_trend_distribution_cmip5", + "long_name": print_long_name + " Trends " + "distribution CMIP5", + "units": "1", + } + ) + if trends["cmip6"]: + list_dict["data"].append(res_ar["artrend_c6"]) + list_dict["name"].append( + { + "var_name": var + "_trends_cmip6", + "long_name": print_long_name + " Trends CMIP6", + "units": "percent", + } + ) + list_dict["data"].append(res_ar["weights_c6"]) + list_dict["name"].append( + { + "var_name": "data_set_weights", + "long_name": "Weights for each data set.", + "units": "1", + } + ) + list_dict["data"].append(res_ar["kde1_c6"](res_ar["xhist"])) + list_dict["name"].append( + { + "var_name": var + "_trend_distribution_cmip6", + "long_name": print_long_name + " Trends " + "distribution CMIP6", + "units": "1", + } + ) + + if trends["obs"]: + for obsname in trends["obs"].keys(): + list_dict["data"].append(trends["obs"][obsname]) + list_dict["name"].append( + { + "var_name": var + "_trend_" + obsname, + "long_name": print_long_name + " Trend " + obsname, + "units": "percent", + } + ) iris.save(cube_to_save_vars(list_dict), target=diagnostic_file) - logger.info('Recording provenance of %s:\n%s', diagnostic_file, - pformat(provenance_record)) + logger.info( + "Recording provenance of %s:\n%s", diagnostic_file, + pformat(provenance_record) + ) with ProvenanceLogger(cfg) as provenance_logger: provenance_logger.log(diagnostic_file, provenance_record) def _plot_settings(cfg, axx, fig, figname, period, axx_lim): """Define Settings for pdf figures.""" - for spine in ['top', 'right']: + for spine in ["top", "right"]: axx.spines[spine].set_visible(False) axx.legend(loc=1) - if 'ymax' in cfg.keys(): - ymax = cfg['ymax'] + if "ymax" in cfg.keys(): + ymax = cfg["ymax"] else: - ymax = axx_lim['maxh'] + ymax = axx_lim["maxh"] axx.set_ylim([0, ymax]) - axx.set_ylabel('Probability density') - add = '.' - if 'common_period' in period.keys(): - add = ' for ' + period['common_period'] + '.' - elif 'common_span' in period.keys(): - add = ' for ' + period['common_span'] + ' years.' - if 'common_lats' in period.keys(): - add = ' between ' + period['common_lats'] + add + axx.set_ylabel("Probability density") + add = "." + if "common_period" in period.keys(): + add = " for " + period["common_period"] + "." + elif "common_span" in period.keys(): + add = " for " + period["common_span"] + " years." + if "common_lats" in period.keys(): + add = " between " + period["common_lats"] + add vars = list(extract_variables(cfg).keys()) - var = ' '.join(vars) - if var == 'prw': - var = ' Water Vapour Path' - else: - var = ' ' + var - axx.set_title('Trends in' + var + add) - axx.set_xlabel('Trend (%/decade)') + var = " ".join(vars) + print_long_name = extract_variables(cfg)[var]["long_name"] + axx.set_title("Trends in" + print_long_name + add) + axx.set_xlabel("Trend (%/decade)") - axx.set_xlim(axx_lim['xmin'], axx_lim['xmax']) + axx.set_xlim(axx_lim["xmin"], axx_lim["xmax"]) fig.tight_layout() fig.savefig(get_plot_filename(figname, cfg), dpi=300) @@ -534,186 +625,186 @@ def _plot_settings(cfg, axx, fig, figname, period, axx_lim): def _set_extratrends_dict(cfg): """Set dictionary to plot pdf over model ensembles.""" extratrends = {} - for extramodel in cfg['add_model_dist']: + for extramodel in cfg["add_model_dist"]: extratrends[extramodel] = OrderedDict() return extratrends def _write_list_dict(cfg, trends, res_ar): """Collect data for provenance.""" + + vars = list(extract_variables(cfg).keys()) + var = " ".join(vars) + print_long_name = extract_variables(cfg)[var]["long_name"] + list_dict = {} - list_dict['data'] = [res_ar['xhist']] - list_dict['name'] = [{'var_name': 'prw_trends_bins', - 'long_name': 'Water Vapor Path Trend bins', - 'units': 'percent'}] - - for extramodel in cfg['add_model_dist']: - list_dict['data'].append(res_ar['artrend'][extramodel]) - list_dict['name'].append({'var_name': 'prw_trends_' + extramodel, - 'long_name': 'Water Vapor Path Trends ' + - extramodel, - 'units': 'percent'}) - list_dict['data'].append(res_ar['kde1'][extramodel](res_ar['xhist'])) - list_dict['name'].append({'var_name': 'prw_trend_distribution_' + - extramodel, - 'long_name': 'Water Vapor Path Trends ' + - 'distribution ' + extramodel, - 'units': '1'}) - - if trends['obs']: - for obsname in trends['obs'].keys(): - list_dict['data'].append(trends['obs'][obsname]) - list_dict['name'].append({'var_name': 'prw_trend_' + obsname, - 'long_name': 'Water Vapor Path Trend ' + - obsname, - 'units': 'percent'}) + list_dict["data"] = [res_ar["xhist"]] + list_dict["name"] = [ + { + "var_name": var + "_trends_bins", + "long_name": print_long_name + " Trend bins", + "units": "percent", + } + ] + + for extramodel in cfg["add_model_dist"]: + list_dict["data"].append(res_ar["artrend"][extramodel]) + list_dict["name"].append( + { + "var_name": var + "_trends_" + extramodel, + "long_name": print_long_name + " Trends " + extramodel, + "units": "percent", + } + ) + list_dict["data"].append(res_ar["kde1"][extramodel](res_ar["xhist"])) + list_dict["name"].append( + { + "var_name": var + "_trend_distribution_" + extramodel, + "long_name": print_long_name + + " Trends " + + "distribution " + + extramodel, + "units": "1", + } + ) + + if trends["obs"]: + for obsname in trends["obs"].keys(): + list_dict["data"].append(trends["obs"][obsname]) + list_dict["name"].append( + { + "var_name": var + "_trend_" + obsname, + "long_name": print_long_name + " Trend " + obsname, + "units": "percent", + } + ) return list_dict def cube_to_save_vars(list_dict): """Create cubes to prepare bar plot data for saving to netCDF.""" - # cubes = iris.cube.CubeList() - for iii, var in enumerate(list_dict['data']): + for iii, var in enumerate(list_dict["data"]): if iii == 0: - cubes = iris.cube.CubeList([ - iris.cube.Cube(var, - var_name=list_dict['name'][iii]['var_name'], - long_name=list_dict['name'][iii]['long_name'], - units=list_dict['name'][iii]['units'])]) + cubes = iris.cube.CubeList( + [ + iris.cube.Cube( + var, + var_name=list_dict["name"][iii]["var_name"], + long_name=list_dict["name"][iii]["long_name"], + units=list_dict["name"][iii]["units"], + ) + ] + ) else: cubes.append( - iris.cube.Cube(var, - var_name=list_dict['name'][iii]['var_name'], - long_name=list_dict['name'][iii]['long_name'], - units=list_dict['name'][iii]['units'])) + iris.cube.Cube( + var, + var_name=list_dict["name"][iii]["var_name"], + long_name=list_dict["name"][iii]["long_name"], + units=list_dict["name"][iii]["units"], + ) + ) return cubes -def get_provenance_record(ancestor_files, caption, statistics, - domains, plot_type='probability'): +def get_provenance_record( + ancestor_files, caption, statistics, domains, plot_type="probability" +): """Get Provenance record.""" record = { - 'caption': caption, - 'statistics': statistics, - 'domains': domains, - 'plot_type': plot_type, - 'realms': ['atmos'], - 'themes': ['atmDyn'], - 'authors': [ - 'weigel_katja', + "caption": caption, + "statistics": statistics, + "domains": domains, + "plot_type": plot_type, + "realms": ["atmos"], + "themes": ["atmDyn"], + "authors": [ + "weigel_katja", ], - 'references': [ - 'santer07jclim', - 'santer21jclim', - 'eyring21ipcc', + "references": [ + "santer07jclim", + "santer21jclim", + "eyring21ipcc", ], - 'ancestors': ancestor_files, + "ancestors": ancestor_files, } return record -############################################################################### +############################################################################## # Setup diagnostic -############################################################################### +############################################################################## def main(cfg): """Run the diagnostic.""" - ########################################################################### + ########################################################################## # Read recipe data - ########################################################################### + ########################################################################## # Dataset data containers - input_data = cfg['input_data'].values() + input_data = cfg["input_data"].values() - if not variables_available(cfg, ['prw']): - logger.warning('This diagnostic was written and tested only for prw.') + if not variables_available(cfg, ["prw"]): + logger.warning("This diagnostic was written and tested only for prw.") - ########################################################################### + ########################################################################## # Read data - ########################################################################### + ########################################################################## # Create iris cube for each dataset and save annual means trends = {} - trends['cmip5'] = OrderedDict() - trends['cmip6'] = OrderedDict() - trends['cmip5weights'] = OrderedDict() - trends['cmip6weights'] = OrderedDict() - trends['obs'] = {} - # f_c5 = open(cfg['work_dir'] + "/cmip5_trends.txt", "a") - # f_c5.write("Model Alias Trend Weight Mean Median " + - # "Maximum Mnimum Standard deviation \n") - # f_c6 = open(cfg['work_dir'] + "/cmip6_trends.txt", "a") - # f_c6.write("Model Alias Trend Weight Mean Median " + - # "Maximum Mnimum Standard deviation \n") - - if 'add_model_dist' in cfg: + trends["cmip5"] = OrderedDict() + trends["cmip6"] = OrderedDict() + trends["cmip5weights"] = OrderedDict() + trends["cmip6weights"] = OrderedDict() + trends["obs"] = {} + + if "add_model_dist" in cfg: extratrends = _set_extratrends_dict(cfg) valid_datasets, number_of_subdata, period = _get_valid_datasets(input_data) for dataset_path in input_data: - project = dataset_path['project'] - cube_load = iris.load(dataset_path['filename'])[0] + project = dataset_path["project"] + cube_load = iris.load(dataset_path["filename"])[0] cube = _apply_filter(cfg, cube_load) - cat.add_month_number(cube, 'time', name='month_number') - cat.add_year(cube, 'time', name='year') - alias = dataset_path['alias'] - dataset = dataset_path['dataset'] - # selection = select_metadata(input_data, dataset=dataset) - # number_of_subdata = float(len(select_metadata(input_data, - # dataset=dataset))) + cat.add_month_number(cube, "time", name="month_number") + cat.add_year(cube, "time", name="year") + alias = dataset_path["alias"] + dataset = dataset_path["dataset"] + if not _check_full_data(dataset_path, cube): continue cube_anom = _calculate_anomalies(cube) - if project == 'CMIP6': + if project == "CMIP6": trend = _calc_trend(cube_anom) - trends['cmip6'][alias] = trend - trends['cmip6weights'][alias] = 1. / number_of_subdata[dataset] - # f_c6.write(dataset + ' ' + - # alias + ' ' + - # str(round(trend, 4)) + ' ' + - # str(round(1. / number_of_subdata[dataset], 4)) + ' ' + - # str(round(np.mean(cube_only_filt.data), 4)) + ' ' + - # str(round(np.median(cube_only_filt.data), 4)) + ' ' + - # str(round(np.max(cube_only_filt.data), 4)) + ' ' + - # str(round(np.min(cube_only_filt.data), 4)) + ' ' + - # str(round(np.std(cube_only_filt.data), 4)) + '\n') - elif project == 'CMIP5': + trends["cmip6"][alias] = trend + trends["cmip6weights"][alias] = 1.0 / number_of_subdata[dataset] + elif project == "CMIP5": trend = _calc_trend(cube_anom) - trends['cmip5'][alias] = trend - trends['cmip5weights'][alias] = 1. / number_of_subdata[dataset] - # f_c5.write(dataset + ' ' + - # alias + ' ' + - # str(round(trend, 4)) + ' ' + - # str(round(1. / number_of_subdata[dataset], 4)) + ' ' + - # str(round(np.mean(cube_only_filt.data), 4)) + ' ' + - # str(round(np.median(cube_only_filt.data), 4)) + ' ' + - # str(round(np.max(cube_only_filt.data), 4)) + ' ' + - # str(round(np.min(cube_only_filt.data), 4)) + ' ' + - # str(round(np.std(cube_only_filt.data), 4)) + '\n') + trends["cmip5"][alias] = trend + trends["cmip5weights"][alias] = 1.0 / number_of_subdata[dataset] else: trend = _calc_trend(cube_anom) - trends['obs'][dataset] = trend + trends["obs"][dataset] = trend - if 'add_model_dist' in cfg: - if dataset in cfg['add_model_dist']: + if "add_model_dist" in cfg: + if dataset in cfg["add_model_dist"]: extratrends[dataset][alias] = trend - # f_c5.close() - # f_c6.close() axx_lim = _get_ax_limits(cfg, trends) _plot_trends(cfg, trends, valid_datasets, period, axx_lim) - if 'add_model_dist' in cfg: + if "add_model_dist" in cfg: _plot_extratrends(cfg, extratrends, trends, period, axx_lim) - ########################################################################### + ########################################################################## # Process data - ########################################################################### + ########################################################################## -if __name__ == '__main__': +if __name__ == "__main__": with run_diagnostic() as config: main(config)