Skip to content

Commit

Permalink
adding provenance
Browse files Browse the repository at this point in the history
  • Loading branch information
Elizaveta Malinina committed Nov 29, 2024
1 parent 2a81683 commit 5774e5a
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 51 deletions.
37 changes: 24 additions & 13 deletions esmvaltool/diag_scripts/eccc_extremes/simple_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,13 @@ def obtain_reference(data_group: list):
maxs = list()

for dataset_f in ref_fnames.keys():
dataset_n = ref_fnames[dataset_f][0]['dataset']
if len(group_metadata(data_group, 'dataset')[dataset_n])>1:
key = ref_fnames[dataset_f][0]['alias']
else:
key = dataset_n
ref_cb = iris.load_cube(dataset_f)
reference_dic[dataset_f] = ref_cb
reference_dic[key] = ref_cb
# make key dataset if there's only one filename of this dataset otherwise alias
mins.append(ref_cb.collapsed('time', iris.analysis.MIN).data)
maxs.append(ref_cb.collapsed('time', iris.analysis.MAX).data)
Expand All @@ -55,7 +60,7 @@ def obtain_reference(data_group: list):
def create_provenance(caption: str):
'''Creates provenance dictionary'''

provenance_dic = {'authors': ['malinina24'],
provenance_dic = {'authors': ['malinina_elizaveta'],
'caption': caption,
'references': ['malinina24']}

Expand Down Expand Up @@ -93,41 +98,43 @@ def plot_timeseries(data_dic: dict, reference_dic: dict, cfg: dict,
model_var_data = list()
weights = list()

for i in range(0,len(data_dic[dataset]['var_data'])):
var_cb = data_dic[dataset]['var_data'][i]
for i in range(0,len(data_dic[dataset])):
var_cb = data_dic[dataset][i]
model_var_data.append(var_cb.data)
weights.append(var_cb.attributes['ensemble_weight']*
var_cb[i].attributes['reverse_dtsts_n'])
var_cb.attributes['reverse_dtsts_n'])
weights = np.array(weights)
model_var_data = np.array(model_var_data)
mean_var_arr = np.average(model_var_data, axis=0)
min_var_arr = np.min(model_var_data, axis=0)
max_var_arr = np.max(model_var_data, axis=0)

years = [var_cb.coord('time').cell(i).year for i in range(len(var_cb.coord('time')))]
years = [var_cb.coord('time').cell(i).point.year for i in range(var_cb.coord('time').shape[0])]

color_st = eplot.get_dataset_style(dataset, cfg.get('color_style'))
ax_ts.plot(years, mean_var_arr, c=color_st['color'], zorder=3, label=dataset)
if len(data_dic[dataset]['var_data'])>1:
if len(data_dic[dataset])>1:
ax_ts.fill_between(years, min_var_arr, max_var_arr, color=color_st['color'], lw=0, alpha=0.25)

for ref_data in reference_dic.keys():
ref_cb = reference_dic.get(ref_data)
ref_color_st = eplot.get_dataset_style(ref_data, cfg.get('color_style'))
ref_years = [ref_cb.coord('time').cell(i).year for i in range(len(ref_cb.coord('time')))]
ref_years = [ref_cb.coord('time').cell(i).point.year for i in range(ref_cb.coord('time').shape[0])]
ax_ts.plot(ref_years,ref_cb.data, label=ref_data, c=ref_color_st['color'], zorder=3)

ax_ts.axhline(color='grey', zorder=1)
ax_ts.set_ylim(b_min, b_max)
ax_ts.legend(loc=0, ncols=4, fancybox=False, frameon=False)

variable = cfg['var_label'] if cfg.get('var_label') else var_cb.var_name
exp_variable = variable.replace('_', ' ')
units = cfg['units'] if cfg.get('units') else var_cb.units
region = cfg['region'] if cfg.get('units') else 'region'

ax_ts.set_ylabel(f'{variable.replace('_', ' ')} , {units}')
ax_ts.set_ylabel(f'{exp_variable}, {units}')
ax_ts.set_xlabel('year')

default_caption = f'Timeseries of {variable} in {region}'
default_caption = f'Timeseries of {exp_variable} in {region}'

caption = cfg['figure_caption'] if cfg.get('figure_caption') else default_caption

Expand Down Expand Up @@ -160,7 +167,11 @@ def main(cfg):
maxs.append(ref_max)
mins.append(ref_min)

datasets = group_metadata(groups, 'dataset')
remaining_metadata = []
for k in groups.keys():
remaining_metadata.extend(groups[k])

datasets = group_metadata(remaining_metadata, 'dataset')
ens_var_cubelist = iris.cube.CubeList()
for dataset in datasets.keys():
filepaths = list(group_metadata(datasets[dataset], 'filename').keys())
Expand All @@ -175,8 +186,8 @@ def main(cfg):
mod_var_cubelist.append(mod_cb)
mins.append(mod_cb.collapsed('time', iris.analysis.MIN).data)
maxs.append(mod_cb.collapsed('time', iris.analysis.MAX).data)
data_dic[dataset] = {'var_data': mod_var_cubelist}
data_dic['Multi-Model-Mean'] = {'var_data' : ens_var_cubelist}
data_dic[dataset] = mod_var_cubelist
data_dic['Multi-Model'] = ens_var_cubelist

plot_timeseries(data_dic, reference_dic, cfg,
min_val=np.asarray(mins).min(),
Expand Down
48 changes: 28 additions & 20 deletions esmvaltool/diag_scripts/eccc_extremes/variability_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
import csv
import iris
import climextremes as cex
import iris.cube
import pandas as pd
import logging
import numpy as np
import matplotlib.pyplot as plt
import os
from scipy.stats import genextreme as gev

# import internal esmvaltool modules here
from esmvaltool.diag_scripts.shared import run_diagnostic, select_metadata, group_metadata, save_figure
from esmvaltool.diag_scripts.shared import run_diagnostic, group_metadata, save_figure
import esmvaltool.diag_scripts.shared.plot as eplot
from esmvaltool.diag_scripts.ocean import diagnostic_tools as diagtools
# # This part sends debug statements to stdout
logger = logging.getLogger(os.path.basename(__file__))
# logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
Expand All @@ -38,17 +33,21 @@ def obtain_reference(data_group: list):
ref_fnames = group_metadata(data_group, 'filename')

for dataset_f in ref_fnames.keys():
dataset_n = ref_fnames[dataset_f][0]['dataset']
if len(group_metadata(data_group, 'dataset')[dataset_n])>1:
key = ref_fnames[dataset_f][0]['alias']
else:
key = dataset_n
ref_cb = iris.load_cube(dataset_f)
reference_dic[dataset_f] = ref_cb.data.std()
# make key dataset if there's only one filename of this dataset otherwise alias
reference_dic[key] = ref_cb.data.std()

return reference_dic


def create_provenance(caption: str):
'''Creates provenance dictionary'''

provenance_dic = {'authors': ['malinina24'],
provenance_dic = {'authors': ['malinina_elizaveta'],
'caption': caption,
'references': ['malinina24']}

Expand Down Expand Up @@ -109,9 +108,6 @@ def plot_stdevs(data_dic, reference_dic, cfg):
mpl_st_file = eplot.get_path_to_mpl_style(cfg.get('mpl_style'))
plt.style.use(mpl_st_file)

col_mod = (25/255, 14/255, 79/255)
col_obs = 'indianred'

fig_stds, ax_stds = plt.subplots(nrows=1, ncols=1)

fig_stds.set_size_inches(8., 9.)
Expand All @@ -124,35 +120,43 @@ def plot_stdevs(data_dic, reference_dic, cfg):
color_st = eplot.get_dataset_style(model, cfg.get('color_style'))
if model != 'Multi-Model':
for i in range(0, len(data_dic[model])):
ax_stds.scatter(data_dic[model][i], nm+1, marker='o',
c=color_st['color'], zorder=2, label='individual member')
single_dot = ax_stds.scatter(data_dic[model][i], nm+1, marker='o',
c=color_st['color'], zorder=2,
label='individual member')
y_labs[nm+1] = model
else:
ax_stds.scatter(data_dic[model], 0, c=color_st['color'], s=70, marker='s', zorder=3)
square = ax_stds.scatter(data_dic[model], 0, c=color_st['color'], s=70,
marker='s', zorder=3, label='full ensemble')
y_labs[0] = 'ALL'

legend_handles = [square, single_dot]

for ref_dataset in reference_dic.keys():
ref_color_st = eplot.get_dataset_style(ref_dataset, cfg.get('color_style'))
ax_stds.axvline(reference_dic[ref_dataset], c=ref_color_st['color'], zorder=2,
ref_line = ax_stds.axvline(reference_dic[ref_dataset], c=ref_color_st['color'], zorder=2,
label=ref_dataset)
legend_handles.append(ref_line)
ax_stds.set_ylim(len(data_dic.keys()) -0.8, -0.2)

ax_stds.set_yticks(y_ticks, labels=y_labs)
ax_stds.grid(which='both', c='silver', zorder=1)

ax_stds.set_xlabel('StD of '+cfg['var_label'] + ' anomalies, C')
ax_stds.text(0.02, 0.97, cfg.get('litera')+' '+cfg['region'], fontsize='xx-large', transform=ax_stds.transAxes)

variable = cfg.get('var_label')
exp_variable = variable.replace('_', ' ')
units = cfg.get('units')
region = cfg['region'] if cfg.get('units') else 'region'

ax_stds.set_xlabel(f'StD of {exp_variable}, {units}')

default_caption = f'{variable} varibility in {region}'

caption = cfg['figure_caption'] if cfg.get('figure_caption') else default_caption
fig_stds.suptitle(caption)

prov_dic = create_provenance(caption)

plt.legend(handles=legend_handles, bbox_to_anchor =(0.5,-0.1),
loc='lower center', ncols=4, fancybox=False, frameon=False)

plt.tight_layout()

Expand All @@ -171,9 +175,13 @@ def main(cfg):

reference_dic = obtain_reference(groups.pop('reference'))

remaining_metadata = []
for k in groups.keys():
remaining_metadata.extend(groups[k])

data_dic = {}

datasets = group_metadata(groups, 'dataset')
datasets = group_metadata(remaining_metadata, 'dataset')
ens_var_cubelist = iris.cube.CubeList()
for dataset in datasets.keys():
filepaths = list(group_metadata(datasets[dataset], 'filename').keys())
Expand Down
38 changes: 20 additions & 18 deletions esmvaltool/recipes/eccc_extremes/recipe_region_tnn_analytics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,20 @@ preprocessors:
- Alberta
area_statistics:
operator: mean
sesonal_statistics:
seasons: JASONDJFMAMJ
seasonal_statistics:
seasons: [SONDJFMAMJ, JA]
operator: min
annual_statistics:
operator: min
anomalies:
period: full
reference:
reference:
start_year: 1980
start_month: 7
start_year: 1981
start_month: 1
start_day: 1
end_year: 2010
end_month: 6
end_day: 30
end_month: 12
end_day: 31

preproc_era:
custom_order: True
Expand All @@ -52,19 +53,20 @@ preprocessors:
- Alberta
area_statistics:
operator: mean
sesonal_statistics:
seasons: JASONDJFMAMJ
seasonal_statistics:
seasons: [SONDJFMAMJ, JA]
operator: min
annual_statistics:
operator: min
anomalies:
period: full
reference:
start_year: 1980
start_month: 7
start_year: 1981
start_month: 1
start_day: 1
end_year: 2010
end_month: 6
end_day: 30

end_month: 12
end_day: 31

datasets_cmip: &datasets_cmip
- {dataset: ACCESS-CM2, institute: CSIRO-ARCCSS, ensemble: r(1:10)i1p1f1, grid: gn}
Expand Down Expand Up @@ -121,16 +123,16 @@ diagnostics:
txnx_all:
short_name: tasmin
exp: [historical, ssp245]
timerange: '19410701/20230630'
timerange: '19410901/20230630'
mip: day
project: CMIP6
preprocessor: preproc_txnx_flat
preprocessor: preproc_model
additional_datasets: *datasets_cmip
reference:
short_name: tasmin
timerange: '19410701/20230630'
timerange: '19410901/20230630'
mip: day
preprocessor: preproc_era_txnx
preprocessor: preproc_era
additional_datasets: *dataset_era
scripts:
timeseries:
Expand Down

0 comments on commit 5774e5a

Please sign in to comment.