From e3280ac7128e7ca38a16d73d516ceea351ca21e6 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Tue, 27 Apr 2021 14:51:26 -0700 Subject: [PATCH 1/6] remove pmp driver to move it to pmp --- pmp_driver/PMPdriver_EnsoMetrics.py | 397 -------------------- pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py | 213 ----------- pmp_driver/PMPdriver_lib.py | 187 --------- pmp_driver/PMPdriver_plot.py | 165 -------- pmp_driver/README.md | 16 - pmp_driver/my_Param_ENSO.py | 95 ----- pmp_driver/my_Param_ENSO_obs2obs.py | 84 ----- pmp_driver/parallel_driver.py | 185 --------- pmp_driver/parallel_driver_plot.py | 162 -------- pmp_driver/post_process_merge_jsons.py | 100 ----- pmp_driver/run_pmp.sh | 29 -- pmp_driver/run_pmp_parallel.sh | 50 --- pmp_driver/run_pmp_parallel_obs2obs.sh | 26 -- pmp_driver/run_pmp_plot_parallel.sh | 37 -- 14 files changed, 1746 deletions(-) delete mode 100644 pmp_driver/PMPdriver_EnsoMetrics.py delete mode 100644 pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py delete mode 100644 pmp_driver/PMPdriver_lib.py delete mode 100644 pmp_driver/PMPdriver_plot.py delete mode 100644 pmp_driver/README.md delete mode 100644 pmp_driver/my_Param_ENSO.py delete mode 100644 pmp_driver/my_Param_ENSO_obs2obs.py delete mode 100644 pmp_driver/parallel_driver.py delete mode 100644 pmp_driver/parallel_driver_plot.py delete mode 100644 pmp_driver/post_process_merge_jsons.py delete mode 100755 pmp_driver/run_pmp.sh delete mode 100755 pmp_driver/run_pmp_parallel.sh delete mode 100644 pmp_driver/run_pmp_parallel_obs2obs.sh delete mode 100755 pmp_driver/run_pmp_plot_parallel.sh diff --git a/pmp_driver/PMPdriver_EnsoMetrics.py b/pmp_driver/PMPdriver_EnsoMetrics.py deleted file mode 100644 index ee24d7e1..00000000 --- a/pmp_driver/PMPdriver_EnsoMetrics.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python -# ================================================= -# Dependencies -# ------------------------------------------------- -from __future__ import print_function - -import cdms2 -import glob -import json -import os -import pkg_resources -import sys - -from genutil import StringConstructor -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import metrics_to_json -from PMPdriver_lib import sort_human -from PMPdriver_lib import find_realm, get_file -from EnsoMetrics.EnsoCollectionsLib import CmipVariables, defCollection, ReferenceObservations -from EnsoMetrics.EnsoComputeMetricsLib import ComputeCollection - -# To avoid below error when using multi cores -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") -modpath_lf = param.process_templated_argument("modpath_lf") - -# Check given model option -models = param.modnames - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) - -# Realizations -realization = param.realization -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection -dict_mc = defCollection(mc_name) -list_metric = sorted(dict_mc['metrics_list'].keys()) -print('mc_name:', mc_name) - -# case id -case_id = param.case_id - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) -netcdf_path = outdir(output_type='diagnostic_results') -json_name_template = param.process_templated_argument("json_name") -netcdf_name_template = param.process_templated_argument("netcdf_name") - -print('outdir:', str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name))) -print('netcdf_path:', netcdf_path) - -# Switches -debug = param.debug -print('debug:', debug) - -# ================================================= -# Prepare loop iteration -# ------------------------------------------------- -# Environmental setup -try: - egg_pth = pkg_resources.resource_filename( - pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp") -except Exception: - egg_pth = os.path.join(sys.prefix, "share", "pmp") -print('egg_pth:', egg_pth) - -# Create output directory -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# list of variables -list_variables = list() -for metric in list_metric: - listvar = dict_mc['metrics_list'][metric]['variables'] - for var in listvar: - if var not in list_variables: - list_variables.append(var) -list_variables = sorted(list_variables) -print(list_variables) - -# list of observations -list_obs = list() -for metric in list_metric: - dict_var_obs = dict_mc['metrics_list'][metric]['obs_name'] - for var in dict_var_obs.keys(): - for obs in dict_var_obs[var]: - if obs not in list_obs: - list_obs.append(obs) -list_obs = sorted(list_obs) - -# -# finding file and variable name in file for each observations dataset -# -dict_obs = dict() - -for obs in list_obs: - # be sure to add your datasets to EnsoCollectionsLib.ReferenceObservations if needed - dict_var = ReferenceObservations(obs)['variable_name_in_file'] - dict_obs[obs] = dict() - for var in list_variables: - # - # finding variable name in file - # - try: var_in_file = dict_var[var]['var_name'] - except: - print('\033[95m' + str(var) + " is not available for " + str(obs) + " or unscripted" + '\033[0m') - else: - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - - try: - # finding file for 'obs', 'var' - file_name = param.reference_data_path[obs].replace('VAR', var0) - file_areacell = None ## temporary for now - try: - file_landmask = param.reference_data_lf_path[obs] - except: - file_landmask = None - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - # if var_in_file is a list (like for thf) all variables should be read from the same realm - if isinstance(var_in_file, list): - list_files = list() - list_files = [param.reference_data_path[obs].replace('VAR', var1) for var1 in var_in_file] - list_areacell = [file_areacell for var1 in var_in_file] - list_name_area = [areacell_in_file for var1 in var_in_file] - try: - list_landmask = [param.reference_data_lf_path[obs] for var1 in var_in_file] - except: - list_landmask = None - list_name_land = [landmask_in_file for var1 in var_in_file] - else: - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - dict_obs[obs][var] = {'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - except: - print('\033[95m' + 'Observation dataset' + str(obs) + " is not given for variable " + str(var) + '\033[0m') - -print('PMPdriver: dict_obs readin end') - -# ================================================= -# Loop for Models -# ------------------------------------------------- -# finding file and variable name in file for each observations dataset -dict_metric, dict_dive = dict(), dict() -dict_var = CmipVariables()['variable_name_in_file'] - -print('models:', models) - -for mod in models: - print(' ----- model: ', mod, ' ---------------------') - print('PMPdriver: var loop start for model ', mod) - dict_mod = {mod: {}} - dict_metric[mod], dict_dive[mod] = dict(), dict() - - model_path_list = glob.glob( - modpath(mip=mip, exp=exp, realm='atmos', model=mod, realization='*', variable='ts')) - - model_path_list = sort_human(model_path_list) - if debug: - print('model_path_list:', model_path_list) - - # Find where run can be gripped from given filename template for modpath - print('realization:', realization) - run_in_modpath = modpath(mip=mip, exp=exp, realm='atmos', model=mod, realization=realization, - variable='ts').split('/')[-1].split('.').index(realization) - print('run_in_modpath:', run_in_modpath) - # Collect all available runs - runs_list = [model_path.split('/')[-1].split('.')[run_in_modpath] for model_path in model_path_list] - - # Adjust realization to be included - if realization in ["all" ,"*"]: - pass - elif realization in ["first"]: - runs_list = runs_list[:1] - else: - runs_list = [realization] - - if debug: - print('runs_list:', runs_list) - - # ================================================= - # Loop for Realizations - # ------------------------------------------------- - for run in runs_list: - - print(' --- run: ', run, ' ---') - mod_run = '_'.join([mod, run]) - dict_mod = {mod_run: {}} - - if debug: - print('list_variables:', list_variables) - - try: - for var in list_variables: - print(' --- var: ', var, ' ---') - # finding variable name in file - var_in_file = dict_var[var]['var_name'] - print('var_in_file:', var_in_file) - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - # finding variable type (atmos or ocean) - areacell_in_file, realm = find_realm(var0) - if realm == 'Amon': - realm2 = 'atmos' - elif realm == 'Omon': - realm2 = 'ocean' - else: - realm2 = realm - print('var, areacell_in_file, realm:', var, areacell_in_file, realm) - # - # finding file for 'mod', 'var' - # - file_name = get_file(modpath(mip=mip, realm=realm, exp=exp, model=mod, realization=run, variable=var0)) - file_areacell = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=areacell_in_file)) - if not os.path.isfile(file_areacell): - file_areacell = None - file_landmask = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=dict_var['landmask']['var_name'])) - # -- TEMPORARY -- - if mip == 'cmip6': - if mod in ['IPSL-CM6A-LR', 'CNRM-CM6-1']: - file_landmask = '/work/lee1043/ESGF/CMIP6/CMIP/'+mod+'/sftlf_fx_'+mod+'_historical_r1i1p1f1_gr.nc' - elif mod in ['GFDL-ESM4']: - file_landmask = modpath_lf(mip=mip, realm="atmos", model='GFDL-CM4', variable=dict_var['landmask']['var_name']) - if mip == 'cmip5': - if mod == "BNU-ESM": - # Incorrect latitude in original sftlf fixed - file_landmask = "/work/lee1043/ESGF/CMIP5/BNU-ESM/sftlf_fx_BNU-ESM_historical_r0i0p0.nc" - elif mod == "HadCM3": - # Inconsistent lat/lon between sftlf and other variables - file_landmask = None - # Inconsistent grid between areacella and tauu (probably staggering grid system) - file_areacell = None - # -- TEMPORARY END -- - """ - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - """ - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - - if isinstance(var_in_file, list): - list_areacell, list_files, list_landmask, list_name_area, list_name_land = \ - list(), list(), list(), list(), list() - for var1 in var_in_file: - areacell_in_file, realm = find_realm(var1) - modpath_tmp = get_file(modpath(mip=mip, exp=exp, realm=realm, model=mod, realization=realization, variable=var1)) - #modpath_lf_tmp = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=dict_var['landmask']['var_name'])) - if not os.path.isfile(modpath_tmp): - modpath_tmp = None - #if not os.path.isfile(modpath_lf_tmp): - # modpath_lf_tmp = None - file_areacell_tmp = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=areacell_in_file)) - print("file_areacell_tmp:", file_areacell_tmp) - if not os.path.isfile(file_areacell_tmp): - file_areacell_tmp = None - list_files.append(modpath_tmp) - list_areacell.append(file_areacell_tmp) - list_name_area.append(areacell_in_file) - #list_landmask.append(modpath_lf_tmp) - list_landmask.append(file_landmask) - list_name_land.append(landmask_in_file) - else: - if not os.path.isfile(file_name): - file_name = None - if file_landmask is not None: - if not os.path.isfile(file_landmask): - file_landmask = None - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - - # Variable from ocean grid - if var in ['ssh']: - list_landmask = None - # Temporay control of areacello for models with zos on gr instead on gn - if mod in ['BCC-ESM1', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', - 'GFDL-CM4', 'GFDL-ESM4', 'MRI-ESM2-0', # cmip6 - #'BCC-CSM1-1', 'BCC-CSM1-1-M', 'EC-EARTH', 'GFDL-CM3', 'GISS-E2-R', - 'BCC-CSM1-1', 'BCC-CSM1-1-M', 'GFDL-CM3', 'GISS-E2-R', - 'MRI-CGCM3']: # cmip5 - list_areacell = None - - dict_mod[mod_run][var] = { - 'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - - print('PMPdriver: var loop end') - - # dictionary needed by EnsoMetrics.ComputeMetricsLib.ComputeCollection - dictDatasets = {'model': dict_mod, 'observations': dict_obs} - print('dictDatasets:') - print(json.dumps(dictDatasets, indent=4, sort_keys=True)) - - # regridding dictionary (only if you want to specify the regridding) - dict_regrid = {} - """ - # Usage of dict_regrid (select option as below): - dict_regrid = { - 'regridding': { - 'model_orand_obs': 2, 'regridder': 'cdms', 'regridTool': 'esmf', 'regridMethod': 'linear', - 'newgrid_name': 'generic 1x1deg'}, - } - """ - - # Prepare netcdf file setup - json_name = json_name_template(mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id, model=mod, realization=run) - netcdf_name = netcdf_name_template(mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id, model=mod, realization=run) - netcdf = os.path.join(netcdf_path, netcdf_name) - - if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('json_name:', json_name) - - # Computes the metric collection - print("\n### Compute the metric collection ###\n") - cdms2.setAutoBounds('on') - dict_metric[mod][run], dict_dive[mod][run] = ComputeCollection(mc_name, dictDatasets, mod_run, netcdf=param.nc_out, - netcdf_name=netcdf, debug=debug) - if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('dict_metric:') - print(json.dumps(dict_metric, indent=4, sort_keys=True)) - - # OUTPUT METRICS TO JSON FILE (per simulation) - metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=mod, run=run) - - except Exception as e: - print('failed for ', mod, run) - print(e) - if not debug: - pass - -print('PMPdriver: model loop end') - -# ================================================= -# OUTPUT METRICS TO JSON FILE (for all simulations) -# ------------------------------------------------- -#json_name = json_name_template(mip=mip, exp=exp, metricsCollection=mc_name, model='all', realization='all') -#metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name) diff --git a/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py b/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py deleted file mode 100644 index 957fcd62..00000000 --- a/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python -# ================================================= -# Dependencies -# ------------------------------------------------- -from __future__ import print_function - -import cdms2 -import glob -import json -import os -import pkg_resources -import sys - -from genutil import StringConstructor -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import metrics_to_json -from PMPdriver_lib import sort_human -from PMPdriver_lib import find_realm, get_file -from EnsoMetrics.EnsoCollectionsLib import CmipVariables, defCollection, ReferenceObservations -from EnsoMetrics.EnsoComputeMetricsLib import ComputeCollection, ComputeCollection_ObsOnly - -# To avoid below error when using multi cores -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") -modpath_lf = param.process_templated_argument("modpath_lf") - -# Check given model option -models = param.modnames - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) - -# Realizations -realization = param.realization -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection -dict_mc = defCollection(mc_name) -list_metric = sorted(dict_mc['metrics_list'].keys()) -print('mc_name:', mc_name) - -# case id -case_id = param.case_id - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) -netcdf_path = outdir(output_type='diagnostic_results') -json_name_template = param.process_templated_argument("json_name") -netcdf_name_template = param.process_templated_argument("netcdf_name") - -print('outdir:', str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name))) -print('netcdf_path:', netcdf_path) - -# Switches -debug = param.debug -print('debug:', debug) - -# ================================================= -# Prepare loop iteration -# ------------------------------------------------- -# Environmental setup -try: - egg_pth = pkg_resources.resource_filename( - pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp") -except Exception: - egg_pth = os.path.join(sys.prefix, "share", "pmp") -print('egg_pth:', egg_pth) - -# Create output directory -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# list of variables -list_variables = list() -for metric in list_metric: - listvar = dict_mc['metrics_list'][metric]['variables'] - for var in listvar: - if var not in list_variables: - list_variables.append(var) -list_variables = sorted(list_variables) -print(list_variables) - -# list of observations -list_obs = list() -for metric in list_metric: - dict_var_obs = dict_mc['metrics_list'][metric]['obs_name'] - for var in dict_var_obs.keys(): - for obs in dict_var_obs[var]: - if obs not in list_obs: - list_obs.append(obs) -list_obs = sorted(list_obs) - -# -# finding file and variable name in file for each observations dataset -# -dict_obs = dict() - -for obs in list_obs: - # be sure to add your datasets to EnsoCollectionsLib.ReferenceObservations if needed - dict_var = ReferenceObservations(obs)['variable_name_in_file'] - dict_obs[obs] = dict() - for var in list_variables: - # - # finding variable name in file - # - try: var_in_file = dict_var[var]['var_name'] - except: - print('\033[95m' + str(var) + " is not available for " + str(obs) + " or unscripted" + '\033[0m') - else: - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - - try: - # finding file for 'obs', 'var' - file_name = param.reference_data_path[obs].replace('VAR',var0) - file_areacell = None ## temporary for now - try: - file_landmask = param.reference_data_lf_path[obs] - except: - file_landmask = None - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - # if var_in_file is a list (like for thf) all variables should be read from the same realm - if isinstance(var_in_file, list): - list_files = list() - list_files = [param.reference_data_path[obs].replace('VAR',var1) for var1 in var_in_file] - list_areacell = [file_areacell for var1 in var_in_file] - list_name_area = [areacell_in_file for var1 in var_in_file] - try: - list_landmask = [param.reference_data_lf_path[obs] for var1 in var_in_file] - except: - list_landmask = None - list_name_land = [landmask_in_file for var1 in var_in_file] - else: - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - dict_obs[obs][var] = {'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - except: - print('\033[95m' + 'Observation dataset' + str(obs) + " is not given for variable " + str(var) + '\033[0m') - -print('PMPdriver: dict_obs readin end') - -# Prepare computing the metric collection (OBS to OBS) -dictDatasets = {'observations': dict_obs} -netcdf_path = "/work/lee1043/imsi/result_test/enso_metric/test_obs2obs_yann" -netcdf_name = 'YANN_PLANTON_' + mc_name + "_OBSNAME" -netcdf = os.path.join(netcdf_path, netcdf_name) -if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('dict_obs:') - print(json.dumps(dict_obs, indent=4, sort_keys=True)) - with open("dict_obs_" + mc_name + ".json", "w") as f_dict_obs: - json.dump(dict_obs, f_dict_obs, indent=4, sort_keys=True) - -sys.exit("TEST") - -# Compute the metric collection (OBS to OBS) -dict_metric, dict_dive = ComputeCollection_ObsOnly(mc_name, dictDatasets, debug=True, netcdf=True, netcdf_name=netcdf) -if debug: - print('dict_metric:') - print(json.dumps(dict_metric, indent=4, sort_keys=True)) - -# OUTPUT METRICS TO JSON FILE (per simulation) -outdir = netcdf_path -json_name = netcdf_name -metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=mod, run=run) - -stop -sys.exit("TEST") diff --git a/pmp_driver/PMPdriver_lib.py b/pmp_driver/PMPdriver_lib.py deleted file mode 100644 index fb10aac9..00000000 --- a/pmp_driver/PMPdriver_lib.py +++ /dev/null @@ -1,187 +0,0 @@ -from __future__ import print_function -from collections import defaultdict -from pcmdi_metrics.driver.pmp_parser import PMPParser - -import copy -import collections -import datetime -import glob -import os -import sys -import pcmdi_metrics -import re - - -def AddParserArgument(): - - P = PMPParser() # Includes all default options - - #P.use("--mip") - #P.use("--exp") - - P.add_argument("--mip", - type=str, - default="cmip5", - help="A WCRP MIP project such as CMIP3 and CMIP5") - P.add_argument("--exp", - type=str, - default="historical", - help="An experiment such as AMIP, historical or pi-contorl") - P.use("--modpath") - P.add_argument("--modpath_lf", - type=str, - dest='modpath_lf', - help="Directory path to model land fraction field") - P.add_argument("--modnames", - type=str, - nargs='+', - default=None, - help="List of models") - P.add_argument("-r", "--realization", - type=str, - default="r1i1p1", - help="Consider all accessible realizations as idividual\n" - "- r1i1p1: default, consider only 'r1i1p1' member\n" - " Or, specify realization, e.g, r3i1p1'\n" - "- *: consider all available realizations") - P.use("--reference_data_path") - P.add_argument("--reference_data_lf_path", - type=str, - dest='reference_data_lf_path', - help="Data path to land fraction of reference dataset") - P.add_argument("--metricsCollection", - type=str, - dest='metricsCollection', - default="ENSO_perf", - help="Metrics Collection e.g. ENSO_perf, ENSO_tel, or ENSO_proc") - P.add_argument("--json_name", - type=str, - dest='json_name', - help="File name for output JSON") - P.add_argument("--netcdf_name", - type=str, - dest='netcdf_name', - help="File name for output NetCDF") - P.use("--results_dir") - P.add_argument("--case_id", - type=str, - dest="case_id", - default="{:v%Y%m%d}".format(datetime.datetime.now()), - help="version as date, e.g., v20191116 (yyyy-mm-dd)") - # Switches - P.add_argument("-d", "--debug", nargs='?', - const=True, default=False, - type=bool, - help="Option for debug: True / False (defualt)") - P.add_argument("--nc_out", nargs='?', - const=True, default=True, - type=bool, - help="Option for generate netCDF file output: True (default) / False") - - param = P.get_parameter() - - return param - - -def sort_human(input_list): - tmp_list = copy.copy(input_list) - convert = lambda text: float(text) if text.isdigit() else text - alphanum = lambda key: [convert(c) for c in re.split('([-+]?[0-9]*\.?[0-9]*)', key)] - tmp_list.sort(key=alphanum) - return tmp_list - - -# Dictionary to save result -def tree(): return defaultdict(tree) - - -# Prepare outputing metrics to JSON file -def metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=None, run=None): - # disclaimer and reference for JSON header - disclaimer = open( - os.path.join( - egg_pth, - "disclaimer.txt")).read() - - if mc_name == 'MC1': - reference = "The statistics in this file are based on Bellenger, H et al. Clim Dyn (2014) 42:1999-2018. doi:10.1007/s00382-013-1783-z" - elif mc_name == 'ENSO_perf': - reference = "MC for ENSO Performance..." - elif mc_name == 'ENSO_tel': - reference = "MC for ENSO Teleconnection..." - elif mc_name == 'ENSO_proc': - reference = "MC for ENSO Process..." - else: - reference = mc_name - - enso_stat_dic = tree() # Use tree dictionary to avoid declearing everytime - - # First JSON for metrics results - enso_stat_dic['obs'] = dict_obs - if mod is not None and run is not None: - enso_stat_dic['model'][mod][run] = dict_metric[mod][run] - else: - enso_stat_dic['model'] = dict_metric - metrics_dictionary = collections.OrderedDict() - metrics_dictionary["DISCLAIMER"] = disclaimer - metrics_dictionary["REFERENCE"] = reference - metrics_dictionary["RESULTS"] = enso_stat_dic - - OUT = pcmdi_metrics.io.base.Base(outdir(output_type='metrics_results'), json_name+'.json') - OUT.write( - metrics_dictionary, - json_structure=["type", "data", "metric", "item", "value or description"], - indent=4, - separators=( - ',', - ': '), - sort_keys=True) - - # Second JSON for dive down information - diveDown_dictionary = collections.OrderedDict() - diveDown_dictionary["DISCLAIMER"] = disclaimer - diveDown_dictionary["REFERENCE"] = reference - diveDown_dictionary["RESULTS"] = {} - if mod is not None and run is not None: - diveDown_dictionary["RESULTS"]["model"] = {} - diveDown_dictionary["RESULTS"]["model"][mod] = {} - diveDown_dictionary["RESULTS"]["model"][mod][run] = {} - diveDown_dictionary["RESULTS"]["model"][mod][run] = dict_dive[mod][run] - else: - diveDown_dictionary["RESULTS"]["model"] = dict_dive - - OUT2 = pcmdi_metrics.io.base.Base(outdir(output_type='metrics_results'), json_name+'_diveDown.json') - OUT2.write( - dict_dive, - json_structure=["type", "data", "metric", "item", "value or description"], - indent=4, - separators=( - ',', - ': '), - sort_keys=True) - - -def find_realm(varname): - if varname in ["tos", "tauuo", "zos", "areacello", "SSH", "ssh"]: - realm = "ocean" - #realm = "Omon" - areacell_in_file = "areacello" - else: - realm = "atmos" - #realm = "Amon" - areacell_in_file = "areacella" - return areacell_in_file, realm - - -def get_file(path): - file_list = glob.glob(path) - print("path: ", path) - print("file_list: ", file_list) - if len(file_list) > 1: - print("Multiple files detected in get_file function. file_list: ", file_list) - path_to_return = sorted(file_list)[0] - elif len(file_list) == 1: - path_to_return = file_list[0] - elif len(file_list) == 0: - path_to_return = path - return path_to_return diff --git a/pmp_driver/PMPdriver_plot.py b/pmp_driver/PMPdriver_plot.py deleted file mode 100644 index 7989e00d..00000000 --- a/pmp_driver/PMPdriver_plot.py +++ /dev/null @@ -1,165 +0,0 @@ -# -*- coding:UTF-8 -*- -# ---------------------------------------------------# -# Aim of the program: -# Create plots for ENSO_metrics -# ---------------------------------------------------# - - -# ---------------------------------------------------# -# Import the right packages -# ---------------------------------------------------# -from __future__ import print_function - -# Run matplotlib background to prevent -# display localhost error after console disconnected -# and to speed up -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -plt.ioff() - -# Import other libs -from glob import iglob as GLOBiglob -import json -from os import makedirs as OS__makedirs -from os.path import exists as OSpath__exists -from os.path import join as OSpath__join -# ENSO_metrics functions -#from EnsoCollectionsLib import defCollection -from EnsoMetrics.EnsoCollectionsLib import defCollection -from EnsoMetricPlot import main_plotter -import sys - -from PMPdriver_lib import AddParserArgument - -# ---------------------------------------------------# -# Arguments -# ---------------------------------------------------# -param = AddParserArgument() - -# Metrics Collection -metric_collection = param.metricsCollection - -# Pre-defined options -mip = param.mip -exp = param.exp - -# model -if param.modnames is None: - model = "IPSL-CM5A-LR" -else: - model = param.modnames[0] - -# Realizations -run = param.realization - -# case id -case_id = param.case_id - -# Switches -debug = param.debug - -""" -metric_collection = "ENSO_perf" -#metric_collection = "ENSO_tel" -#metric_collection = "ENSO_proc" - -mip = "cmip5" -exp = "historical" -model = "IPSL-CM5A-LR" -run = "r1i1p1" - -case_id = "v20200305" -debug = True -""" - -# ---------------------------------------------------# -# Check Arguments -# ---------------------------------------------------# -print("metric_collection:", metric_collection) -print("mip:", mip) -print("exp:", exp) -print("model:", model) -print("run:", run) -print("case_id:", case_id) -print("debug:", debug) -# ---------------------------------------------------# - -path_main = "/p/user_pub/pmp/pmp_results/pmp_v1.1.2" -path_in_json = OSpath__join(path_main, "metrics_results", "enso_metric", mip, exp, case_id, metric_collection) -path_in_nc = OSpath__join(path_main, "diagnostic_results", "enso_metric", mip, exp, case_id, metric_collection) - -if debug: - path_main = "/work/lee1043/imsi/result_test" -path_out = OSpath__join(path_main, "graphics", "enso_metric", mip, exp, case_id, metric_collection) - -if not OSpath__exists(path_out): - try: - OS__makedirs(path_out) - print("path_out:", path_out) - except: - pass - -pattern = "_".join([mip, exp, metric_collection, case_id]) - -# ---------------------------------------------------# -# Main -# ---------------------------------------------------# -# read json file -filename_js = OSpath__join(path_in_json, pattern + "_allModels_allRuns.json") -print('filename_js:', filename_js) -with open(filename_js) as ff: - data_json = json.load(ff)['RESULTS']['model'][model][run] -ff.close() -del ff, filename_js -# loop on metrics -metrics = sorted(defCollection(metric_collection)['metrics_list'].keys(), key=lambda v: v.upper()) -for met in metrics: - try: - print('met:', met) - # get NetCDF file name - filename_nc = OSpath__join(path_in_nc, pattern + "_" + model + "_" + run + "_" + met + ".nc") - print("filename_nc:", filename_nc) - # get diagnostic values for the given model and observations - if metric_collection == "ENSO_tel" and "Map" in met: - dict_dia = data_json["value"][met+"Corr"]["diagnostic"] - diagnostic_values = dict((key1, None) for key1 in dict_dia.keys()) - diagnostic_units = "" - else: - dict_dia = data_json["value"][met]["diagnostic"] - diagnostic_values = dict((key1, dict_dia[key1]["value"]) for key1 in dict_dia.keys()) - diagnostic_units = data_json["metadata"]["metrics"][met]["diagnostic"]["units"] - # get metric values computed with the given model and observations - if metric_collection == "ENSO_tel" and "Map" in met: - list1, list2 = [met+"Corr", met+"Rmse"], ["diagnostic", "metric"] - dict_met = data_json["value"] - metric_values = dict((key1, {model: [dict_met[su][ty][key1]["value"] for su, ty in zip(list1, list2)]}) - for key1 in dict_met[list1[0]]["metric"].keys()) - metric_units = [data_json["metadata"]["metrics"][su]["metric"]["units"] for su in list1] - else: - dict_met = data_json["value"][met]["metric"] - metric_values = dict((key1, {model: dict_met[key1]["value"]}) for key1 in dict_met.keys()) - metric_units = data_json["metadata"]["metrics"][met]["metric"]["units"] - # figure name - figure_name = "_".join([mip, exp, metric_collection, model, run, met]) - # this function needs: - # - the name of the metric collection: metric_collection - # - the name of the metric: metric - # - the name of the model: modname (!!!!! this must be the name given when computed because it is the name used - # for in the netCDF files and in the json file !!!!!) - # - name of the exp: exp - # - name of the netCDF file name and path: filename_nc - # - a dictionary containing the diagnostic values: diagnostic_values (e.g., {"ERA-Interim": 1, "Tropflux": 1.1, - # modname: 1.5}) - # - the diagnostic units: diagnostic_units - # - a dictionary containing the metric values: metric_values (e.g., {"ERA-Interim": {modname: 1.5}, - # "Tropflux": {modname: 1.36}}) - # - the metric units: metric_units - # - (optional) the path where to save the plots: path_out - # - (optional) the name of the plots: name_png - main_plotter(metric_collection, met, model, exp, filename_nc, diagnostic_values, - diagnostic_units, metric_values, metric_units, member=run, path_png=path_out, - name_png=figure_name) - except Exception as e: - print("## ERROR:", e) - pass diff --git a/pmp_driver/README.md b/pmp_driver/README.md deleted file mode 100644 index db8b1853..00000000 --- a/pmp_driver/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Scripts for PCMDI Metrics Package - -- `run_pmp.sh`: Compute metrics using single CPU. - - `PMPdriver_EnsoMetrics.py` -- `run_pmp_parallel.sh`: Compute metrics using multiple CPUs. - - `parallel_driver.py` - - `PMPdriver_EnsoMetrics.py` - - Input parameter file: `my_Param_ENSO.py` or `my_Param_ENSO_obs2obs.py` -- `run_pmp_palallel_obs2obs.sh`: Compute metrics using multiple CPUs but for observation to observation comparison. - - `parallel_driver.py` - - `PMPdriver_EnsoMetrics.py` -- `run_pmp_plot_parallel.sh`: Generate dive down plots using multiple CPUs. - - `parallel_driver_plot.py` - - `PMPdriver_plot.py` - -**NOTE**: *More clean up needed for obs2obs task* diff --git a/pmp_driver/my_Param_ENSO.py b/pmp_driver/my_Param_ENSO.py deleted file mode 100644 index 1d76b499..00000000 --- a/pmp_driver/my_Param_ENSO.py +++ /dev/null @@ -1,95 +0,0 @@ -import datetime -import glob -import os - - -def find_latest(path): - dir_list = [p for p in glob.glob(path+"/v????????")] - return sorted(dir_list)[-1] - - -# ================================================= -# Background Information -# ------------------------------------------------- -mip = 'cmip6' # cmip5, cmip6 -exp = 'historical' # historical, piControl - -#================================================= -# Miscellaneous -#------------------------------------------------- -debug = False -#debug = True -nc_out = True - -#================================================= -# Observation -#------------------------------------------------- -reference_data_path = { - 'ERA-Interim': '/p/user_pub/PCMDIobs/PCMDIobs2/atmos/mon/VAR/ERA-INT/gn/v20200707/VAR_mon_ERA-INT_BE_gn_v20200707_197901-201903.nc', - 'HadISST': '/work/lee1043/DATA/HadISSTv1.1/HadISSTv1.1.xml', - 'OISST': '/work/lee1043/DATA/OISST/xmls/OISST_tos_mo.xml', - 'Tropflux': '/work/lee1043/DATA/TropFlux/monthly/xmls/Tropflux_VAR_mo.xml', - #'Tropflux': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/VAR/TropFlux-1-0/gn/v20190912/VAR_mon_TropFlux-1-0_BE_gn_197901-201707.nc', - #'OAFlux': '/work/lee1043/DATA/OAFlux/xmls/OAFlux_VAR_mo.xml', - 'GPCPv2.3': '/p/user_pub/pmp/pmp_obs_preparation/orig/data/GPCP_v2.3_mon_jwl/precip.mon.mean.nc', - #'GPCPv2.3': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/pr/GPCP-2-3/gn/v20200117/pr_mon_GPCP-2-3_BE_gn_197901-201907.nc', - #'AVISO': '/p/user_pub/PCMDIobs/PCMDIobs2.1/ocean/mon/zos/AVISO-1-0/gn/v20190912/zos_mon_AVISO-1-0_BE_gn_199210-201012.nc', - 'AVISO': '/work/lee1043/DATA/AVISO/sla_aviso_199301-201812.xml', -} - -reference_data_lf_path = { - 'GPCPv2.3': '/work/lee1043/DATA/GPCP/gpcp_25_lsmask.nc' -} -#================================================= -# Models -#------------------------------------------------- -modpath = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/%(exp)/%(realm)/mon/%(variable)', - '%(mip).%(exp).%(model).%(realization).mon.%(variable).xml') - -modpath_lf = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/historical/%(realm)/fx/%(variable)', - '%(mip).historical.%(model).r0i0p0.fx.%(variable).xml') - -modnames = ['all'] - -""" -realization: -- specific [i.e., "r1i1p1" (cmip5) or "r1i1p1f1" (cmip6)] -- "*" or "all" for all -- "first" for only first realization -""" -realization = 'first' -#realization = '*' - -if debug: - modnames = ['IPSL-CM6A-LR'] - #realization = 'r1i1p1f1' - realization = 'first' - - -#================================================= -# Metrics Collection -#------------------------------------------------- -metricsCollection = 'ENSO_perf' # ENSO_perf, ENSO_tel, ENSO_proc -#metricsCollection = 'ENSO_tel' # ENSO_perf, ENSO_tel, ENSO_proc - -#================================================= -# Output -#------------------------------------------------- -case_id = "{:v%Y%m%d}".format(datetime.datetime.now()) -pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - -if debug: - case_id = "{:v%Y%m%d-%H%M}".format(datetime.datetime.now()) - pmprdir = '/work/lee1043/temporary/result_test' - -results_dir = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - -json_name = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' -netcdf_name = json_name diff --git a/pmp_driver/my_Param_ENSO_obs2obs.py b/pmp_driver/my_Param_ENSO_obs2obs.py deleted file mode 100644 index 97e9fe8a..00000000 --- a/pmp_driver/my_Param_ENSO_obs2obs.py +++ /dev/null @@ -1,84 +0,0 @@ -import datetime -import glob -import os - - -def find_latest(path): - dir_list = [p for p in glob.glob(path+"/v????????")] - return sorted(dir_list)[-1] - - -# ================================================= -# Background Information -# ------------------------------------------------- -mip = 'obs2obs' # cmip5, cmip6 -exp = 'historical' # historical, piControl - -#================================================= -# Miscellaneous -#------------------------------------------------- -debug = False -nc_out = True - -#================================================= -# Observation -#------------------------------------------------- -reference_data_path = { - 'ERA-Interim': '/p/user_pub/PCMDIobs/PCMDIobs2/atmos/mon/VAR/ERA-INT/gn/v20200402/VAR_mon_ERA-INT_BE_gn_v20200402_197901-201903.nc', - 'HadISST': '/work/lee1043/DATA/HadISSTv1.1/HadISSTv1.1.xml', - 'OISST': '/work/lee1043/DATA/OISST/xmls/OISST_tos_mo.xml', - 'Tropflux': '/work/lee1043/DATA/TropFlux/monthly/xmls/Tropflux_VAR_mo.xml', - #'Tropflux': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/VAR/TropFlux-1-0/gn/v20190912/VAR_mon_TropFlux-1-0_BE_gn_197901-201707.nc', - #'OAFlux': '/work/lee1043/DATA/OAFlux/xmls/OAFlux_VAR_mo.xml', - 'GPCPv2.3': '/p/user_pub/pmp/pmp_obs_preparation/orig/data/GPCP_v2.3_mon_jwl/precip.mon.mean.nc', - #'GPCPv2.3': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/pr/GPCP-2-3/gn/v20200117/pr_mon_GPCP-2-3_BE_gn_197901-201907.nc', - #'AVISO': '/p/user_pub/PCMDIobs/PCMDIobs2.1/ocean/mon/zos/AVISO-1-0/gn/v20190912/zos_mon_AVISO-1-0_BE_gn_199210-201012.nc', - 'AVISO': '/work/lee1043/DATA/AVISO/sla_aviso_199301-201812.xml', -} - -reference_data_lf_path = { - 'GPCPv2.3': '/work/lee1043/DATA/GPCP/gpcp_25_lsmask.nc' -} -#================================================= -# Models -#------------------------------------------------- -modpath = os.path.join( - '/p/user_pub/PCMDIobs/PCMDIobs2/%(realm)/mon/%(variable)', - '%(model)/gn/', - 'v????????', - '%(variable)_mon_%(model)_BE_gn_v????????_??????-??????.nc') - -modpath_lf = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/historical/%(realm)/fx/%(variable)', - '%(mip).historical.%(model).r0i0p0.fx.%(variable).xml') - -modnames = ['20CR', 'ERA-20C', 'ERA-INT', 'TropFlux-1-0', 'CMAP-V1902', 'GPCP-2-3', 'TRMM-3B43v-7', 'ERA-5', 'CERES-EBAF-4-0', 'CERES-EBAF-4-1', 'AVISO-1-0'] - -if debug: - modnames = ['ERA-INT'] - -realization = 'r1i1p1f1' # r1i1p1 (cmip5), r1i1p1f1 (cmip6), * (all) -#realization = '*' - -#================================================= -# Metrics Collection -#------------------------------------------------- -metricsCollection = 'ENSO_perf' # ENSO_perf, ENSO_tel, ENSO_proc - -#================================================= -# Output -#------------------------------------------------- -case_id = "{:v%Y%m%d}".format(datetime.datetime.now()) -pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - -if debug: - pmprdir = '/work/lee1043/imsi/result_test' - -results_dir = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - -json_name = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' -netcdf_name = json_name diff --git a/pmp_driver/parallel_driver.py b/pmp_driver/parallel_driver.py deleted file mode 100644 index 937a0edf..00000000 --- a/pmp_driver/parallel_driver.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python - -""" -Usage example: -1. First realization per model -./parallel_driver.py -p my_Param_ENSO.py --mip cmip6 --modnames all --realization r1i1p1f1 --metricsCollection ENSO_perf -2. All realizations of individual models -./parallel_driver.py -p my_Param_ENSO.py --mip cmip6 --modnames all --realization all --metricsCollection ENSO_perf -""" - -from __future__ import print_function -from argparse import RawTextHelpFormatter -from genutil import StringConstructor -from subprocess import Popen - -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import sort_human - -import datetime -import glob -import os -import pcmdi_metrics -import sys -import time - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# Must be done before any CDAT library is called. -# https://github.com/CDAT/cdat/issues/2213 -if 'UVCDAT_ANONYMOUS_LOG' not in os.environ: - os.environ['UVCDAT_ANONYMOUS_LOG'] = 'no' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") - -# Check given model option -models = param.modnames -print('models:', models) - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) -print('number of models:', len(models)) - -# Realizations -realization = param.realization -if ('all' in [r.lower() for r in realization]) or (realization == 'all'): - realization = '*' -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection - -# case id -case_id = param.case_id -print('case_id:', case_id) - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) - -# Debug -debug = param.debug -print('debug:', debug) - -# ================================================= -# Create output directories -# ------------------------------------------------- -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# ================================================= -# Generates list of command -# ------------------------------------------------- -if mip == "obs2obs": - param_file = './my_Param_ENSO_obs2obs.py' -else: - param_file = './my_Param_ENSO.py' - -cmds_list = [] -for model in models: - print(' ----- model: ', model, ' ---------------------') - # Find all xmls for the given model - model_path_list = glob.glob( - modpath(mip=mip, exp=exp, model=model, realization="*", variable='ts')) - # sort in nice way - model_path_list = sort_human(model_path_list) - if debug: - print('model_path_list:', model_path_list) - - # Find where run can be gripped from given filename template for modpath - print('realization:', realization) - run_in_modpath = modpath(mip=mip, exp=exp, realm='atmos', model=model, realization=realization, - variable='ts').split('/')[-1].split('.').index(realization) - print('run_in_modpath:', run_in_modpath) - # Collect all available runs - runs_list = [model_path.split('/')[-1].split('.')[run_in_modpath] for model_path in model_path_list] - - # Adjust realization to be included - if realization in ["all" ,"*"]: - pass - elif realization in ["first"]: - runs_list = runs_list[:1] - else: - runs_list = [realization] - - if debug: - print('runs_list (all):', runs_list) - - # Generate commends - for run in runs_list: - cmd = ['python', 'PMPdriver_EnsoMetrics.py', - '-p', param_file, - '--mip', mip, '--metricsCollection', mc_name, - '--case_id', case_id, - '--modnames', model, - '--realization', run] - cmds_list.append(cmd) - -if debug: - for cmd in cmds_list: - print(' '.join(cmd)) - -# ================================================= -# Run subprocesses in parallel -# ------------------------------------------------- -# log dir -log_dir = os.path.join("log", case_id, mc_name) - -if not os.path.exists(log_dir): - os.makedirs(log_dir) - -# number of tasks to submit at the same time -num_workers = 7 -#num_workers = 10 -#num_workers = 30 -#num_workers = 25 - -print("Start : %s" % time.ctime()) - -# submit tasks and wait for subset of tasks to complete -procs_list = [] -for p, cmd in enumerate(cmds_list): - timenow = time.ctime() - print(timenow, p, ' '.join(cmd)) - model = cmd[-3] - run = cmd[-1] - log_filename = '_'.join(['log_enso', mc_name, mip, exp, model, run, case_id]) - log_file = os.path.join(log_dir, log_filename) - with open(log_file+"_stdout.txt", "wb") as out, open(log_file+"_stderr.txt", "wb") as err: - procs_list.append(Popen(cmd, stdout=out, stderr=err)) - time.sleep(1) - if ((p > 0 and p % num_workers == 0) or (p == len(cmds_list)-1)): - print('wait...') - for proc in procs_list: - proc.wait() - print("Tasks end : %s" % time.ctime()) - procs_list = [] - -# tasks done -print("End : %s" % time.ctime()) -sys.exit('DONE') diff --git a/pmp_driver/parallel_driver_plot.py b/pmp_driver/parallel_driver_plot.py deleted file mode 100644 index 1cb96bd3..00000000 --- a/pmp_driver/parallel_driver_plot.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python - -""" -Usage example: -1. First realization per model -./parallel_driver.py --mip cmip6 --exp historical --case_id v20200305 --modnames all --realization r1i1p1f1 --metricsCollection ENSO_perf -2. All realizations of individual models -./parallel_driver.py --mip cmip6 --exp historical --case_id v20200305 --modnames all --realization all --metricsCollection ENSO_perf - -""" - -from __future__ import print_function -from subprocess import Popen - -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import sort_human - -import datetime -import json -import os -import sys -import time - -from os import makedirs as OS__makedirs -from os.path import exists as OSpath__exists -from os.path import join as OSpath__join - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# Must be done before any CDAT library is called. -# https://github.com/CDAT/cdat/issues/2213 -if 'UVCDAT_ANONYMOUS_LOG' not in os.environ: - os.environ['UVCDAT_ANONYMOUS_LOG'] = 'no' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Metrics Collection -metric_collection = param.metricsCollection - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Check given model option -models = param.modnames -print('models:', models) - -# Realizations -realization = param.realization -if ('all' in [r.lower() for r in realization]) or (realization == 'all'): - realization = 'all' -print('realization: ', realization) - -# case id -case_id = param.case_id -print('case_id:', case_id) - -path_main = "/p/user_pub/pmp/pmp_results/pmp_v1.1.2" -path_in_json = OSpath__join(path_main, "metrics_results", "enso_metric", mip, exp, case_id, metric_collection) -path_out = OSpath__join(path_main, "graphics", "enso_metric", mip, exp, case_id, metric_collection) - -pattern = "_".join([mip, exp, metric_collection, case_id]) - -# ---------------------------------------------------# -# Adjust model list if "ALL" given -# ---------------------------------------------------# -# read json file -filename_js = OSpath__join(path_in_json, pattern + "_allModels_allRuns.json") -print('filename_js:', filename_js) -with open(filename_js) as ff: - data_json = json.load(ff)['RESULTS']['model'] - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - models = sort_human(list(data_json.keys())) - -print('models:', models) -print('number of models:', len(models)) - -# Debug -debug = param.debug -print('debug:', debug) - -# ================================================= -# Create output directories -# ------------------------------------------------- -print("path_out:", path_out) -if not OSpath__exists(path_out): - try: - OS__makedirs(path_out) - except: - pass - -# ================================================= -# Generates list of command -# e.g.: python PMPdriver_plot.py --mip cmip5 --exp historical --metricsCollection ENSO_perf --modnames IPSL-CM5A-LR --realization r1i1p1 --case_id v20200305 -# ------------------------------------------------- -cmds_list = [] -for model in models: - print(' ----- model: ', model, ' ---------------------') - if realization is "all": - runs_list = sort_human(list(data_json[model].keys())) - print('runs_list (all):', runs_list) - else: - runs_list = [realization] - for run in runs_list: - cmd = ['python', 'PMPdriver_plot.py', - '--mip', mip, '--exp', exp, '--metricsCollection', metric_collection, - '--case_id', case_id, - '--modnames', model, - '--realization', run] - cmds_list.append(cmd) - -for i, cmd in enumerate(cmds_list): - print(i+1, ' '.join(cmd)) - -# ================================================= -# Run subprocesses in parallel -# ------------------------------------------------- -# log dir -log_dir = os.path.join("log", case_id, metric_collection) - -if not os.path.exists(log_dir): - os.makedirs(log_dir) - -# number of tasks to submit at the same time -#num_workers = 8 -num_workers = 10 -#num_workers = 30 -#num_workers = 25 - -print("Start : %s" % time.ctime()) - -# submit tasks and wait for subset of tasks to complete -procs_list = [] -for p, cmd in enumerate(cmds_list): - timenow = time.ctime() - print(timenow, p, ' '.join(cmd)) - model = cmd[-3] - run = cmd[-1] - log_filename = '_'.join(['log_ensoPlot', metric_collection, mip, exp, model, run, case_id]) - log_file = os.path.join(log_dir, log_filename) - with open(log_file+"_stdout.txt", "wb") as out, open(log_file+"_stderr.txt", "wb") as err: - procs_list.append(Popen(cmd, stdout=out, stderr=err)) - time.sleep(1) - if ((p > 0 and p % num_workers == 0) or (p == len(cmds_list)-1)): - print('wait...') - for proc in procs_list: - proc.wait() - print("Tasks end : %s" % time.ctime()) - procs_list = [] - -# tasks done -print("End : %s" % time.ctime()) -sys.exit('DONE') diff --git a/pmp_driver/post_process_merge_jsons.py b/pmp_driver/post_process_merge_jsons.py deleted file mode 100644 index a360a87e..00000000 --- a/pmp_driver/post_process_merge_jsons.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -from genutil import StringConstructor -from pcmdi_metrics.variability_mode.lib import dict_merge - -import copy -import glob -import json -import os - - -def main(): - mips = ["cmip5", "cmip6"] - #mips = ["cmip5"] - #mips = ["cmip6"] - #mips = ["obs2obs"] - - exps = ["historical"] - - #MCs = ["ENSO_perf", "ENSO_tel", "ENSO_proc", "test_tel"] - MCs = ["ENSO_perf", "ENSO_tel", "ENSO_proc"] - #MCs = ["ENSO_tel"] - #MCs = ["test_tel"] - - pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - #pmprdir = "/work/lee1043/imsi/result_test" - - for mip in mips: - for exp in exps: - for MC in MCs: - case_id = find_latest(pmprdir, mip, exp, MC) - print("mip, exp, MC, case_id:", mip, exp, MC, case_id) - merge_jsons(mip, exp, case_id, MC, pmprdir) - - -def merge_jsons(mip, exp, case_id, metricsCollection, pmprdir): - json_file_dir_template = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - json_file_dir_template = StringConstructor(json_file_dir_template) - json_file_dir = json_file_dir_template( - output_type='metrics_results', mip=mip, exp=exp, case_id=case_id, metricsCollection=metricsCollection) - - json_file_template = '_'.join(['%(mip)_%(exp)_%(metricsCollection)', '%(case_id)', '%(model)', '%(realization)']) - json_file_template = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' - json_file_template = StringConstructor(json_file_template) - - # Search for individual JSONs - json_files = sorted(glob.glob( - os.path.join( - json_file_dir, - json_file_template( - mip=mip, exp=exp, metricsCollection=metricsCollection, case_id=case_id, model='*', realization='*')+'.json'))) - - # Remove diveDown JSONs and previously generated merged JSONs if included - json_files_revised = copy.copy(json_files) - for j, json_file in enumerate(json_files): - filename_component = json_file.split('/')[-1].split('.')[0].split('_') - if 'diveDown' in filename_component: - json_files_revised.remove(json_file) - elif 'allModels' in filename_component: - json_files_revised.remove(json_file) - elif 'allRuns' in filename_component: - json_files_revised.remove(json_file) - - # Load individual JSON and merge to one big dictionary - for j, json_file in enumerate(json_files_revised): - print(j, json_file) - f = open(json_file) - dict_tmp = json.loads(f.read()) - if j == 0: - dict_final = dict_tmp.copy() - else: - dict_merge(dict_final, dict_tmp) - f.close() - - # Dump final dictionary to JSON - final_json_filename = json_file_template( - mip=mip, exp=exp, metricsCollection=metricsCollection, case_id=case_id, - model='allModels', realization='allRuns')+'.json' - final_json_file = os.path.join(json_file_dir, final_json_filename) - - with open(final_json_file, 'w') as fp: - json.dump(dict_final, fp, sort_keys=True, indent=4) - - print("Done: check ", final_json_file) - - -def find_latest(pmprdir, mip, exp, MC): - versions = sorted([r.split('/')[-2] for r in glob.glob(os.path.join( - pmprdir, "metrics_results", "enso_metric", - mip, exp, "v????????", MC))]) - latest_version = versions[-1] - return latest_version - - -if __name__ == "__main__": - main() diff --git a/pmp_driver/run_pmp.sh b/pmp_driver/run_pmp.sh deleted file mode 100755 index 57814c5d..00000000 --- a/pmp_driver/run_pmp.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -set -a - -# Working conda env in Crunchy: pmp_nightly_20180830 - -ver=`date +"%Y%m%d-%H%M"` - -#mips='cmip5 cmip6' -#mips='cmip5' -mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -#param_file='my_Param_ENSO.py' -#param_file='my_Param_ENSO_obs2obs.py' -param_file='my_Param_ENSO_obs2obs_combinedDataSource.py' - -mkdir -p log - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC - python PMPdriver_EnsoMetrics.py -p $param_file --mip ${mip} --metricsCollection ${MC} >& log/log.${mip}.${MC}.all.v${ver}.txt & - disown - done -done diff --git a/pmp_driver/run_pmp_parallel.sh b/pmp_driver/run_pmp_parallel.sh deleted file mode 100755 index d7caecf6..00000000 --- a/pmp_driver/run_pmp_parallel.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27 - -case_id="v"`date +"%Y%m%d"` -#case_id="v20200224" - -mips='cmip5 cmip6' -#mips='cmip5' -#mips='cmip6' -#mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -modnames='all' -#modnames='IPSL-CM5A-LR' - -#realization='all' -realization='first' - -mkdir -p log/$case_id - -for mip in $mips; do - if [ $mip == 'cmip5' ]; then - #realization='r1i1p1' - #modnames="BNU-ESM HadCM3" - param_file='my_Param_ENSO.py' - elif [ $mip == 'cmip6' ]; then - #realization='r1i1p1f1' - #modnames="BCC-ESM1 CESM2 CESM2-FV2 CESM2-WACCM CESM2-WACCM-FV2 GFDL-CM4 GFDL-ESM4 MRI-ESM2-0" - param_file='my_Param_ENSO.py' - elif [ $mip == 'obs2obs' ]; then - param_file='my_Param_ENSO_obs2obs.py' - fi - - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py -p $param_file --mip $mip --case_id=$case_id --modnames $modnames --metricsCollection $MC --realization $realization >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done diff --git a/pmp_driver/run_pmp_parallel_obs2obs.sh b/pmp_driver/run_pmp_parallel_obs2obs.sh deleted file mode 100644 index 80efe5d4..00000000 --- a/pmp_driver/run_pmp_parallel_obs2obs.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27, cdat82_20200128_py27 - -case_id="v"`date +"%Y%m%d"` - -mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -modnames='20CR ERA-20C ERA-INT TropFlux-1-0 CMAP-V1902 GPCP-2-3 TRMM-3B43v-7 ERA-5 CERES-EBAF-4-0 CERES-EBAF-4-1 AVISO-1-0' - -mkdir -p log/$case_id - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py -p my_Param_ENSO_obs2obs.py --mip $mip --case_id=$case_id --modnames $modnames --metricsCollection $MC >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done diff --git a/pmp_driver/run_pmp_plot_parallel.sh b/pmp_driver/run_pmp_plot_parallel.sh deleted file mode 100755 index 25446565..00000000 --- a/pmp_driver/run_pmp_plot_parallel.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27 - -case_id="v20200305" - -mips='cmip5 cmip6' -#mips='cmip5' -#mips='cmip6' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel ENSO_proc' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -modnames='all' -#modnames='IPSL-CM5A-LR' -#modnames='CanESM5 FGOALS-g3' - -realization='all' - -mkdir -p log/$case_id - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py --mip $mip --exp historical --case_id=$case_id --modnames $modnames --metricsCollection $MC --realization $realization >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done From fb5b804d3761ffb90a96e53d18d71a1c9abef95d Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Thu, 30 Nov 2023 16:34:18 -0800 Subject: [PATCH 2/6] remove pmp driver as it has been migrated to the PMP side --- pmp_driver/PMPdriver_EnsoMetrics.py | 397 -------------------- pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py | 213 ----------- pmp_driver/PMPdriver_lib.py | 187 --------- pmp_driver/PMPdriver_plot.py | 165 -------- pmp_driver/README.md | 16 - pmp_driver/my_Param_ENSO.py | 95 ----- pmp_driver/my_Param_ENSO_obs2obs.py | 84 ----- pmp_driver/parallel_driver.py | 185 --------- pmp_driver/parallel_driver_plot.py | 162 -------- pmp_driver/post_process_merge_jsons.py | 100 ----- pmp_driver/run_pmp.sh | 29 -- pmp_driver/run_pmp_parallel.sh | 50 --- pmp_driver/run_pmp_parallel_obs2obs.sh | 26 -- pmp_driver/run_pmp_plot_parallel.sh | 37 -- 14 files changed, 1746 deletions(-) delete mode 100644 pmp_driver/PMPdriver_EnsoMetrics.py delete mode 100644 pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py delete mode 100644 pmp_driver/PMPdriver_lib.py delete mode 100644 pmp_driver/PMPdriver_plot.py delete mode 100644 pmp_driver/README.md delete mode 100644 pmp_driver/my_Param_ENSO.py delete mode 100644 pmp_driver/my_Param_ENSO_obs2obs.py delete mode 100644 pmp_driver/parallel_driver.py delete mode 100644 pmp_driver/parallel_driver_plot.py delete mode 100644 pmp_driver/post_process_merge_jsons.py delete mode 100755 pmp_driver/run_pmp.sh delete mode 100755 pmp_driver/run_pmp_parallel.sh delete mode 100644 pmp_driver/run_pmp_parallel_obs2obs.sh delete mode 100755 pmp_driver/run_pmp_plot_parallel.sh diff --git a/pmp_driver/PMPdriver_EnsoMetrics.py b/pmp_driver/PMPdriver_EnsoMetrics.py deleted file mode 100644 index ee24d7e1..00000000 --- a/pmp_driver/PMPdriver_EnsoMetrics.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python -# ================================================= -# Dependencies -# ------------------------------------------------- -from __future__ import print_function - -import cdms2 -import glob -import json -import os -import pkg_resources -import sys - -from genutil import StringConstructor -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import metrics_to_json -from PMPdriver_lib import sort_human -from PMPdriver_lib import find_realm, get_file -from EnsoMetrics.EnsoCollectionsLib import CmipVariables, defCollection, ReferenceObservations -from EnsoMetrics.EnsoComputeMetricsLib import ComputeCollection - -# To avoid below error when using multi cores -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") -modpath_lf = param.process_templated_argument("modpath_lf") - -# Check given model option -models = param.modnames - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) - -# Realizations -realization = param.realization -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection -dict_mc = defCollection(mc_name) -list_metric = sorted(dict_mc['metrics_list'].keys()) -print('mc_name:', mc_name) - -# case id -case_id = param.case_id - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) -netcdf_path = outdir(output_type='diagnostic_results') -json_name_template = param.process_templated_argument("json_name") -netcdf_name_template = param.process_templated_argument("netcdf_name") - -print('outdir:', str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name))) -print('netcdf_path:', netcdf_path) - -# Switches -debug = param.debug -print('debug:', debug) - -# ================================================= -# Prepare loop iteration -# ------------------------------------------------- -# Environmental setup -try: - egg_pth = pkg_resources.resource_filename( - pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp") -except Exception: - egg_pth = os.path.join(sys.prefix, "share", "pmp") -print('egg_pth:', egg_pth) - -# Create output directory -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# list of variables -list_variables = list() -for metric in list_metric: - listvar = dict_mc['metrics_list'][metric]['variables'] - for var in listvar: - if var not in list_variables: - list_variables.append(var) -list_variables = sorted(list_variables) -print(list_variables) - -# list of observations -list_obs = list() -for metric in list_metric: - dict_var_obs = dict_mc['metrics_list'][metric]['obs_name'] - for var in dict_var_obs.keys(): - for obs in dict_var_obs[var]: - if obs not in list_obs: - list_obs.append(obs) -list_obs = sorted(list_obs) - -# -# finding file and variable name in file for each observations dataset -# -dict_obs = dict() - -for obs in list_obs: - # be sure to add your datasets to EnsoCollectionsLib.ReferenceObservations if needed - dict_var = ReferenceObservations(obs)['variable_name_in_file'] - dict_obs[obs] = dict() - for var in list_variables: - # - # finding variable name in file - # - try: var_in_file = dict_var[var]['var_name'] - except: - print('\033[95m' + str(var) + " is not available for " + str(obs) + " or unscripted" + '\033[0m') - else: - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - - try: - # finding file for 'obs', 'var' - file_name = param.reference_data_path[obs].replace('VAR', var0) - file_areacell = None ## temporary for now - try: - file_landmask = param.reference_data_lf_path[obs] - except: - file_landmask = None - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - # if var_in_file is a list (like for thf) all variables should be read from the same realm - if isinstance(var_in_file, list): - list_files = list() - list_files = [param.reference_data_path[obs].replace('VAR', var1) for var1 in var_in_file] - list_areacell = [file_areacell for var1 in var_in_file] - list_name_area = [areacell_in_file for var1 in var_in_file] - try: - list_landmask = [param.reference_data_lf_path[obs] for var1 in var_in_file] - except: - list_landmask = None - list_name_land = [landmask_in_file for var1 in var_in_file] - else: - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - dict_obs[obs][var] = {'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - except: - print('\033[95m' + 'Observation dataset' + str(obs) + " is not given for variable " + str(var) + '\033[0m') - -print('PMPdriver: dict_obs readin end') - -# ================================================= -# Loop for Models -# ------------------------------------------------- -# finding file and variable name in file for each observations dataset -dict_metric, dict_dive = dict(), dict() -dict_var = CmipVariables()['variable_name_in_file'] - -print('models:', models) - -for mod in models: - print(' ----- model: ', mod, ' ---------------------') - print('PMPdriver: var loop start for model ', mod) - dict_mod = {mod: {}} - dict_metric[mod], dict_dive[mod] = dict(), dict() - - model_path_list = glob.glob( - modpath(mip=mip, exp=exp, realm='atmos', model=mod, realization='*', variable='ts')) - - model_path_list = sort_human(model_path_list) - if debug: - print('model_path_list:', model_path_list) - - # Find where run can be gripped from given filename template for modpath - print('realization:', realization) - run_in_modpath = modpath(mip=mip, exp=exp, realm='atmos', model=mod, realization=realization, - variable='ts').split('/')[-1].split('.').index(realization) - print('run_in_modpath:', run_in_modpath) - # Collect all available runs - runs_list = [model_path.split('/')[-1].split('.')[run_in_modpath] for model_path in model_path_list] - - # Adjust realization to be included - if realization in ["all" ,"*"]: - pass - elif realization in ["first"]: - runs_list = runs_list[:1] - else: - runs_list = [realization] - - if debug: - print('runs_list:', runs_list) - - # ================================================= - # Loop for Realizations - # ------------------------------------------------- - for run in runs_list: - - print(' --- run: ', run, ' ---') - mod_run = '_'.join([mod, run]) - dict_mod = {mod_run: {}} - - if debug: - print('list_variables:', list_variables) - - try: - for var in list_variables: - print(' --- var: ', var, ' ---') - # finding variable name in file - var_in_file = dict_var[var]['var_name'] - print('var_in_file:', var_in_file) - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - # finding variable type (atmos or ocean) - areacell_in_file, realm = find_realm(var0) - if realm == 'Amon': - realm2 = 'atmos' - elif realm == 'Omon': - realm2 = 'ocean' - else: - realm2 = realm - print('var, areacell_in_file, realm:', var, areacell_in_file, realm) - # - # finding file for 'mod', 'var' - # - file_name = get_file(modpath(mip=mip, realm=realm, exp=exp, model=mod, realization=run, variable=var0)) - file_areacell = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=areacell_in_file)) - if not os.path.isfile(file_areacell): - file_areacell = None - file_landmask = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=dict_var['landmask']['var_name'])) - # -- TEMPORARY -- - if mip == 'cmip6': - if mod in ['IPSL-CM6A-LR', 'CNRM-CM6-1']: - file_landmask = '/work/lee1043/ESGF/CMIP6/CMIP/'+mod+'/sftlf_fx_'+mod+'_historical_r1i1p1f1_gr.nc' - elif mod in ['GFDL-ESM4']: - file_landmask = modpath_lf(mip=mip, realm="atmos", model='GFDL-CM4', variable=dict_var['landmask']['var_name']) - if mip == 'cmip5': - if mod == "BNU-ESM": - # Incorrect latitude in original sftlf fixed - file_landmask = "/work/lee1043/ESGF/CMIP5/BNU-ESM/sftlf_fx_BNU-ESM_historical_r0i0p0.nc" - elif mod == "HadCM3": - # Inconsistent lat/lon between sftlf and other variables - file_landmask = None - # Inconsistent grid between areacella and tauu (probably staggering grid system) - file_areacell = None - # -- TEMPORARY END -- - """ - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - """ - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - - if isinstance(var_in_file, list): - list_areacell, list_files, list_landmask, list_name_area, list_name_land = \ - list(), list(), list(), list(), list() - for var1 in var_in_file: - areacell_in_file, realm = find_realm(var1) - modpath_tmp = get_file(modpath(mip=mip, exp=exp, realm=realm, model=mod, realization=realization, variable=var1)) - #modpath_lf_tmp = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=dict_var['landmask']['var_name'])) - if not os.path.isfile(modpath_tmp): - modpath_tmp = None - #if not os.path.isfile(modpath_lf_tmp): - # modpath_lf_tmp = None - file_areacell_tmp = get_file(modpath_lf(mip=mip, realm=realm2, model=mod, variable=areacell_in_file)) - print("file_areacell_tmp:", file_areacell_tmp) - if not os.path.isfile(file_areacell_tmp): - file_areacell_tmp = None - list_files.append(modpath_tmp) - list_areacell.append(file_areacell_tmp) - list_name_area.append(areacell_in_file) - #list_landmask.append(modpath_lf_tmp) - list_landmask.append(file_landmask) - list_name_land.append(landmask_in_file) - else: - if not os.path.isfile(file_name): - file_name = None - if file_landmask is not None: - if not os.path.isfile(file_landmask): - file_landmask = None - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - - # Variable from ocean grid - if var in ['ssh']: - list_landmask = None - # Temporay control of areacello for models with zos on gr instead on gn - if mod in ['BCC-ESM1', 'CESM2', 'CESM2-FV2', 'CESM2-WACCM', 'CESM2-WACCM-FV2', - 'GFDL-CM4', 'GFDL-ESM4', 'MRI-ESM2-0', # cmip6 - #'BCC-CSM1-1', 'BCC-CSM1-1-M', 'EC-EARTH', 'GFDL-CM3', 'GISS-E2-R', - 'BCC-CSM1-1', 'BCC-CSM1-1-M', 'GFDL-CM3', 'GISS-E2-R', - 'MRI-CGCM3']: # cmip5 - list_areacell = None - - dict_mod[mod_run][var] = { - 'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - - print('PMPdriver: var loop end') - - # dictionary needed by EnsoMetrics.ComputeMetricsLib.ComputeCollection - dictDatasets = {'model': dict_mod, 'observations': dict_obs} - print('dictDatasets:') - print(json.dumps(dictDatasets, indent=4, sort_keys=True)) - - # regridding dictionary (only if you want to specify the regridding) - dict_regrid = {} - """ - # Usage of dict_regrid (select option as below): - dict_regrid = { - 'regridding': { - 'model_orand_obs': 2, 'regridder': 'cdms', 'regridTool': 'esmf', 'regridMethod': 'linear', - 'newgrid_name': 'generic 1x1deg'}, - } - """ - - # Prepare netcdf file setup - json_name = json_name_template(mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id, model=mod, realization=run) - netcdf_name = netcdf_name_template(mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id, model=mod, realization=run) - netcdf = os.path.join(netcdf_path, netcdf_name) - - if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('json_name:', json_name) - - # Computes the metric collection - print("\n### Compute the metric collection ###\n") - cdms2.setAutoBounds('on') - dict_metric[mod][run], dict_dive[mod][run] = ComputeCollection(mc_name, dictDatasets, mod_run, netcdf=param.nc_out, - netcdf_name=netcdf, debug=debug) - if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('dict_metric:') - print(json.dumps(dict_metric, indent=4, sort_keys=True)) - - # OUTPUT METRICS TO JSON FILE (per simulation) - metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=mod, run=run) - - except Exception as e: - print('failed for ', mod, run) - print(e) - if not debug: - pass - -print('PMPdriver: model loop end') - -# ================================================= -# OUTPUT METRICS TO JSON FILE (for all simulations) -# ------------------------------------------------- -#json_name = json_name_template(mip=mip, exp=exp, metricsCollection=mc_name, model='all', realization='all') -#metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name) diff --git a/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py b/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py deleted file mode 100644 index 957fcd62..00000000 --- a/pmp_driver/PMPdriver_EnsoMetrics_ObsOnly.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/env python -# ================================================= -# Dependencies -# ------------------------------------------------- -from __future__ import print_function - -import cdms2 -import glob -import json -import os -import pkg_resources -import sys - -from genutil import StringConstructor -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import metrics_to_json -from PMPdriver_lib import sort_human -from PMPdriver_lib import find_realm, get_file -from EnsoMetrics.EnsoCollectionsLib import CmipVariables, defCollection, ReferenceObservations -from EnsoMetrics.EnsoComputeMetricsLib import ComputeCollection, ComputeCollection_ObsOnly - -# To avoid below error when using multi cores -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") -modpath_lf = param.process_templated_argument("modpath_lf") - -# Check given model option -models = param.modnames - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) - -# Realizations -realization = param.realization -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection -dict_mc = defCollection(mc_name) -list_metric = sorted(dict_mc['metrics_list'].keys()) -print('mc_name:', mc_name) - -# case id -case_id = param.case_id - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) -netcdf_path = outdir(output_type='diagnostic_results') -json_name_template = param.process_templated_argument("json_name") -netcdf_name_template = param.process_templated_argument("netcdf_name") - -print('outdir:', str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name))) -print('netcdf_path:', netcdf_path) - -# Switches -debug = param.debug -print('debug:', debug) - -# ================================================= -# Prepare loop iteration -# ------------------------------------------------- -# Environmental setup -try: - egg_pth = pkg_resources.resource_filename( - pkg_resources.Requirement.parse("pcmdi_metrics"), "share/pmp") -except Exception: - egg_pth = os.path.join(sys.prefix, "share", "pmp") -print('egg_pth:', egg_pth) - -# Create output directory -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# list of variables -list_variables = list() -for metric in list_metric: - listvar = dict_mc['metrics_list'][metric]['variables'] - for var in listvar: - if var not in list_variables: - list_variables.append(var) -list_variables = sorted(list_variables) -print(list_variables) - -# list of observations -list_obs = list() -for metric in list_metric: - dict_var_obs = dict_mc['metrics_list'][metric]['obs_name'] - for var in dict_var_obs.keys(): - for obs in dict_var_obs[var]: - if obs not in list_obs: - list_obs.append(obs) -list_obs = sorted(list_obs) - -# -# finding file and variable name in file for each observations dataset -# -dict_obs = dict() - -for obs in list_obs: - # be sure to add your datasets to EnsoCollectionsLib.ReferenceObservations if needed - dict_var = ReferenceObservations(obs)['variable_name_in_file'] - dict_obs[obs] = dict() - for var in list_variables: - # - # finding variable name in file - # - try: var_in_file = dict_var[var]['var_name'] - except: - print('\033[95m' + str(var) + " is not available for " + str(obs) + " or unscripted" + '\033[0m') - else: - if isinstance(var_in_file, list): - var0 = var_in_file[0] - else: - var0 = var_in_file - - try: - # finding file for 'obs', 'var' - file_name = param.reference_data_path[obs].replace('VAR',var0) - file_areacell = None ## temporary for now - try: - file_landmask = param.reference_data_lf_path[obs] - except: - file_landmask = None - try: - areacell_in_file = dict_var['areacell']['var_name'] - except: - areacell_in_file = None - try: - landmask_in_file = dict_var['landmask']['var_name'] - except: - landmask_in_file = None - # if var_in_file is a list (like for thf) all variables should be read from the same realm - if isinstance(var_in_file, list): - list_files = list() - list_files = [param.reference_data_path[obs].replace('VAR',var1) for var1 in var_in_file] - list_areacell = [file_areacell for var1 in var_in_file] - list_name_area = [areacell_in_file for var1 in var_in_file] - try: - list_landmask = [param.reference_data_lf_path[obs] for var1 in var_in_file] - except: - list_landmask = None - list_name_land = [landmask_in_file for var1 in var_in_file] - else: - list_files = file_name - list_areacell = file_areacell - list_name_area = areacell_in_file - list_landmask = file_landmask - list_name_land = landmask_in_file - dict_obs[obs][var] = {'path + filename': list_files, 'varname': var_in_file, - 'path + filename_area': list_areacell, 'areaname': list_name_area, - 'path + filename_landmask': list_landmask, 'landmaskname': list_name_land} - except: - print('\033[95m' + 'Observation dataset' + str(obs) + " is not given for variable " + str(var) + '\033[0m') - -print('PMPdriver: dict_obs readin end') - -# Prepare computing the metric collection (OBS to OBS) -dictDatasets = {'observations': dict_obs} -netcdf_path = "/work/lee1043/imsi/result_test/enso_metric/test_obs2obs_yann" -netcdf_name = 'YANN_PLANTON_' + mc_name + "_OBSNAME" -netcdf = os.path.join(netcdf_path, netcdf_name) -if debug: - print('file_name:', file_name) - print('list_files:', list_files) - print('netcdf_name:', netcdf_name) - print('dict_obs:') - print(json.dumps(dict_obs, indent=4, sort_keys=True)) - with open("dict_obs_" + mc_name + ".json", "w") as f_dict_obs: - json.dump(dict_obs, f_dict_obs, indent=4, sort_keys=True) - -sys.exit("TEST") - -# Compute the metric collection (OBS to OBS) -dict_metric, dict_dive = ComputeCollection_ObsOnly(mc_name, dictDatasets, debug=True, netcdf=True, netcdf_name=netcdf) -if debug: - print('dict_metric:') - print(json.dumps(dict_metric, indent=4, sort_keys=True)) - -# OUTPUT METRICS TO JSON FILE (per simulation) -outdir = netcdf_path -json_name = netcdf_name -metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=mod, run=run) - -stop -sys.exit("TEST") diff --git a/pmp_driver/PMPdriver_lib.py b/pmp_driver/PMPdriver_lib.py deleted file mode 100644 index fb10aac9..00000000 --- a/pmp_driver/PMPdriver_lib.py +++ /dev/null @@ -1,187 +0,0 @@ -from __future__ import print_function -from collections import defaultdict -from pcmdi_metrics.driver.pmp_parser import PMPParser - -import copy -import collections -import datetime -import glob -import os -import sys -import pcmdi_metrics -import re - - -def AddParserArgument(): - - P = PMPParser() # Includes all default options - - #P.use("--mip") - #P.use("--exp") - - P.add_argument("--mip", - type=str, - default="cmip5", - help="A WCRP MIP project such as CMIP3 and CMIP5") - P.add_argument("--exp", - type=str, - default="historical", - help="An experiment such as AMIP, historical or pi-contorl") - P.use("--modpath") - P.add_argument("--modpath_lf", - type=str, - dest='modpath_lf', - help="Directory path to model land fraction field") - P.add_argument("--modnames", - type=str, - nargs='+', - default=None, - help="List of models") - P.add_argument("-r", "--realization", - type=str, - default="r1i1p1", - help="Consider all accessible realizations as idividual\n" - "- r1i1p1: default, consider only 'r1i1p1' member\n" - " Or, specify realization, e.g, r3i1p1'\n" - "- *: consider all available realizations") - P.use("--reference_data_path") - P.add_argument("--reference_data_lf_path", - type=str, - dest='reference_data_lf_path', - help="Data path to land fraction of reference dataset") - P.add_argument("--metricsCollection", - type=str, - dest='metricsCollection', - default="ENSO_perf", - help="Metrics Collection e.g. ENSO_perf, ENSO_tel, or ENSO_proc") - P.add_argument("--json_name", - type=str, - dest='json_name', - help="File name for output JSON") - P.add_argument("--netcdf_name", - type=str, - dest='netcdf_name', - help="File name for output NetCDF") - P.use("--results_dir") - P.add_argument("--case_id", - type=str, - dest="case_id", - default="{:v%Y%m%d}".format(datetime.datetime.now()), - help="version as date, e.g., v20191116 (yyyy-mm-dd)") - # Switches - P.add_argument("-d", "--debug", nargs='?', - const=True, default=False, - type=bool, - help="Option for debug: True / False (defualt)") - P.add_argument("--nc_out", nargs='?', - const=True, default=True, - type=bool, - help="Option for generate netCDF file output: True (default) / False") - - param = P.get_parameter() - - return param - - -def sort_human(input_list): - tmp_list = copy.copy(input_list) - convert = lambda text: float(text) if text.isdigit() else text - alphanum = lambda key: [convert(c) for c in re.split('([-+]?[0-9]*\.?[0-9]*)', key)] - tmp_list.sort(key=alphanum) - return tmp_list - - -# Dictionary to save result -def tree(): return defaultdict(tree) - - -# Prepare outputing metrics to JSON file -def metrics_to_json(mc_name, dict_obs, dict_metric, dict_dive, egg_pth, outdir, json_name, mod=None, run=None): - # disclaimer and reference for JSON header - disclaimer = open( - os.path.join( - egg_pth, - "disclaimer.txt")).read() - - if mc_name == 'MC1': - reference = "The statistics in this file are based on Bellenger, H et al. Clim Dyn (2014) 42:1999-2018. doi:10.1007/s00382-013-1783-z" - elif mc_name == 'ENSO_perf': - reference = "MC for ENSO Performance..." - elif mc_name == 'ENSO_tel': - reference = "MC for ENSO Teleconnection..." - elif mc_name == 'ENSO_proc': - reference = "MC for ENSO Process..." - else: - reference = mc_name - - enso_stat_dic = tree() # Use tree dictionary to avoid declearing everytime - - # First JSON for metrics results - enso_stat_dic['obs'] = dict_obs - if mod is not None and run is not None: - enso_stat_dic['model'][mod][run] = dict_metric[mod][run] - else: - enso_stat_dic['model'] = dict_metric - metrics_dictionary = collections.OrderedDict() - metrics_dictionary["DISCLAIMER"] = disclaimer - metrics_dictionary["REFERENCE"] = reference - metrics_dictionary["RESULTS"] = enso_stat_dic - - OUT = pcmdi_metrics.io.base.Base(outdir(output_type='metrics_results'), json_name+'.json') - OUT.write( - metrics_dictionary, - json_structure=["type", "data", "metric", "item", "value or description"], - indent=4, - separators=( - ',', - ': '), - sort_keys=True) - - # Second JSON for dive down information - diveDown_dictionary = collections.OrderedDict() - diveDown_dictionary["DISCLAIMER"] = disclaimer - diveDown_dictionary["REFERENCE"] = reference - diveDown_dictionary["RESULTS"] = {} - if mod is not None and run is not None: - diveDown_dictionary["RESULTS"]["model"] = {} - diveDown_dictionary["RESULTS"]["model"][mod] = {} - diveDown_dictionary["RESULTS"]["model"][mod][run] = {} - diveDown_dictionary["RESULTS"]["model"][mod][run] = dict_dive[mod][run] - else: - diveDown_dictionary["RESULTS"]["model"] = dict_dive - - OUT2 = pcmdi_metrics.io.base.Base(outdir(output_type='metrics_results'), json_name+'_diveDown.json') - OUT2.write( - dict_dive, - json_structure=["type", "data", "metric", "item", "value or description"], - indent=4, - separators=( - ',', - ': '), - sort_keys=True) - - -def find_realm(varname): - if varname in ["tos", "tauuo", "zos", "areacello", "SSH", "ssh"]: - realm = "ocean" - #realm = "Omon" - areacell_in_file = "areacello" - else: - realm = "atmos" - #realm = "Amon" - areacell_in_file = "areacella" - return areacell_in_file, realm - - -def get_file(path): - file_list = glob.glob(path) - print("path: ", path) - print("file_list: ", file_list) - if len(file_list) > 1: - print("Multiple files detected in get_file function. file_list: ", file_list) - path_to_return = sorted(file_list)[0] - elif len(file_list) == 1: - path_to_return = file_list[0] - elif len(file_list) == 0: - path_to_return = path - return path_to_return diff --git a/pmp_driver/PMPdriver_plot.py b/pmp_driver/PMPdriver_plot.py deleted file mode 100644 index 7989e00d..00000000 --- a/pmp_driver/PMPdriver_plot.py +++ /dev/null @@ -1,165 +0,0 @@ -# -*- coding:UTF-8 -*- -# ---------------------------------------------------# -# Aim of the program: -# Create plots for ENSO_metrics -# ---------------------------------------------------# - - -# ---------------------------------------------------# -# Import the right packages -# ---------------------------------------------------# -from __future__ import print_function - -# Run matplotlib background to prevent -# display localhost error after console disconnected -# and to speed up -import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as plt -plt.ioff() - -# Import other libs -from glob import iglob as GLOBiglob -import json -from os import makedirs as OS__makedirs -from os.path import exists as OSpath__exists -from os.path import join as OSpath__join -# ENSO_metrics functions -#from EnsoCollectionsLib import defCollection -from EnsoMetrics.EnsoCollectionsLib import defCollection -from EnsoMetricPlot import main_plotter -import sys - -from PMPdriver_lib import AddParserArgument - -# ---------------------------------------------------# -# Arguments -# ---------------------------------------------------# -param = AddParserArgument() - -# Metrics Collection -metric_collection = param.metricsCollection - -# Pre-defined options -mip = param.mip -exp = param.exp - -# model -if param.modnames is None: - model = "IPSL-CM5A-LR" -else: - model = param.modnames[0] - -# Realizations -run = param.realization - -# case id -case_id = param.case_id - -# Switches -debug = param.debug - -""" -metric_collection = "ENSO_perf" -#metric_collection = "ENSO_tel" -#metric_collection = "ENSO_proc" - -mip = "cmip5" -exp = "historical" -model = "IPSL-CM5A-LR" -run = "r1i1p1" - -case_id = "v20200305" -debug = True -""" - -# ---------------------------------------------------# -# Check Arguments -# ---------------------------------------------------# -print("metric_collection:", metric_collection) -print("mip:", mip) -print("exp:", exp) -print("model:", model) -print("run:", run) -print("case_id:", case_id) -print("debug:", debug) -# ---------------------------------------------------# - -path_main = "/p/user_pub/pmp/pmp_results/pmp_v1.1.2" -path_in_json = OSpath__join(path_main, "metrics_results", "enso_metric", mip, exp, case_id, metric_collection) -path_in_nc = OSpath__join(path_main, "diagnostic_results", "enso_metric", mip, exp, case_id, metric_collection) - -if debug: - path_main = "/work/lee1043/imsi/result_test" -path_out = OSpath__join(path_main, "graphics", "enso_metric", mip, exp, case_id, metric_collection) - -if not OSpath__exists(path_out): - try: - OS__makedirs(path_out) - print("path_out:", path_out) - except: - pass - -pattern = "_".join([mip, exp, metric_collection, case_id]) - -# ---------------------------------------------------# -# Main -# ---------------------------------------------------# -# read json file -filename_js = OSpath__join(path_in_json, pattern + "_allModels_allRuns.json") -print('filename_js:', filename_js) -with open(filename_js) as ff: - data_json = json.load(ff)['RESULTS']['model'][model][run] -ff.close() -del ff, filename_js -# loop on metrics -metrics = sorted(defCollection(metric_collection)['metrics_list'].keys(), key=lambda v: v.upper()) -for met in metrics: - try: - print('met:', met) - # get NetCDF file name - filename_nc = OSpath__join(path_in_nc, pattern + "_" + model + "_" + run + "_" + met + ".nc") - print("filename_nc:", filename_nc) - # get diagnostic values for the given model and observations - if metric_collection == "ENSO_tel" and "Map" in met: - dict_dia = data_json["value"][met+"Corr"]["diagnostic"] - diagnostic_values = dict((key1, None) for key1 in dict_dia.keys()) - diagnostic_units = "" - else: - dict_dia = data_json["value"][met]["diagnostic"] - diagnostic_values = dict((key1, dict_dia[key1]["value"]) for key1 in dict_dia.keys()) - diagnostic_units = data_json["metadata"]["metrics"][met]["diagnostic"]["units"] - # get metric values computed with the given model and observations - if metric_collection == "ENSO_tel" and "Map" in met: - list1, list2 = [met+"Corr", met+"Rmse"], ["diagnostic", "metric"] - dict_met = data_json["value"] - metric_values = dict((key1, {model: [dict_met[su][ty][key1]["value"] for su, ty in zip(list1, list2)]}) - for key1 in dict_met[list1[0]]["metric"].keys()) - metric_units = [data_json["metadata"]["metrics"][su]["metric"]["units"] for su in list1] - else: - dict_met = data_json["value"][met]["metric"] - metric_values = dict((key1, {model: dict_met[key1]["value"]}) for key1 in dict_met.keys()) - metric_units = data_json["metadata"]["metrics"][met]["metric"]["units"] - # figure name - figure_name = "_".join([mip, exp, metric_collection, model, run, met]) - # this function needs: - # - the name of the metric collection: metric_collection - # - the name of the metric: metric - # - the name of the model: modname (!!!!! this must be the name given when computed because it is the name used - # for in the netCDF files and in the json file !!!!!) - # - name of the exp: exp - # - name of the netCDF file name and path: filename_nc - # - a dictionary containing the diagnostic values: diagnostic_values (e.g., {"ERA-Interim": 1, "Tropflux": 1.1, - # modname: 1.5}) - # - the diagnostic units: diagnostic_units - # - a dictionary containing the metric values: metric_values (e.g., {"ERA-Interim": {modname: 1.5}, - # "Tropflux": {modname: 1.36}}) - # - the metric units: metric_units - # - (optional) the path where to save the plots: path_out - # - (optional) the name of the plots: name_png - main_plotter(metric_collection, met, model, exp, filename_nc, diagnostic_values, - diagnostic_units, metric_values, metric_units, member=run, path_png=path_out, - name_png=figure_name) - except Exception as e: - print("## ERROR:", e) - pass diff --git a/pmp_driver/README.md b/pmp_driver/README.md deleted file mode 100644 index db8b1853..00000000 --- a/pmp_driver/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Scripts for PCMDI Metrics Package - -- `run_pmp.sh`: Compute metrics using single CPU. - - `PMPdriver_EnsoMetrics.py` -- `run_pmp_parallel.sh`: Compute metrics using multiple CPUs. - - `parallel_driver.py` - - `PMPdriver_EnsoMetrics.py` - - Input parameter file: `my_Param_ENSO.py` or `my_Param_ENSO_obs2obs.py` -- `run_pmp_palallel_obs2obs.sh`: Compute metrics using multiple CPUs but for observation to observation comparison. - - `parallel_driver.py` - - `PMPdriver_EnsoMetrics.py` -- `run_pmp_plot_parallel.sh`: Generate dive down plots using multiple CPUs. - - `parallel_driver_plot.py` - - `PMPdriver_plot.py` - -**NOTE**: *More clean up needed for obs2obs task* diff --git a/pmp_driver/my_Param_ENSO.py b/pmp_driver/my_Param_ENSO.py deleted file mode 100644 index 1d76b499..00000000 --- a/pmp_driver/my_Param_ENSO.py +++ /dev/null @@ -1,95 +0,0 @@ -import datetime -import glob -import os - - -def find_latest(path): - dir_list = [p for p in glob.glob(path+"/v????????")] - return sorted(dir_list)[-1] - - -# ================================================= -# Background Information -# ------------------------------------------------- -mip = 'cmip6' # cmip5, cmip6 -exp = 'historical' # historical, piControl - -#================================================= -# Miscellaneous -#------------------------------------------------- -debug = False -#debug = True -nc_out = True - -#================================================= -# Observation -#------------------------------------------------- -reference_data_path = { - 'ERA-Interim': '/p/user_pub/PCMDIobs/PCMDIobs2/atmos/mon/VAR/ERA-INT/gn/v20200707/VAR_mon_ERA-INT_BE_gn_v20200707_197901-201903.nc', - 'HadISST': '/work/lee1043/DATA/HadISSTv1.1/HadISSTv1.1.xml', - 'OISST': '/work/lee1043/DATA/OISST/xmls/OISST_tos_mo.xml', - 'Tropflux': '/work/lee1043/DATA/TropFlux/monthly/xmls/Tropflux_VAR_mo.xml', - #'Tropflux': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/VAR/TropFlux-1-0/gn/v20190912/VAR_mon_TropFlux-1-0_BE_gn_197901-201707.nc', - #'OAFlux': '/work/lee1043/DATA/OAFlux/xmls/OAFlux_VAR_mo.xml', - 'GPCPv2.3': '/p/user_pub/pmp/pmp_obs_preparation/orig/data/GPCP_v2.3_mon_jwl/precip.mon.mean.nc', - #'GPCPv2.3': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/pr/GPCP-2-3/gn/v20200117/pr_mon_GPCP-2-3_BE_gn_197901-201907.nc', - #'AVISO': '/p/user_pub/PCMDIobs/PCMDIobs2.1/ocean/mon/zos/AVISO-1-0/gn/v20190912/zos_mon_AVISO-1-0_BE_gn_199210-201012.nc', - 'AVISO': '/work/lee1043/DATA/AVISO/sla_aviso_199301-201812.xml', -} - -reference_data_lf_path = { - 'GPCPv2.3': '/work/lee1043/DATA/GPCP/gpcp_25_lsmask.nc' -} -#================================================= -# Models -#------------------------------------------------- -modpath = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/%(exp)/%(realm)/mon/%(variable)', - '%(mip).%(exp).%(model).%(realization).mon.%(variable).xml') - -modpath_lf = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/historical/%(realm)/fx/%(variable)', - '%(mip).historical.%(model).r0i0p0.fx.%(variable).xml') - -modnames = ['all'] - -""" -realization: -- specific [i.e., "r1i1p1" (cmip5) or "r1i1p1f1" (cmip6)] -- "*" or "all" for all -- "first" for only first realization -""" -realization = 'first' -#realization = '*' - -if debug: - modnames = ['IPSL-CM6A-LR'] - #realization = 'r1i1p1f1' - realization = 'first' - - -#================================================= -# Metrics Collection -#------------------------------------------------- -metricsCollection = 'ENSO_perf' # ENSO_perf, ENSO_tel, ENSO_proc -#metricsCollection = 'ENSO_tel' # ENSO_perf, ENSO_tel, ENSO_proc - -#================================================= -# Output -#------------------------------------------------- -case_id = "{:v%Y%m%d}".format(datetime.datetime.now()) -pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - -if debug: - case_id = "{:v%Y%m%d-%H%M}".format(datetime.datetime.now()) - pmprdir = '/work/lee1043/temporary/result_test' - -results_dir = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - -json_name = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' -netcdf_name = json_name diff --git a/pmp_driver/my_Param_ENSO_obs2obs.py b/pmp_driver/my_Param_ENSO_obs2obs.py deleted file mode 100644 index 97e9fe8a..00000000 --- a/pmp_driver/my_Param_ENSO_obs2obs.py +++ /dev/null @@ -1,84 +0,0 @@ -import datetime -import glob -import os - - -def find_latest(path): - dir_list = [p for p in glob.glob(path+"/v????????")] - return sorted(dir_list)[-1] - - -# ================================================= -# Background Information -# ------------------------------------------------- -mip = 'obs2obs' # cmip5, cmip6 -exp = 'historical' # historical, piControl - -#================================================= -# Miscellaneous -#------------------------------------------------- -debug = False -nc_out = True - -#================================================= -# Observation -#------------------------------------------------- -reference_data_path = { - 'ERA-Interim': '/p/user_pub/PCMDIobs/PCMDIobs2/atmos/mon/VAR/ERA-INT/gn/v20200402/VAR_mon_ERA-INT_BE_gn_v20200402_197901-201903.nc', - 'HadISST': '/work/lee1043/DATA/HadISSTv1.1/HadISSTv1.1.xml', - 'OISST': '/work/lee1043/DATA/OISST/xmls/OISST_tos_mo.xml', - 'Tropflux': '/work/lee1043/DATA/TropFlux/monthly/xmls/Tropflux_VAR_mo.xml', - #'Tropflux': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/VAR/TropFlux-1-0/gn/v20190912/VAR_mon_TropFlux-1-0_BE_gn_197901-201707.nc', - #'OAFlux': '/work/lee1043/DATA/OAFlux/xmls/OAFlux_VAR_mo.xml', - 'GPCPv2.3': '/p/user_pub/pmp/pmp_obs_preparation/orig/data/GPCP_v2.3_mon_jwl/precip.mon.mean.nc', - #'GPCPv2.3': '/p/user_pub/PCMDIobs/PCMDIobs2.0/atmos/mon/pr/GPCP-2-3/gn/v20200117/pr_mon_GPCP-2-3_BE_gn_197901-201907.nc', - #'AVISO': '/p/user_pub/PCMDIobs/PCMDIobs2.1/ocean/mon/zos/AVISO-1-0/gn/v20190912/zos_mon_AVISO-1-0_BE_gn_199210-201012.nc', - 'AVISO': '/work/lee1043/DATA/AVISO/sla_aviso_199301-201812.xml', -} - -reference_data_lf_path = { - 'GPCPv2.3': '/work/lee1043/DATA/GPCP/gpcp_25_lsmask.nc' -} -#================================================= -# Models -#------------------------------------------------- -modpath = os.path.join( - '/p/user_pub/PCMDIobs/PCMDIobs2/%(realm)/mon/%(variable)', - '%(model)/gn/', - 'v????????', - '%(variable)_mon_%(model)_BE_gn_v????????_??????-??????.nc') - -modpath_lf = os.path.join( - find_latest('/p/user_pub/pmp/pmp_results/pmp_v1.1.2/additional_xmls/latest'), - '%(mip)/historical/%(realm)/fx/%(variable)', - '%(mip).historical.%(model).r0i0p0.fx.%(variable).xml') - -modnames = ['20CR', 'ERA-20C', 'ERA-INT', 'TropFlux-1-0', 'CMAP-V1902', 'GPCP-2-3', 'TRMM-3B43v-7', 'ERA-5', 'CERES-EBAF-4-0', 'CERES-EBAF-4-1', 'AVISO-1-0'] - -if debug: - modnames = ['ERA-INT'] - -realization = 'r1i1p1f1' # r1i1p1 (cmip5), r1i1p1f1 (cmip6), * (all) -#realization = '*' - -#================================================= -# Metrics Collection -#------------------------------------------------- -metricsCollection = 'ENSO_perf' # ENSO_perf, ENSO_tel, ENSO_proc - -#================================================= -# Output -#------------------------------------------------- -case_id = "{:v%Y%m%d}".format(datetime.datetime.now()) -pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - -if debug: - pmprdir = '/work/lee1043/imsi/result_test' - -results_dir = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - -json_name = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' -netcdf_name = json_name diff --git a/pmp_driver/parallel_driver.py b/pmp_driver/parallel_driver.py deleted file mode 100644 index 937a0edf..00000000 --- a/pmp_driver/parallel_driver.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python - -""" -Usage example: -1. First realization per model -./parallel_driver.py -p my_Param_ENSO.py --mip cmip6 --modnames all --realization r1i1p1f1 --metricsCollection ENSO_perf -2. All realizations of individual models -./parallel_driver.py -p my_Param_ENSO.py --mip cmip6 --modnames all --realization all --metricsCollection ENSO_perf -""" - -from __future__ import print_function -from argparse import RawTextHelpFormatter -from genutil import StringConstructor -from subprocess import Popen - -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import sort_human - -import datetime -import glob -import os -import pcmdi_metrics -import sys -import time - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# Must be done before any CDAT library is called. -# https://github.com/CDAT/cdat/issues/2213 -if 'UVCDAT_ANONYMOUS_LOG' not in os.environ: - os.environ['UVCDAT_ANONYMOUS_LOG'] = 'no' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Path to model data as string template -modpath = param.process_templated_argument("modpath") - -# Check given model option -models = param.modnames -print('models:', models) - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - model_index_path = param.modpath.split('/')[-1].split('.').index("%(model)") - models = ([p.split('/')[-1].split('.')[model_index_path] for p in glob.glob(modpath( - mip=mip, exp=exp, model='*', realization='*', variable='ts'))]) - # remove duplicates - models = sorted(list(dict.fromkeys(models)), key=lambda s: s.lower()) - -print('models:', models) -print('number of models:', len(models)) - -# Realizations -realization = param.realization -if ('all' in [r.lower() for r in realization]) or (realization == 'all'): - realization = '*' -print('realization: ', realization) - -# Metrics Collection -mc_name = param.metricsCollection - -# case id -case_id = param.case_id -print('case_id:', case_id) - -# Output -outdir_template = param.process_templated_argument("results_dir") -outdir = StringConstructor(str(outdir_template( - output_type='%(output_type)', - mip=mip, exp=exp, metricsCollection=mc_name, case_id=case_id))) - -# Debug -debug = param.debug -print('debug:', debug) - -# ================================================= -# Create output directories -# ------------------------------------------------- -for output_type in ['graphics', 'diagnostic_results', 'metrics_results']: - if not os.path.exists(outdir(output_type=output_type)): - os.makedirs(outdir(output_type=output_type)) - print(outdir(output_type=output_type)) - -# ================================================= -# Generates list of command -# ------------------------------------------------- -if mip == "obs2obs": - param_file = './my_Param_ENSO_obs2obs.py' -else: - param_file = './my_Param_ENSO.py' - -cmds_list = [] -for model in models: - print(' ----- model: ', model, ' ---------------------') - # Find all xmls for the given model - model_path_list = glob.glob( - modpath(mip=mip, exp=exp, model=model, realization="*", variable='ts')) - # sort in nice way - model_path_list = sort_human(model_path_list) - if debug: - print('model_path_list:', model_path_list) - - # Find where run can be gripped from given filename template for modpath - print('realization:', realization) - run_in_modpath = modpath(mip=mip, exp=exp, realm='atmos', model=model, realization=realization, - variable='ts').split('/')[-1].split('.').index(realization) - print('run_in_modpath:', run_in_modpath) - # Collect all available runs - runs_list = [model_path.split('/')[-1].split('.')[run_in_modpath] for model_path in model_path_list] - - # Adjust realization to be included - if realization in ["all" ,"*"]: - pass - elif realization in ["first"]: - runs_list = runs_list[:1] - else: - runs_list = [realization] - - if debug: - print('runs_list (all):', runs_list) - - # Generate commends - for run in runs_list: - cmd = ['python', 'PMPdriver_EnsoMetrics.py', - '-p', param_file, - '--mip', mip, '--metricsCollection', mc_name, - '--case_id', case_id, - '--modnames', model, - '--realization', run] - cmds_list.append(cmd) - -if debug: - for cmd in cmds_list: - print(' '.join(cmd)) - -# ================================================= -# Run subprocesses in parallel -# ------------------------------------------------- -# log dir -log_dir = os.path.join("log", case_id, mc_name) - -if not os.path.exists(log_dir): - os.makedirs(log_dir) - -# number of tasks to submit at the same time -num_workers = 7 -#num_workers = 10 -#num_workers = 30 -#num_workers = 25 - -print("Start : %s" % time.ctime()) - -# submit tasks and wait for subset of tasks to complete -procs_list = [] -for p, cmd in enumerate(cmds_list): - timenow = time.ctime() - print(timenow, p, ' '.join(cmd)) - model = cmd[-3] - run = cmd[-1] - log_filename = '_'.join(['log_enso', mc_name, mip, exp, model, run, case_id]) - log_file = os.path.join(log_dir, log_filename) - with open(log_file+"_stdout.txt", "wb") as out, open(log_file+"_stderr.txt", "wb") as err: - procs_list.append(Popen(cmd, stdout=out, stderr=err)) - time.sleep(1) - if ((p > 0 and p % num_workers == 0) or (p == len(cmds_list)-1)): - print('wait...') - for proc in procs_list: - proc.wait() - print("Tasks end : %s" % time.ctime()) - procs_list = [] - -# tasks done -print("End : %s" % time.ctime()) -sys.exit('DONE') diff --git a/pmp_driver/parallel_driver_plot.py b/pmp_driver/parallel_driver_plot.py deleted file mode 100644 index 1cb96bd3..00000000 --- a/pmp_driver/parallel_driver_plot.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python - -""" -Usage example: -1. First realization per model -./parallel_driver.py --mip cmip6 --exp historical --case_id v20200305 --modnames all --realization r1i1p1f1 --metricsCollection ENSO_perf -2. All realizations of individual models -./parallel_driver.py --mip cmip6 --exp historical --case_id v20200305 --modnames all --realization all --metricsCollection ENSO_perf - -""" - -from __future__ import print_function -from subprocess import Popen - -from PMPdriver_lib import AddParserArgument -from PMPdriver_lib import sort_human - -import datetime -import json -import os -import sys -import time - -from os import makedirs as OS__makedirs -from os.path import exists as OSpath__exists -from os.path import join as OSpath__join - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -os.environ['OPENBLAS_NUM_THREADS'] = '1' - -# Must be done before any CDAT library is called. -# https://github.com/CDAT/cdat/issues/2213 -if 'UVCDAT_ANONYMOUS_LOG' not in os.environ: - os.environ['UVCDAT_ANONYMOUS_LOG'] = 'no' - -# ================================================= -# Collect user defined options -# ------------------------------------------------- -param = AddParserArgument() - -# Metrics Collection -metric_collection = param.metricsCollection - -# Pre-defined options -mip = param.mip -exp = param.exp -print('mip:', mip) -print('exp:', exp) - -# Check given model option -models = param.modnames -print('models:', models) - -# Realizations -realization = param.realization -if ('all' in [r.lower() for r in realization]) or (realization == 'all'): - realization = 'all' -print('realization: ', realization) - -# case id -case_id = param.case_id -print('case_id:', case_id) - -path_main = "/p/user_pub/pmp/pmp_results/pmp_v1.1.2" -path_in_json = OSpath__join(path_main, "metrics_results", "enso_metric", mip, exp, case_id, metric_collection) -path_out = OSpath__join(path_main, "graphics", "enso_metric", mip, exp, case_id, metric_collection) - -pattern = "_".join([mip, exp, metric_collection, case_id]) - -# ---------------------------------------------------# -# Adjust model list if "ALL" given -# ---------------------------------------------------# -# read json file -filename_js = OSpath__join(path_in_json, pattern + "_allModels_allRuns.json") -print('filename_js:', filename_js) -with open(filename_js) as ff: - data_json = json.load(ff)['RESULTS']['model'] - -# Include all models if conditioned -if ('all' in [m.lower() for m in models]) or (models == 'all'): - models = sort_human(list(data_json.keys())) - -print('models:', models) -print('number of models:', len(models)) - -# Debug -debug = param.debug -print('debug:', debug) - -# ================================================= -# Create output directories -# ------------------------------------------------- -print("path_out:", path_out) -if not OSpath__exists(path_out): - try: - OS__makedirs(path_out) - except: - pass - -# ================================================= -# Generates list of command -# e.g.: python PMPdriver_plot.py --mip cmip5 --exp historical --metricsCollection ENSO_perf --modnames IPSL-CM5A-LR --realization r1i1p1 --case_id v20200305 -# ------------------------------------------------- -cmds_list = [] -for model in models: - print(' ----- model: ', model, ' ---------------------') - if realization is "all": - runs_list = sort_human(list(data_json[model].keys())) - print('runs_list (all):', runs_list) - else: - runs_list = [realization] - for run in runs_list: - cmd = ['python', 'PMPdriver_plot.py', - '--mip', mip, '--exp', exp, '--metricsCollection', metric_collection, - '--case_id', case_id, - '--modnames', model, - '--realization', run] - cmds_list.append(cmd) - -for i, cmd in enumerate(cmds_list): - print(i+1, ' '.join(cmd)) - -# ================================================= -# Run subprocesses in parallel -# ------------------------------------------------- -# log dir -log_dir = os.path.join("log", case_id, metric_collection) - -if not os.path.exists(log_dir): - os.makedirs(log_dir) - -# number of tasks to submit at the same time -#num_workers = 8 -num_workers = 10 -#num_workers = 30 -#num_workers = 25 - -print("Start : %s" % time.ctime()) - -# submit tasks and wait for subset of tasks to complete -procs_list = [] -for p, cmd in enumerate(cmds_list): - timenow = time.ctime() - print(timenow, p, ' '.join(cmd)) - model = cmd[-3] - run = cmd[-1] - log_filename = '_'.join(['log_ensoPlot', metric_collection, mip, exp, model, run, case_id]) - log_file = os.path.join(log_dir, log_filename) - with open(log_file+"_stdout.txt", "wb") as out, open(log_file+"_stderr.txt", "wb") as err: - procs_list.append(Popen(cmd, stdout=out, stderr=err)) - time.sleep(1) - if ((p > 0 and p % num_workers == 0) or (p == len(cmds_list)-1)): - print('wait...') - for proc in procs_list: - proc.wait() - print("Tasks end : %s" % time.ctime()) - procs_list = [] - -# tasks done -print("End : %s" % time.ctime()) -sys.exit('DONE') diff --git a/pmp_driver/post_process_merge_jsons.py b/pmp_driver/post_process_merge_jsons.py deleted file mode 100644 index a360a87e..00000000 --- a/pmp_driver/post_process_merge_jsons.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -from genutil import StringConstructor -from pcmdi_metrics.variability_mode.lib import dict_merge - -import copy -import glob -import json -import os - - -def main(): - mips = ["cmip5", "cmip6"] - #mips = ["cmip5"] - #mips = ["cmip6"] - #mips = ["obs2obs"] - - exps = ["historical"] - - #MCs = ["ENSO_perf", "ENSO_tel", "ENSO_proc", "test_tel"] - MCs = ["ENSO_perf", "ENSO_tel", "ENSO_proc"] - #MCs = ["ENSO_tel"] - #MCs = ["test_tel"] - - pmprdir = '/p/user_pub/pmp/pmp_results/pmp_v1.1.2' - #pmprdir = "/work/lee1043/imsi/result_test" - - for mip in mips: - for exp in exps: - for MC in MCs: - case_id = find_latest(pmprdir, mip, exp, MC) - print("mip, exp, MC, case_id:", mip, exp, MC, case_id) - merge_jsons(mip, exp, case_id, MC, pmprdir) - - -def merge_jsons(mip, exp, case_id, metricsCollection, pmprdir): - json_file_dir_template = os.path.join( - pmprdir, - '%(output_type)', 'enso_metric', - '%(mip)', '%(exp)', '%(case_id)', '%(metricsCollection)') - json_file_dir_template = StringConstructor(json_file_dir_template) - json_file_dir = json_file_dir_template( - output_type='metrics_results', mip=mip, exp=exp, case_id=case_id, metricsCollection=metricsCollection) - - json_file_template = '_'.join(['%(mip)_%(exp)_%(metricsCollection)', '%(case_id)', '%(model)', '%(realization)']) - json_file_template = '%(mip)_%(exp)_%(metricsCollection)_%(case_id)_%(model)_%(realization)' - json_file_template = StringConstructor(json_file_template) - - # Search for individual JSONs - json_files = sorted(glob.glob( - os.path.join( - json_file_dir, - json_file_template( - mip=mip, exp=exp, metricsCollection=metricsCollection, case_id=case_id, model='*', realization='*')+'.json'))) - - # Remove diveDown JSONs and previously generated merged JSONs if included - json_files_revised = copy.copy(json_files) - for j, json_file in enumerate(json_files): - filename_component = json_file.split('/')[-1].split('.')[0].split('_') - if 'diveDown' in filename_component: - json_files_revised.remove(json_file) - elif 'allModels' in filename_component: - json_files_revised.remove(json_file) - elif 'allRuns' in filename_component: - json_files_revised.remove(json_file) - - # Load individual JSON and merge to one big dictionary - for j, json_file in enumerate(json_files_revised): - print(j, json_file) - f = open(json_file) - dict_tmp = json.loads(f.read()) - if j == 0: - dict_final = dict_tmp.copy() - else: - dict_merge(dict_final, dict_tmp) - f.close() - - # Dump final dictionary to JSON - final_json_filename = json_file_template( - mip=mip, exp=exp, metricsCollection=metricsCollection, case_id=case_id, - model='allModels', realization='allRuns')+'.json' - final_json_file = os.path.join(json_file_dir, final_json_filename) - - with open(final_json_file, 'w') as fp: - json.dump(dict_final, fp, sort_keys=True, indent=4) - - print("Done: check ", final_json_file) - - -def find_latest(pmprdir, mip, exp, MC): - versions = sorted([r.split('/')[-2] for r in glob.glob(os.path.join( - pmprdir, "metrics_results", "enso_metric", - mip, exp, "v????????", MC))]) - latest_version = versions[-1] - return latest_version - - -if __name__ == "__main__": - main() diff --git a/pmp_driver/run_pmp.sh b/pmp_driver/run_pmp.sh deleted file mode 100755 index 57814c5d..00000000 --- a/pmp_driver/run_pmp.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh -set -a - -# Working conda env in Crunchy: pmp_nightly_20180830 - -ver=`date +"%Y%m%d-%H%M"` - -#mips='cmip5 cmip6' -#mips='cmip5' -mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -#param_file='my_Param_ENSO.py' -#param_file='my_Param_ENSO_obs2obs.py' -param_file='my_Param_ENSO_obs2obs_combinedDataSource.py' - -mkdir -p log - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC - python PMPdriver_EnsoMetrics.py -p $param_file --mip ${mip} --metricsCollection ${MC} >& log/log.${mip}.${MC}.all.v${ver}.txt & - disown - done -done diff --git a/pmp_driver/run_pmp_parallel.sh b/pmp_driver/run_pmp_parallel.sh deleted file mode 100755 index d7caecf6..00000000 --- a/pmp_driver/run_pmp_parallel.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27 - -case_id="v"`date +"%Y%m%d"` -#case_id="v20200224" - -mips='cmip5 cmip6' -#mips='cmip5' -#mips='cmip6' -#mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -modnames='all' -#modnames='IPSL-CM5A-LR' - -#realization='all' -realization='first' - -mkdir -p log/$case_id - -for mip in $mips; do - if [ $mip == 'cmip5' ]; then - #realization='r1i1p1' - #modnames="BNU-ESM HadCM3" - param_file='my_Param_ENSO.py' - elif [ $mip == 'cmip6' ]; then - #realization='r1i1p1f1' - #modnames="BCC-ESM1 CESM2 CESM2-FV2 CESM2-WACCM CESM2-WACCM-FV2 GFDL-CM4 GFDL-ESM4 MRI-ESM2-0" - param_file='my_Param_ENSO.py' - elif [ $mip == 'obs2obs' ]; then - param_file='my_Param_ENSO_obs2obs.py' - fi - - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py -p $param_file --mip $mip --case_id=$case_id --modnames $modnames --metricsCollection $MC --realization $realization >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done diff --git a/pmp_driver/run_pmp_parallel_obs2obs.sh b/pmp_driver/run_pmp_parallel_obs2obs.sh deleted file mode 100644 index 80efe5d4..00000000 --- a/pmp_driver/run_pmp_parallel_obs2obs.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27, cdat82_20200128_py27 - -case_id="v"`date +"%Y%m%d"` - -mips='obs2obs' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -modnames='20CR ERA-20C ERA-INT TropFlux-1-0 CMAP-V1902 GPCP-2-3 TRMM-3B43v-7 ERA-5 CERES-EBAF-4-0 CERES-EBAF-4-1 AVISO-1-0' - -mkdir -p log/$case_id - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py -p my_Param_ENSO_obs2obs.py --mip $mip --case_id=$case_id --modnames $modnames --metricsCollection $MC >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done diff --git a/pmp_driver/run_pmp_plot_parallel.sh b/pmp_driver/run_pmp_plot_parallel.sh deleted file mode 100755 index 25446565..00000000 --- a/pmp_driver/run_pmp_plot_parallel.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh -set -a - -# To avoid below error -# OpenBLAS blas_thread_init: pthread_create failed for thread XX of 96: Resource temporarily unavailable -export OMP_NUM_THREADS=1 - -# Working conda env in gates: cdat82_20191107_py27 - -case_id="v20200305" - -mips='cmip5 cmip6' -#mips='cmip5' -#mips='cmip6' - -MCs='ENSO_perf ENSO_tel ENSO_proc' -#MCs='ENSO_perf' -#MCs='ENSO_tel ENSO_proc' -#MCs='ENSO_tel' -#MCs='ENSO_proc' - -modnames='all' -#modnames='IPSL-CM5A-LR' -#modnames='CanESM5 FGOALS-g3' - -realization='all' - -mkdir -p log/$case_id - -for mip in $mips; do - for MC in $MCs; do - echo $mip $MC $realization $case_id - python -u ./parallel_driver.py --mip $mip --exp historical --case_id=$case_id --modnames $modnames --metricsCollection $MC --realization $realization >& log/$case_id/log_parallel.${mip}.${MC}.all.${case_id}.txt & - disown - sleep 1 - done -done From a203f592c3823767f97a1124f7842e92fe834916 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 22 Nov 2024 18:56:18 -0800 Subject: [PATCH 3/6] update version, fix to work with higher matplotlib version --- lib/version.py | 4 +-- plots/EnsoPlotTemplate.py | 52 +++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lib/version.py b/lib/version.py index 701389ef..5ee345f7 100644 --- a/lib/version.py +++ b/lib/version.py @@ -1,3 +1,3 @@ -__version__ = '1.0-2020' +__version__ = '1.1.3' __git_tag_describe__ = '1.0-2020' -__git_sha1__ = b'8045270291eef94222f6002371775ebb667cbde9' +__git_sha1__ = b'e8ba34e7dac9630f65e1a959de0e155454cea522' diff --git a/plots/EnsoPlotTemplate.py b/plots/EnsoPlotTemplate.py index ba680664..270deee3 100644 --- a/plots/EnsoPlotTemplate.py +++ b/plots/EnsoPlotTemplate.py @@ -97,8 +97,8 @@ def cmip_boxplot(dict_param, dict_values, units, reference, val_type, my_text, f ax.set_yticks(tick_labels) ax.set_ylim(ymin=mini, ymax=maxi) ax.set_ylabel(yname, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) # boxplots for ii, (cc, tab) in enumerate(zip(colors, vall)): boxproperties = { @@ -282,8 +282,8 @@ def my_boxplot(model, filename_nc, dict_param, reference, metric_variables, figu # title ax.set_title(title[ii], fontsize=15, y=1.01, loc="left") # x axis - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis ax.set_yticks(tick_labels) if custom_label is not None: @@ -294,8 +294,8 @@ def my_boxplot(model, filename_nc, dict_param, reference, metric_variables, figu if units != "": ylabel = ylabel + " (" + units + ")" ax.set_ylabel(ylabel, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) # boxplots boxproperties = { "boxprops": dict(linestyle="-", linewidth=2, color=legco[0]), @@ -565,15 +565,15 @@ def my_dotplot(model, filename_nc, dict_param, reference, metric_variables, figu label = [""] * len(label_ticks) plt.xticks(label_ticks, label) plt.xlim(min(label_ticks), max(label_ticks)) - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis tick_labels = minmax_plot(tab, metric=plot_metric) plt.yticks(tick_labels, tick_labels) plt.ylim(min(tick_labels), max(tick_labels)) ax.set_ylabel(yname, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) if min(tick_labels) < 0 and max(tick_labels) > 0: ax.axhline(0, color='k', linestyle='-', linewidth=2) # dots @@ -651,8 +651,8 @@ def my_dot_to_box(model, filename_nc, dict_param, reference, metric_variables, f tab = diag_mod lines = [Line2D([0], [0], marker="o", c="w", markerfacecolor=cc, markersize=12) for cc in mcolors] # x axis - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis tmp = [diag_obs] + [min(my_mask(tt, remove_masked=True)) for tt in tab] +\ [max(my_mask(tt, remove_masked=True)) for tt in tab] @@ -660,8 +660,8 @@ def my_dot_to_box(model, filename_nc, dict_param, reference, metric_variables, f ax.set_yticks(tick_labels) ax.set_ylim(ymin=min(tick_labels), ymax=max(tick_labels)) ax.set_ylabel(yname, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) # plot ax.axhline(diag_obs, color=mcolors[0], linestyle='-', linewidth=2) for ii in range(len(tab)): @@ -844,16 +844,16 @@ def my_hovmoeller(model, filename_nc, dict_param, reference, metric_variables, f ax.set_xticks(xlabel_ticks) ax.set_xticklabels(xlabel) ax.set_xlabel(xname, fontsize=15) - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis ax.set_ylim(ymin=min(tim), ymax=max(tim)) if ii % nbrc == 0: ax.set_yticks(ylabel_ticks) ax.set_yticklabels(ylabel) ax.set_ylabel(yname, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) # hovmoeller levels = create_levels(labelbar) xx, yy = NUMPYmeshgrid(lon, tim) @@ -1490,8 +1490,8 @@ def my_scatterplot(model, filename_nc, dict_param, reference, metric_variables, if units != "": xlabel = xlabel + " (" + units + ")" ax.set_xlabel(xlabel, fontsize=15) - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis if "EnsoFbSshSst" in figure_name and article_fig is True: ax.set_yticks([-6, -3, 0, 3, 6], minor=False) @@ -1528,8 +1528,8 @@ def my_scatterplot(model, filename_nc, dict_param, reference, metric_variables, if units != "": ylabel = ylabel + " (" + units + ")" ax.set_ylabel(ylabel, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) # scatterplots and slopes x1, x2 = ax.get_xlim() dx = (x2 - x1) / 100. @@ -1682,8 +1682,8 @@ def plot_curve(tab_mod, tab_obs, ax, title, axis, xname, yname, ytick_labels, li ax.set_xlim([min(axis), max(axis)]) # ax.set_xlim([-13, 13]) ax.set_xlabel(xname, fontsize=15) - for tick in ax.xaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_xticklabels(): + axis_tick_label.set_fontsize(12) # y axis ax.set_yticks(ytick_labels) ax.set_yticklabels(ytick_labels) @@ -1702,8 +1702,8 @@ def plot_curve(tab_mod, tab_obs, ax, title, axis, xname, yname, ytick_labels, li # ax.text(34, 0.1, "duration", fontsize=18, color="orange", horizontalalignment='center', # verticalalignment='center') ax.set_ylabel(yname, fontsize=15) - for tick in ax.yaxis.get_major_ticks(): - tick.label.set_fontsize(12) + for axis_tick_label in ax.get_yticklabels(): + axis_tick_label.set_fontsize(12) if min(ytick_labels) < 0 and max(ytick_labels) > 0: ax.axhline(0, color='k', linestyle='-', linewidth=2) # plot curves From d877b5c4ce7cbc41bddf43120ea50db604b81142 Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 22 Nov 2024 19:01:32 -0800 Subject: [PATCH 4/6] update version info --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 754efcc4..756276dd 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ import subprocess import glob -Version = "1.0-2020" +Version = "1.1.3" p = subprocess.Popen( ("git", From 165ec6e61731c7c3a8b3713b21a7cee68f06f5ea Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 22 Nov 2024 19:06:14 -0800 Subject: [PATCH 5/6] clean up --- lib/version.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/version.py b/lib/version.py index 5ee345f7..82b66090 100644 --- a/lib/version.py +++ b/lib/version.py @@ -1,3 +1,3 @@ __version__ = '1.1.3' -__git_tag_describe__ = '1.0-2020' -__git_sha1__ = b'e8ba34e7dac9630f65e1a959de0e155454cea522' +__git_tag_describe__ = '1.1.3' +__git_sha1__ = b'd877b5c4ce7cbc41bddf43120ea50db604b81142' From 0377ad7c1868679cd65ffbf70cf01e9c2e95404f Mon Sep 17 00:00:00 2001 From: Jiwoo Lee Date: Fri, 22 Nov 2024 19:40:17 -0800 Subject: [PATCH 6/6] Update meta.yaml --- conda/meta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 95314f2a..17bb4717 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,5 +1,5 @@ {% set name = "enso_metrics" %} -{% set version = "1.1.1" %} +{% set version = "1.1.3" %} package: name: {{ name|lower }} @@ -26,6 +26,7 @@ requirements: - numpy - scipy - udunits2 + - matplotlib test: imports: