diff --git a/esmvaltool/cmorizers/data/cmor_config/RSS.yml b/esmvaltool/cmorizers/data/cmor_config/RSS.yml new file mode 100755 index 0000000000..1743e187ff --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/RSS.yml @@ -0,0 +1,27 @@ +--- +# Global attributes of NetCDF file +attributes: + dataset_id: RSS + project_id: OBS6 + tier: 2 + version: 'v07r01' + modeling_realm: reanaly + source: '' + reference: 'rss' + comment: | + 'Remote Sensing Systems. + Monthly Mean Total Precipitable Water Data Set + on a 1 degree grid made from Remote Sensing + Systems Version-7 Microwave Radiometer Data, + Date created: 20201110T185551Z + [accessed on 2020-11-19]. Santa Rosa, CA, USA. + Available at www.remss.com' + +# Variables to CMORize +variables: + # monthly frequency + prw: + short_name: prw + mip: Amon + raw: precipitable_water + file: 'tpw_v07r01_198801_202010.nc4.nc' \ No newline at end of file diff --git a/esmvaltool/cmorizers/data/formatters/datasets/rss.ncl b/esmvaltool/cmorizers/data/formatters/datasets/rss.ncl deleted file mode 100644 index 46436a9571..0000000000 --- a/esmvaltool/cmorizers/data/formatters/datasets/rss.ncl +++ /dev/null @@ -1,234 +0,0 @@ -; ############################################################################# -; REFORMAT SCRIPT FOR RSS prw REANALYISIS DATA -; ############################################################################# -; -; Tier -; Tier 2: other freely available dataset. -; -; Source -; ftp://ftp.remss.com/vapor/monthly_1deg/ -; -; Last access -; 2020-11-19 -; -; Download and processing instructions -; A registration is required for downloading the data, but no licence -; agreement necessary. Download vapor, here ncdf3 file is used. -; -; Caveats -; -; Modification history -; 20170419-A_gier_bettina: written. -; 20201201-A_weigel_katja: portet to ESMValTool v2. -; 20231214-A_weigel_katja: update to current ESMValTool version. -; -; ############################################################################# - -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/interface.ncl") -loadscript(getenv("esmvaltool_root") + \ - "/data/formatters/utilities.ncl") - -begin - - ; Source name - OBSNAME = "RSS" - - DIAG_SCRIPT = "rss.ncl" - - ; Tier - TIER = 2 - - ; Input dir (raw data) - INDIR = getenv("ESMValTool_RAWOBSPATH") + "/Tier" + \ - TIER + "/" + OBSNAME + "/" - - ; Output dir (CMOR-ized data) - OUTDIR = getenv("ESMValTool_OBSPATH") + "/Tier" + \ - TIER + "/" + OBSNAME + "/" - - ; Period - YEAR1 = 1988 - YEAR2 = 2019 - - ; Selected variable (standard name) - VAR = "prw" - - ; Name in the raw data - NAME = "precipitable_water" - NAMEANO = "precipitable_water_anomaly" - CLIMNAME = "precipitable_water_climatology" - - ; Initialize global variable - FIELD = "T2Ms" - - ; MIP - MIP = "Amon" - - ; Frequency - FREQ = "mon" - - ; CMOR table - CMOR_TABLE = getenv("cmor_tables") + "/cmip5/Tables/CMIP5_" + MIP - ; + ".json" - - ; Type - TYPE = "reanaly" - - ; Version - VERSION = "v07r01" - - ; Global attributes - SOURCE = "ftp://ftp.remss.com/vapor/monthly_1deg/" - REF = "Remote Sensing Systems. " \ - + "Monthly Mean Total Precipitable Water Data Set " \ - + "on a 1 degree grid made from Remote Sensing " \ - + "Systems Version-7 Microwave Radiometer Data, " \ - + "Date created: 20201110T185551Z " \ - + "[accessed on 2020-11-19]. Santa Rosa, CA, USA. " \ - + "Available at www.remss.com" - COMMENT = "" - -end - -begin - ; verbosity = stringtointeger(getenv("ESMValTool_verbosity")) - log_info("Processing " + VAR + " (" + MIP + ")") - - fname = input_dir_path + "tpw_" + VERSION + "_198801_202010.nc4.nc" - - f = addfile(fname, "r") - output = (/f->$NAME$/) - outputano = (/f->$NAMEANO$/) - clim = (/f->$CLIMNAME$/) - lat = (/f->latitude/) - lon = (/f->longitude/) - - print(dimsizes(output)) - ; Cut off the first 10 months of 2020 - output_new = output(:dimsizes(output(:, 0, 0))-11, :, :) - delete(output) - output = output_new - outputano_new = outputano(:dimsizes(outputano(:, 0, 0))-11, :, :) - delete(outputano) - outputano = outputano_new - - n = 0 - do iii = 0, dimsizes(outputano(:, 0, 0)) - 1, 12 - do nnn = 0, 11 - outputano_new(iii + nnn, :, :) = outputano(iii + nnn, :, :) + \ - clim(nnn, :, :) - end do - end do - - outputano_new = where(outputano.eq.(-500), outputano@_FillValue, outputano_new) - delete(outputano) - outputano = outputano_new - - ; Format coordinates - output!0 = "time" - output!1 = "lat" - output!2 = "lon" - output&lat = lat - output&lon = lon - outputano!0 = "time" - outputano!1 = "lat" - outputano!2 = "lon" - outputano&lat = lat - outputano&lon = lon - ; Format latitude coordinate - if (isMonotonic(output&lat) .eq. 0) then - error_msg("f", diag_script, "", \ - "non-monotonic latitude coordinate") - end if - if (isMonotonic(output&lat) .eq. -1) then - output = output(:, ::-1, :) - end if - if (isMonotonic(outputano&lat) .eq. 0) then - error_msg("f", diag_script, "", \ - "non-monotonic latitude coordinate") - end if - if (isMonotonic(outputano&lat) .eq. -1) then - outputano = outputano(:, ::-1, :) - end if - - ; Format longitude coordinate - if (isMonotonic(output&lon) .eq. 0) then - error_msg("f", diag_script, "", \ - "non-monotonic longitude coordinate") - end if - if (any(output&lon.lt.0.)) then - output = lonFlip(output) - end if - if (isMonotonic(outputano&lon) .eq. 0) then - error_msg("f", diag_script, "", \ - "non-monotonic longitude coordinate") - end if - if (any(outputano&lon.lt.0.)) then - outputano = lonFlip(outputano) - end if - - output&time = create_timec(YEAR1, YEAR2) - outputano&time = create_timec(YEAR1, YEAR2) - - format_coords(output, YEAR1 + "0101", YEAR2 + "1231", FREQ) - format_coords(outputano, YEAR1 + "0101", YEAR2 + "1231", FREQ) - - ; Calculate days per month - date = cd_calendar(output&time, 0) - dpm = days_in_month(toint(date(:, 0)), toint(date(:, 1))) - dpmc = conform(output, dpm, 0) - ; Check time range - if (dimsizes(date(:, 0)).ne.12 * (YEAR2 - YEAR1 + 1)) then - error_msg("f", diag_script, "", "incorrect number of timesteps") - end if - - date = cd_calendar(outputano&time, 0) - dpm = days_in_month(toint(date(:, 0)), toint(date(:, 1))) - dpmc = conform(outputano, dpm, 0) - ; Check time range - if (dimsizes(date(:, 0)).ne.12 * (YEAR2 - YEAR1 + 1)) then - error_msg("f", diag_script, "", "incorrect number of timesteps") - end if - - ; Values of -500 are ice -> set these to missing values - output = where(output.eq.(-500), output@_FillValue, output) - outputano = where(outputano.eq.(-500), outputano@_FillValue, outputano) - - ; Set variable attributes - tmp = format_variable(output, VAR, CMOR_TABLE) - delete(output) - output = tmp - delete(tmp) - - tmp = format_variable(outputano, VAR, CMOR_TABLE) - delete(outputano) - outputano = tmp - delete(tmp) - - ; Set global attributes - gAtt = set_global_atts(OBSNAME, TIER, SOURCE, REF, COMMENT) - gAtt@period = YEAR1 + "-" + YEAR2 - - bounds = guess_coord_bounds(output, FREQ) - boundsano = guess_coord_bounds(outputano, FREQ) - - filter = outputano - filter = where(outputano.ne.(outputano@_FillValue), 1, filter) - - ; Outfile - DATESTR = YEAR1 + "01-" + YEAR2 + "12" - fout = output_dir_path + \ - str_join((/"OBS", OBSNAME, TYPE, VERSION, \ - MIP, VAR, DATESTR/), "_") + ".nc" - - filtout = output_dir_path + \ - str_join((/"OBS", OBSNAME, "filter", VERSION, \ - MIP, VAR, DATESTR/), "_") + ".nc" - write_nc(fout, VAR, output, bounds, gAtt) - write_nc(filtout, VAR, filter, boundsano, gAtt) - delete(gAtt) - delete(output) - delete(bounds) - -end diff --git a/esmvaltool/cmorizers/data/formatters/datasets/rss.py b/esmvaltool/cmorizers/data/formatters/datasets/rss.py new file mode 100755 index 0000000000..bde83939da --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/rss.py @@ -0,0 +1,156 @@ +"""ESMValTool CMORizer for RSS data. + +Tier + Tier 2: other freely available dataset. + +Source + ftp://ftp.remss.com/vapor/monthly_1deg/ + +Last access + 20201119 + +Download and processing instructions + A registration is required for downloading the data, but no licence + agreement necessary. Download vapor, here ncdf3 file is used. + +Modification history + 20170419-A_gier_bettina: written. + 20201201-A_weigel_katja: portet to ESMValTool v2. + 20231214-A_weigel_katja: update to current ESMValTool version. + 20241122-A_weigel_katja: changed to python. + +""" +import glob +import logging +import os +import xarray as xr +from copy import deepcopy + +from dask import array as da +from esmvalcore.cmor.table import CMOR_TABLES + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + +def _load_cube(in_files, var): + + dataset = xr.open_mfdataset(in_files, engine="netcdf4") + da_tmp = dataset[var['raw']] + cube = da_tmp.to_iris() + + cube.data = da.ma.masked_equal(cube.core_data(), -500) + + return cube + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} + axes = ['T', 'X', 'Y'] + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype('float64') + + if len(coord.points) > 1: + coord.guess_bounds() + else: + raise logger.error("Bounds for coordinate %s " + "cannot be guessed", coord) + + return cube + + +def _extract_variable(in_files, var, cfg, out_dir): + logger.info("CMORizing variable '%s' from input files '%s'", + var['short_name'], ', '.join(in_files)) + attributes = deepcopy(cfg['attributes']) + attributes['mip'] = var['mip'] + attributes['raw'] = var['raw'] + + cmor_table = CMOR_TABLES[attributes['project_id']] + definition = cmor_table.get_variable(var['mip'], var['short_name']) + + cube = _load_cube(in_files, var) + + # keep the following raw cube attributes + attrs_to_keep = [ + "institution", "Institution", + "institute_id", "VersionID", + "experiment_id", + "source", "Source", # overrides empty string default + "model_id", "ModelID", + "contact", "Contact", + "references", + "tracking_id", + "mip_specs", # described by "mip" already + "source_id", "SourceID", + "product", "Product", + "frequency", "Frequency", + "creation_date", + "project_id", "ProjectID", + "table_id", "TableID", + "title", "Title", + "modeling_realm", + "doi", + "VersionID", # described by "version" already + ] + + attrs_to_keep_exist = [ + att for att in cube.attributes if att in attrs_to_keep + ] + for att in attrs_to_keep_exist: + attributes[att] = cube.attributes[att] + + utils.set_global_atts(cube, attributes) + + # Set correct names + cube.var_name = definition.short_name + cube.long_name = definition.long_name + + # Fix data type + cube.data = cube.core_data().astype('float32') + + # Roll longitude + cube.coord('longitude').points = cube.coord('longitude').points + 180. + nlon = len(cube.coord('longitude').points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + + cube.coord('latitude').attributes = None + cube.coord('longitude').attributes = None + + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + utils.save_variable(cube, cube.var_name, + out_dir, attributes, + unlimited_dimensions=['time']) + logger.info("Finished CMORizing %s", ', '.join(in_files)) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Run CMORizer for RSS.""" + cfg.pop('cmor_table') + + for short_name, var in cfg['variables'].items(): + if 'short_name' not in var: + var['short_name'] = short_name + logger.info("short_name, var KW") + logger.info(short_name, var) + logger.info(var['file']) + logger.info(var['file'].format()) + # Now get list of files + filepattern = os.path.join(in_dir, var['file'].format()) + in_files = glob.glob(filepattern) + if not in_files: + logger.warning('%s does not exist', filepattern) + continue + _extract_variable(in_files, var, cfg, out_dir)