-
Notifications
You must be signed in to change notification settings - Fork 129
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add cmorizer scripts for NOAA-ERSST. (#1799)
Co-authored-by: rbeucher <[email protected]> Co-authored-by: Felicity Chun <[email protected]> Co-authored-by: Lisa Bock <[email protected]> Co-authored-by: Felicity Chun <[email protected]> Co-authored-by: Bouwe Andela <[email protected]>
- Loading branch information
1 parent
bed6408
commit 7e9eecd
Showing
11 changed files
with
395 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
--- | ||
# Filename | ||
filename: 'ersst.*.nc' | ||
|
||
# Common global attributes for Cmorizer output | ||
attributes: | ||
project_id: OBS6 | ||
dataset_id: NOAA-ERSSTv3b | ||
version: 'v3b' | ||
tier: 2 | ||
modeling_realm: reanaly | ||
source: https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf/' | ||
reference: 'ersstv3b' | ||
comment: '' | ||
|
||
# Variables to cmorize | ||
variables: | ||
tos: | ||
mip: Omon | ||
raw: sst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
--- | ||
# Filename | ||
filename: 'ersst.v5.*.nc' | ||
|
||
# Common global attributes for Cmorizer output | ||
attributes: | ||
project_id: OBS6 | ||
dataset_id: NOAA-ERSSTv5 | ||
version: 'v5' | ||
tier: 2 | ||
modeling_realm: reanaly | ||
source: 'https://doi.org/10.7289/V5T72FNM' | ||
reference: 'ersstv5' | ||
comment: '' | ||
|
||
# Variables to cmorize | ||
variables: | ||
tos: | ||
mip: Omon | ||
raw: sst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
49 changes: 49 additions & 0 deletions
49
esmvaltool/cmorizers/data/downloaders/datasets/noaa_ersstv3b.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""Script to download NOAA-ERSST-v3b.""" | ||
import logging | ||
from datetime import datetime | ||
from dateutil import relativedelta | ||
|
||
from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def download_dataset(config, dataset, dataset_info, start_date, end_date, | ||
overwrite): | ||
"""Download dataset. | ||
Parameters | ||
---------- | ||
config : dict | ||
ESMValTool's user configuration | ||
dataset : str | ||
Name of the dataset | ||
dataset_info : dict | ||
Dataset information from the datasets.yml file | ||
start_date : datetime | ||
Start of the interval to download | ||
end_date : datetime | ||
End of the interval to download | ||
overwrite : bool | ||
Overwrite already downloaded files | ||
""" | ||
if start_date is None: | ||
start_date = datetime(1854, 1, 1) | ||
if end_date is None: | ||
end_date = datetime(2020, 1, 1) | ||
|
||
loop_date = start_date | ||
|
||
downloader = WGetDownloader( | ||
config=config, | ||
dataset=dataset, | ||
dataset_info=dataset_info, | ||
overwrite=overwrite, | ||
) | ||
base_path = ("https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf" | ||
"/ersst.{year}{month:02d}.nc") | ||
|
||
while loop_date <= end_date: | ||
downloader.download_folder( | ||
base_path.format(year=loop_date.year, month=loop_date.month), []) | ||
loop_date += relativedelta.relativedelta(months=1) |
49 changes: 49 additions & 0 deletions
49
esmvaltool/cmorizers/data/downloaders/datasets/noaa_ersstv5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
"""Script to download NOAA-ERSST-V5.""" | ||
import logging | ||
from datetime import datetime | ||
from dateutil import relativedelta | ||
|
||
from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def download_dataset(config, dataset, dataset_info, start_date, end_date, | ||
overwrite): | ||
"""Download dataset. | ||
Parameters | ||
---------- | ||
config : dict | ||
ESMValTool's user configuration | ||
dataset : str | ||
Name of the dataset | ||
dataset_info : dict | ||
Dataset information from the datasets.yml file | ||
start_date : datetime | ||
Start of the interval to download | ||
end_date : datetime | ||
End of the interval to download | ||
overwrite : bool | ||
Overwrite already downloaded files | ||
""" | ||
if start_date is None: | ||
start_date = datetime(1854, 1, 1) | ||
if end_date is None: | ||
end_date = datetime(2020, 1, 1) | ||
loop_date = start_date | ||
|
||
downloader = WGetDownloader( | ||
config=config, | ||
dataset=dataset, | ||
dataset_info=dataset_info, | ||
overwrite=overwrite, | ||
) | ||
|
||
base_path = ("https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v5/netcdf/" | ||
"ersst.v5.{year}{month:02d}.nc") | ||
|
||
while loop_date <= end_date: | ||
downloader.download_folder( | ||
base_path.format(year=loop_date.year, month=loop_date.month), []) | ||
loop_date += relativedelta.relativedelta(months=1) |
89 changes: 89 additions & 0 deletions
89
esmvaltool/cmorizers/data/formatters/datasets/noaa_ersstv3b.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
"""ESMValTool CMORizer for NOAA ERSST data, version 3b. | ||
This is the CMORizer script for the NOAA Extended Reconstructed | ||
Sea Surface Temperature (ERSST) in its version 3b. | ||
Tier | ||
Tier 2: open dataset. | ||
Source | ||
https://doi.org/10.1175/1520-0442-16.10.1495 | ||
Last access | ||
20200520 | ||
Download and processing instructions | ||
The data is provided by NOAA at: | ||
https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v3b/netcdf/ | ||
""" | ||
|
||
import logging | ||
import os | ||
import re | ||
|
||
import iris | ||
from cf_units import Unit | ||
|
||
from esmvaltool.cmorizers.data import utilities as utils | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def _get_filepaths(in_dir, basename): | ||
"""Find correct name of file (extend basename with timestamp).""" | ||
regex = re.compile(basename) | ||
return_files = [] | ||
for files in os.listdir(in_dir): | ||
|
||
if regex.match(files): | ||
return_files.append(os.path.join(in_dir, files)) | ||
|
||
return return_files | ||
|
||
|
||
def _fix_time_coord(cube, _field, _filename): | ||
"""Set time points to central day of month.""" | ||
time_coord = cube.coord('time') | ||
new_unit = Unit('days since 1850-01-01 00:00:00', calendar='standard') | ||
time_coord.convert_units(new_unit) | ||
old_time = new_unit.num2date(time_coord.points) | ||
new_time = [d.replace(day=15) for d in old_time] | ||
time_coord.points = new_unit.date2num(new_time) | ||
|
||
|
||
def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir): | ||
"""Extract variable from all files.""" | ||
var = cmor_info.short_name | ||
cubes = iris.load(filepath, raw_var, _fix_time_coord) | ||
iris.util.equalise_attributes(cubes) | ||
cube = cubes.concatenate_cube() | ||
cube = iris.util.squeeze(cube) | ||
|
||
utils.fix_var_metadata(cube, cmor_info) | ||
utils.set_global_atts(cube, attrs) | ||
utils.save_variable(cube, | ||
var, | ||
out_dir, | ||
attrs, | ||
unlimited_dimensions=['time']) | ||
|
||
|
||
def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): | ||
"""Cmorization func call.""" | ||
glob_attrs = cfg['attributes'] | ||
cmor_table = cfg['cmor_table'] | ||
|
||
filepaths = _get_filepaths(in_dir, cfg['filename']) | ||
|
||
if len(filepaths) > 0: | ||
logger.info("Found %d input files in '%s'", len(filepaths), in_dir) | ||
else: | ||
logger.info("No files found, basename: %s", cfg['filename']) | ||
|
||
for (var, var_info) in cfg['variables'].items(): | ||
logger.info("CMORizing variable '%s'", var) | ||
glob_attrs['mip'] = var_info['mip'] | ||
cmor_info = cmor_table.get_variable(var_info['mip'], var) | ||
raw_var = var_info.get('raw', var) | ||
_extract_variable(raw_var, cmor_info, glob_attrs, filepaths, out_dir) |
105 changes: 105 additions & 0 deletions
105
esmvaltool/cmorizers/data/formatters/datasets/noaa_ersstv5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
"""ESMValTool CMORizer for NOAA ERSST data, version 5. | ||
This is the CMORizer script for the NOAA Extended Reconstructed Sea Surface | ||
Temperature (ERSST) data of version 5. | ||
Tier | ||
Tier 2: open dataset. | ||
Source | ||
https://doi.org/10.7289/V5T72FNM | ||
Last access | ||
20200520 | ||
Download and processing instructions | ||
The data is provided by NOAA at: | ||
https://www1.ncdc.noaa.gov/pub/data/cmb/ersst/v5/netcdf/ | ||
""" | ||
|
||
import logging | ||
import os | ||
import re | ||
|
||
import iris | ||
import cf_units | ||
|
||
from esmvaltool.cmorizers.data import utilities as utils | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def _get_filepaths(in_dir, basename): | ||
"""Find correct name of file (extend basename with timestamp).""" | ||
regex = re.compile(basename) | ||
return_files = [] | ||
return_files_gr08 = [] | ||
for file in os.listdir(in_dir): | ||
|
||
if regex.match(file): | ||
year = file.split('.')[2][:4] # ersst.v5.$yr$nm.nc | ||
# return 2 lists as files differ from 2008 | ||
if int(year) < 2008: | ||
return_files.append(os.path.join(in_dir, file)) | ||
else: | ||
return_files_gr08.append(os.path.join(in_dir, file)) | ||
|
||
return return_files, return_files_gr08 | ||
|
||
|
||
def _fix_time_coord(cube, _, _filename): | ||
"""Set time points to central day of month and standardise time units.""" | ||
t_coord = cube.coord('time') | ||
_unit = t_coord.units | ||
new_time = [d.replace(day=15) for d in _unit.num2date(t_coord.points)] | ||
t_coord.points = _unit.date2num(new_time).astype('float64') | ||
t_coord.units = cf_units.Unit(t_coord.units.origin, calendar='standard') | ||
t_coord.long_name = 'Time' | ||
|
||
|
||
def _extract_variable(raw_var, cmor_info, attrs, filepaths, out_dir): | ||
"""Extract variable and concatenate months.""" | ||
var = cmor_info.short_name | ||
|
||
cubels = iris.load(filepaths, raw_var, _fix_time_coord) | ||
iris.util.equalise_attributes(cubels) | ||
iris.util.unify_time_units(cubels) | ||
cube = cubels.concatenate_cube() | ||
cube = iris.util.squeeze(cube) | ||
|
||
utils.fix_var_metadata(cube, cmor_info) | ||
utils.fix_coords(cube) | ||
|
||
utils.set_global_atts(cube, attrs) | ||
utils.save_variable(cube, | ||
var, | ||
out_dir, | ||
attrs, | ||
unlimited_dimensions=['time']) | ||
|
||
|
||
def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): | ||
"""Cmorization func call.""" | ||
glob_attrs = cfg['attributes'] | ||
cmor_table = cfg['cmor_table'] | ||
|
||
filepaths = _get_filepaths(in_dir, cfg['filename']) | ||
|
||
if len(filepaths[0]) > 0 or len(filepaths[1]) > 0: | ||
totalfiles = len(filepaths[0]) + len(filepaths[1]) | ||
logger.info("%d files before 2008", len(filepaths[0])) | ||
logger.info("Found %d input files in '%s'", totalfiles, in_dir) | ||
else: | ||
logger.info("No files found, basename: %s", cfg['filename']) | ||
|
||
# Run the cmorization | ||
for (var, var_info) in cfg['variables'].items(): | ||
logger.info("CMORizing variable '%s'", var) | ||
glob_attrs['mip'] = var_info['mip'] | ||
cmor_info = cmor_table.get_variable(var_info['mip'], var) | ||
raw_var = var_info.get('raw', var) | ||
_extract_variable(raw_var, cmor_info, glob_attrs, | ||
filepaths[0], out_dir) | ||
_extract_variable(raw_var, cmor_info, glob_attrs, | ||
filepaths[1], out_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.