Skip to content

Commit

Permalink
Update esacci-soilmoisture(v08.1) downloader and CMORizer (Python ver…
Browse files Browse the repository at this point in the history
…sion) (#3676)

Co-authored-by: Manuel Schlund <[email protected]>
  • Loading branch information
diegokam and schlunma authored Jul 9, 2024
1 parent 64c371e commit 9633fbe
Show file tree
Hide file tree
Showing 9 changed files with 312 additions and 207 deletions.
2 changes: 1 addition & 1 deletion doc/sphinx/source/input.rst
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| ESACCI-SEA-SURFACE-SALINITY | sos (Omon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| ESACCI-SOILMOISTURE | dos, dosStderr, sm, smStderr (Lmon) | 2 | NCL |
| ESACCI-SOILMOISTURE | sm (Eday, Lmon), smStderr (Eday) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| ESACCI-SST | ts, tsStderr (Amon) | 2 | NCL |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
Expand Down
21 changes: 21 additions & 0 deletions esmvaltool/cmorizers/data/cmor_config/ESACCI-SOILMOISTURE.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
attributes:
project_id: 'OBS'
dataset_id: 'ESACCI-SOILMOISTURE'
tier: 2
modeling_realm: sat
institution: 'TU Wien (AUT); VanderSat B.V. (NL); Planet Labs (NL); CESBIO (FR), EODC Gmbh (AUT)'
reference: 'esacci-soilmoisture'
source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/'
title: 'ESA CCI Soil Moisture'
version: 'L3S-SSMV-COMBINED-v08.1'
comment: ''
variables:
sm:
mip: Eday
raw: sm
filename: ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-{year}????000000-fv08.1.nc
smStderr:
mip: Eday
raw: sm_uncertainty
filename: ESACCI-SOILMOISTURE-L3S-SSMV-COMBINED-{year}????000000-fv08.1.nc

6 changes: 3 additions & 3 deletions esmvaltool/cmorizers/data/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -531,11 +531,11 @@ datasets:
ESACCI-SOILMOISTURE:
tier: 2
source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/
last_access: 2019-02-01
last_access: 2024-06-19
info: |
Download the data from:
daily_files/COMBINED/v04.2/
ancillary/v04.2/
daily_files/COMBINED/v08.1/
ancillary/v08.1/
Put all files under a single directory (no subdirectories with years).
ESACCI-SEA-SURFACE-SALINITY:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(1979, 1, 1)
start_date = datetime(1978, 11, 1)
if end_date is None:
end_date = datetime(2016, 1, 1)
end_date = datetime(2022, 12, 31)

loop_date = start_date

Expand All @@ -40,9 +40,9 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
)
downloader.ftp_name = 'soil_moisture'
downloader.connect()
downloader.set_cwd('ancillary/v04.2/')
downloader.set_cwd('ancillary/v08.1/')
downloader.download_folder('.')
downloader.set_cwd('daily_files/COMBINED/v04.2/')
downloader.set_cwd('daily_files/COMBINED/v08.1/')
while loop_date <= end_date:
year = loop_date.year
downloader.download_year(f'{year}')
Expand Down
174 changes: 0 additions & 174 deletions esmvaltool/cmorizers/data/formatters/datasets/esacci_soilmoisture.ncl

This file was deleted.

149 changes: 149 additions & 0 deletions esmvaltool/cmorizers/data/formatters/datasets/esacci_soilmoisture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""ESMValTool CMORizer for ESACCI-SOILMOISTURE data.
Tier
Tier 2: other freely-available dataset.
Source
ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/
Last access
20240626
Download and processing instructions
Download the data from:
daily_files/COMBINED/v08.1/
ancillary/v08.1/
Put all files under a single directory (no subdirectories with years).
in ${RAWOBS}/Tier2/ESACCI-SOILMOISTURE
"""

import glob
import logging
import os
from datetime import datetime
import iris
from esmvalcore.preprocessor import concatenate, monthly_statistics
from cf_units import Unit

from ...utilities import (
fix_var_metadata,
fix_dim_coordnames,
fix_bounds,
save_variable,
set_global_atts
)

logger = logging.getLogger(__name__)


def fix_coords(cube):
"""Fix coordinates to CMOR standards.
Fixes coordinates eg time to have correct units, bounds etc;
longitude to be CMOR-compliant 0-360deg; fixes some attributes
and bounds - the user can avert bounds fixing by using supplied
arguments; if bounds are None they will be fixed regardless.
Parameters
----------
cube: iris.cube.Cube
data cube with coordinates to be fixed.
Returns
-------
cube: iris.cube.Cube
data cube with fixed coordinates.
"""
# First fix any completely missing coord var names
fix_dim_coordnames(cube)

# Convert longitude from -180...180 to 0...360
cube = cube.intersection(longitude=(0.0, 360.0))

# Fix individual coords
for cube_coord in cube.coords():
# Fix time
if cube_coord.var_name == 'time':
logger.info("Fixing time...")
cube.coord('time').convert_units(
Unit('days since 1970-01-01T00:00:00+00:00',
calendar='proleptic_gregorian'))

# Fix latitude
if cube_coord.var_name == 'lat':
logger.info("Fixing latitude...")
cube = iris.util.reverse(cube, cube_coord)

# Fix bounds of all coordinates
fix_bounds(cube, cube_coord)

return cube


def extract_variable(raw_info):
"""Extract variables."""
rawvar = raw_info['name']
constraint = iris.Constraint(name=rawvar)
if rawvar == 'sm_uncertainty':
sm_cube = iris.load_cube(raw_info['file'],
iris.NameConstraint(var_name='sm'))
ancillary_var = sm_cube.ancillary_variable(
'Volumetric Soil Moisture Uncertainty'
)
cube = sm_cube.copy(ancillary_var.core_data())
else:
cube = iris.load_cube(raw_info['file'], constraint)

# Remove dysfunctional ancillary data without standard names
for ancillary_variable in cube.ancillary_variables():
cube.remove_ancillary_variable(ancillary_variable)

return cube


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorize data."""
glob_attrs = cfg['attributes']
if not start_date:
start_date = datetime(1978, 1, 1)
if not end_date:
end_date = datetime(2022, 12, 31)

# run the cmorization
for var_name, vals in cfg['variables'].items():
all_data_cubes = []
if not isinstance(vals, dict): # Ensure vals is a dictionary
raise ValueError(
f"Invalid format for variable {var_name}: {type(vals)}"
)
var_info = cfg['cmor_table'].get_variable(vals['mip'], var_name)
glob_attrs['mip'] = vals['mip']
raw_info = {'name': vals['raw']}
inpfile_pattern = os.path.join(in_dir, vals['filename'])
logger.info("CMORizing var %s from file type %s",
var_name, inpfile_pattern)

for year in range(start_date.year, end_date.year + 1):
year_inpfile_pattern = inpfile_pattern.format(year=year)
inpfiles = sorted(glob.glob(year_inpfile_pattern))
for inpfile in inpfiles:
raw_info['file'] = inpfile
cube = extract_variable(raw_info)
all_data_cubes.append(cube)
final_cube = concatenate(all_data_cubes)
fix_var_metadata(final_cube, var_info)
final_cube = fix_coords(final_cube)
set_global_atts(final_cube, glob_attrs)

save_variable(final_cube, var_name, out_dir, glob_attrs,
unlimited_dimensions=['time'])

# For sm, also save monthly means
if var_name == 'sm':
monthly_mean_cube = monthly_statistics(final_cube, 'mean')
glob_attrs['mip'] = 'Lmon'
monthly_mean_cube.attributes.update(glob_attrs)
save_variable(monthly_mean_cube, var_name, out_dir, glob_attrs,
unlimited_dimensions=['time'])
Loading

0 comments on commit 9633fbe

Please sign in to comment.