Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/azure/conda_linux.yml
Original file line number Diff line number Diff line change
@@ -40,6 +40,8 @@ jobs:
export NREL_API_KEY=$(nrelApiKey)
export BSRN_FTP_USERNAME=$(BSRN_FTP_USERNAME)
export BSRN_FTP_PASSWORD=$(BSRN_FTP_PASSWORD)
export MERRA2_USERNAME=$(MERRA2_USERNAME)
export MERRA2_PASSWORD=$(MERRA2_PASSWORD)
pytest pvlib --remote-data --junitxml=junit/test-results.xml --cov --cov-report=xml --cov-report=html
displayName: 'pytest'
- task: PublishTestResults@2
5 changes: 5 additions & 0 deletions ci/requirements-py36.yml
Original file line number Diff line number Diff line change
@@ -3,9 +3,12 @@ channels:
- defaults
- conda-forge
dependencies:
- cftime
- coveralls
- cython
- dask
- ephem
- lxml
- netcdf4
- nose
- numba
@@ -27,7 +30,9 @@ dependencies:
- shapely # pvfactors dependency
- siphon # conda-forge
- statsmodels
- xarray
- pip:
- dataclasses
- nrel-pysam>=2.0
- pvfactors==1.4.1
- git+https://github.com/pydap/pydap#egg=pydap
5 changes: 5 additions & 0 deletions ci/requirements-py37.yml
Original file line number Diff line number Diff line change
@@ -3,9 +3,12 @@ channels:
- defaults
- conda-forge
dependencies:
- cftime
- coveralls
- cython
- dask
- ephem
- lxml
- netcdf4
- nose
- numba
@@ -27,6 +30,8 @@ dependencies:
- shapely # pvfactors dependency
- siphon # conda-forge
- statsmodels
- xarray
- pip:
- nrel-pysam>=2.0
- pvfactors==1.4.1
- git+https://github.com/pydap/pydap#egg=pydap
5 changes: 5 additions & 0 deletions ci/requirements-py38.yml
Original file line number Diff line number Diff line change
@@ -3,9 +3,12 @@ channels:
- defaults
- conda-forge
dependencies:
- cftime
- coveralls
- cython
- dask
- ephem
- lxml
- netcdf4
- nose
- numba
@@ -27,6 +30,8 @@ dependencies:
- shapely # pvfactors dependency
- siphon # conda-forge
- statsmodels
- xarray
- pip:
- nrel-pysam>=2.0
- pvfactors==1.4.1
- git+https://github.com/pydap/pydap#egg=pydap
5 changes: 5 additions & 0 deletions ci/requirements-py39.yml
Original file line number Diff line number Diff line change
@@ -3,9 +3,12 @@ channels:
- defaults
- conda-forge
dependencies:
- cftime
- coveralls
- cython
- dask
- ephem
- lxml
# - netcdf4 # pulls in a different version of numpy with ImportError
- nose
# - numba # python 3.9 compat in early 2021
@@ -27,6 +30,8 @@ dependencies:
- shapely # pvfactors dependency
# - siphon # conda-forge
- statsmodels
- xarray
- pip:
# - nrel-pysam>=2.0 # install error on windows
- pvfactors==1.4.1
- git+https://github.com/pydap/pydap#egg=pydap
2 changes: 2 additions & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
@@ -497,6 +497,8 @@ of sources and file formats relevant to solar energy modeling.
iotools.get_cams
iotools.read_cams
iotools.parse_cams
iotools.get_merra2
iotools.read_merra2

A :py:class:`~pvlib.location.Location` object may be created from metadata
in some files.
8 changes: 6 additions & 2 deletions docs/sphinx/source/whatsnew/v0.9.0.rst
Original file line number Diff line number Diff line change
@@ -107,8 +107,11 @@ Deprecations

Enhancements
~~~~~~~~~~~~
* Added :func:`~pvlib.iotools.read_pvgis_hourly` and
:func:`~pvlib.iotools.get_pvgis_hourly` for reading and retrieving hourly
* Added :func:`~pvlib.iotools.get_merra2` and
:func:`~pvlib.iotools.read_merra2` for retrieving and reading hourly
reanalysis data from MERRA-2. (:pull:`1247`)
* Added :func:`~pvlib.iotools.get_pvgis_hourly` and
:func:`~pvlib.iotools.read_pvgis_hourly` for retrieving and reading hourly
solar radiation data and PV power output from PVGIS. (:pull:`1186`,
:issue:`849`)
* Add :func:`~pvlib.iotools.get_bsrn` and :func:`~pvlib.iotools.read_bsrn`
@@ -210,6 +213,7 @@ Documentation
Requirements
~~~~~~~~~~~~
* ``dataclasses`` is required for python 3.6
* xarray, dask, and pydap are now optional requirements. (:pull:`1264`, :pull:`1274`)

Contributors
~~~~~~~~~~~~
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
@@ -21,3 +21,5 @@
from pvlib.iotools.sodapro import get_cams # noqa: F401
from pvlib.iotools.sodapro import read_cams # noqa: F401
from pvlib.iotools.sodapro import parse_cams # noqa: F401
from pvlib.iotools.merra2 import get_merra2 # noqa: F401
from pvlib.iotools.merra2 import read_merra2 # noqa: F401
246 changes: 246 additions & 0 deletions pvlib/iotools/merra2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
"""Functions to read and retrieve MERRA-2 reanalysis data from NASA.
.. codeauthor:: Adam R. Jensen<[email protected]>
"""
from pvlib.tools import (_extract_metadata_from_dataset,
_convert_C_to_K_in_dataset)

try:
import xarray as xr
except ImportError:
xr = None

try:
from pydap.cas.urs import setup_session
except ImportError:
setup_session = None

try:
import cftime
except ImportError:
cftime = None

MERRA2_VARIABLE_MAP = {
# Variables from M2T1NXRAD - radiation diagnostics
'LWGEM': 'lwu', # longwave flux emitted from surface [W/m^2]
'SWGDN': 'ghi', # surface incoming shortwave flux [W/m^2]
'SWGDNCLR': 'ghi_clear', # SWGDN assuming clear sky [W/m^2]
'SWTDN': 'toa', # toa incoming shortwave flux [W/m^2]
# Variables from M2T1NXSLV - single-level diagnostics
'PS': 'pressure', # surface pressure [Pa]
'T2M': 'temp_air', # 2-meter air temperature [K converted to C]
'T2MDEW': 'temp_dew', # dew point temperature at 2 m [K converted to C]
}

# goldsmr4 contains the single-level 2D hourly MERRA-2 data files
MERRA2_BASE_URL = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/dods'


def get_merra2(latitude, longitude, start, end, dataset, variables, username,
password, save_path=None, output_format=None,
map_variables=True):
"""
Retrieve MERRA-2 reanalysis data from the NASA GES DISC repository.
The function supports downloading of MERRA-2 [1]_ hourly 2-dimensional
time-averaged variables. A list of the available datasets and parameters
is given in [2]_.
* Temporal coverage: 1980 to present (latency of 2-7 weeks)
* Temporal resolution: hourly
* Spatial coverage: global
* Spatial resolution: 0.5° latitude by 0.625° longitude
Parameters
----------
latitude: float or list
in decimal degrees, between -90 and 90, north is positive (ISO 19115).
If latitude is a list, it should have the format [S, N], and
latitudes within the range are selected according to the grid.
longitude: float or list
in decimal degrees, between -180 and 180, east is positive (ISO 19115).
If longitude is a list, it should have the format [W, E], and
longitudes within the range are selected according to the grid.
start: datetime-like
First day of the requested period.
end: datetime-like
Last day of the requested period.
variables: list
List of variables to retrieve, e.g., ['TAUHGH', 'SWGNT'].
dataset: str
Name of the dataset to retrieve the variables from, e.g., 'M2T1NXRAD'
for radiation parameters and 'M2T1NXAER' for aerosol parameters.
output_format: {'dataframe', 'dataset'}, optional
Type of data object to return. Default is to return a pandas DataFrame
if data for a single location is requested and otherwise return an
xarray Dataset.
map_variables: bool, default: True
When true, renames columns to pvlib variable names where applicable.
See variable MERRA2_VARIABLE_MAP.
Returns
-------
data: DataFrame
MERRA-2 time-series data, fields depend on the requested data. The
returned object is either a pandas DataFrame or an xarray dataset,
depending on the output_format parameter.
metadata: dict
Metadata extracted from the netcdf files.
Notes
-----
To obtain MERRA-2 data, it is necessary to register for an EarthData
account and link it to the GES DISC as described in [3]_.
MERRA-2 contains 14 single-level 2D datasets with an hourly resolution. The
most important ones are 'M2T1NXAER', which contains aerosol data,
'M2T1NXRAD', which contains radiation related parameters, and 'M2T1NXSLV',
which contains general variables (e.g., temperature and wind speed).
Warning
-------
There is a known error in the calculation of radiation, hence it is
strongly adviced that radiation from MERRA-2 should not be used. Users
interested in radiation from reanalysis datasets are referred to
:func:`pvlib.iotools.get_era5`.
See Also
--------
pvlib.iotools.read_merra2, pvlib.iotools.get_era5
References
----------
.. [1] `NASA MERRA-2 Project overview
<https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/>`_
.. [2] `MERRA-2 File specification
<https://gmao.gsfc.nasa.gov/pubs/docs/Bosilovich785.pdf>`_
.. [3] `Account registration and data access to NASA's GES DISC
<https://disc.gsfc.nasa.gov/data-access>`_
""" # noqa: E501
if xr is None:
raise ImportError('Retrieving MERRA-2 data requires xarray')
if setup_session is None:
raise ImportError('Retrieving MERRA-2 data requires PyDap')
if cftime is None:
raise ImportError('Retrieving MERRA-2 data requires cftime')

url = MERRA2_BASE_URL + '/' + dataset

session = setup_session(username, password, check_url=url)
store = xr.backends.PydapDataStore.open(url, session=session)

start_float = cftime.date2num(start, units='days since 1-1-1 00:00:0.0')
end_float = cftime.date2num(end, units='days since 1-1-1 00:00:0.0')

try:
sel_dict = {
'lat': slice(latitude[0], latitude[1]),
'lon': slice(longitude[0], longitude[1]),
'time': slice(start_float, end_float)}
except TypeError:
sel_dict = {
'lat': latitude,
'lon': longitude,
'time': slice(start_float, end_float)}

# Setting decode_times=False results in a time saving of up to some minutes
ds = xr.open_dataset(store, decode_times=False).sel(sel_dict)

ds = xr.decode_cf(ds) # Decode timestamps

variables = [v.lower() for v in variables] # Make all variables lower-case

ds = ds[variables] # select sub-set of variables

if map_variables:
# Renaming of xarray datasets throws an error if keys are missing
ds = ds.rename_vars(
{k: v for k, v in MERRA2_VARIABLE_MAP.items() if k in list(ds)})

ds = _convert_C_to_K_in_dataset(ds)
metadata = _extract_metadata_from_dataset(ds)

if (output_format == 'dataframe') or (
(output_format is None) & (ds['lat'].size == 1) &
(ds['lon'].size == 1)):
data = ds.to_dataframe()
# Localize timezone to UTC
if data.index.nlevels > 1: # if dataframe has a multi-index
data.index = data.index.set_levels(data.index.get_level_values('time').tz_localize('utc'), level='time') # noqa: E501
else: # for single location dataframes (only time as index)
data.index = data.index.tz_localize('UTC')
data = data.drop(columns=['lat', 'lon'])
return data, metadata
else:
return ds, metadata


def read_merra2(filename, output_format=None, map_variables=True):
"""Reading a MERRA-2 file into a pandas dataframe.
MERRA-2 is described in [1]_ and a list of variables can be found in [2]_.
Parameters
----------
filename: str or path-like or list
Filename of a netcdf file containing MERRA-2 data or a list of
filenames.
output_format: {'dataframe', 'dataset'}, optional
Type of data object to return. Default is to return a pandas DataFrame
if data for a single location is requested and otherwise return an
xarray Dataset.
map_variables: bool, default: True
When true, renames columns to pvlib variable names where applicable.
See variable MERRA2_VARIABLE_MAP.
Returns
-------
data: DataFrame
MERRA-2 time-series data, fields depend on the requested data. The
returned object is either a pandas DataFrame or an xarray dataset,
depending on the output_format parameter.
metadata: dict
Metadata extracted from the netcdf files.
See Also
--------
pvlib.iotools.get_merra2, pvlib.iotools.get_era5
References
----------
.. [1] `NASA MERRA-2 Project overview
<https://gmao.gsfc.nasa.gov/reanalysis/MERRA-2/>`_
.. [2] `MERRA-2 File specification
<https://gmao.gsfc.nasa.gov/pubs/docs/Bosilovich785.pdf>`_
"""
if xr is None:
raise ImportError('Reading MERRA-2 data requires xarray to be installed.') # noqa: E501

# open multiple-files (mf) requires dask
if isinstance(filename, (list, tuple)):
ds = xr.open_mfdataset(filename)
else:
ds = xr.open_dataset(filename)

if map_variables:
# Renaming of xarray datasets throws an error if keys are missing
ds = ds.rename_vars(
{k: v for k, v in MERRA2_VARIABLE_MAP.items() if k in list(ds)})

ds = _convert_C_to_K_in_dataset(ds)
metadata = _extract_metadata_from_dataset(ds)

if (output_format == 'dataframe') or (
(output_format is None) & (ds['lat'].size == 1) &
(ds['lon'].size == 1)):
data = ds.to_dataframe()
# Remove lat and lon from multi-index
if (ds['lat'].size == 1) & (ds['lon'].size == 1):
data = data.droplevel(['lat', 'lon'])
# Localize timezone to UTC
if data.index.nlevels > 1: # if dataframe has a multi-index
data.index = data.index.set_levels(data.index.get_level_values('time').tz_localize('utc'), level='time') # noqa: E501
else: # for single location dataframes (only time as index)
data.index = data.index.tz_localize('UTC')
return data, metadata
else:
return ds, metadata
23 changes: 23 additions & 0 deletions pvlib/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -95,6 +95,19 @@ def assert_frame_equal(left, right, **kwargs):
not has_bsrn_credentials, reason='requires bsrn credentials')


try:
# Attempt to load NASA EarthData login credentials used for testing
# pvlib.iotools.get_merra2
MERRA2_USERNAME = os.environ["MERRA2_USERNAME"]
MERRA2_PASSWORD = os.environ["MERRA2_PASSWORD"]
has_merra2_credentials = True
except KeyError:
has_merra2_credentials = False

requires_merra2_credentials = pytest.mark.skipif(
not has_merra2_credentials, reason='requires merra2 credentials')


try:
import statsmodels # noqa: F401
has_statsmodels = True
@@ -105,6 +118,16 @@ def assert_frame_equal(left, right, **kwargs):
not has_statsmodels, reason='requires statsmodels')


try:
import xarray as xr # noqa: F401
has_xarray = True
except ImportError:
has_xarray = False

requires_xarray = pytest.mark.skipif(
not has_xarray, reason='requires xarray')


try:
import tables
has_tables = True
80 changes: 80 additions & 0 deletions pvlib/tests/iotools/test_merra2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
tests for :mod:`pvlib.iotools.merra2`
"""

import pandas as pd
import numpy as np
import datetime as dt
import pytest
import os
from pvlib.iotools import read_merra2, get_merra2
from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal,
requires_merra2_credentials, requires_xarray)


@pytest.fixture(scope="module")
def merra2_credentials():
"""Supplies pvlib-python's EarthData login credentials.
Users should obtain their own credentials as described in the `get_merra2`
documentation."""
return (os.environ["MERRA2_USERNAME"], os.environ["MERRA2_PASSWORD"])


@pytest.fixture
def expected_index():
index = pd.date_range('2020-1-1-00:30', periods=24*2, freq='1h', tz='UTC')
index.name = 'time'
return index


@requires_xarray
def test_read_merra2(expected_index):
filenames = [DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc',
DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc']

data, meta = read_merra2(filenames, map_variables=False)
assert_index_equal(data.index, expected_index)
assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude',
'units': 'degrees_north'}
assert np.isclose(data.loc['2020-01-01 12:30:00+00:00', 'SWGDN'], 130.4375)


@requires_xarray
def test_read_merra2_dataset(expected_index):
filenames = [DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc',
DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200102.SUB.nc']

data, meta = read_merra2(filenames, output_format='dataset',
map_variables=False)
import xarray as xr
assert isinstance(data, xr.Dataset)
assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude',
'units': 'degrees_north'}
assert np.all([v in ['time', 'lon', 'lat', 'ALBEDO', 'EMIS', 'SWGDN',
'SWGDNCLR', 'SWTDN'] for v in list(data.variables)])


@requires_xarray
def test_read_merra2_map_variables():
filename = DATA_DIR / 'MERRA2_400.tavg1_2d_rad_Nx.20200101.SUB.nc'
data, meta = read_merra2(filename, map_variables=True)
assert meta['ghi'] == {
'name': 'ghi', 'long_name': 'surface_incoming_shortwave_flux',
'units': 'W m-2'}


@requires_xarray
@requires_merra2_credentials
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_merra2(merra2_credentials):
username, password = merra2_credentials
data, meta = get_merra2(
latitude=55, longitude=15,
start=dt.datetime(2020, 1, 1), end=dt.datetime(2020, 1, 2),
dataset='M2T1NXRAD', variables=['TAUHGH', 'SWGNT'],
username=username, password=password, map_variables=True)
assert_index_equal(data.index, expected_index)
assert meta['lat'] == {'name': 'lat', 'long_name': 'latitude',
'units': 'degrees_north'}
assert np.all([v in ['tauhgh', 'swgnt'] for v in data.columns])
45 changes: 45 additions & 0 deletions pvlib/tools.py
Original file line number Diff line number Diff line change
@@ -344,3 +344,48 @@ def _golden_sect_DataFrame(params, VL, VH, func):
raise Exception("EXCEPTION:iterations exceeded maximum (50)")

return func(df, 'V1'), df['V1']


def _extract_metadata_from_dataset(ds):
"""
Generate a dictionary of metadata from an xarray dataset.
Parameters
----------
ds : dataset
dataset containing time series data.
Returns
-------
metadata : dict
Dictionary containing metadata.
"""
metadata = {}
for v in list(ds.variables):
metadata[v] = {
'name': ds[v].name,
'long_name': ds[v].long_name}
if 'units' in ds[v].attrs:
metadata[v]['units'] = ds[v].units
metadata['dims'] = dict(ds.dims)
metadata.update(ds.attrs) # add arbitrary metadata
return metadata


def _convert_C_to_K_in_dataset(ds):
"""
Convert all variables in an xarray dataset that have the unit Kelvin to
degrees Celsius.
Parameters
----------
ds : dataset
dataset containing time series data.
Returns
-------
ds : dataset
dataset where variables with temperature variables in Celsius
"""
for v in list(ds.variables):
if 'units' in ds[v].attrs:
if 'K' == ds[v].attrs['units']:
ds[v].data = ds[v].data - 273.15
ds[v].attrs['units'] = 'C'
return ds
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@
EXTRAS_REQUIRE = {
'optional': ['cython', 'ephem', 'netcdf4', 'nrel-pysam', 'numba',
'pvfactors', 'siphon', 'statsmodels', 'tables',
'cftime >= 1.1.1'],
'cftime >= 1.1.1', 'xarray', 'dask'],
'doc': ['ipython', 'matplotlib', 'sphinx == 3.1.2',
'sphinx_rtd_theme==0.5.0', 'sphinx-gallery', 'docutils == 0.15.2',
'pillow', 'netcdf4', 'siphon', 'tables',