Skip to content

Standardize pvgis_tmy to return (data,meta) #2470

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ them.
Usage
-----

With some exceptions, the :py:mod:`pvlib.iotools` functions
The :py:mod:`pvlib.iotools` functions
provide a uniform interface for accessing data across many formats.
Specifically, :py:mod:`pvlib.iotools` functions usually return two objects:
a :py:class:`pandas.DataFrame` of the actual dataset, plus a metadata
Expand All @@ -89,8 +89,7 @@ Typical usage looks something like this:

.. code-block:: python

# get_pvgis_tmy returns two additional values besides df and metadata
df, _, _, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)
df, metadata = pvlib.iotools.get_pvgis_tmy(latitude, longitude, map_variables=True)

This code will fetch a Typical Meteorological Year (TMY) dataset from PVGIS,
returning a :py:class:`pandas.DataFrame` containing the hourly weather data
Expand Down
4 changes: 4 additions & 0 deletions docs/sphinx/source/whatsnew/v0.12.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Breaking Changes
following the iotools convention instead of ``(data,inputs,meta)``.
The ``inputs`` dictionary is now included in ``meta``, which
has changed structure to accommodate it. (:pull:`2462`)
* The functions :py:func:`~pvlib.iotools.read_pvgis_tmy` and
:py:func:`~pvlib.iotools.get_pvgis_tmy` now return ``(data,meta)``
following the iotools convention instead of ``(data,months_selected,inputs,meta)``.
(:pull:`2470`)
* Remove ``outputformat='basic'`` option in :py:func:`~pvlib.iotools.get_pvgis_tmy`.
(:pull:`2416`)

Expand Down
75 changes: 43 additions & 32 deletions pvlib/iotools/pvgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def _parse_pvgis_hourly_json(src, map_variables):

def _parse_pvgis_hourly_csv(src, map_variables):
# The first 4 rows are latitude, longitude, elevation, radiation database
metadata = {'inputs': {}}
metadata = {'inputs': {}, 'descriptions': {}}
# 'location' metadata
# 'Latitude (decimal degrees): 45.000\r\n'
metadata['inputs']['latitude'] = float(src.readline().split(':')[1])
Expand Down Expand Up @@ -440,6 +440,13 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,

For more information see the PVGIS [1]_ TMY tool documentation [2]_.

.. versionchanged:: 0.13.0
The function now returns two items ``(data,meta)``. Previous
versions of this function returned four elements
``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
and ``months_selected`` are now included in ``meta``, which has
changed structure to accommodate it.

Parameters
----------
latitude : float
Expand Down Expand Up @@ -478,10 +485,6 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
-------
data : pandas.DataFrame
the weather data
months_selected : list
TMY year for each month, ``None`` for EPW
inputs : dict
the inputs, ``None`` for EPW
metadata : list or dict
file metadata

Expand Down Expand Up @@ -527,17 +530,16 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
else:
raise requests.HTTPError(err_msg['message'])
# initialize data to None in case API fails to respond to bad outputformat
data = None, None, None, None
data = None, None
if outputformat == 'json':
src = res.json()
data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
data, meta = _parse_pvgis_tmy_json(src)
elif outputformat == 'csv':
with io.BytesIO(res.content) as src:
data, months_selected, inputs, meta = _parse_pvgis_tmy_csv(src)
data, meta = _parse_pvgis_tmy_csv(src)
elif outputformat == 'epw':
with io.StringIO(res.content.decode('utf-8')) as src:
data, meta = read_epw(src)
months_selected, inputs = None, None
elif outputformat == 'basic':
err_msg = ("outputformat='basic' is no longer supported by pvlib, "
"please use outputformat='csv' instead.")
Expand All @@ -551,34 +553,37 @@ def get_pvgis_tmy(latitude, longitude, outputformat='json', usehorizon=True,
coerce_year = coerce_year or 1990
data = _coerce_and_roll_tmy(data, roll_utc_offset, coerce_year)

return data, months_selected, inputs, meta
return data, meta


def _parse_pvgis_tmy_json(src):
inputs = src['inputs']
meta = src['meta']
months_selected = src['outputs']['months_selected']
meta = src['meta'].copy()
# Override the "inputs" in metadata
meta['inputs'] = src['inputs']
# Re-add the inputs in metadata one-layer down
meta['inputs']['descriptions'] = src['meta']['inputs']
meta['months_selected'] = src['outputs']['months_selected']
data = pd.DataFrame(src['outputs']['tmy_hourly'])
data.index = pd.to_datetime(
data['time(UTC)'], format='%Y%m%d:%H%M', utc=True)
data = data.drop('time(UTC)', axis=1)
return data, months_selected, inputs, meta
return data, meta


def _parse_pvgis_tmy_csv(src):
# the first 3 rows are latitude, longitude, elevation
inputs = {}
meta = {'inputs': {}, 'descriptions': {}}
# 'Latitude (decimal degrees): 45.000\r\n'
inputs['latitude'] = float(src.readline().split(b':')[1])
meta['inputs']['latitude'] = float(src.readline().split(b':')[1])
# 'Longitude (decimal degrees): 8.000\r\n'
inputs['longitude'] = float(src.readline().split(b':')[1])
meta['inputs']['longitude'] = float(src.readline().split(b':')[1])
# Elevation (m): 1389.0\r\n
inputs['elevation'] = float(src.readline().split(b':')[1])
meta['inputs']['elevation'] = float(src.readline().split(b':')[1])

# TMY has an extra line here: Irradiance Time Offset (h): 0.1761\r\n
line = src.readline()
if line.startswith(b'Irradiance Time Offset'):
inputs['irradiance time offset'] = float(line.split(b':')[1])
meta['inputs']['irradiance time offset'] = float(line.split(b':')[1])
src.readline() # skip over the "month,year\r\n"
else:
# `line` is already the "month,year\r\n" line, so nothing to do
Expand All @@ -589,6 +594,7 @@ def _parse_pvgis_tmy_csv(src):
for month in range(12):
months_selected.append(
{'month': month+1, 'year': int(src.readline().split(b',')[1])})
meta['months_selected'] = months_selected
# then there's the TMY (typical meteorological year) data
# first there's a header row:
# time(UTC),T2m,RH,G(h),Gb(n),Gd(h),IR(h),WS10m,WD10m,SP
Expand All @@ -601,14 +607,26 @@ def _parse_pvgis_tmy_csv(src):
data = pd.DataFrame(data, dtype=float)
data.index = dtidx
# finally there's some meta data
meta = [line.decode('utf-8').strip() for line in src.readlines()]
return data, months_selected, inputs, meta
meta['descriptions'] = {}
for line in src.readlines():
line = line.decode('utf-8').strip()
if ':' in line:
meta['descriptions'][line.split(':')[0]] = \
line.split(':')[1].strip()
return data, meta


def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
"""
Read a TMY file downloaded from PVGIS.

.. versionchanged:: 0.13.0
The function now returns two items ``(data,meta)``. Previous
versions of this function returned four elements
``(data,months_selected,inputs,meta)``. The ``inputs`` dictionary
and ``months_selected`` are now included in ``meta``, which has
changed structure to accommodate it.

Parameters
----------
filename : str, pathlib.Path, or file-like buffer
Expand All @@ -629,10 +647,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
-------
data : pandas.DataFrame
the weather data
months_selected : list
TMY year for each month, ``None`` for EPW
inputs : dict
the inputs, ``None`` for EPW
metadata : list or dict
file metadata

Expand Down Expand Up @@ -662,7 +676,6 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
# EPW: use the EPW parser from the pvlib.iotools epw.py module
if outputformat == 'epw':
data, meta = read_epw(filename)
months_selected, inputs = None, None

# NOTE: json and csv output formats have parsers defined as private
# functions in this module
Expand All @@ -676,16 +689,14 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
except AttributeError: # str/path has no .read() attribute
with open(str(filename), 'r') as fbuf:
src = json.load(fbuf)
data, months_selected, inputs, meta = _parse_pvgis_tmy_json(src)
data, meta = _parse_pvgis_tmy_json(src)

elif outputformat == 'csv':
try:
data, months_selected, inputs, meta = \
_parse_pvgis_tmy_csv(filename)
data, meta = _parse_pvgis_tmy_csv(filename)
except AttributeError: # str/path has no .read() attribute
with open(str(filename), 'rb') as fbuf:
data, months_selected, inputs, meta = \
_parse_pvgis_tmy_csv(fbuf)
data, meta = _parse_pvgis_tmy_csv(fbuf)

elif outputformat == 'basic':
err_msg = "outputformat='basic' is no longer supported, please use " \
Expand All @@ -702,7 +713,7 @@ def read_pvgis_tmy(filename, pvgis_format=None, map_variables=True):
if map_variables:
data = data.rename(columns=VARIABLE_MAP)

return data, months_selected, inputs, meta
return data, meta


def get_pvgis_horizon(latitude, longitude, url=URL, **kwargs):
Expand Down
65 changes: 38 additions & 27 deletions tests/iotools/test_pvgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,9 +371,7 @@ def meta_expected():

@pytest.fixture
def csv_meta(meta_expected):
return [
f"{k}: {v['description']} ({v['units']})" for k, v
in meta_expected['outputs']['tmy_hourly']['variables'].items()]
return meta_expected['outputs']['tmy_hourly']['variables']


@pytest.fixture
Expand All @@ -393,7 +391,15 @@ def test_get_pvgis_tmy(expected, month_year_expected, inputs_expected,

def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
meta_expected, pvgis_data):
data, months_selected, inputs, meta = pvgis_data
data, meta = pvgis_data

# Re-create original outputs (prior to #2470)
months_selected = meta['months_selected']
inputs = meta['inputs'].copy()
del inputs['descriptions']
meta['inputs'] = meta['inputs']['descriptions']
del meta['months_selected']
Comment on lines +396 to +401
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a little hacky to me. Ok if it's just to get the API change in for this release, but let's at least open an issue to update the expected values to match the new format.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done in #2472


# check each column of output separately
for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
assert np.allclose(data[outvar], expected[outvar])
Expand All @@ -419,28 +425,27 @@ def _compare_pvgis_tmy_json(expected, month_year_expected, inputs_expected,
@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_pvgis_tmy_kwargs(userhorizon_expected):
_, _, inputs, _ = get_pvgis_tmy(45, 8, usehorizon=False,
map_variables=False)
assert inputs['meteo_data']['use_horizon'] is False
data, _, _, _ = get_pvgis_tmy(
_, meta = get_pvgis_tmy(45, 8, usehorizon=False, map_variables=False)
assert meta['inputs']['meteo_data']['use_horizon'] is False
data, _ = get_pvgis_tmy(
45, 8, userhorizon=[0, 10, 20, 30, 40, 15, 25, 5], map_variables=False)
assert np.allclose(
data['G(h)'], userhorizon_expected['G(h)'].values)
assert np.allclose(
data['Gb(n)'], userhorizon_expected['Gb(n)'].values)
assert np.allclose(
data['Gd(h)'], userhorizon_expected['Gd(h)'].values)
_, _, inputs, _ = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
assert inputs['meteo_data']['year_min'] == 2005
_, _, inputs, _ = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
assert inputs['meteo_data']['year_max'] == 2016
_, meta = get_pvgis_tmy(45, 8, startyear=2005, map_variables=False)
assert meta['inputs']['meteo_data']['year_min'] == 2005
_, meta = get_pvgis_tmy(45, 8, endyear=2016, map_variables=False)
assert meta['inputs']['meteo_data']['year_max'] == 2016


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_pvgis_tmy_coerce_year():
"""test utc_offset and coerce_year work as expected"""
base_case, _, _, _ = get_pvgis_tmy(45, 8) # Turin
base_case, _ = get_pvgis_tmy(45, 8) # Turin
assert str(base_case.index.tz) == 'UTC'
assert base_case.index.name == 'time(UTC)'
noon_test_data = [
Expand All @@ -449,9 +454,9 @@ def test_get_pvgis_tmy_coerce_year():
cet_tz = 1 # Turin time is CET
cet_name = 'Etc/GMT-1'
# check indices of rolled data after converting timezone
pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
jan1_midnight = pd.Timestamp('1990-01-01 00:00:00', tz=cet_name)
dec31_midnight = pd.Timestamp('1990-12-31 23:00:00', tz=cet_name)
pvgis_data, _ = get_pvgis_tmy(45, 8, roll_utc_offset=cet_tz)
jan1_midnight = pd.Timestamp('1990-01-01 00', tz=cet_name)
dec31_midnight = pd.Timestamp('1990-12-31 23', tz=cet_name)
assert pvgis_data.index[0] == jan1_midnight
assert pvgis_data.index[-1] == dec31_midnight
assert pvgis_data.index.name == f'time({cet_name})'
Expand All @@ -461,20 +466,20 @@ def test_get_pvgis_tmy_coerce_year():
assert all(test_case == expected)
# repeat tests with year coerced
test_yr = 2021
pvgis_data, _, _, _ = get_pvgis_tmy(
pvgis_data, _ = get_pvgis_tmy(
45, 8, roll_utc_offset=cet_tz, coerce_year=test_yr)
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz=cet_name)
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz=cet_name)
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz=cet_name)
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz=cet_name)
assert pvgis_data.index[0] == jan1_midnight
assert pvgis_data.index[-1] == dec31_midnight
assert pvgis_data.index.name == f'time({cet_name})'
for m, test_case in enumerate(noon_test_data):
expected = pvgis_data[pvgis_data.index.month == m+1].iloc[12+cet_tz]
assert all(test_case == expected)
# repeat tests with year coerced but utc offset none or zero
pvgis_data, _, _, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00:00:00', tz='UTC')
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23:00:00', tz='UTC')
pvgis_data, _ = get_pvgis_tmy(45, 8, coerce_year=test_yr)
jan1_midnight = pd.Timestamp(f'{test_yr}-01-01 00', tz='UTC')
dec31_midnight = pd.Timestamp(f'{test_yr}-12-31 23', tz='UTC')
assert pvgis_data.index[0] == jan1_midnight
assert pvgis_data.index[-1] == dec31_midnight
assert pvgis_data.index.name == 'time(UTC)'
Expand All @@ -494,7 +499,13 @@ def test_get_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,

def _compare_pvgis_tmy_csv(expected, month_year_expected, inputs_expected,
meta_expected, csv_meta, pvgis_data):
data, months_selected, inputs, meta = pvgis_data
data, meta = pvgis_data

# Re-create original outputs (prior to #2470)
months_selected = meta['months_selected']
inputs = meta['inputs'].copy()
meta = meta['descriptions']

# check each column of output separately
for outvar in meta_expected['outputs']['tmy_hourly']['variables'].keys():
assert np.allclose(data[outvar], expected[outvar])
Expand Down Expand Up @@ -526,7 +537,7 @@ def test_get_pvgis_tmy_epw(expected, epw_meta):


def _compare_pvgis_tmy_epw(expected, epw_meta, pvgis_data):
data, _, _, meta = pvgis_data
data, meta = pvgis_data
assert np.allclose(data.ghi, expected['G(h)'])
assert np.allclose(data.dni, expected['Gb(n)'])
assert np.allclose(data.dhi, expected['Gd(h)'])
Expand Down Expand Up @@ -556,8 +567,8 @@ def test_get_pvgis_tmy_basic():

@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_pvgis_map_variables(pvgis_tmy_mapped_columns):
actual, _, _, _ = get_pvgis_tmy(45, 8, map_variables=True)
def test_get_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
actual, _ = get_pvgis_tmy(45, 8, map_variables=True)
assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)


Expand All @@ -580,7 +591,7 @@ def test_read_pvgis_horizon_invalid_coords():

def test_read_pvgis_tmy_map_variables(pvgis_tmy_mapped_columns):
fn = TESTS_DATA_DIR / 'tmy_45.000_8.000_2005_2023.json'
actual, _, _, _ = read_pvgis_tmy(fn, map_variables=True)
actual, _ = read_pvgis_tmy(fn, map_variables=True)
assert all(c in pvgis_tmy_mapped_columns for c in actual.columns)


Expand Down
Loading