Skip to content

Commit 1ac19c4

Browse files
cftime: Fix resampling when time bins straddle "0001-01-01" (#9116)
* Add test for #9108 * Update xarray/tests/test_groupby.py Co-authored-by: Spencer Clark <[email protected]> * Take has_year_zero into account in offset arithmetic * Fix test * Add whats-new * Update doc/whats-new.rst * Add more detail to what's new entry * Modify wording slightly, since this does not always happen when the time coordinate includes "0001-01-01". --------- Co-authored-by: Spencer Clark <[email protected]>
1 parent c7cb9f7 commit 1ac19c4

File tree

4 files changed

+130
-58
lines changed

4 files changed

+130
-58
lines changed

doc/whats-new.rst

+6
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ Bug fixes
3737

3838
- Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`).
3939
By `Tom Nicholas <https://github.com/TomNicholas>`_.
40+
- Fix resampling error with monthly, quarterly, or yearly frequencies with
41+
cftime when the time bins straddle the date "0001-01-01". For example, this
42+
can happen in certain circumstances when the time coordinate contains the
43+
date "0001-01-01". (:issue:`9108`, :pull:`9116`) By `Spencer Clark
44+
<https://github.com/spencerkclark>`_ and `Deepak Cherian
45+
<https://github.com/dcherian>`_.
4046

4147
Documentation
4248
~~~~~~~~~~~~~

xarray/coding/cftime_offsets.py

+27-23
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from __future__ import annotations
4444

4545
import re
46+
import warnings
4647
from collections.abc import Mapping
4748
from datetime import datetime, timedelta
4849
from functools import partial
@@ -257,24 +258,15 @@ def _get_day_of_month(other, day_option: DayOption) -> int:
257258

258259
if day_option == "start":
259260
return 1
260-
if day_option == "end":
261-
return _days_in_month(other)
262-
if day_option is None:
261+
elif day_option == "end":
262+
return other.daysinmonth
263+
elif day_option is None:
263264
# Note: unlike `_shift_month`, _get_day_of_month does not
264265
# allow day_option = None
265266
raise NotImplementedError()
266267
raise ValueError(day_option)
267268

268269

269-
def _days_in_month(date):
270-
"""The number of days in the month of the given date"""
271-
if date.month == 12:
272-
reference = type(date)(date.year + 1, 1, 1)
273-
else:
274-
reference = type(date)(date.year, date.month + 1, 1)
275-
return (reference - timedelta(days=1)).day
276-
277-
278270
def _adjust_n_months(other_day, n, reference_day):
279271
"""Adjust the number of times a monthly offset is applied based
280272
on the day of a given date, and the reference day provided.
@@ -303,22 +295,34 @@ def _shift_month(date, months, day_option: DayOption = "start"):
303295
if cftime is None:
304296
raise ModuleNotFoundError("No module named 'cftime'")
305297

298+
has_year_zero = date.has_year_zero
306299
delta_year = (date.month + months) // 12
307300
month = (date.month + months) % 12
308301

309302
if month == 0:
310303
month = 12
311304
delta_year = delta_year - 1
305+
306+
if not has_year_zero:
307+
if date.year < 0 and date.year + delta_year >= 0:
308+
delta_year = delta_year + 1
309+
elif date.year > 0 and date.year + delta_year <= 0:
310+
delta_year = delta_year - 1
311+
312312
year = date.year + delta_year
313313

314-
if day_option == "start":
315-
day = 1
316-
elif day_option == "end":
317-
reference = type(date)(year, month, 1)
318-
day = _days_in_month(reference)
319-
else:
320-
raise ValueError(day_option)
321-
return date.replace(year=year, month=month, day=day)
314+
# Silence warnings associated with generating dates with years < 1.
315+
with warnings.catch_warnings():
316+
warnings.filterwarnings("ignore", message="this date/calendar/year zero")
317+
318+
if day_option == "start":
319+
day = 1
320+
elif day_option == "end":
321+
reference = type(date)(year, month, 1, has_year_zero=has_year_zero)
322+
day = reference.daysinmonth
323+
else:
324+
raise ValueError(day_option)
325+
return date.replace(year=year, month=month, day=day)
322326

323327

324328
def roll_qtrday(
@@ -398,13 +402,13 @@ class MonthEnd(BaseCFTimeOffset):
398402
_freq = "ME"
399403

400404
def __apply__(self, other):
401-
n = _adjust_n_months(other.day, self.n, _days_in_month(other))
405+
n = _adjust_n_months(other.day, self.n, other.daysinmonth)
402406
return _shift_month(other, n, "end")
403407

404408
def onOffset(self, date) -> bool:
405409
"""Check if the given date is in the set of possible dates created
406410
using a length-one version of this offset class."""
407-
return date.day == _days_in_month(date)
411+
return date.day == date.daysinmonth
408412

409413

410414
_MONTH_ABBREVIATIONS = {
@@ -594,7 +598,7 @@ class YearEnd(YearOffset):
594598
def onOffset(self, date) -> bool:
595599
"""Check if the given date is in the set of possible dates created
596600
using a length-one version of this offset class."""
597-
return date.day == _days_in_month(date) and date.month == self.month
601+
return date.day == date.daysinmonth and date.month == self.month
598602

599603
def rollforward(self, date):
600604
"""Roll date forward to nearest end of year"""

xarray/tests/test_cftime_offsets.py

+79-35
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import warnings
34
from itertools import product
45
from typing import Callable, Literal
56

@@ -24,7 +25,6 @@
2425
Tick,
2526
YearBegin,
2627
YearEnd,
27-
_days_in_month,
2828
_legacy_to_new_freq,
2929
_new_to_legacy_freq,
3030
cftime_range,
@@ -589,22 +589,6 @@ def test_minus_offset_error(a, b):
589589
b - a
590590

591591

592-
def test_days_in_month_non_december(calendar):
593-
date_type = get_date_type(calendar)
594-
reference = date_type(1, 4, 1)
595-
assert _days_in_month(reference) == 30
596-
597-
598-
def test_days_in_month_december(calendar):
599-
if calendar == "360_day":
600-
expected = 30
601-
else:
602-
expected = 31
603-
date_type = get_date_type(calendar)
604-
reference = date_type(1, 12, 5)
605-
assert _days_in_month(reference) == expected
606-
607-
608592
@pytest.mark.parametrize(
609593
("initial_date_args", "offset", "expected_date_args"),
610594
[
@@ -657,7 +641,7 @@ def test_add_month_end(
657641

658642
# Here the days at the end of each month varies based on the calendar used
659643
expected_date_args = (
660-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
644+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
661645
)
662646
expected = date_type(*expected_date_args)
663647
assert result == expected
@@ -694,17 +678,15 @@ def test_add_month_end_onOffset(
694678
date_type = get_date_type(calendar)
695679
reference_args = initial_year_month + (1,)
696680
reference = date_type(*reference_args)
697-
initial_date_args = (
698-
initial_year_month + (_days_in_month(reference),) + initial_sub_day
699-
)
681+
initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day
700682
initial = date_type(*initial_date_args)
701683
result = initial + offset
702684
reference_args = expected_year_month + (1,)
703685
reference = date_type(*reference_args)
704686

705687
# Here the days at the end of each month varies based on the calendar used
706688
expected_date_args = (
707-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
689+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
708690
)
709691
expected = date_type(*expected_date_args)
710692
assert result == expected
@@ -756,7 +738,7 @@ def test_add_year_end(
756738

757739
# Here the days at the end of each month varies based on the calendar used
758740
expected_date_args = (
759-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
741+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
760742
)
761743
expected = date_type(*expected_date_args)
762744
assert result == expected
@@ -792,17 +774,15 @@ def test_add_year_end_onOffset(
792774
date_type = get_date_type(calendar)
793775
reference_args = initial_year_month + (1,)
794776
reference = date_type(*reference_args)
795-
initial_date_args = (
796-
initial_year_month + (_days_in_month(reference),) + initial_sub_day
797-
)
777+
initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day
798778
initial = date_type(*initial_date_args)
799779
result = initial + offset
800780
reference_args = expected_year_month + (1,)
801781
reference = date_type(*reference_args)
802782

803783
# Here the days at the end of each month varies based on the calendar used
804784
expected_date_args = (
805-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
785+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
806786
)
807787
expected = date_type(*expected_date_args)
808788
assert result == expected
@@ -854,7 +834,7 @@ def test_add_quarter_end(
854834

855835
# Here the days at the end of each month varies based on the calendar used
856836
expected_date_args = (
857-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
837+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
858838
)
859839
expected = date_type(*expected_date_args)
860840
assert result == expected
@@ -890,17 +870,15 @@ def test_add_quarter_end_onOffset(
890870
date_type = get_date_type(calendar)
891871
reference_args = initial_year_month + (1,)
892872
reference = date_type(*reference_args)
893-
initial_date_args = (
894-
initial_year_month + (_days_in_month(reference),) + initial_sub_day
895-
)
873+
initial_date_args = initial_year_month + (reference.daysinmonth,) + initial_sub_day
896874
initial = date_type(*initial_date_args)
897875
result = initial + offset
898876
reference_args = expected_year_month + (1,)
899877
reference = date_type(*reference_args)
900878

901879
# Here the days at the end of each month varies based on the calendar used
902880
expected_date_args = (
903-
expected_year_month + (_days_in_month(reference),) + expected_sub_day
881+
expected_year_month + (reference.daysinmonth,) + expected_sub_day
904882
)
905883
expected = date_type(*expected_date_args)
906884
assert result == expected
@@ -957,7 +935,7 @@ def test_onOffset_month_or_quarter_or_year_end(
957935
date_type = get_date_type(calendar)
958936
reference_args = year_month_args + (1,)
959937
reference = date_type(*reference_args)
960-
date_args = year_month_args + (_days_in_month(reference),) + sub_day_args
938+
date_args = year_month_args + (reference.daysinmonth,) + sub_day_args
961939
date = date_type(*date_args)
962940
result = offset.onOffset(date)
963941
assert result
@@ -1005,7 +983,7 @@ def test_rollforward(calendar, offset, initial_date_args, partial_expected_date_
1005983
elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)):
1006984
reference_args = partial_expected_date_args + (1,)
1007985
reference = date_type(*reference_args)
1008-
expected_date_args = partial_expected_date_args + (_days_in_month(reference),)
986+
expected_date_args = partial_expected_date_args + (reference.daysinmonth,)
1009987
else:
1010988
expected_date_args = partial_expected_date_args
1011989
expected = date_type(*expected_date_args)
@@ -1056,7 +1034,7 @@ def test_rollback(calendar, offset, initial_date_args, partial_expected_date_arg
10561034
elif isinstance(offset, (MonthEnd, QuarterEnd, YearEnd)):
10571035
reference_args = partial_expected_date_args + (1,)
10581036
reference = date_type(*reference_args)
1059-
expected_date_args = partial_expected_date_args + (_days_in_month(reference),)
1037+
expected_date_args = partial_expected_date_args + (reference.daysinmonth,)
10601038
else:
10611039
expected_date_args = partial_expected_date_args
10621040
expected = date_type(*expected_date_args)
@@ -1787,3 +1765,69 @@ def test_date_range_no_freq(start, end, periods):
17871765
expected = pd.date_range(start=start, end=end, periods=periods)
17881766

17891767
np.testing.assert_array_equal(result, expected)
1768+
1769+
1770+
@pytest.mark.parametrize(
1771+
"offset",
1772+
[
1773+
MonthBegin(n=1),
1774+
MonthEnd(n=1),
1775+
QuarterBegin(n=1),
1776+
QuarterEnd(n=1),
1777+
YearBegin(n=1),
1778+
YearEnd(n=1),
1779+
],
1780+
ids=lambda x: f"{x}",
1781+
)
1782+
@pytest.mark.parametrize("has_year_zero", [False, True])
1783+
def test_offset_addition_preserves_has_year_zero(offset, has_year_zero):
1784+
1785+
with warnings.catch_warnings():
1786+
warnings.filterwarnings("ignore", message="this date/calendar/year zero")
1787+
datetime = cftime.DatetimeGregorian(-1, 12, 31, has_year_zero=has_year_zero)
1788+
1789+
result = datetime + offset
1790+
assert result.has_year_zero == datetime.has_year_zero
1791+
if has_year_zero:
1792+
assert result.year == 0
1793+
else:
1794+
assert result.year == 1
1795+
1796+
1797+
@pytest.mark.parametrize(
1798+
"offset",
1799+
[
1800+
MonthBegin(n=1),
1801+
MonthEnd(n=1),
1802+
QuarterBegin(n=1),
1803+
QuarterEnd(n=1),
1804+
YearBegin(n=1),
1805+
YearEnd(n=1),
1806+
],
1807+
ids=lambda x: f"{x}",
1808+
)
1809+
@pytest.mark.parametrize("has_year_zero", [False, True])
1810+
def test_offset_subtraction_preserves_has_year_zero(offset, has_year_zero):
1811+
datetime = cftime.DatetimeGregorian(1, 1, 1, has_year_zero=has_year_zero)
1812+
result = datetime - offset
1813+
assert result.has_year_zero == datetime.has_year_zero
1814+
if has_year_zero:
1815+
assert result.year == 0
1816+
else:
1817+
assert result.year == -1
1818+
1819+
1820+
@pytest.mark.parametrize("has_year_zero", [False, True])
1821+
def test_offset_day_option_end_accounts_for_has_year_zero(has_year_zero):
1822+
offset = MonthEnd(n=1)
1823+
1824+
with warnings.catch_warnings():
1825+
warnings.filterwarnings("ignore", message="this date/calendar/year zero")
1826+
datetime = cftime.DatetimeGregorian(-1, 1, 31, has_year_zero=has_year_zero)
1827+
1828+
result = datetime + offset
1829+
assert result.has_year_zero == datetime.has_year_zero
1830+
if has_year_zero:
1831+
assert result.day == 28
1832+
else:
1833+
assert result.day == 29

xarray/tests/test_groupby.py

+18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
create_test_data,
2323
has_cftime,
2424
has_flox,
25+
requires_cftime,
2526
requires_dask,
2627
requires_flox,
2728
requires_scipy,
@@ -2503,6 +2504,23 @@ def test_default_flox_method() -> None:
25032504
assert "method" not in kwargs
25042505

25052506

2507+
@requires_cftime
2508+
@pytest.mark.filterwarnings("ignore")
2509+
def test_cftime_resample_gh_9108():
2510+
import cftime
2511+
2512+
ds = Dataset(
2513+
{"pr": ("time", np.random.random((10,)))},
2514+
coords={"time": xr.date_range("0001-01-01", periods=10, freq="D")},
2515+
)
2516+
actual = ds.resample(time="ME").mean()
2517+
expected = ds.mean("time").expand_dims(
2518+
time=[cftime.DatetimeGregorian(1, 1, 31, 0, 0, 0, 0, has_year_zero=False)]
2519+
)
2520+
assert actual.time.data[0].has_year_zero == ds.time.data[0].has_year_zero
2521+
assert_equal(actual, expected)
2522+
2523+
25062524
def test_custom_grouper() -> None:
25072525
class YearGrouper(Grouper):
25082526
"""

0 commit comments

Comments
 (0)