Skip to content

Commit

Permalink
set default resolution to "s", which actually means, use pandas lowes…
Browse files Browse the repository at this point in the history
…t resolution, fix code and tests to allow this
  • Loading branch information
kmuehlbauer committed Oct 14, 2024
1 parent 1d03a43 commit ca5050d
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 29 deletions.
11 changes: 10 additions & 1 deletion xarray/coding/times.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like
from xarray.core.duck_array_ops import asarray, ravel, reshape
from xarray.core.formatting import first_n_items, format_timestamp, last_item
from xarray.core.options import _get_datetime_resolution
from xarray.core.pdcompat import _timestamp_as_unit, default_precision_timestamp
from xarray.core.utils import emit_user_level_warning
from xarray.core.variable import Variable
Expand Down Expand Up @@ -98,6 +99,13 @@ def _is_numpy_compatible_time_range(times):
tmin = times.min()
tmax = times.max()
try:
# before relaxing the nanosecond constrained
# this raised OutOfBoundsDatetime for
# times < 1678 and times > 2262
# this isn't the case anymore for other resolutions like "s"
# now, we raise for dates before 1582-10-15
_check_date_is_after_shift(tmin, "standard")
_check_date_is_after_shift(tmax, "standard")
convert_time_or_go_back(tmin, pd.Timestamp)
convert_time_or_go_back(tmax, pd.Timestamp)
except pd.errors.OutOfBoundsDatetime:
Expand Down Expand Up @@ -290,7 +298,7 @@ def _check_date_is_after_shift(date: pd.Timestamp, calendar: str) -> None:
# proleptic_gregorian and standard/gregorian are only equivalent
# if reference date and date range is >= 1582-10-15
if calendar != "proleptic_gregorian":
if date < pd.Timestamp("1582-10-15"):
if date < type(date)(1582, 10, 15):
raise OutOfBoundsDatetime(
f"Dates before 1582-10-15 cannot be decoded "
f"with pandas using {calendar!r} calendar."
Expand Down Expand Up @@ -318,6 +326,7 @@ def _decode_datetime_with_pandas(
try:
time_unit, ref_date = _unpack_time_unit_and_ref_date(units)
ref_date = _align_reference_date_and_unit(ref_date, time_unit)
ref_date = _align_reference_date_and_unit(ref_date, _get_datetime_resolution())
except ValueError as err:
# ValueError is raised by pd.Timestamp for non-ISO timestamp
# strings, in which case we fall back to using cftime
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ class set_options:
warn_for_unclosed_files : bool, default: False
Whether or not to issue a warning when unclosed files are
deallocated. This is mostly useful for debugging.
time_resolution : {"s", "ms", "us", "ns"}, default: "ns"
time_resolution : {"s", "ms", "us", "ns"}, default: "s"
Time resolution used for CF encoding/decoding.
Examples
Expand Down
7 changes: 5 additions & 2 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from xarray import Dataset
from xarray.core.duck_array_ops import allclose_or_equiv # noqa: F401
from xarray.core.extension_array import PandasExtensionArray
from xarray.core.options import set_options
from xarray.core.options import _get_datetime_resolution, set_options
from xarray.core.variable import IndexVariable
from xarray.testing import ( # noqa: F401
assert_chunks_equal,
Expand Down Expand Up @@ -323,7 +323,10 @@ def create_test_data(
f'Not enough letters for filling this dimension size ({_dims["dim3"]})'
)
obj["dim3"] = ("dim3", list(string.ascii_lowercase[0 : _dims["dim3"]]))
obj["time"] = ("time", pd.date_range("2000-01-01", periods=20, unit="s"))
obj["time"] = (
"time",
pd.date_range("2000-01-01", periods=20, unit=f"{_get_datetime_resolution()}"),
)
for v, dims in sorted(_vars.items()):
data = rs.normal(size=tuple(_dims[d] for d in dims))
obj[v] = (dims, data)
Expand Down
7 changes: 4 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from xarray.coding.variables import SerializationWarning
from xarray.conventions import encode_dataset_coordinates
from xarray.core import indexing
from xarray.core.options import set_options
from xarray.core.options import _get_datetime_resolution, set_options
from xarray.core.utils import module_available
from xarray.namedarray.pycompat import array_type
from xarray.tests import (
Expand Down Expand Up @@ -1590,8 +1590,9 @@ def test_open_encodings(self) -> None:

expected = Dataset()

# todo: check, if specifying "s" is enough
time = pd.date_range("1999-01-05", periods=10, unit="s")
time = pd.date_range(
"1999-01-05", periods=10, unit=f"{_get_datetime_resolution()}"
)
encoding = {"units": units, "dtype": np.dtype("int32")}
expected["time"] = ("time", time, {}, encoding)

Expand Down
44 changes: 31 additions & 13 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from xarray.coding.variables import SerializationWarning
from xarray.conventions import _update_bounds_attributes, cf_encoder
from xarray.core.common import contains_cftime_datetimes
from xarray.core.options import _get_datetime_resolution
from xarray.core.utils import is_duck_dask_array
from xarray.testing import assert_equal, assert_identical
from xarray.tests import (
Expand Down Expand Up @@ -134,7 +135,9 @@ def test_cf_datetime(num_dates, units, calendar) -> None:
max_y = np.ravel(np.atleast_1d(expected))[np.nanargmax(num_dates)] # .year
typ = type(min_y)
border = typ(1582, 10, 15)
if calendar == "proleptic_gregorian" or (min_y >= border and max_y >= border):
if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns") or (
min_y >= border and max_y >= border
):
expected = cftime_to_nptime(expected)

with warnings.catch_warnings():
Expand Down Expand Up @@ -214,12 +217,15 @@ def test_decode_standard_calendar_inside_timestamp_range(calendar) -> None:
import cftime

units = "days since 0001-01-01"
unit = cast(Literal["s", "ms", "us", "ns"], "us")
unit = cast(Literal["s", "ms", "us", "ns"], _get_datetime_resolution())
times = pd.date_range("2001-04-01-00", end="2001-04-30-23", unit=unit, freq="h")
# to_pydatetime() will return microsecond
time = cftime.date2num(times.to_pydatetime(), units, calendar=calendar)
expected = times.values
if calendar == "proleptic_gregorian":
unit = "s"
# for cftime we get "us" resolution
# ns resolution is handled by cftime, too (OutOfBounds)
if calendar != "proleptic_gregorian" or _get_datetime_resolution() == "ns":
unit = "us"
expected_dtype = np.dtype(f"M8[{unit}]")
actual = decode_cf_datetime(time, units, calendar=calendar)
assert actual.dtype == expected_dtype
Expand Down Expand Up @@ -268,7 +274,7 @@ def test_decode_dates_outside_timestamp_range(calendar) -> None:
time, units, calendar=calendar, only_use_cftime_datetimes=True
)
# special case proleptic_gregorian
if calendar == "proleptic_gregorian":
if calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns":
expected = expected.astype("=M8[us]")
expected_date_type = type(expected[0])

Expand All @@ -289,7 +295,11 @@ def test_decode_standard_calendar_single_element_inside_timestamp_range(
calendar,
) -> None:
units = "days since 0001-01-01"
unit = "s" if calendar == "proleptic_gregorian" else "us"
unit = (
_get_datetime_resolution()
if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns")
else "us"
)
for num_time in [735368, [735368], [[735368]]]:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", "Unable to decode time axis")
Expand Down Expand Up @@ -337,7 +347,11 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range(
import cftime

units = "days since 0001-01-01"
unit = "s" if calendar == "proleptic_gregorian" else "us"
unit = (
_get_datetime_resolution()
if (calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns")
else "us"
)
times1 = pd.date_range("2001-04-01", end="2001-04-05", freq="D")
times2 = pd.date_range("2001-05-01", end="2001-05-05", freq="D")
time1 = cftime.date2num(times1.to_pydatetime(), units, calendar=calendar)
Expand Down Expand Up @@ -426,8 +440,8 @@ def test_decode_multidim_time_outside_timestamp_range(calendar) -> None:
actual = decode_cf_datetime(mdim_time, units, calendar=calendar)

dtype: np.dtype
if calendar == "proleptic_gregorian":
dtype = np.dtype("=M8[s]")
if calendar == "proleptic_gregorian" and _get_datetime_resolution() != "ns":
dtype = np.dtype(f"=M8[{_get_datetime_resolution()}]")
expected1 = expected1.astype(dtype)
expected2 = expected2.astype(dtype)
else:
Expand Down Expand Up @@ -528,7 +542,7 @@ def test_decoded_cf_datetime_array_2d() -> None:
("x", "y"), np.array([[0, 1], [2, 3]]), {"units": "days since 2000-01-01"}
)
result = CFDatetimeCoder().decode(variable)
assert result.dtype == "datetime64[s]"
assert result.dtype == f"datetime64[{_get_datetime_resolution()}]"
expected = pd.date_range("2000-01-01", periods=4).values.reshape(2, 2)
assert_array_equal(np.asarray(result), expected)

Expand Down Expand Up @@ -697,7 +711,7 @@ def test_decode_cf(calendar) -> None:
if calendar not in _STANDARD_CALENDARS:
assert ds.test.dtype == np.dtype("O")
else:
assert ds.test.dtype == np.dtype("M8[s]")
assert ds.test.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]")


def test_decode_cf_time_bounds() -> None:
Expand All @@ -722,7 +736,7 @@ def test_decode_cf_time_bounds() -> None:
"calendar": "standard",
}
dsc = decode_cf(ds)
assert dsc.time_bnds.dtype == np.dtype("M8[s]")
assert dsc.time_bnds.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]")
dsc = decode_cf(ds, decode_times=False)
assert dsc.time_bnds.dtype == np.dtype("int64")

Expand Down Expand Up @@ -1299,7 +1313,11 @@ def test_roundtrip_datetime64_nanosecond_precision(
assert encoded_var.data.dtype == dtype

decoded_var = conventions.decode_cf_variable("foo", encoded_var)
assert decoded_var.dtype == np.dtype(f"=M8[{timeunit}]")
if _get_datetime_resolution() == "ns":
dtypeunit = "ns"
else:
dtypeunit = timeunit
assert decoded_var.dtype == np.dtype(f"=M8[{dtypeunit}]")
assert (
decoded_var.encoding["units"]
== f"{_numpy_to_netcdf_timeunit(timeunit)} since 1970-01-01 00:00:00"
Expand Down
7 changes: 4 additions & 3 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from xarray.backends.common import WritableCFDataStore
from xarray.backends.memory import InMemoryDataStore
from xarray.conventions import decode_cf
from xarray.core.options import _get_datetime_resolution
from xarray.testing import assert_identical
from xarray.tests import (
assert_array_equal,
Expand Down Expand Up @@ -364,7 +365,7 @@ def test_dataset_repr_with_netcdf4_datetimes(self) -> None:

attrs = {"units": "days since 1900-01-01"}
ds = decode_cf(Dataset({"time": ("time", [0, 1], attrs)}))
assert "(time) datetime64[s]" in repr(ds)
assert f"(time) datetime64[{_get_datetime_resolution()}]" in repr(ds)

@requires_cftime
def test_decode_cf_datetime_transition_to_invalid(self) -> None:
Expand Down Expand Up @@ -447,13 +448,13 @@ def test_decode_cf_time_kwargs(self) -> None:

dsc = conventions.decode_cf(ds)
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
assert dsc.time.dtype == np.dtype("M8[s]")
assert dsc.time.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]")
dsc = conventions.decode_cf(ds, decode_times=False)
assert dsc.timedelta.dtype == np.dtype("int64")
assert dsc.time.dtype == np.dtype("int64")
dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False)
assert dsc.timedelta.dtype == np.dtype("int64")
assert dsc.time.dtype == np.dtype("M8[s]")
assert dsc.time.dtype == np.dtype(f"M8[{_get_datetime_resolution()}]")
dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True)
assert dsc.timedelta.dtype == np.dtype("m8[ns]")
assert dsc.time.dtype == np.dtype("int64")
Expand Down
14 changes: 8 additions & 6 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from xarray.core.common import duck_array_ops, full_like
from xarray.core.coordinates import Coordinates, DatasetCoordinates
from xarray.core.indexes import Index, PandasIndex
from xarray.core.options import _get_datetime_resolution
from xarray.core.types import ArrayLike
from xarray.core.utils import is_scalar
from xarray.groupers import TimeResampler
Expand Down Expand Up @@ -290,7 +291,7 @@ def test_repr(self) -> None:
Coordinates:
* dim2 (dim2) float64 72B 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0
* dim3 (dim3) {} 40B 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j'
* time (time) datetime64[s] 160B 2000-01-01 2000-01-02 ... 2000-01-20
* time (time) datetime64[{}] 160B 2000-01-01 2000-01-02 ... 2000-01-20
numbers (dim3) int64 80B 0 1 2 0 0 1 1 2 2 3
Dimensions without coordinates: dim1
Data variables:
Expand All @@ -299,7 +300,8 @@ def test_repr(self) -> None:
var3 (dim3, dim1) float64 640B 0.5565 -0.2121 0.4563 ... -0.2452 -0.3616
Attributes:
foo: bar""".format(
data["dim3"].dtype
data["dim3"].dtype,
_get_datetime_resolution(),
)
)
actual = "\n".join(x.rstrip() for x in repr(data).split("\n"))
Expand Down Expand Up @@ -442,8 +444,8 @@ def test_info(self) -> None:
ds.info(buf=buf)

expected = dedent(
"""\
xarray.Dataset {
f"""\
xarray.Dataset {{
dimensions:
\tdim2 = 9 ;
\ttime = 20 ;
Expand All @@ -452,7 +454,7 @@ def test_info(self) -> None:
variables:
\tfloat64 dim2(dim2) ;
\tdatetime64[s] time(time) ;
\tdatetime64[{_get_datetime_resolution()}] time(time) ;
\tfloat64 var1(dim1, dim2) ;
\t\tvar1:foo = variable ;
\tfloat64 var2(dim1, dim2) ;
Expand All @@ -464,7 +466,7 @@ def test_info(self) -> None:
// global attributes:
\t:unicode_attr = ba® ;
\t:string_attr = bar ;
}"""
}}"""
)
actual = buf.getvalue()
assert expected == actual
Expand Down

0 comments on commit ca5050d

Please sign in to comment.