Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interval logic improvements: type/range validation, intersection method, contains/in functionality #114

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python: ["3.10", "3.11", "3.12", "3.13"]
defaults:
run:
working-directory: .
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "undate"
description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals"
readme = "README.md"
license = { text = "Apache-2" }
requires-python = ">= 3.9"
requires-python = ">= 3.10"
dynamic = ["version"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"]
authors = [
Expand All @@ -31,7 +31,6 @@ keywords = [
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
95 changes: 77 additions & 18 deletions src/undate/interval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import datetime

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Optional, Union

Expand All @@ -25,31 +23,30 @@ class UndateInterval:
latest: Union[Undate, None]
label: Union[str, None]

# TODO: let's think about adding an optional precision / length /size field
# using DatePrecision
# TODO: think about adding an optional precision / length /size field
# using DatePrecision for intervals of any standard duration (decade, century)

def __init__(
self,
earliest: Optional[Undate] = None,
latest: Optional[Undate] = None,
label: Optional[str] = None,
):
# for now, assume takes two undate objects;
# support conversion from datetime
if earliest and not isinstance(earliest, Undate):
# NOTE: some overlap with Undate._comparison_type method
# maybe support conversion from other formats later
if isinstance(earliest, datetime.date):
earliest = Undate.from_datetime_date(earliest)
else:
# takes two undate objects; allows conversion from supported types
if earliest:
try:
earliest = Undate.to_undate(earliest)
except TypeError as err:
raise ValueError(
f"earliest date {earliest} cannot be converted to Undate"
)
if latest and not isinstance(latest, Undate):
if isinstance(latest, datetime.date):
latest = Undate.from_datetime_date(latest)
else:
raise ValueError(f"latest date {latest} cannot be converted to Undate")
) from err
if latest:
try:
latest = Undate.to_undate(latest)
except TypeError as err:
raise ValueError(
f"latest date {latest} cannot be converted to Undate"
) from err

# check that the interval is valid
if latest and earliest and latest <= earliest:
Expand Down Expand Up @@ -78,6 +75,9 @@ def __repr__(self) -> str:
return "<UndateInterval %s>" % self

def __eq__(self, other) -> bool:
# currently doesn't support comparison with any other types
if not isinstance(other, UndateInterval):
return NotImplemented
# consider interval equal if both dates are equal
return self.earliest == other.earliest and self.latest == other.latest

Expand Down Expand Up @@ -122,3 +122,62 @@ def duration(self) -> Timedelta:
# is there any meaningful way to calculate duration
# if one year is known and the other is not?
raise NotImplementedError

def __contains__(self, other: object) -> bool:
"""Determine if another interval or date falls within this
interval."""
# support comparison with another interval
if isinstance(other, UndateInterval):
# if two intervals are strictly equal, don't consider
# either one as containing the other
if self == other:
return False
# otherwise compare based on earliest/latest bounds
other_earliest = other.earliest
other_latest = other.latest
else:
# otherwise, try to convert to an Undate
try:
other = Undate.to_undate(other)
other_latest = other_earliest = other
except TypeError:
# if conversion fails, then we don't support comparison
raise

# if either bound of the current interval is None,
# then it is an open interval and we don't need to check the other value.
# if the other value is set, then check that it falls within the
# bounds of this interval
return (
self.earliest is None
or other_earliest is not None
and other_earliest >= self.earliest
) and (
self.latest is None
or other_latest is not None
and other_latest <= self.latest
)

def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]:
"""Determine the intersection or overlap between two :class:`UndateInterval`
objects and return a new interval. Returns None if there is no overlap.
"""
try:
# when both values are defined, return the inner bounds;
# if not, return whichever is not None, or None
earliest = (
max(self.earliest, other.earliest)
if self.earliest and other.earliest
else self.earliest or other.earliest
)
latest = (
min(self.latest, other.latest)
if self.latest and other.latest
else self.latest or other.latest
)

# if this results in an invalid interval, initialization
# will throw an exception
return UndateInterval(earliest, latest)
except ValueError:
return None
49 changes: 33 additions & 16 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import datetime
from enum import auto

import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from undate.interval import UndateInterval

try:
# StrEnum was only added in python 3.11
from enum import StrEnum
Expand Down Expand Up @@ -72,6 +74,10 @@ def __init__(
label: Optional[str] = None,
calendar: Optional[Union[str, Calendar]] = None,
):
# everything is optional but something is required
if all([val is None for val in [year, month, day]]):
raise ValueError("At least one of year, month, or day must be specified")

# keep track of initial values and which values are known
# TODO: add validation: if str, must be expected length
self.initial_values: Dict[str, Optional[Union[int, str]]] = {
Expand Down Expand Up @@ -242,23 +248,19 @@ def format(self, format) -> str:

raise ValueError(f"Unsupported format '{format}'")

def _comparison_type(self, other: object) -> "Undate":
@classmethod
def _comparison_type(cls, other: object) -> "Undate":
"""Common logic for type handling in comparison methods.
Converts to Undate object if possible, otherwise raises
NotImplemented error. Currently only supports conversion
from :class:`datetime.date`
NotImplementedError exception. Uses :meth:`to_undate` for conversion.
"""

# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

# recommended to support comparison with arbitrary objects
if not isinstance(other, Undate):
# convert if possible; return NotImplemented if not
try:
return cls.to_undate(other)
except TypeError:
# recommended to support comparison with arbitrary objects
return NotImplemented

return other

def __eq__(self, other: object) -> bool:
# Note: assumes label differences don't matter for comparing dates

Expand All @@ -268,6 +270,8 @@ def __eq__(self, other: object) -> bool:

other = self._comparison_type(other)
if other is NotImplemented:
# return NotImplemented to indicate comparison is not supported
# with this type
return NotImplemented

# if both dates are fully known, then earliest/latest check
Expand Down Expand Up @@ -359,10 +363,23 @@ def __contains__(self, other: object) -> bool:
]
)

@staticmethod
def from_datetime_date(dt_date: datetime.date):
"""Initialize an :class:`Undate` object from a :class:`datetime.date`"""
return Undate(dt_date.year, dt_date.month, dt_date.day)
@classmethod
def to_undate(cls, other: object) -> "Undate":
"""Converted arbitrary object to Undate, if possible. Raises TypeError
if conversion is not possible.

Currently suppports:
- :class:`datetime.date` or :class:`datetime.datetime`

"""
match other:
case Undate():
return other
case datetime.date() | datetime.datetime():
return Undate(other.year, other.month, other.day)

case _:
raise TypeError(f"Conversion from {type(other)} is not supported")

@property
def known_year(self) -> bool:
Expand Down
9 changes: 4 additions & 5 deletions tests/test_converters/test_edtf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest
from undate.converters.edtf import EDTFDateConverter
from undate.date import DatePrecision
from undate import Undate, UndateInterval


Expand Down Expand Up @@ -64,8 +63,8 @@ def test_to_string(self):

# if converter can't generate a string for the date,
# it should return a value error
empty_undate = Undate()
empty_undate.precision = DatePrecision.DECADE
with pytest.raises(ValueError):
EDTFDateConverter().to_string(empty_undate)
# empty_undate = Undate() # undate with no date information no longer supported
# empty_undate.precision = DatePrecision.DECADE
# with pytest.raises(ValueError):
# EDTFDateConverter().to_string(empty_undate)
# TODO: override missing digit and confirm replacement
86 changes: 86 additions & 0 deletions tests/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ def test_eq(self):
)
assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5))

def test_eq_type_check(self):
# doesn't currently support comparison with anything else
interval = UndateInterval(Undate(900))
# returns NotIplemented if comparison with this type is not supported
assert interval.__eq__("foo") == NotImplemented

def test_not_eq(self):
assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval(
Undate(2022), Undate(2024)
Expand Down Expand Up @@ -143,3 +149,83 @@ def test_duration(self):
# one year set and the other not currently raises not implemented error
with pytest.raises(NotImplementedError):
UndateInterval(Undate(2000), Undate(month=10)).duration()

def test_intersection(self):
century11th = UndateInterval(Undate(1001), Undate(1100))
century20th = UndateInterval(Undate(1901), Undate(2000))
# no intersection
assert century11th.intersection(century20th) is None
# should work in either direction
assert century20th.intersection(century11th) is None

decade1990s = UndateInterval(Undate(1990), Undate(1999))
# intersection of an interval completely contained in another
# returns an interval equivalent to the smaller one
assert century20th.intersection(decade1990s) == decade1990s
assert decade1990s.intersection(century20th) == decade1990s

# partial overlap
nineties_oughts = UndateInterval(Undate(1990), Undate(2009))
assert century20th.intersection(nineties_oughts) == UndateInterval(
Undate(1990), Undate(2000)
)

# intersections between half open intervals
after_c11th = UndateInterval(Undate(1001), None)
assert after_c11th.intersection(century20th) == century20th
assert after_c11th.intersection(decade1990s) == decade1990s

before_20th = UndateInterval(None, Undate(1901))
assert before_20th.intersection(decade1990s) is None
assert before_20th.intersection(century11th) == century11th
assert before_20th.intersection(after_c11th) == UndateInterval(
Undate(1001), Undate(1901)
)

def test_contains(self):
century11th = UndateInterval(Undate(1001), Undate(1100))
century20th = UndateInterval(Undate(1901), Undate(2000))
decade1990s = UndateInterval(Undate(1990), Undate(1999))
# an interval doesn't contain itself
for interval in [century11th, century20th, decade1990s]:
assert interval not in interval

# checking if an interval is within another interval
assert decade1990s in century20th
assert decade1990s not in century11th
assert century11th not in decade1990s
assert century20th not in decade1990s
# a specific date can be contained by an interval
y2k = Undate(2000)
assert y2k in century20th
assert y2k not in century11th
# partially known date should work too
april_someyear = Undate("198X", 4)
assert april_someyear in century20th
assert april_someyear not in century11th
# conversion from datetime.date also works
assert datetime.date(1922, 5, 1) in century20th
# unsupported types result in a type error
with pytest.raises(TypeError):
"nineteen-eighty-four" in century20th

# contains check with half-open intervals
after_c11th = UndateInterval(Undate(1001), None)
before_20th = UndateInterval(None, Undate(1901))
# neither of them contains the other
assert after_c11th not in before_20th
assert before_20th not in after_c11th
# nor are they contained by a smaller range
assert after_c11th not in decade1990s
assert before_20th not in decade1990s

# all of our previous test dates are in the 1900s,
# so they are after the 11th century and not before the 20th
for period in [decade1990s, y2k, april_someyear]:
assert period in after_c11th
assert period not in before_20th

# fully open interval - is this even meaningful?
whenever = UndateInterval(None, None)
assert decade1990s in whenever
assert whenever not in whenever
Loading