Skip to content

Commit

Permalink
Relaxed datetime/time parsing
Browse files Browse the repository at this point in the history
Previously we strictly followed the RFC3339 format when parsing datetime
and time objects from strings. We now support a few common ISO8601
compatible relaxations:

- A `:` isn't required as part of the timezone component in both
  datetime and time strings (`2022-01-02T03:04:05.678+0102` and
  `2022-01-02T03:04:05.678+01:02` are treated the same).

- A ` ` may be used instead of `T`/`t` as a separator between date and
  time components when parsing datetime strings.

When encoding datetime/time objects we still strictly follow RFC3339.
This eases integrating msgspec with other systems that don't strictly
follow RFC3339.
  • Loading branch information
jcrist committed Aug 26, 2023
1 parent d40bc43 commit d95b33c
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 10 deletions.
23 changes: 13 additions & 10 deletions msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -10078,11 +10078,12 @@ ms_decode_time(const char *buf, Py_ssize_t size, TypeNode *type, PathNode *path)
goto invalid;
}

/* Explicit offset requires exactly 5 bytes left */
if (buf_end - buf != 5) goto invalid;

if (buf_end - buf < 3) goto invalid;
if ((buf = ms_read_fixint(buf, 2, &offset_hour)) == NULL) goto invalid;
if (*buf++ != ':') goto invalid;
/* RFC3339 requires a ':' separator, ISO8601 doesn't. We support
* either */
if (*buf == ':') buf++;
if (buf_end - buf != 2) goto invalid;
if ((buf = ms_read_fixint(buf, 2, &offset_min)) == NULL) goto invalid;
if (offset_hour > 23 || offset_min > 59) goto invalid;
offset *= (offset_hour * 60 + offset_min);
Expand Down Expand Up @@ -10178,9 +10179,10 @@ ms_decode_datetime_from_str(
if (*buf++ != '-') goto invalid;
if ((buf = ms_read_fixint(buf, 2, &day)) == NULL) goto invalid;

/* Date/time separator can be T or t */
/* RFC3339 date/time separator can be T or t. We also support ' ', which is
* ISO8601 compatible. */
c = *buf++;
if (!(c == 'T' || c == 't')) goto invalid;
if (!(c == 'T' || c == 't' || c == ' ')) goto invalid;

/* Parse time */
if ((buf = ms_read_fixint(buf, 2, &hour)) == NULL) goto invalid;
Expand Down Expand Up @@ -10251,11 +10253,12 @@ ms_decode_datetime_from_str(
goto invalid;
}

/* Explicit offset requires exactly 5 bytes left */
if (buf_end - buf != 5) goto invalid;

if (buf_end - buf < 3) goto invalid;
if ((buf = ms_read_fixint(buf, 2, &offset_hour)) == NULL) goto invalid;
if (*buf++ != ':') goto invalid;
/* RFC3339 requires a ':' separator, ISO8601 doesn't. We support
* either */
if (*buf == ':') buf++;
if (buf_end - buf != 2) goto invalid;
if ((buf = ms_read_fixint(buf, 2, &offset_min)) == NULL) goto invalid;
if (offset_hour > 23 || offset_min > 59) goto invalid;
offset *= (offset_hour * 60 + offset_min);
Expand Down
19 changes: 19 additions & 0 deletions tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -3229,6 +3229,20 @@ def test_decode_time_not_case_sensitive(self, proto, z):
res = proto.decode(proto.encode(f"04:05:06.000007{z}"), type=datetime.time)
assert res == sol

@pytest.mark.parametrize(
"lax, strict",
[
("03:04:05+0102", "03:04:05+01:02"),
("03:04:05-0102", "03:04:05-01:02"),
],
)
def test_decode_time_rfc3339_relaxed(self, lax, strict, proto):
"""msgspec supports a few relaxations of the RFC3339 format."""
sol = datetime.time.fromisoformat(strict)
msg = proto.encode(lax)
res = proto.decode(msg, type=datetime.time)
assert res == sol

@pytest.mark.parametrize(
"t, sol",
[
Expand Down Expand Up @@ -3276,13 +3290,16 @@ def test_decode_time_nanos(self, proto, t, sol):
"01:02:3.0000004Z",
"01:02:03.0000004+5:06",
"01:02:03.0000004+05:6",
"01:02:03.0000004+056",
"01:02:03.0000004+05600",
# Trailing data
"01:02:030",
"01:02:03a",
"01:02:03.a",
"01:02:03.0a",
"01:02:03.0000004a",
"01:02:03.0000004+00:000",
"01:02:03.0000004+00000",
"01:02:03.0000004Z0",
# Truncated
"01:02:3",
Expand All @@ -3298,6 +3315,8 @@ def test_decode_time_nanos(self, proto, t, sol):
"01:02:03.00a+05:06",
"01:02:03.004+0a:06",
"01:02:03.004+05:0a",
"01:02:03.004+0a06",
"01:02:03.004+050a",
# Hour out of range
"24:02:03.004",
# Minute out of range
Expand Down
19 changes: 19 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,21 @@ def test_decode_datetime_nanos(self, msg, sol):
res = msgspec.json.decode(msg, type=datetime.datetime)
assert res == sol

@pytest.mark.parametrize(
"lax, strict",
[
("2022-01-02T03:04:05+0102", "2022-01-02T03:04:05+01:02"),
("2022-01-02T03:04:05-0102", "2022-01-02T03:04:05-01:02"),
("2022-01-02 03:04:05", "2022-01-02T03:04:05"),
],
)
def test_decode_datetime_rfc3339_relaxed(self, lax, strict):
"""msgspec supports a few relaxations of the RFC3339 format."""
sol = datetime.datetime.fromisoformat(strict)
msg = msgspec.json.encode(lax)
res = msgspec.json.decode(msg, type=datetime.datetime)
assert res == sol

@pytest.mark.parametrize(
"s",
[
Expand All @@ -996,8 +1011,10 @@ def test_decode_datetime_nanos(self, msg, sol):
b'"0001-02-03T04:05:6.000007Z"',
b'"0001-02-03T04:05:06.000007+0:00"',
b'"0001-02-03T04:05:06.000007+00:0"',
b'"0001-02-03T04:05:06.000007+000"',
# Trailing data
b'"0001-02-03T04:05:06.000007+00:000"',
b'"0001-02-03T04:05:06.000007+00000"',
b'"0001-02-03T04:05:06.000007Z0"',
b'"0001-02-03T04:05:06a"',
b'"0001-02-03T04:05:06.000007a"',
Expand All @@ -1019,6 +1036,8 @@ def test_decode_datetime_nanos(self, msg, sol):
b'"0001-02-03T04:05:06.000007a"',
b'"0001-02-03T04:05:06.000007+0a:00"',
b'"0001-02-03T04:05:06.000007+00:0a"',
b'"0001-02-03T04:05:06.000007+0a00"',
b'"0001-02-03T04:05:06.000007+000a"',
# Year out of range
b'"0000-02-03T04:05:06.000007Z"',
# Month out of range
Expand Down

0 comments on commit d95b33c

Please sign in to comment.