Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d4e0aed

Browse files
committedSep 19, 2022
msgpack: support tzindex in datetime
Support non-zero tzindex in datetime extended type. If both tzoffset and tzindex are specified, tzindex is prior (same as in Tarantool [1]). Use `tz` parameter to set up timezone name: ``` dt = tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54, nsec=308543321, tz='Europe/Moscow') ``` You may use `tz` property to get timezone name of a datetime object. pytz is used to build timezone info. Tarantool index to Olson name map and inverted one are built with gen_timezones.sh script based on tarantool/go-tarantool script [2]. All Tarantool unique and alias timezones presents in pytz.all_timezones list. Only the following abrreviated timezones from Tarantool presents in pytz.all_timezones (version 2022.2.1): - CET - EET - EST - GMT - HST - MST - UTC - WET pytz does not natively support work with abbreviated timezones due to its possibly ambiguous nature [3-5]. Tarantool itself do not support work with ambiguous abbreviated timezones: ``` Tarantool 2.10.1-0-g482d91c66 tarantool> datetime.new({tz = 'BST'}) --- - error: 'builtin/datetime.lua:477: could not parse ''BST'' - ambiguous timezone' ... ``` If ambiguous timezone is specified, the exception is raised. Tarantool header timezones.h [6] provides a map for all abbreviated timezones with category info (all ambiguous timezones are marked with TZ_AMBIGUOUS flag) and offset info. We parse this info to build pytz.FixedOffset() timezone for each Tarantool abbreviated timezone not supported natively by pytz. 1. https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ 2. https://github.com/tarantool/go-tarantool/blob/5801dc6f5ce69db7c8bc0c0d0fe4fb6042d5ecbc/datetime/gen-timezones.sh 3. https://stackoverflow.com/questions/37109945/how-to-use-abbreviated-timezone-namepst-ist-in-pytz 4. https://stackoverflow.com/questions/27531718/datetime-timezone-conversion-using-pytz 5. https://stackoverflow.com/questions/30315485/pytz-return-olson-timezone-name-from-only-a-gmt-offset 6. https://github.com/tarantool/tarantool/9ee45289e01232b8df1413efea11db170ae3b3b4/src/lib/tzcode/timezones.h Closes #204
1 parent e729ac8 commit d4e0aed

File tree

7 files changed

+2020
-11
lines changed

7 files changed

+2020
-11
lines changed
 

‎CHANGELOG.md

+14
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5353
You may use `tzoffset` property to get timezone offset of a datetime
5454
object.
5555

56+
- Timezone in datetime type support (#204).
57+
58+
Use `tz` parameter to set up timezone name:
59+
60+
```python
61+
dt = tarantool.Datetime(year=2022, month=8, day=31,
62+
hour=18, minute=7, sec=54,
63+
nsec=308543321, tz='Europe/Moscow')
64+
```
65+
66+
If both `tz` and `tzoffset` is specified, `tz` is used.
67+
68+
You may use `tz` property to get timezone name of a datetime object.
69+
5670
### Changed
5771
- Bump msgpack requirement to 1.0.4 (PR #223).
5872
The only reason of this bump is various vulnerability fixes,

‎tarantool/msgpack_ext/types/datetime.py

+49-11
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import pandas
44
import pytz
55

6+
import tarantool.msgpack_ext.types.timezones as tt_timezones
7+
from tarantool.error import MsgpackError
8+
69
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
710
#
811
# The datetime MessagePack representation looks like this:
@@ -63,6 +66,17 @@ def compute_offset(timestamp):
6366
# There is no precision loss since offset is in minutes
6467
return int(utc_offset.total_seconds()) // SEC_IN_MIN
6568

69+
def get_python_tzinfo(tz, error_class):
70+
if tz in pytz.all_timezones:
71+
return pytz.timezone(tz)
72+
73+
# Checked with timezones/validate_timezones.py
74+
tt_tzinfo = tt_timezones.timezoneAbbrevInfo[tz]
75+
if (tt_tzinfo['category'] & tt_timezones.TZ_AMBIGUOUS) != 0:
76+
raise error_class(f'Failed to create datetime with ambiguous timezone "{tz}"')
77+
78+
return pytz.FixedOffset(tt_tzinfo['offset'])
79+
6680
def msgpack_decode(data):
6781
cursor = 0
6882
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
@@ -84,23 +98,29 @@ def msgpack_decode(data):
8498
datetime = pandas.to_datetime(total_nsec, unit='ns')
8599

86100
if tzindex != 0:
87-
raise NotImplementedError
101+
if tzindex not in tt_timezones.indexToTimezone:
102+
raise MsgpackError(f'Failed to decode datetime with unknown tzindex "{tzindex}"')
103+
tz = tt_timezones.indexToTimezone[tzindex]
104+
tzinfo = get_python_tzinfo(tz, MsgpackError)
105+
return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), tz
88106
elif tzoffset != 0:
89107
tzinfo = pytz.FixedOffset(tzoffset)
90-
return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo)
108+
return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), ''
91109
else:
92-
return datetime
110+
return datetime, ''
93111

94112
class Datetime():
95113
def __init__(self, data=None, *, timestamp=None, year=None, month=None,
96114
day=None, hour=None, minute=None, sec=None, nsec=None,
97-
tzoffset=0):
115+
tzoffset=0, tz=''):
98116
if data is not None:
99117
if not isinstance(data, bytes):
100118
raise ValueError('data argument (first positional argument) ' +
101119
'expected to be a "bytes" instance')
102120

103-
self._datetime = msgpack_decode(data)
121+
datetime, tz = msgpack_decode(data)
122+
self._datetime = datetime
123+
self._tz = tz
104124
return
105125

106126
# The logic is same as in Tarantool, refer to datetime API.
@@ -133,11 +153,20 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None,
133153
microsecond=microsecond,
134154
nanosecond=nanosecond)
135155

136-
if tzoffset != 0:
137-
tzinfo = pytz.FixedOffset(tzoffset)
138-
datetime = datetime.replace(tzinfo=tzinfo)
156+
if tz != '':
157+
if tz not in tt_timezones.timezoneToIndex:
158+
raise ValueError(f'Unknown Tarantool timezone "{tz}"')
139159

140-
self._datetime = datetime
160+
tzinfo = get_python_tzinfo(tz, ValueError)
161+
self._datetime = datetime.replace(tzinfo=tzinfo)
162+
self._tz = tz
163+
elif tzoffset != 0:
164+
tzinfo = pytz.FixedOffset(tzoffset)
165+
self._datetime = datetime.replace(tzinfo=tzinfo)
166+
self._tz = ''
167+
else:
168+
self._datetime = datetime
169+
self._tz = ''
141170

142171
def __eq__(self, other):
143172
if isinstance(other, Datetime):
@@ -151,7 +180,7 @@ def __str__(self):
151180
return self._datetime.__str__()
152181

153182
def __repr__(self):
154-
return f'datetime: {self._datetime.__repr__()}'
183+
return f'datetime: {self._datetime.__repr__()}, tz: "{self.tz}"'
155184

156185
def __copy__(self):
157186
cls = self.__class__
@@ -206,11 +235,20 @@ def tzoffset(self):
206235
return compute_offset(self._datetime)
207236
return 0
208237

238+
@property
239+
def tz(self):
240+
return self._tz
241+
209242
def msgpack_encode(self):
210243
seconds = self._datetime.value // NSEC_IN_SEC
211244
nsec = self.nsec
212245
tzoffset = self.tzoffset
213-
tzindex = 0
246+
247+
tz = self.tz
248+
if tz != '':
249+
tzindex = tt_timezones.timezoneToIndex[tz]
250+
else:
251+
tzindex = 0
214252

215253
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
216254

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.timezones.timezones import (
2+
TZ_AMBIGUOUS,
3+
indexToTimezone,
4+
timezoneToIndex,
5+
timezoneAbbrevInfo,
6+
)
7+
8+
__all__ = ['TZ_AMBIGUOUS', 'indexToTimezone', 'timezoneToIndex',
9+
'timezoneAbbrevInfo']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
set -xeuo pipefail
3+
4+
SRC_COMMIT="9ee45289e01232b8df1413efea11db170ae3b3b4"
5+
SRC_FILE=timezones.h
6+
DST_FILE=timezones.py
7+
8+
[ -e ${SRC_FILE} ] && rm ${SRC_FILE}
9+
wget -O ${SRC_FILE} \
10+
https://raw.githubusercontent.com/tarantool/tarantool/${SRC_COMMIT}/src/lib/tzcode/timezones.h
11+
12+
# We don't need aliases in indexToTimezone because Tarantool always replace it:
13+
#
14+
# tarantool> T = date.parse '2022-01-01T00:00 Pacific/Enderbury'
15+
# ---
16+
# ...
17+
# tarantool> T
18+
# ---
19+
# - 2022-01-01T00:00:00 Pacific/Kanton
20+
# ...
21+
#
22+
# So we can do the same and don't worry, be happy.
23+
24+
cat <<EOF > ${DST_FILE}
25+
# Automatically generated by gen-timezones.sh
26+
27+
TZ_UTC = 0x01
28+
TZ_RFC = 0x02
29+
TZ_MILITARY = 0x04
30+
TZ_AMBIGUOUS = 0x08
31+
TZ_NYI = 0x10
32+
TZ_OLSON = 0x20
33+
TZ_ALIAS = 0x40
34+
TZ_DST = 0x80
35+
36+
indexToTimezone = {
37+
EOF
38+
39+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
40+
| awk '{printf("\t%s : %s,\n", $1, $3)}' >> ${DST_FILE}
41+
grep ZONE_UNIQUE ${SRC_FILE} | sed "s/ZONE_UNIQUE( *//g" | sed "s/[),]//g" \
42+
| awk '{printf("\t%s : %s,\n", $1, $2)}' >> ${DST_FILE}
43+
44+
cat <<EOF >> ${DST_FILE}
45+
}
46+
47+
timezoneToIndex = {
48+
EOF
49+
50+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
51+
| awk '{printf("\t%s : %s,\n", $3, $1)}' >> ${DST_FILE}
52+
grep ZONE_UNIQUE ${SRC_FILE} | sed "s/ZONE_UNIQUE( *//g" | sed "s/[),]//g" \
53+
| awk '{printf("\t%s : %s,\n", $2, $1)}' >> ${DST_FILE}
54+
grep ZONE_ALIAS ${SRC_FILE} | sed "s/ZONE_ALIAS( *//g" | sed "s/[),]//g" \
55+
| awk '{printf("\t%s : %s,\n", $2, $1)}' >> ${DST_FILE}
56+
57+
cat <<EOF >> ${DST_FILE}
58+
}
59+
60+
timezoneAbbrevInfo = {
61+
EOF
62+
63+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
64+
| awk '{printf("\t%s : {\"offset\" : %d, \"category\" : %s},\n", $3, $2, $4)}' >> ${DST_FILE}
65+
echo "}" >> ${DST_FILE}
66+
67+
rm timezones.h
68+
69+
python validate_timezones.py

‎tarantool/msgpack_ext/types/timezones/timezones.py

+1,784
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pytz
2+
from timezones import timezoneToIndex, timezoneAbbrevInfo
3+
4+
if __name__ != '__main__':
5+
raise Error('Import not expected')
6+
7+
for timezone in timezoneToIndex.keys():
8+
if timezone in pytz.all_timezones:
9+
continue
10+
11+
if not timezone in timezoneAbbrevInfo:
12+
raise Exception(f'Unknown Tarantool timezone {timezone}')

‎test/suites/test_datetime.py

+83
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,23 @@ def test_Datetime_class_API(self):
6565
# Both Tarantool and pandas prone to precision loss for timestamp() floats
6666
self.assertEqual(dt.timestamp, 1661958474.308543)
6767
self.assertEqual(dt.tzoffset, 180)
68+
self.assertEqual(dt.tz, '')
69+
70+
def test_Datetime_class_API_wth_tz(self):
71+
dt = tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54,
72+
nsec=308543321, tzoffset=123, tz='Europe/Moscow')
73+
74+
self.assertEqual(dt.year, 2022)
75+
self.assertEqual(dt.month, 8)
76+
self.assertEqual(dt.day, 31)
77+
self.assertEqual(dt.hour, 18)
78+
self.assertEqual(dt.minute, 7)
79+
self.assertEqual(dt.sec, 54)
80+
self.assertEqual(dt.nsec, 308543321)
81+
# Both Tarantool and pandas prone to precision loss for timestamp() floats
82+
self.assertEqual(dt.timestamp, 1661958474.308543)
83+
self.assertEqual(dt.tzoffset, 180)
84+
self.assertEqual(dt.tz, 'Europe/Moscow')
6885

6986

7087
datetime_class_invalid_init_cases = {
@@ -92,6 +109,18 @@ def test_Datetime_class_API(self):
92109
'type': ValueError,
93110
'msg': 'timestamp must be int if nsec provided'
94111
},
112+
'unknown_tz': {
113+
'args': [],
114+
'kwargs': {'year': 2022, 'month': 8, 'day': 31, 'tz': 'Moskva'},
115+
'type': ValueError,
116+
'msg': 'Unknown Tarantool timezone "Moskva"'
117+
},
118+
'abbrev_tz': {
119+
'args': [],
120+
'kwargs': {'year': 2022, 'month': 8, 'day': 31, 'tz': 'AET'},
121+
'type': ValueError,
122+
'msg': 'Failed to create datetime with ambiguous timezone "AET"'
123+
},
95124
}
96125

97126
def test_Datetime_class_invalid_init(self):
@@ -182,6 +211,49 @@ def test_Datetime_class_invalid_init(self):
182211
'msgpack': (b'\x8a\xb1\x0f\x63\x00\x00\x00\x00\x00\x00\x00\x00\xc4\xff\x00\x00'),
183212
'tarantool': r"datetime.new({timestamp=1661969274, tzoffset=-60})",
184213
},
214+
'date_with_utc_tz': {
215+
'python': tarantool.Datetime(year=1970, month=1, day=1, tz='UTC'),
216+
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x01'),
217+
'tarantool': r"datetime.new({year=1970, month=1, day=1, tz='UTC'})",
218+
},
219+
'date_with_tz': {
220+
'python': tarantool.Datetime(year=2022, month=8, day=31, tz='Europe/Moscow'),
221+
'msgpack': (b'\x50\x7a\x0e\x63\x00\x00\x00\x00\x00\x00\x00\x00\xb4\x00\xb3\x03'),
222+
'tarantool': r"datetime.new({year=2022, month=8, day=31, tz='Europe/Moscow'})",
223+
},
224+
'datetime_with_tz': {
225+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54,
226+
nsec=308543321, tz='Europe/Moscow'),
227+
'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\xb3\x03'),
228+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
229+
r"nsec=308543321, tz='Europe/Moscow'})",
230+
},
231+
'datetime_with_tz_winter_time': {
232+
'python': tarantool.Datetime(year=2008, month=8, day=1, tz='Europe/Moscow'),
233+
'msgpack': (b'\xc0\x19\x92\x48\x00\x00\x00\x00\x00\x00\x00\x00\xf0\x00\xb3\x03'),
234+
'tarantool': r"datetime.new({year=2008, month=8, day=1, tz='Europe/Moscow'})",
235+
},
236+
'datetime_with_tz_and_offset': {
237+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54,
238+
nsec=308543321, tz='Europe/Moscow', tzoffset=123),
239+
'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\xb3\x03'),
240+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
241+
r"nsec=308543321, tz='Europe/Moscow', tzoffset=123})",
242+
},
243+
'datetime_with_abbrev_tz': {
244+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54,
245+
nsec=308543321, tz='MSK'),
246+
'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\xee\x00'),
247+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
248+
r"nsec=308543321, tz='MSK'})",
249+
},
250+
'datetime_with_abbrev_tz_and_zero_offset': {
251+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54,
252+
nsec=308543321, tz='AZODT'),
253+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x12\x02'),
254+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
255+
r"nsec=308543321, tz='AZODT'})",
256+
},
185257
}
186258

187259
def test_msgpack_decode(self):
@@ -235,6 +307,17 @@ def test_tarantool_encode(self):
235307

236308
self.assertSequenceEqual(self.adm(lua_eval), [True])
237309

310+
def test_msgpack_decode_unknown_tzindex(self):
311+
case = b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\xff\xff'
312+
self.assertRaisesRegex(
313+
MsgpackError, 'Failed to decode datetime with unknown tzindex "-1"',
314+
lambda: unpacker_ext_hook(4, case))
315+
316+
def test_msgpack_decode_ambiguous_tzindex(self):
317+
case = b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x82\x00'
318+
self.assertRaisesRegex(
319+
MsgpackError, 'Failed to create datetime with ambiguous timezone "AET"',
320+
lambda: unpacker_ext_hook(4, case))
238321

239322
@classmethod
240323
def tearDownClass(self):

0 commit comments

Comments
 (0)
Please sign in to comment.