Skip to content

Commit 73feb8f

Browse files
authored
Merge pull request #21 from alimanfoo/doc-msgpack-pickle
Review msgpack pickle. Resolves #6, resolves #8.
2 parents ba8d58b + b496bec commit 73feb8f

File tree

12 files changed

+110
-111
lines changed

12 files changed

+110
-111
lines changed

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ def __getattr__(cls, name):
2828
return Mock()
2929

3030

31-
MOCK_MODULES = []
31+
MOCK_MODULES = ['msgpack']
3232
if PY2:
3333
MOCK_MODULES.append('lzma')
3434

3535

3636
sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
37-
37+
3838

3939
# If extensions (or modules to document with autodoc) are in another directory,
4040
# add these directories to sys.path here. If the directory is relative to the

docs/index.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ Contents
5959
packbits
6060
categorize
6161
checksum32
62+
pickles
63+
msgpacks
6264
release
6365

6466
Acknowledgments

docs/msgpacks.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
MsgPack
2+
=======
3+
.. automodule:: numcodecs.msgpacks
4+
5+
.. autoclass:: MsgPack
6+
7+
.. autoattribute:: codec_id

docs/pickles.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Pickle
2+
======
3+
.. automodule:: numcodecs.pickles
4+
5+
.. autoclass:: Pickle
6+
7+
.. autoattribute:: codec_id

numcodecs/msgpacks.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,12 @@
66

77

88
from numcodecs.abc import Codec
9-
from numcodecs.compat import ndarray_from_buffer, buffer_copy
109
import msgpack
1110

1211

1312
class MsgPack(Codec):
14-
"""Codec to encode data as msgpacked bytes. Useful for encoding python
15-
strings
16-
17-
Raises
18-
------
19-
encoding a non-object dtyped ndarray will raise ValueError
13+
"""Codec to encode data as msgpacked bytes. Useful for encoding an array of Python string
14+
objects.
2015
2116
Examples
2217
--------
@@ -27,26 +22,39 @@ class MsgPack(Codec):
2722
>>> f.decode(f.encode(x))
2823
array(['foo', 'bar', 'baz'], dtype=object)
2924
25+
See Also
26+
--------
27+
:class:`numcodecs.pickles.Pickle`
28+
29+
Notes
30+
-----
31+
Requires `msgpack-python <https://pypi.python.org/pypi/msgpack-python>`_ to be installed.
32+
3033
""" # flake8: noqa
3134

3235
codec_id = 'msgpack'
3336

37+
def __init__(self, encoding='utf-8'):
38+
self.encoding = encoding
39+
3440
def encode(self, buf):
35-
if hasattr(buf, 'dtype') and buf.dtype != 'object':
36-
raise ValueError("cannot encode non-object ndarrays, %s "
37-
"dtype was passed" % buf.dtype)
38-
return msgpack.packb(buf.tolist(), encoding='utf-8')
41+
buf = np.asarray(buf)
42+
l = buf.tolist()
43+
l.append(buf.dtype.str)
44+
return msgpack.packb(l, encoding=self.encoding)
3945

4046
def decode(self, buf, out=None):
41-
dec = np.array(msgpack.unpackb(buf, encoding='utf-8'), dtype='object')
47+
l = msgpack.unpackb(buf, encoding=self.encoding)
48+
dec = np.array(l[:-1], dtype=l[-1])
4249
if out is not None:
4350
np.copyto(out, dec)
4451
return out
4552
else:
4653
return dec
4754

4855
def get_config(self):
49-
return dict(id=self.codec_id)
56+
return dict(id=self.codec_id,
57+
encoding=self.encoding)
5058

5159
def __repr__(self):
52-
return 'MsgPack()'
60+
return 'MsgPack(encoding=%r)' % self.encoding

numcodecs/pickles.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,13 @@
1414

1515

1616
class Pickle(Codec):
17-
"""Codec to encode data as as pickled bytes. Useful for encoding python
18-
strings.
17+
"""Codec to encode data as as pickled bytes. Useful for encoding an array of Python string
18+
objects.
1919
2020
Parameters
2121
----------
2222
protocol : int, defaults to pickle.HIGHEST_PROTOCOL
23-
the protocol used to pickle data
24-
25-
Raises
26-
------
27-
encoding a non-object dtyped ndarray will raise ValueError
23+
The protocol used to pickle data.
2824
2925
Examples
3026
--------
@@ -35,6 +31,10 @@ class Pickle(Codec):
3531
>>> f.decode(f.encode(x))
3632
array(['foo', 'bar', 'baz'], dtype=object)
3733
34+
See Also
35+
--------
36+
:class:`numcodecs.msgpacks.MsgPack`
37+
3838
""" # flake8: noqa
3939

4040
codec_id = 'pickle'
@@ -43,9 +43,6 @@ def __init__(self, protocol=pickle.HIGHEST_PROTOCOL):
4343
self.protocol = protocol
4444

4545
def encode(self, buf):
46-
if hasattr(buf, 'dtype') and buf.dtype != 'object':
47-
raise ValueError("cannot encode non-object ndarrays, %s "
48-
"dtype was passed" % buf.dtype)
4946
return pickle.dumps(buf, protocol=self.protocol)
5047

5148
def decode(self, buf, out=None):

numcodecs/tests/common.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,13 @@ def compare(res):
9191
compare(out)
9292

9393

94-
def check_encode_decode_objects(arr, codec):
95-
96-
# this is a more specific test that check_encode_decode
97-
# as these require actual objects (and not bytes only)
94+
def check_encode_decode_array(arr, codec):
9895

9996
def compare(res, arr=arr):
10097

10198
assert_true(isinstance(res, np.ndarray))
10299
assert_true(res.shape == arr.shape)
103-
assert_true(res.dtype == 'object')
100+
assert_true(res.dtype == arr.dtype)
104101

105102
# numpy asserts don't compare object arrays
106103
# properly; assert that we have the same nans

numcodecs/tests/test_lzma.py

Lines changed: 43 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,49 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
3+
import itertools
34

45

5-
_lzma = None
6+
import nose
7+
import numpy as np
8+
69
try:
7-
import lzma as _lzma
10+
from numcodecs.lzma import LZMA, _lzma
811
except ImportError: # pragma: no cover
9-
try:
10-
from backports import lzma as _lzma
11-
except ImportError:
12-
pass
13-
14-
15-
if _lzma:
16-
17-
import itertools
18-
import numpy as np
19-
from numcodecs.lzma import LZMA
20-
from numcodecs.tests.common import check_encode_decode, check_config, \
21-
check_repr
22-
23-
codecs = [
24-
LZMA(),
25-
LZMA(preset=1),
26-
LZMA(preset=5),
27-
LZMA(preset=9),
28-
LZMA(format=_lzma.FORMAT_RAW,
29-
filters=[dict(id=_lzma.FILTER_LZMA2, preset=1)])
30-
]
31-
32-
# mix of dtypes: integer, float, bool, string
33-
# mix of shapes: 1D, 2D, 3D
34-
# mix of orders: C, F
35-
arrays = [
36-
np.arange(1000, dtype='i4'),
37-
np.linspace(1000, 1001, 1000, dtype='f8'),
38-
np.random.normal(loc=1000, scale=1, size=(100, 10)),
39-
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10,
40-
order='F'),
41-
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
42-
]
43-
44-
def test_encode_decode():
45-
for arr, codec in itertools.product(arrays, codecs):
46-
check_encode_decode(arr, codec)
47-
48-
def test_config():
49-
codec = LZMA(preset=1, format=_lzma.FORMAT_XZ,
50-
check=_lzma.CHECK_NONE, filters=None)
51-
check_config(codec)
52-
53-
def test_repr():
54-
check_repr('LZMA(format=1, check=0, preset=1, filters=None)')
12+
raise nose.SkipTest("LZMA not available")
13+
14+
from numcodecs.tests.common import check_encode_decode, check_config, check_repr
15+
16+
17+
codecs = [
18+
LZMA(),
19+
LZMA(preset=1),
20+
LZMA(preset=5),
21+
LZMA(preset=9),
22+
LZMA(format=_lzma.FORMAT_RAW, filters=[dict(id=_lzma.FILTER_LZMA2, preset=1)])
23+
]
24+
25+
26+
# mix of dtypes: integer, float, bool, string
27+
# mix of shapes: 1D, 2D, 3D
28+
# mix of orders: C, F
29+
arrays = [
30+
np.arange(1000, dtype='i4'),
31+
np.linspace(1000, 1001, 1000, dtype='f8'),
32+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
33+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
34+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10)
35+
]
36+
37+
38+
def test_encode_decode():
39+
for arr, codec in itertools.product(arrays, codecs):
40+
check_encode_decode(arr, codec)
41+
42+
43+
def test_config():
44+
codec = LZMA(preset=1, format=_lzma.FORMAT_XZ, check=_lzma.CHECK_NONE, filters=None)
45+
check_config(codec)
46+
47+
48+
def test_repr():
49+
check_repr('LZMA(format=1, check=0, preset=1, filters=None)')

numcodecs/tests/test_msgpacks.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33

44

55
import numpy as np
6-
from numpy.testing import assert_raises
7-
from numcodecs.msgpacks import MsgPack
8-
from numcodecs.tests.common import (check_config, check_repr,
9-
check_encode_decode_objects)
6+
import nose
7+
8+
try:
9+
from numcodecs.msgpacks import MsgPack
10+
except ImportError: # pragma: no cover
11+
raise nose.SkipTest("msgpack-python not available")
12+
13+
from numcodecs.tests.common import check_config, check_repr, check_encode_decode_array
1014

1115

1216
# object array with strings
@@ -16,25 +20,15 @@
1620
np.array(['foo', 'bar', 'baz'] * 300, dtype=object),
1721
np.array([['foo', 'bar', np.nan]] * 300, dtype=object),
1822
np.array(['foo', 1.0, 2] * 300, dtype=object),
19-
]
20-
21-
# non-object ndarrays
22-
arrays_incompat = [
2323
np.arange(1000, dtype='i4'),
2424
np.array(['foo', 'bar', 'baz'] * 300),
2525
]
2626

2727

28-
def test_encode_errors():
29-
for arr in arrays_incompat:
30-
codec = MsgPack()
31-
assert_raises(ValueError, codec.encode, arr)
32-
33-
3428
def test_encode_decode():
3529
for arr in arrays:
3630
codec = MsgPack()
37-
check_encode_decode_objects(arr, codec)
31+
check_encode_decode_array(arr, codec)
3832

3933

4034
def test_config():
@@ -43,4 +37,5 @@ def test_config():
4337

4438

4539
def test_repr():
46-
check_repr("MsgPack()")
40+
check_repr("MsgPack(encoding='utf-8')")
41+
check_repr("MsgPack(encoding='ascii')")

numcodecs/tests/test_pickle.py renamed to numcodecs/tests/test_pickles.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@
33

44

55
import numpy as np
6-
from numpy.testing import assert_raises
76

87

98
from numcodecs.pickles import Pickle
10-
from numcodecs.tests.common import (check_config, check_repr,
11-
check_encode_decode_objects)
9+
from numcodecs.tests.common import check_config, check_repr, check_encode_decode_array
1210

1311

1412
# object array with strings
@@ -18,25 +16,15 @@
1816
np.array(['foo', 'bar', 'baz'] * 300, dtype=object),
1917
np.array([['foo', 'bar', np.nan]] * 300, dtype=object),
2018
np.array(['foo', 1.0, 2] * 300, dtype=object),
21-
]
22-
23-
# non-object ndarrays
24-
arrays_incompat = [
2519
np.arange(1000, dtype='i4'),
2620
np.array(['foo', 'bar', 'baz'] * 300),
2721
]
2822

2923

30-
def test_encode_errors():
31-
for arr in arrays_incompat:
32-
codec = Pickle()
33-
assert_raises(ValueError, codec.encode, arr)
34-
35-
3624
def test_encode_decode():
25+
codec = Pickle()
3726
for arr in arrays:
38-
codec = Pickle()
39-
check_encode_decode_objects(arr, codec)
27+
check_encode_decode_array(arr, codec)
4028

4129

4230
def test_config():

0 commit comments

Comments
 (0)