Skip to content

Commit e16ca16

Browse files
committed
Start work on blosc v2
1 parent 2a7bf9c commit e16ca16

File tree

2 files changed

+466
-0
lines changed

2 files changed

+466
-0
lines changed

numcodecs/blosc_v2.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
"""
2+
An attempt at replacing bundled versin of blosc with
3+
the blosc-python package, which provides pre-build wheels.
4+
5+
List of functions to deprecate:
6+
[
7+
'destroy',
8+
'init',
9+
'compname_to_compcode',
10+
'cbuffer_sizes',
11+
'cbuffer_metainfo',
12+
'err_bad_cname',
13+
'decompress_partial'
14+
]
15+
16+
List of behaviour to deprecate:
17+
- Passing cname as bytes
18+
19+
"""
20+
21+
from numcodecs.abc import Codec
22+
import numpy as np
23+
24+
import blosc
25+
from blosc import (
26+
BITSHUFFLE,
27+
SHUFFLE,
28+
NOSHUFFLE,
29+
MAX_BUFFERSIZE,
30+
MAX_THREADS,
31+
MAX_TYPESIZE,
32+
VERSION_STRING,
33+
VERSION_DATE,
34+
)
35+
36+
__all__ = [
37+
"BITSHUFFLE",
38+
"SHUFFLE",
39+
"NOSHUFFLE",
40+
"MAX_BUFFERSIZE",
41+
"MAX_THREADS",
42+
"MAX_TYPESIZE",
43+
"VERSION_STRING",
44+
"VERSION_DATE",
45+
"list_compressors",
46+
'get_nthreads',
47+
]
48+
49+
AUTOBLOCKS = 0
50+
AUTOSHUFFLE = -1
51+
_shuffle_repr = ['AUTOSHUFFLE', 'NOSHUFFLE', 'SHUFFLE', 'BITSHUFFLE']
52+
53+
54+
def list_compressors() -> list[str]:
55+
return blosc.compressor_list()
56+
57+
58+
def get_nthreads() -> int:
59+
nthreads = blosc.set_nthreads(1)
60+
blosc.set_nthreads(nthreads)
61+
return nthreads
62+
63+
64+
def set_nthreads(nthreads: int) -> None:
65+
blosc.set_nthreads(nthreads)
66+
67+
68+
def cbuffer_complib(source):
69+
return blosc.get_clib(source)
70+
71+
72+
def _check_not_object_array(arr):
73+
if arr.dtype == object:
74+
raise TypeError("object arrays are not supported")
75+
76+
77+
def _check_buffer_size(buf, max_buffer_size):
78+
if isinstance(buf, np.ndarray):
79+
size = buf.nbytes
80+
else:
81+
size = len(buf)
82+
83+
if size > max_buffer_size:
84+
msg = f"Codec does not support buffers of > {max_buffer_size} bytes"
85+
raise ValueError(msg)
86+
87+
88+
def compress(source, cname: str, clevel: int, shuffle: int = SHUFFLE, blocksize=AUTOBLOCKS):
89+
if shuffle == AUTOSHUFFLE:
90+
if source.itemsize == 1:
91+
shuffle = BITSHUFFLE
92+
else:
93+
shuffle = SHUFFLE
94+
blosc.set_blocksize(blocksize)
95+
if isinstance(source, np.ndarray):
96+
_check_not_object_array(source)
97+
result = blosc.compress_ptr(
98+
source.ctypes.data,
99+
source.size,
100+
source.dtype.itemsize,
101+
cname=cname,
102+
clevel=clevel,
103+
shuffle=shuffle,
104+
)
105+
else:
106+
result = blosc.compress(source, cname=cname, clevel=clevel, shuffle=shuffle)
107+
blosc.set_blocksize(AUTOBLOCKS)
108+
return result
109+
110+
111+
def decompress(source, dest: np.ndarray | bytearray | None = None):
112+
if dest is None:
113+
return blosc.decompress(source)
114+
elif isinstance(dest, np.ndarray):
115+
_check_not_object_array(dest)
116+
blosc.decompress_ptr(source, dest.ctypes.data)
117+
else:
118+
dest[:] = blosc.decompress(source)
119+
120+
121+
class Blosc(Codec):
122+
"""Codec providing compression using the Blosc meta-compressor.
123+
124+
Parameters
125+
----------
126+
cname : string, optional
127+
A string naming one of the compression algorithms available within blosc, e.g.,
128+
'zstd', 'blosclz', 'lz4', 'lz4hc', 'zlib' or 'snappy'.
129+
clevel : integer, optional
130+
An integer between 0 and 9 specifying the compression level.
131+
shuffle : integer, optional
132+
Either NOSHUFFLE (0), SHUFFLE (1), BITSHUFFLE (2) or AUTOSHUFFLE (-1). If AUTOSHUFFLE,
133+
bit-shuffle will be used for buffers with itemsize 1, and byte-shuffle will
134+
be used otherwise. The default is `SHUFFLE`.
135+
blocksize : int
136+
The requested size of the compressed blocks. If 0 (default), an automatic
137+
blocksize will be used.
138+
139+
See Also
140+
--------
141+
numcodecs.zstd.Zstd, numcodecs.lz4.LZ4
142+
143+
"""
144+
145+
codec_id = 'blosc'
146+
NOSHUFFLE = NOSHUFFLE
147+
SHUFFLE = SHUFFLE
148+
BITSHUFFLE = BITSHUFFLE
149+
AUTOSHUFFLE = AUTOSHUFFLE
150+
max_buffer_size = 2**31 - 1
151+
152+
def __init__(self, cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=AUTOBLOCKS):
153+
self.cname = cname
154+
if isinstance(cname, str):
155+
self._cname_bytes = cname.encode('ascii')
156+
else:
157+
self._cname_bytes = cname
158+
self.clevel = clevel
159+
self.shuffle = shuffle
160+
self.blocksize = blocksize
161+
162+
def encode(self, buf):
163+
_check_buffer_size(buf, self.max_buffer_size)
164+
return compress(
165+
buf, self.cname, clevel=self.clevel, shuffle=self.shuffle, blocksize=self.blocksize
166+
)
167+
168+
def decode(self, buf, out=None):
169+
_check_buffer_size(buf, self.max_buffer_size)
170+
return decompress(buf, out)
171+
172+
def __repr__(self):
173+
r = '%s(cname=%r, clevel=%r, shuffle=%s, blocksize=%s)' % (
174+
type(self).__name__,
175+
self.cname,
176+
self.clevel,
177+
_shuffle_repr[self.shuffle + 1],
178+
self.blocksize,
179+
)
180+
return r

0 commit comments

Comments
 (0)