Skip to content

Commit a15bb01

Browse files
authored
Merge pull request #1444 from pauldmccarthy/rf/zstd
Use `compression.zstd` or `backports.zstd` in preference to `pyzstd`
2 parents 7cda525 + 4d2755f commit a15bb01

15 files changed

+309
-241
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ repos:
3131
- types-Pillow
3232
- pydicom
3333
- numpy
34-
- pyzstd
34+
- backports.zstd
3535
- importlib_resources
3636
args: ["nibabel"]
3737
pass_filenames: false

nibabel/_compression.py

Lines changed: 152 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,42 @@
1212

1313
import bz2
1414
import gzip
15+
import io
1516
import typing as ty
1617

18+
try:
19+
from compression import zstd # type: ignore[import-not-found]
20+
21+
HAVE_ZSTD = True
22+
except ImportError: # PY313
23+
HAVE_ZSTD = False
24+
25+
from .deprecated import alert_future_error
1726
from .optpkg import optional_package
1827

1928
if ty.TYPE_CHECKING:
20-
import io
21-
2229
import indexed_gzip # type: ignore[import]
23-
import pyzstd
30+
31+
if not HAVE_ZSTD: # PY313
32+
from backports import zstd # type: ignore[import]
33+
34+
HAVE_ZSTD = True
2435

2536
HAVE_INDEXED_GZIP = True
2637
HAVE_ZSTD = True
38+
39+
ModeRT = ty.Literal['r', 'rt']
40+
ModeRB = ty.Literal['rb']
41+
ModeWT = ty.Literal['w', 'wt']
42+
ModeWB = ty.Literal['wb']
43+
ModeR = ty.Union[ModeRT, ModeRB]
44+
ModeW = ty.Union[ModeWT, ModeWB]
45+
Mode = ty.Union[ModeR, ModeW]
46+
2747
else:
2848
indexed_gzip, HAVE_INDEXED_GZIP, _ = optional_package('indexed_gzip')
29-
pyzstd, HAVE_ZSTD, _ = optional_package('pyzstd')
30-
49+
if not HAVE_ZSTD: # PY313
50+
zstd, HAVE_ZSTD, _ = optional_package('backports.zstd')
3151

3252
# Collections of types for isinstance or exception matching
3353
COMPRESSED_FILE_LIKES: tuple[type[io.IOBase], ...] = (
@@ -47,5 +67,130 @@
4767
IndexedGzipFile = gzip.GzipFile
4868

4969
if HAVE_ZSTD:
50-
COMPRESSED_FILE_LIKES += (pyzstd.ZstdFile,)
51-
COMPRESSION_ERRORS += (pyzstd.ZstdError,)
70+
COMPRESSED_FILE_LIKES += (zstd.ZstdFile,)
71+
COMPRESSION_ERRORS += (zstd.ZstdError,)
72+
73+
74+
class DeterministicGzipFile(gzip.GzipFile):
75+
"""Deterministic variant of GzipFile
76+
77+
This writer does not add filename information to the header, and defaults
78+
to a modification time (``mtime``) of 0 seconds.
79+
"""
80+
81+
def __init__(
82+
self,
83+
filename: str | None = None,
84+
mode: Mode | None = None,
85+
compresslevel: int = 9,
86+
fileobj: io.FileIO | None = None,
87+
mtime: int = 0,
88+
):
89+
if mode is None:
90+
mode = 'rb'
91+
modestr: str = mode
92+
93+
# These two guards are adapted from
94+
# https://github.com/python/cpython/blob/6ab65c6/Lib/gzip.py#L171-L174
95+
if 'b' not in modestr:
96+
modestr = f'{mode}b'
97+
if fileobj is None:
98+
if filename is None:
99+
raise TypeError('Must define either fileobj or filename')
100+
# Cast because GzipFile.myfileobj has type io.FileIO while open returns ty.IO
101+
fileobj = self.myfileobj = ty.cast('io.FileIO', open(filename, modestr))
102+
super().__init__(
103+
filename='',
104+
mode=modestr,
105+
compresslevel=compresslevel,
106+
fileobj=fileobj,
107+
mtime=mtime,
108+
)
109+
110+
111+
def gzip_open(
112+
filename: str,
113+
mode: Mode = 'rb',
114+
compresslevel: int = 9,
115+
mtime: int = 0,
116+
keep_open: bool = False,
117+
) -> gzip.GzipFile:
118+
"""Open a gzip file for reading or writing.
119+
120+
If opening a file for reading, and ``indexed_gzip`` is available,
121+
an ``IndexedGzipFile`` is returned.
122+
123+
Otherwise (opening for writing, or ``indexed_gzip`` not available),
124+
a ``DeterministicGzipFile`` is returned.
125+
126+
Parameters:
127+
-----------
128+
129+
filename : str
130+
Path of file to open.
131+
mode : str
132+
Opening mode - either ``rb`` or ``wb``.
133+
compresslevel: int
134+
Compression level when writing.
135+
mtime: int
136+
Modification time used when writing a file - passed to the
137+
``DetemrinisticGzipFile``. Ignored when reading.
138+
keep_open: bool
139+
Whether to keep the file handle open between reads. Ignored when writing,
140+
or when ``indexed_gzip`` is not present.
141+
"""
142+
if not HAVE_INDEXED_GZIP or mode != 'rb':
143+
gzip_file = DeterministicGzipFile(filename, mode, compresslevel, mtime=mtime)
144+
145+
# use indexed_gzip if possible for faster read access. If keep_open ==
146+
# True, we tell IndexedGzipFile to keep the file handle open. Otherwise
147+
# the IndexedGzipFile will close/open the file on each read.
148+
else:
149+
gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open)
150+
151+
return gzip_file
152+
153+
154+
def zstd_open(
155+
filename: str,
156+
mode: Mode = 'r',
157+
*,
158+
level: int | None = None,
159+
options: dict | None = None,
160+
zstd_dict: zstd.ZstdDict | None = None,
161+
level_or_option: int | dict | None = None,
162+
) -> zstd.ZstdFile:
163+
"""Open a zstd file for reading or writing.
164+
165+
The specific object returned will be a ``compression.zstd.ZstdFile`` or
166+
a ``backports.zstd.ZstdFile``.
167+
168+
Parameters
169+
----------
170+
171+
filename : str
172+
Path of file to open.
173+
mode : str
174+
Opening mode.
175+
zstd_dict : ZstdDict
176+
Dictionary used for compression/decompression.
177+
level : int
178+
Compression level when writing.
179+
options : dict
180+
Dictionary of compression/decompression options.
181+
"""
182+
if level_or_option is not None:
183+
alert_future_error(
184+
'The level_or_option parameter will be removed in a future version of nibabel',
185+
'7.0',
186+
warning_rec='This warning can be silenced by using the separate level/option parameters',
187+
error_rec='Future errors can be avoided by using the separate level/option parameters',
188+
error_class=TypeError,
189+
)
190+
if level is not None or options is not None:
191+
raise ValueError('Only one of level_or_option, level or options may be specified')
192+
if isinstance(level_or_option, int):
193+
level = level_or_option
194+
else:
195+
options = level_or_option
196+
return zstd.ZstdFile(filename, mode, level=level, options=options, zstd_dict=zstd_dict)

nibabel/arrayproxy.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
import numpy as np
3737

38-
from . import openers
38+
from . import _compression, openers
3939
from .fileslice import canonical_slicers, fileslice
4040
from .volumeutils import apply_read_scaling, array_from_file
4141

@@ -263,7 +263,7 @@ def _should_keep_file_open(self, keep_file_open):
263263
- whether ``self.file_like`` is an an open file handle, or a path to a
264264
``'.gz'`` file, or a path to a non-gzip file.
265265
- whether ``indexed_gzip`` is present (see
266-
:attr:`.openers.HAVE_INDEXED_GZIP`).
266+
:attr:`._compression.HAVE_INDEXED_GZIP`).
267267
268268
An ``ArrayProxy`` object uses two internal flags to manage
269269
``ImageOpener`` instances and underlying file handles.
@@ -330,7 +330,7 @@ def _should_keep_file_open(self, keep_file_open):
330330
if self._has_fh():
331331
return False, False
332332
# if the file is a gzip file, and we have_indexed_gzip,
333-
have_igzip = openers.HAVE_INDEXED_GZIP and self.file_like.endswith('.gz')
333+
have_igzip = _compression.HAVE_INDEXED_GZIP and self.file_like.endswith('.gz')
334334

335335
persist_opener = keep_file_open or have_igzip
336336
return keep_file_open, persist_opener

nibabel/benchmarks/bench_arrayproxy_slicing.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import numpy as np
1919

2020
import nibabel as nib
21-
from nibabel.openers import HAVE_INDEXED_GZIP
21+
from nibabel._compression import HAVE_INDEXED_GZIP
2222
from nibabel.tmpdirs import InTemporaryDirectory
2323

2424
from ..rstutils import rst_table
@@ -135,14 +135,14 @@ def fmt_sliceobj(sliceobj):
135135
# load uncompressed and compressed versions of the image
136136
img = nib.load(testfile, keep_file_open=keep_open)
137137

138-
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip):
138+
with mock.patch('nibabel._compression.HAVE_INDEXED_GZIP', have_igzip):
139139
imggz = nib.load(testfilegz, keep_file_open=keep_open)
140140

141141
def basefunc():
142142
img.dataobj[fix_sliceobj(sliceobj)]
143143

144144
def testfunc():
145-
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip):
145+
with mock.patch('nibabel._compression.HAVE_INDEXED_GZIP', have_igzip):
146146
imggz.dataobj[fix_sliceobj(sliceobj)]
147147

148148
# make sure nothing is floating around from the previous test

nibabel/benchmarks/bench_fileslice.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,15 @@
1414

1515
import numpy as np
1616

17+
from .._compression import HAVE_ZSTD
1718
from ..fileslice import fileslice
1819
from ..openers import ImageOpener
19-
from ..optpkg import optional_package
2020
from ..rstutils import rst_table
2121
from ..tmpdirs import InTemporaryDirectory
2222

2323
SHAPE = (64, 64, 32, 100)
2424
ROW_NAMES = [f'axis {i}, len {dim}' for i, dim in enumerate(SHAPE)]
2525
COL_NAMES = ['mid int', 'step 1', 'half step 1', 'step mid int']
26-
HAVE_ZSTD = optional_package('pyzstd')[1]
2726

2827

2928
def _slices_for_len(L):

0 commit comments

Comments
 (0)