Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
e9188e8
TEST: Adjust existing tests/benchmarks to import zstd from _compression,
pauldmccarthy Nov 26, 2025
20df2cf
RF: Try to import zstd from compression.zstd, backports.zstd, or pyzs…
pauldmccarthy Nov 26, 2025
cbb1a89
RF: Move gzip/zstd specific opening logic from openers into _compress…
pauldmccarthy Nov 26, 2025
6cb1755
TEST: Update mocks in unit test for openers->_compression move
pauldmccarthy Nov 26, 2025
3886275
RF: Move type hints, add docs for gzip_open/zstd_open
pauldmccarthy Nov 26, 2025
f728911
RF: Adjust zstd_open function to accept level/option parameters, and to
pauldmccarthy Nov 26, 2025
922be60
MNT: Use backports.zstd instead of pyzstd as primary dependency
pauldmccarthy Nov 26, 2025
10aeeb5
MNT: Update lockfile
pauldmccarthy Nov 26, 2025
e9b87f0
RF: Drop support for pyzstd - depend on compression.zstd, falling bac…
pauldmccarthy Nov 26, 2025
c9fe6fc
RF: Remove (HAVE_INDEXED_GZIP, IndexedGzipFile, DeterministicGzipFile…
pauldmccarthy Nov 26, 2025
efdb160
TEST: Fix imports, update uses of HAVE_INDEXED_GZIP, IndexedGzipFile
pauldmccarthy Nov 26, 2025
44745ac
STY: Address style warnings
pauldmccarthy Nov 26, 2025
b410d92
RF: Import _compression module rather than individual symbols to ease…
pauldmccarthy Nov 27, 2025
d5c28b2
TEST: It appears that run_tests should be a set
pauldmccarthy Nov 27, 2025
24f8b52
STY: Style fixes
pauldmccarthy Nov 27, 2025
2d0040d
STY: More style fixes
pauldmccarthy Nov 27, 2025
4d2755f
RF: Simplify logic
pauldmccarthy Dec 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ repos:
- types-Pillow
- pydicom
- numpy
- pyzstd
- backports.zstd
- importlib_resources
args: ["nibabel"]
pass_filenames: false
Expand Down
159 changes: 152 additions & 7 deletions nibabel/_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,42 @@

import bz2
import gzip
import io
import typing as ty

try:
from compression import zstd # type: ignore[import-not-found]

HAVE_ZSTD = True
except ImportError: # PY313
HAVE_ZSTD = False

from .deprecated import alert_future_error
from .optpkg import optional_package

if ty.TYPE_CHECKING:
import io

import indexed_gzip # type: ignore[import]
import pyzstd

if not HAVE_ZSTD: # PY313
from backports import zstd # type: ignore[import]

HAVE_ZSTD = True

HAVE_INDEXED_GZIP = True
HAVE_ZSTD = True

ModeRT = ty.Literal['r', 'rt']
ModeRB = ty.Literal['rb']
ModeWT = ty.Literal['w', 'wt']
ModeWB = ty.Literal['wb']
ModeR = ty.Union[ModeRT, ModeRB]
ModeW = ty.Union[ModeWT, ModeWB]
Mode = ty.Union[ModeR, ModeW]

else:
indexed_gzip, HAVE_INDEXED_GZIP, _ = optional_package('indexed_gzip')
pyzstd, HAVE_ZSTD, _ = optional_package('pyzstd')

if not HAVE_ZSTD: # PY313
zstd, HAVE_ZSTD, _ = optional_package('backports.zstd')

# Collections of types for isinstance or exception matching
COMPRESSED_FILE_LIKES: tuple[type[io.IOBase], ...] = (
Expand All @@ -47,5 +67,130 @@
IndexedGzipFile = gzip.GzipFile

if HAVE_ZSTD:
COMPRESSED_FILE_LIKES += (pyzstd.ZstdFile,)
COMPRESSION_ERRORS += (pyzstd.ZstdError,)
COMPRESSED_FILE_LIKES += (zstd.ZstdFile,)
COMPRESSION_ERRORS += (zstd.ZstdError,)


class DeterministicGzipFile(gzip.GzipFile):
"""Deterministic variant of GzipFile

This writer does not add filename information to the header, and defaults
to a modification time (``mtime``) of 0 seconds.
"""

def __init__(
self,
filename: str | None = None,
mode: Mode | None = None,
compresslevel: int = 9,
fileobj: io.FileIO | None = None,
mtime: int = 0,
):
if mode is None:
mode = 'rb'
modestr: str = mode

# These two guards are adapted from
# https://github.com/python/cpython/blob/6ab65c6/Lib/gzip.py#L171-L174
if 'b' not in modestr:
modestr = f'{mode}b'
if fileobj is None:
if filename is None:
raise TypeError('Must define either fileobj or filename')
# Cast because GzipFile.myfileobj has type io.FileIO while open returns ty.IO
fileobj = self.myfileobj = ty.cast('io.FileIO', open(filename, modestr))
super().__init__(
filename='',
mode=modestr,
compresslevel=compresslevel,
fileobj=fileobj,
mtime=mtime,
)


def gzip_open(
filename: str,
mode: Mode = 'rb',
compresslevel: int = 9,
mtime: int = 0,
keep_open: bool = False,
) -> gzip.GzipFile:
"""Open a gzip file for reading or writing.

If opening a file for reading, and ``indexed_gzip`` is available,
an ``IndexedGzipFile`` is returned.

Otherwise (opening for writing, or ``indexed_gzip`` not available),
a ``DeterministicGzipFile`` is returned.

Parameters:
-----------

filename : str
Path of file to open.
mode : str
Opening mode - either ``rb`` or ``wb``.
compresslevel: int
Compression level when writing.
mtime: int
Modification time used when writing a file - passed to the
``DetemrinisticGzipFile``. Ignored when reading.
keep_open: bool
Whether to keep the file handle open between reads. Ignored when writing,
or when ``indexed_gzip`` is not present.
"""
if not HAVE_INDEXED_GZIP or mode != 'rb':
gzip_file = DeterministicGzipFile(filename, mode, compresslevel, mtime=mtime)

# use indexed_gzip if possible for faster read access. If keep_open ==
# True, we tell IndexedGzipFile to keep the file handle open. Otherwise
# the IndexedGzipFile will close/open the file on each read.
else:
gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open)

return gzip_file


def zstd_open(
filename: str,
mode: Mode = 'r',
*,
level: int | None = None,
options: dict | None = None,
zstd_dict: zstd.ZstdDict | None = None,
level_or_option: int | dict | None = None,
) -> zstd.ZstdFile:
"""Open a zstd file for reading or writing.

The specific object returned will be a ``compression.zstd.ZstdFile`` or
a ``backports.zstd.ZstdFile``.

Parameters
----------

filename : str
Path of file to open.
mode : str
Opening mode.
zstd_dict : ZstdDict
Dictionary used for compression/decompression.
level : int
Compression level when writing.
options : dict
Dictionary of compression/decompression options.
"""
if level_or_option is not None:
alert_future_error(
'The level_or_option parameter will be removed in a future version of nibabel',
'7.0',
warning_rec='This warning can be silenced by using the separate level/option parameters',
error_rec='Future errors can be avoided by using the separate level/option parameters',
error_class=TypeError,
)
if level is not None or options is not None:
raise ValueError('Only one of level_or_option, level or options may be specified')
if isinstance(level_or_option, int):
level = level_or_option
else:
options = level_or_option
return zstd.ZstdFile(filename, mode, level=level, options=options, zstd_dict=zstd_dict)
6 changes: 3 additions & 3 deletions nibabel/arrayproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

import numpy as np

from . import openers
from . import _compression, openers
from .fileslice import canonical_slicers, fileslice
from .volumeutils import apply_read_scaling, array_from_file

Expand Down Expand Up @@ -263,7 +263,7 @@ def _should_keep_file_open(self, keep_file_open):
- whether ``self.file_like`` is an an open file handle, or a path to a
``'.gz'`` file, or a path to a non-gzip file.
- whether ``indexed_gzip`` is present (see
:attr:`.openers.HAVE_INDEXED_GZIP`).
:attr:`._compression.HAVE_INDEXED_GZIP`).

An ``ArrayProxy`` object uses two internal flags to manage
``ImageOpener`` instances and underlying file handles.
Expand Down Expand Up @@ -330,7 +330,7 @@ def _should_keep_file_open(self, keep_file_open):
if self._has_fh():
return False, False
# if the file is a gzip file, and we have_indexed_gzip,
have_igzip = openers.HAVE_INDEXED_GZIP and self.file_like.endswith('.gz')
have_igzip = _compression.HAVE_INDEXED_GZIP and self.file_like.endswith('.gz')

persist_opener = keep_file_open or have_igzip
return keep_file_open, persist_opener
Expand Down
6 changes: 3 additions & 3 deletions nibabel/benchmarks/bench_arrayproxy_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np

import nibabel as nib
from nibabel.openers import HAVE_INDEXED_GZIP
from nibabel._compression import HAVE_INDEXED_GZIP
from nibabel.tmpdirs import InTemporaryDirectory

from ..rstutils import rst_table
Expand Down Expand Up @@ -135,14 +135,14 @@ def fmt_sliceobj(sliceobj):
# load uncompressed and compressed versions of the image
img = nib.load(testfile, keep_file_open=keep_open)

with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip):
with mock.patch('nibabel._compression.HAVE_INDEXED_GZIP', have_igzip):
imggz = nib.load(testfilegz, keep_file_open=keep_open)

def basefunc():
img.dataobj[fix_sliceobj(sliceobj)]

def testfunc():
with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', have_igzip):
with mock.patch('nibabel._compression.HAVE_INDEXED_GZIP', have_igzip):
imggz.dataobj[fix_sliceobj(sliceobj)]

# make sure nothing is floating around from the previous test
Expand Down
3 changes: 1 addition & 2 deletions nibabel/benchmarks/bench_fileslice.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,15 @@

import numpy as np

from .._compression import HAVE_ZSTD
from ..fileslice import fileslice
from ..openers import ImageOpener
from ..optpkg import optional_package
from ..rstutils import rst_table
from ..tmpdirs import InTemporaryDirectory

SHAPE = (64, 64, 32, 100)
ROW_NAMES = [f'axis {i}, len {dim}' for i, dim in enumerate(SHAPE)]
COL_NAMES = ['mid int', 'step 1', 'half step 1', 'step mid int']
HAVE_ZSTD = optional_package('pyzstd')[1]


def _slices_for_len(L):
Expand Down
Loading