|
12 | 12 |
|
13 | 13 | import bz2 |
14 | 14 | import gzip |
| 15 | +import io |
15 | 16 | import typing as ty |
16 | 17 |
|
| 18 | +try: |
| 19 | + from compression import zstd # type: ignore[import-not-found] |
| 20 | + |
| 21 | + HAVE_ZSTD = True |
| 22 | +except ImportError: # PY313 |
| 23 | + HAVE_ZSTD = False |
| 24 | + |
| 25 | +from .deprecated import alert_future_error |
17 | 26 | from .optpkg import optional_package |
18 | 27 |
|
19 | 28 | if ty.TYPE_CHECKING: |
20 | | - import io |
21 | | - |
22 | 29 | import indexed_gzip # type: ignore[import] |
23 | | - import pyzstd |
| 30 | + |
| 31 | + if not HAVE_ZSTD: # PY313 |
| 32 | + from backports import zstd # type: ignore[import] |
| 33 | + |
| 34 | + HAVE_ZSTD = True |
24 | 35 |
|
25 | 36 | HAVE_INDEXED_GZIP = True |
26 | 37 | HAVE_ZSTD = True |
| 38 | + |
| 39 | + ModeRT = ty.Literal['r', 'rt'] |
| 40 | + ModeRB = ty.Literal['rb'] |
| 41 | + ModeWT = ty.Literal['w', 'wt'] |
| 42 | + ModeWB = ty.Literal['wb'] |
| 43 | + ModeR = ty.Union[ModeRT, ModeRB] |
| 44 | + ModeW = ty.Union[ModeWT, ModeWB] |
| 45 | + Mode = ty.Union[ModeR, ModeW] |
| 46 | + |
27 | 47 | else: |
28 | 48 | indexed_gzip, HAVE_INDEXED_GZIP, _ = optional_package('indexed_gzip') |
29 | | - pyzstd, HAVE_ZSTD, _ = optional_package('pyzstd') |
30 | | - |
| 49 | + if not HAVE_ZSTD: # PY313 |
| 50 | + zstd, HAVE_ZSTD, _ = optional_package('backports.zstd') |
31 | 51 |
|
32 | 52 | # Collections of types for isinstance or exception matching |
33 | 53 | COMPRESSED_FILE_LIKES: tuple[type[io.IOBase], ...] = ( |
|
47 | 67 | IndexedGzipFile = gzip.GzipFile |
48 | 68 |
|
49 | 69 | if HAVE_ZSTD: |
50 | | - COMPRESSED_FILE_LIKES += (pyzstd.ZstdFile,) |
51 | | - COMPRESSION_ERRORS += (pyzstd.ZstdError,) |
| 70 | + COMPRESSED_FILE_LIKES += (zstd.ZstdFile,) |
| 71 | + COMPRESSION_ERRORS += (zstd.ZstdError,) |
| 72 | + |
| 73 | + |
| 74 | +class DeterministicGzipFile(gzip.GzipFile): |
| 75 | + """Deterministic variant of GzipFile |
| 76 | +
|
| 77 | + This writer does not add filename information to the header, and defaults |
| 78 | + to a modification time (``mtime``) of 0 seconds. |
| 79 | + """ |
| 80 | + |
| 81 | + def __init__( |
| 82 | + self, |
| 83 | + filename: str | None = None, |
| 84 | + mode: Mode | None = None, |
| 85 | + compresslevel: int = 9, |
| 86 | + fileobj: io.FileIO | None = None, |
| 87 | + mtime: int = 0, |
| 88 | + ): |
| 89 | + if mode is None: |
| 90 | + mode = 'rb' |
| 91 | + modestr: str = mode |
| 92 | + |
| 93 | + # These two guards are adapted from |
| 94 | + # https://github.com/python/cpython/blob/6ab65c6/Lib/gzip.py#L171-L174 |
| 95 | + if 'b' not in modestr: |
| 96 | + modestr = f'{mode}b' |
| 97 | + if fileobj is None: |
| 98 | + if filename is None: |
| 99 | + raise TypeError('Must define either fileobj or filename') |
| 100 | + # Cast because GzipFile.myfileobj has type io.FileIO while open returns ty.IO |
| 101 | + fileobj = self.myfileobj = ty.cast('io.FileIO', open(filename, modestr)) |
| 102 | + super().__init__( |
| 103 | + filename='', |
| 104 | + mode=modestr, |
| 105 | + compresslevel=compresslevel, |
| 106 | + fileobj=fileobj, |
| 107 | + mtime=mtime, |
| 108 | + ) |
| 109 | + |
| 110 | + |
| 111 | +def gzip_open( |
| 112 | + filename: str, |
| 113 | + mode: Mode = 'rb', |
| 114 | + compresslevel: int = 9, |
| 115 | + mtime: int = 0, |
| 116 | + keep_open: bool = False, |
| 117 | +) -> gzip.GzipFile: |
| 118 | + """Open a gzip file for reading or writing. |
| 119 | +
|
| 120 | + If opening a file for reading, and ``indexed_gzip`` is available, |
| 121 | + an ``IndexedGzipFile`` is returned. |
| 122 | +
|
| 123 | + Otherwise (opening for writing, or ``indexed_gzip`` not available), |
| 124 | + a ``DeterministicGzipFile`` is returned. |
| 125 | +
|
| 126 | + Parameters: |
| 127 | + ----------- |
| 128 | +
|
| 129 | + filename : str |
| 130 | + Path of file to open. |
| 131 | + mode : str |
| 132 | + Opening mode - either ``rb`` or ``wb``. |
| 133 | + compresslevel: int |
| 134 | + Compression level when writing. |
| 135 | + mtime: int |
| 136 | + Modification time used when writing a file - passed to the |
| 137 | + ``DetemrinisticGzipFile``. Ignored when reading. |
| 138 | + keep_open: bool |
| 139 | + Whether to keep the file handle open between reads. Ignored when writing, |
| 140 | + or when ``indexed_gzip`` is not present. |
| 141 | + """ |
| 142 | + if not HAVE_INDEXED_GZIP or mode != 'rb': |
| 143 | + gzip_file = DeterministicGzipFile(filename, mode, compresslevel, mtime=mtime) |
| 144 | + |
| 145 | + # use indexed_gzip if possible for faster read access. If keep_open == |
| 146 | + # True, we tell IndexedGzipFile to keep the file handle open. Otherwise |
| 147 | + # the IndexedGzipFile will close/open the file on each read. |
| 148 | + else: |
| 149 | + gzip_file = IndexedGzipFile(filename, drop_handles=not keep_open) |
| 150 | + |
| 151 | + return gzip_file |
| 152 | + |
| 153 | + |
| 154 | +def zstd_open( |
| 155 | + filename: str, |
| 156 | + mode: Mode = 'r', |
| 157 | + *, |
| 158 | + level: int | None = None, |
| 159 | + options: dict | None = None, |
| 160 | + zstd_dict: zstd.ZstdDict | None = None, |
| 161 | + level_or_option: int | dict | None = None, |
| 162 | +) -> zstd.ZstdFile: |
| 163 | + """Open a zstd file for reading or writing. |
| 164 | +
|
| 165 | + The specific object returned will be a ``compression.zstd.ZstdFile`` or |
| 166 | + a ``backports.zstd.ZstdFile``. |
| 167 | +
|
| 168 | + Parameters |
| 169 | + ---------- |
| 170 | +
|
| 171 | + filename : str |
| 172 | + Path of file to open. |
| 173 | + mode : str |
| 174 | + Opening mode. |
| 175 | + zstd_dict : ZstdDict |
| 176 | + Dictionary used for compression/decompression. |
| 177 | + level : int |
| 178 | + Compression level when writing. |
| 179 | + options : dict |
| 180 | + Dictionary of compression/decompression options. |
| 181 | + """ |
| 182 | + if level_or_option is not None: |
| 183 | + alert_future_error( |
| 184 | + 'The level_or_option parameter will be removed in a future version of nibabel', |
| 185 | + '7.0', |
| 186 | + warning_rec='This warning can be silenced by using the separate level/option parameters', |
| 187 | + error_rec='Future errors can be avoided by using the separate level/option parameters', |
| 188 | + error_class=TypeError, |
| 189 | + ) |
| 190 | + if level is not None or options is not None: |
| 191 | + raise ValueError('Only one of level_or_option, level or options may be specified') |
| 192 | + if isinstance(level_or_option, int): |
| 193 | + level = level_or_option |
| 194 | + else: |
| 195 | + options = level_or_option |
| 196 | + return zstd.ZstdFile(filename, mode, level=level, options=options, zstd_dict=zstd_dict) |
0 commit comments