Skip to content

Commit 72e5b25

Browse files
authored
gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647)
The new methods simply delegate to the underlying buffer, much like the existing GzipFile.read[1] methods. This avoids extra allocations caused by the BufferedIOBase.readinto implementation previously used. This commit also factors out a common readability check rather than copying it an additional two times.
1 parent 7879081 commit 72e5b25

File tree

3 files changed

+55
-9
lines changed

3 files changed

+55
-9
lines changed

Lib/gzip.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -325,31 +325,41 @@ def _write_raw(self, data):
325325

326326
return length
327327

328-
def read(self, size=-1):
329-
self._check_not_closed()
328+
def _check_read(self, caller):
330329
if self.mode != READ:
331330
import errno
332-
raise OSError(errno.EBADF, "read() on write-only GzipFile object")
331+
msg = f"{caller}() on write-only GzipFile object"
332+
raise OSError(errno.EBADF, msg)
333+
334+
def read(self, size=-1):
335+
self._check_not_closed()
336+
self._check_read("read")
333337
return self._buffer.read(size)
334338

335339
def read1(self, size=-1):
336340
"""Implements BufferedIOBase.read1()
337341
338342
Reads up to a buffer's worth of data if size is negative."""
339343
self._check_not_closed()
340-
if self.mode != READ:
341-
import errno
342-
raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
344+
self._check_read("read1")
343345

344346
if size < 0:
345347
size = io.DEFAULT_BUFFER_SIZE
346348
return self._buffer.read1(size)
347349

350+
def readinto(self, b):
351+
self._check_not_closed()
352+
self._check_read("readinto")
353+
return self._buffer.readinto(b)
354+
355+
def readinto1(self, b):
356+
self._check_not_closed()
357+
self._check_read("readinto1")
358+
return self._buffer.readinto1(b)
359+
348360
def peek(self, n):
349361
self._check_not_closed()
350-
if self.mode != READ:
351-
import errno
352-
raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
362+
self._check_read("peek")
353363
return self._buffer.peek(n)
354364

355365
@property

Lib/test/test_gzip.py

+32
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,38 @@ def test_read1(self):
143143
self.assertEqual(f.tell(), nread)
144144
self.assertEqual(b''.join(blocks), data1 * 50)
145145

146+
def test_readinto(self):
147+
# 10MB of uncompressible data to ensure multiple reads
148+
large_data = os.urandom(10 * 2**20)
149+
with gzip.GzipFile(self.filename, 'wb') as f:
150+
f.write(large_data)
151+
152+
buf = bytearray(len(large_data))
153+
with gzip.GzipFile(self.filename, 'r') as f:
154+
nbytes = f.readinto(buf)
155+
self.assertEqual(nbytes, len(large_data))
156+
self.assertEqual(buf, large_data)
157+
158+
def test_readinto1(self):
159+
# 10MB of uncompressible data to ensure multiple reads
160+
large_data = os.urandom(10 * 2**20)
161+
with gzip.GzipFile(self.filename, 'wb') as f:
162+
f.write(large_data)
163+
164+
nread = 0
165+
buf = bytearray(len(large_data))
166+
memview = memoryview(buf) # Simplifies slicing
167+
with gzip.GzipFile(self.filename, 'r') as f:
168+
for count in range(200):
169+
nbytes = f.readinto1(memview[nread:])
170+
if not nbytes:
171+
break
172+
nread += nbytes
173+
self.assertEqual(f.tell(), nread)
174+
self.assertEqual(buf, large_data)
175+
# readinto1() should require multiple loops
176+
self.assertGreater(count, 1)
177+
146178
@bigmemtest(size=_4G, memuse=1)
147179
def test_read_large(self, size):
148180
# Read chunk size over UINT_MAX should be supported, despite zlib's
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Eagerly write to buffers passed to :class:`gzip.GzipFile`'s
2+
:meth:`~io.BufferedIOBase.readinto` and
3+
:meth:`~io.BufferedIOBase.readinto1` implementations,
4+
avoiding unnecessary allocations. Patch by Chris Markiewicz.

0 commit comments

Comments
 (0)