Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-128646: Implement GzipFile.readinto() functions #128647

Merged
merged 9 commits into from
Mar 8, 2025
28 changes: 19 additions & 9 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,31 +325,41 @@ def _write_raw(self, data):

return length

def read(self, size=-1):
self._check_not_closed()
def _check_read(self, caller):
if self.mode != READ:
import errno
raise OSError(errno.EBADF, "read() on write-only GzipFile object")
msg = f"{caller}() on write-only GzipFile object"
raise OSError(errno.EBADF, msg)

def read(self, size=-1):
self._check_not_closed()
self._check_read("read")
return self._buffer.read(size)

def read1(self, size=-1):
"""Implements BufferedIOBase.read1()

Reads up to a buffer's worth of data if size is negative."""
self._check_not_closed()
if self.mode != READ:
import errno
raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
self._check_read("read1")

if size < 0:
size = io.DEFAULT_BUFFER_SIZE
return self._buffer.read1(size)

def readinto(self, b):
self._check_not_closed()
self._check_read("readinto")
return self._buffer.readinto(b)

def readinto1(self, b):
self._check_not_closed()
self._check_read("readinto1")
return self._buffer.readinto1(b)

def peek(self, n):
self._check_not_closed()
if self.mode != READ:
import errno
raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
self._check_read("peek")
return self._buffer.peek(n)

@property
Expand Down
32 changes: 32 additions & 0 deletions Lib/test/test_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,38 @@ def test_read1(self):
self.assertEqual(f.tell(), nread)
self.assertEqual(b''.join(blocks), data1 * 50)

def test_readinto(self):
# 10MB of uncompressible data to ensure multiple reads
large_data = os.urandom(10 * 2**20)
with gzip.GzipFile(self.filename, 'wb') as f:
f.write(large_data)

buf = bytearray(len(large_data))
with gzip.GzipFile(self.filename, 'r') as f:
nbytes = f.readinto(buf)
self.assertEqual(nbytes, len(large_data))
self.assertEqual(buf, large_data)

def test_readinto1(self):
# 10MB of uncompressible data to ensure multiple reads
large_data = os.urandom(10 * 2**20)
with gzip.GzipFile(self.filename, 'wb') as f:
f.write(large_data)

nread = 0
buf = bytearray(len(large_data))
memview = memoryview(buf) # Simplifies slicing
with gzip.GzipFile(self.filename, 'r') as f:
for count in range(200):
nbytes = f.readinto1(memview[nread:])
if not nbytes:
break
nread += nbytes
self.assertEqual(f.tell(), nread)
self.assertEqual(buf, large_data)
# readinto1() should require multiple loops
self.assertGreater(count, 1)

@bigmemtest(size=_4G, memuse=1)
def test_read_large(self, size):
# Read chunk size over UINT_MAX should be supported, despite zlib's
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Eagerly write to buffers passed to :class:`gzip.GzipFile`'s
:meth:`~io.BufferedIOBase.readinto` and
:meth:`~io.BufferedIOBase.readinto1` implementations,
avoiding unnecessary allocations. Patch by Chris Markiewicz.
Loading