diff options
author | Chris Markiewicz <markiewicz@stanford.edu> | 2025-03-07 22:04:45 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-07 21:04:45 -0600 |
commit | 72e5b25efb580fb1f0fdfade516be90d90822164 (patch) | |
tree | 64dae27e381d598dec62d62d48c6e35d65bc6a94 | |
parent | 78790811989ab47319e2ee725e0c435b3cdd21ab (diff) | |
download | cpython-72e5b25efb580fb1f0fdfade516be90d90822164.tar.gz cpython-72e5b25efb580fb1f0fdfade516be90d90822164.zip |
gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647)
The new methods simply delegate to the underlying buffer, much like the existing GzipFile.read[1] methods. This avoids extra allocations caused by the BufferedIOBase.readinto implementation previously used.
This commit also factors out a common readability check rather than copying it an additional two times.
-rw-r--r-- | Lib/gzip.py | 28 | ||||
-rw-r--r-- | Lib/test/test_gzip.py | 32 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst | 4 |
3 files changed, 55 insertions, 9 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py index 7e384f8a568..d681ef6b488 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -325,11 +325,15 @@ class GzipFile(_compression.BaseStream): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -337,19 +341,25 @@ class GzipFile(_compression.BaseStream): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + self._check_read("readinto") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + self._check_read("readinto1") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 0940bb114df..260fae5ae1b 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -143,6 +143,38 @@ class TestGzip(BaseTest): self.assertEqual(f.tell(), nread) self.assertEqual(b''.join(blocks), data1 * 50) + def test_readinto(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + buf = bytearray(len(large_data)) + with gzip.GzipFile(self.filename, 'r') as f: + nbytes = f.readinto(buf) + self.assertEqual(nbytes, len(large_data)) + self.assertEqual(buf, large_data) + + def test_readinto1(self): + # 10MB of uncompressible data to ensure multiple reads + large_data = os.urandom(10 * 2**20) + with gzip.GzipFile(self.filename, 'wb') as f: + f.write(large_data) + + nread = 0 + buf = bytearray(len(large_data)) + memview = memoryview(buf) # Simplifies slicing + with gzip.GzipFile(self.filename, 'r') as f: + for count in range(200): + nbytes = f.readinto1(memview[nread:]) + if not nbytes: + break + nread += nbytes + self.assertEqual(f.tell(), nread) + self.assertEqual(buf, large_data) + # readinto1() should require multiple loops + self.assertGreater(count, 1) + @bigmemtest(size=_4G, memuse=1) def test_read_large(self, size): # Read chunk size over UINT_MAX should be supported, despite zlib's diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst new file mode 100644 index 00000000000..034a66b704d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst @@ -0,0 +1,4 @@ +Eagerly write to buffers passed to :class:`gzip.GzipFile`'s +:meth:`~io.BufferedIOBase.readinto` and +:meth:`~io.BufferedIOBase.readinto1` implementations, +avoiding unnecessary allocations. Patch by Chris Markiewicz. |