aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorChris Markiewicz <markiewicz@stanford.edu>2025-03-07 22:04:45 -0500
committerGitHub <noreply@github.com>2025-03-07 21:04:45 -0600
commit72e5b25efb580fb1f0fdfade516be90d90822164 (patch)
tree64dae27e381d598dec62d62d48c6e35d65bc6a94
parent78790811989ab47319e2ee725e0c435b3cdd21ab (diff)
downloadcpython-72e5b25efb580fb1f0fdfade516be90d90822164.tar.gz
cpython-72e5b25efb580fb1f0fdfade516be90d90822164.zip
gh-128646: Implement GzipFile.readinto[1]() methods (GH-128647)
The new methods simply delegate to the underlying buffer, much like the existing GzipFile.read[1] methods. This avoids extra allocations caused by the BufferedIOBase.readinto implementation previously used. This commit also factors out a common readability check rather than copying it an additional two times.
-rw-r--r--Lib/gzip.py28
-rw-r--r--Lib/test/test_gzip.py32
-rw-r--r--Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst4
3 files changed, 55 insertions, 9 deletions
diff --git a/Lib/gzip.py b/Lib/gzip.py
index 7e384f8a568..d681ef6b488 100644
--- a/Lib/gzip.py
+++ b/Lib/gzip.py
@@ -325,11 +325,15 @@ class GzipFile(_compression.BaseStream):
return length
- def read(self, size=-1):
- self._check_not_closed()
+ def _check_read(self, caller):
if self.mode != READ:
import errno
- raise OSError(errno.EBADF, "read() on write-only GzipFile object")
+ msg = f"{caller}() on write-only GzipFile object"
+ raise OSError(errno.EBADF, msg)
+
+ def read(self, size=-1):
+ self._check_not_closed()
+ self._check_read("read")
return self._buffer.read(size)
def read1(self, size=-1):
@@ -337,19 +341,25 @@ class GzipFile(_compression.BaseStream):
Reads up to a buffer's worth of data if size is negative."""
self._check_not_closed()
- if self.mode != READ:
- import errno
- raise OSError(errno.EBADF, "read1() on write-only GzipFile object")
+ self._check_read("read1")
if size < 0:
size = io.DEFAULT_BUFFER_SIZE
return self._buffer.read1(size)
+ def readinto(self, b):
+ self._check_not_closed()
+ self._check_read("readinto")
+ return self._buffer.readinto(b)
+
+ def readinto1(self, b):
+ self._check_not_closed()
+ self._check_read("readinto1")
+ return self._buffer.readinto1(b)
+
def peek(self, n):
self._check_not_closed()
- if self.mode != READ:
- import errno
- raise OSError(errno.EBADF, "peek() on write-only GzipFile object")
+ self._check_read("peek")
return self._buffer.peek(n)
@property
diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py
index 0940bb114df..260fae5ae1b 100644
--- a/Lib/test/test_gzip.py
+++ b/Lib/test/test_gzip.py
@@ -143,6 +143,38 @@ class TestGzip(BaseTest):
self.assertEqual(f.tell(), nread)
self.assertEqual(b''.join(blocks), data1 * 50)
+ def test_readinto(self):
+ # 10MB of uncompressible data to ensure multiple reads
+ large_data = os.urandom(10 * 2**20)
+ with gzip.GzipFile(self.filename, 'wb') as f:
+ f.write(large_data)
+
+ buf = bytearray(len(large_data))
+ with gzip.GzipFile(self.filename, 'r') as f:
+ nbytes = f.readinto(buf)
+ self.assertEqual(nbytes, len(large_data))
+ self.assertEqual(buf, large_data)
+
+ def test_readinto1(self):
+ # 10MB of uncompressible data to ensure multiple reads
+ large_data = os.urandom(10 * 2**20)
+ with gzip.GzipFile(self.filename, 'wb') as f:
+ f.write(large_data)
+
+ nread = 0
+ buf = bytearray(len(large_data))
+ memview = memoryview(buf) # Simplifies slicing
+ with gzip.GzipFile(self.filename, 'r') as f:
+ for count in range(200):
+ nbytes = f.readinto1(memview[nread:])
+ if not nbytes:
+ break
+ nread += nbytes
+ self.assertEqual(f.tell(), nread)
+ self.assertEqual(buf, large_data)
+ # readinto1() should require multiple loops
+ self.assertGreater(count, 1)
+
@bigmemtest(size=_4G, memuse=1)
def test_read_large(self, size):
# Read chunk size over UINT_MAX should be supported, despite zlib's
diff --git a/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst
new file mode 100644
index 00000000000..034a66b704d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-01-08-15-14-17.gh-issue-128647.GabglU.rst
@@ -0,0 +1,4 @@
+Eagerly write to buffers passed to :class:`gzip.GzipFile`'s
+:meth:`~io.BufferedIOBase.readinto` and
+:meth:`~io.BufferedIOBase.readinto1` implementations,
+avoiding unnecessary allocations. Patch by Chris Markiewicz.