diff options
Diffstat (limited to 'Lib/zipfile')
-rw-r--r-- | Lib/zipfile/__init__.py | 64 | ||||
-rw-r--r-- | Lib/zipfile/_path/__init__.py | 38 | ||||
-rw-r--r-- | Lib/zipfile/_path/_functools.py | 20 | ||||
-rw-r--r-- | Lib/zipfile/_path/glob.py | 1 |
4 files changed, 86 insertions, 37 deletions
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 88356abe8cb..18caeb3e04a 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -38,8 +38,8 @@ except ImportError: __all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", - "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", - "Path"] + "ZIP_ZSTANDARD", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", + "LargeZipFile", "Path"] class BadZipFile(Exception): pass @@ -234,8 +234,19 @@ class _Extra(bytes): def _check_zipfile(fp): try: - if _EndRecData(fp): - return True # file has correct magic number + endrec = _EndRecData(fp) + if endrec: + if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0: + return True # Empty zipfiles are still zipfiles + elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]: + # Central directory is on the same disk + fp.seek(sum(_handle_prepended_data(endrec))) + if endrec[_ECD_SIZE] >= sizeCentralDir: + data = fp.read(sizeCentralDir) # CD is where we expect it to be + if len(data) == sizeCentralDir: + centdir = struct.unpack(structCentralDir, data) # CD is the right size + if centdir[_CD_SIGNATURE] == stringCentralDir: + return True # First central directory entry has correct magic number except OSError: pass return False @@ -258,6 +269,22 @@ def is_zipfile(filename): pass return result +def _handle_prepended_data(endrec, debug=0): + size_cd = endrec[_ECD_SIZE] # bytes in central directory + offset_cd = endrec[_ECD_OFFSET] # offset of central directory + + # "concat" is zero, unless zip was concatenated to another file + concat = endrec[_ECD_LOCATION] - size_cd - offset_cd + if endrec[_ECD_SIGNATURE] == stringEndArchive64: + # If Zip64 extension structures are present, account for them + concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + + if debug > 2: + inferred = concat + offset_cd + print("given, inferred, offset", offset_cd, inferred, concat) + + return offset_cd, concat + def _EndRecData64(fpin, offset, endrec): """ Read the ZIP64 end-of-archive records and use that to update endrec @@ -812,11 +839,11 @@ def _get_compressor(compress_type, compresslevel=None): if compresslevel is not None: return bz2.BZ2Compressor(compresslevel) return bz2.BZ2Compressor() - # compresslevel is ignored for ZIP_LZMA and ZIP_ZSTANDARD + # compresslevel is ignored for ZIP_LZMA elif compress_type == ZIP_LZMA: return LZMACompressor() elif compress_type == ZIP_ZSTANDARD: - return zstd.ZstdCompressor() + return zstd.ZstdCompressor(level=compresslevel) else: return None @@ -1352,7 +1379,8 @@ class ZipFile: mode: The mode can be either read 'r', write 'w', exclusive create 'x', or append 'a'. compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), - ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). + ZIP_BZIP2 (requires bz2), ZIP_LZMA (requires lzma), or + ZIP_ZSTANDARD (requires compression.zstd). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -1361,6 +1389,9 @@ class ZipFile: When using ZIP_STORED or ZIP_LZMA this keyword has no effect. When using ZIP_DEFLATED integers 0 through 9 are accepted. When using ZIP_BZIP2 integers 1 through 9 are accepted. + When using ZIP_ZSTANDARD integers -7 though 22 are common, + see the CompressionParameter enum in compression.zstd for + details. """ @@ -1497,28 +1528,21 @@ class ZipFile: raise BadZipFile("File is not a zip file") if self.debug > 1: print(endrec) - size_cd = endrec[_ECD_SIZE] # bytes in central directory - offset_cd = endrec[_ECD_OFFSET] # offset of central directory self._comment = endrec[_ECD_COMMENT] # archive comment - # "concat" is zero, unless zip was concatenated to another file - concat = endrec[_ECD_LOCATION] - size_cd - offset_cd - if endrec[_ECD_SIGNATURE] == stringEndArchive64: - # If Zip64 extension structures are present, account for them - concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) + offset_cd, concat = _handle_prepended_data(endrec, self.debug) + + # self.start_dir: Position of start of central directory + self.start_dir = offset_cd + concat # store the offset to the beginning of data for the # .data_offset property self._data_offset = concat - if self.debug > 2: - inferred = concat + offset_cd - print("given, inferred, offset", offset_cd, inferred, concat) - # self.start_dir: Position of start of central directory - self.start_dir = offset_cd + concat if self.start_dir < 0: raise BadZipFile("Bad offset for central directory") fp.seek(self.start_dir, 0) + size_cd = endrec[_ECD_SIZE] data = fp.read(size_cd) fp = io.BytesIO(data) total = 0 @@ -2093,6 +2117,8 @@ class ZipFile: min_version = max(BZIP2_VERSION, min_version) elif zinfo.compress_type == ZIP_LZMA: min_version = max(LZMA_VERSION, min_version) + elif zinfo.compress_type == ZIP_ZSTANDARD: + min_version = max(ZSTANDARD_VERSION, min_version) extract_version = max(min_version, zinfo.extract_version) create_version = max(min_version, zinfo.create_version) diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 5ae16ec970d..faae4c84cae 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -7,19 +7,19 @@ https://github.com/python/importlib_metadata/wiki/Development-Methodology for more detail. """ +import functools import io -import posixpath -import zipfile import itertools -import contextlib import pathlib +import posixpath import re import stat import sys +import zipfile +from ._functools import save_method_args from .glob import Translator - __all__ = ['Path'] @@ -86,13 +86,12 @@ class InitializedState: Mix-in to save the initialization state for pickling. """ + @save_method_args def __init__(self, *args, **kwargs): - self.__args = args - self.__kwargs = kwargs super().__init__(*args, **kwargs) def __getstate__(self): - return self.__args, self.__kwargs + return self._saved___init__.args, self._saved___init__.kwargs def __setstate__(self, state): args, kwargs = state @@ -181,22 +180,27 @@ class FastLookup(CompleteDirs): """ def namelist(self): - with contextlib.suppress(AttributeError): - return self.__names - self.__names = super().namelist() - return self.__names + return self._namelist + + @functools.cached_property + def _namelist(self): + return super().namelist() def _name_set(self): - with contextlib.suppress(AttributeError): - return self.__lookup - self.__lookup = super()._name_set() - return self.__lookup + return self._name_set_prop + + @functools.cached_property + def _name_set_prop(self): + return super()._name_set() def _extract_text_encoding(encoding=None, *args, **kwargs): # compute stack level so that the caller of the caller sees any warning. is_pypy = sys.implementation.name == 'pypy' - stack_level = 3 + is_pypy + # PyPy no longer special cased after 7.3.19 (or maybe 7.3.18) + # See jaraco/zipp#143 + is_old_pypi = is_pypy and sys.pypy_version_info < (7, 3, 19) + stack_level = 3 + is_old_pypi return io.text_encoding(encoding, stack_level), args, kwargs @@ -351,7 +355,7 @@ class Path: return io.TextIOWrapper(stream, encoding, *args, **kwargs) def _base(self): - return pathlib.PurePosixPath(self.at or self.root.filename) + return pathlib.PurePosixPath(self.at) if self.at else self.filename @property def name(self): diff --git a/Lib/zipfile/_path/_functools.py b/Lib/zipfile/_path/_functools.py new file mode 100644 index 00000000000..7390be21873 --- /dev/null +++ b/Lib/zipfile/_path/_functools.py @@ -0,0 +1,20 @@ +import collections +import functools + + +# from jaraco.functools 4.0.2 +def save_method_args(method): + """ + Wrap a method such that when it is called, the args and kwargs are + saved on the method. + """ + args_and_kwargs = collections.namedtuple('args_and_kwargs', 'args kwargs') # noqa: PYI024 + + @functools.wraps(method) + def wrapper(self, /, *args, **kwargs): + attr_name = '_saved_' + method.__name__ + attr = args_and_kwargs(args, kwargs) + setattr(self, attr_name, attr) + return method(self, *args, **kwargs) + + return wrapper diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py index d7fe45a4947..bd2839304b7 100644 --- a/Lib/zipfile/_path/glob.py +++ b/Lib/zipfile/_path/glob.py @@ -1,7 +1,6 @@ import os import re - _default_seps = os.sep + str(os.altsep) * bool(os.altsep) |