aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Lib/test/test_tarfile.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_tarfile.py')
-rw-r--r--Lib/test/test_tarfile.py310
1 files changed, 292 insertions, 18 deletions
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index cf218a2bf14..7055e1ed147 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -2715,6 +2715,31 @@ class MiscTest(unittest.TestCase):
str(excinfo.exception),
)
+ @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support')
+ @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod")
+ @unittest.mock.patch('os.chmod')
+ def test_deferred_directory_attributes_update(self, mock_chmod):
+ # Regression test for gh-127987: setting attributes on arbitrary files
+ tempdir = os.path.join(TEMPDIR, 'test127987')
+ def mock_chmod_side_effect(path, mode, **kwargs):
+ target_path = os.path.realpath(path)
+ if os.path.commonpath([target_path, tempdir]) != tempdir:
+ raise Exception("should not try to chmod anything outside the destination", target_path)
+ mock_chmod.side_effect = mock_chmod_side_effect
+
+ outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir')
+ with ArchiveMaker() as arc:
+ arc.add('x', symlink_to='.')
+ arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt')
+ arc.add('x', symlink_to=outside_tree_dir)
+
+ os.makedirs(outside_tree_dir)
+ try:
+ arc.open().extractall(path=tempdir, filter='tar')
+ finally:
+ os_helper.rmtree(outside_tree_dir)
+ os_helper.rmtree(tempdir)
+
class CommandLineTest(unittest.TestCase):
@@ -3275,6 +3300,10 @@ class NoneInfoExtractTests(ReadTest):
got_paths = set(
p.relative_to(directory)
for p in pathlib.Path(directory).glob('**/*'))
+ if self.extraction_filter in (None, 'data'):
+ # The 'data' filter is expected to reject special files
+ for path in 'ustar/fifotype', 'ustar/blktype', 'ustar/chrtype':
+ got_paths.discard(pathlib.Path(path))
self.assertEqual(self.control_paths, got_paths)
@contextmanager
@@ -3504,12 +3533,28 @@ class ArchiveMaker:
self.bio = None
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
- mode=None, size=None, **kwargs):
- """Add a member to the test archive. Call within `with`."""
+ mode=None, size=None, content=None, **kwargs):
+ """Add a member to the test archive. Call within `with`.
+
+ Provides many shortcuts:
+ - default `type` is based on symlink_to, hardlink_to, and trailing `/`
+ in name (which is stripped)
+ - size & content defaults are based on each other
+ - content can be str or bytes
+ - mode should be textual ('-rwxrwxrwx')
+
+ (add more! this is unstable internal test-only API)
+ """
name = str(name)
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
+ if content is not None:
+ if isinstance(content, str):
+ content = content.encode()
+ size = len(content)
if size is not None:
tarinfo.size = size
+ if content is None:
+ content = bytes(tarinfo.size)
if mode:
tarinfo.mode = _filemode_to_int(mode)
if symlink_to is not None:
@@ -3523,7 +3568,7 @@ class ArchiveMaker:
if type is not None:
tarinfo.type = type
if tarinfo.isreg():
- fileobj = io.BytesIO(bytes(tarinfo.size))
+ fileobj = io.BytesIO(content)
else:
fileobj = None
self.tar_w.addfile(tarinfo, fileobj)
@@ -3557,7 +3602,7 @@ class TestExtractionFilters(unittest.TestCase):
destdir = outerdir / 'dest'
@contextmanager
- def check_context(self, tar, filter):
+ def check_context(self, tar, filter, *, check_flag=True):
"""Extracts `tar` to `self.destdir` and allows checking the result
If an error occurs, it must be checked using `expect_exception`
@@ -3566,27 +3611,40 @@ class TestExtractionFilters(unittest.TestCase):
except the destination directory itself and parent directories of
other files.
When checking directories, do so before their contents.
+
+ A file called 'flag' is made in outerdir (i.e. outside destdir)
+ before extraction; it should not be altered nor should its contents
+ be read/copied.
"""
with os_helper.temp_dir(self.outerdir):
+ flag_path = self.outerdir / 'flag'
+ flag_path.write_text('capture me')
try:
tar.extractall(self.destdir, filter=filter)
except Exception as exc:
self.raised_exception = exc
+ self.reraise_exception = True
self.expected_paths = set()
else:
self.raised_exception = None
+ self.reraise_exception = False
self.expected_paths = set(self.outerdir.glob('**/*'))
self.expected_paths.discard(self.destdir)
+ self.expected_paths.discard(flag_path)
try:
- yield
+ yield self
finally:
tar.close()
- if self.raised_exception:
+ if self.reraise_exception:
raise self.raised_exception
self.assertEqual(self.expected_paths, set())
+ if check_flag:
+ self.assertEqual(flag_path.read_text(), 'capture me')
+ else:
+ assert filter == 'fully_trusted'
def expect_file(self, name, type=None, symlink_to=None, mode=None,
- size=None):
+ size=None, content=None):
"""Check a single file. See check_context."""
if self.raised_exception:
raise self.raised_exception
@@ -3605,26 +3663,45 @@ class TestExtractionFilters(unittest.TestCase):
# The symlink might be the same (textually) as what we expect,
# but some systems change the link to an equivalent path, so
# we fall back to samefile().
- if expected != got:
- self.assertTrue(got.samefile(expected))
+ try:
+ if expected != got:
+ self.assertTrue(got.samefile(expected))
+ except Exception as e:
+ # attach a note, so it's shown even if `samefile` fails
+ e.add_note(f'{expected=}, {got=}')
+ raise
elif type == tarfile.REGTYPE or type is None:
self.assertTrue(path.is_file())
elif type == tarfile.DIRTYPE:
self.assertTrue(path.is_dir())
elif type == tarfile.FIFOTYPE:
self.assertTrue(path.is_fifo())
+ elif type == tarfile.SYMTYPE:
+ self.assertTrue(path.is_symlink())
else:
raise NotImplementedError(type)
if size is not None:
self.assertEqual(path.stat().st_size, size)
+ if content is not None:
+ self.assertEqual(path.read_text(), content)
for parent in path.parents:
self.expected_paths.discard(parent)
+ def expect_any_tree(self, name):
+ """Check a directory; forget about its contents."""
+ tree_path = (self.destdir / name).resolve()
+ self.expect_file(tree_path, type=tarfile.DIRTYPE)
+ self.expected_paths = {
+ p for p in self.expected_paths
+ if tree_path not in p.parents
+ }
+
def expect_exception(self, exc_type, message_re='.'):
with self.assertRaisesRegex(exc_type, message_re):
if self.raised_exception is not None:
raise self.raised_exception
- self.raised_exception = None
+ self.reraise_exception = False
+ return self.raised_exception
def test_benign_file(self):
with ArchiveMaker() as arc:
@@ -3710,6 +3787,80 @@ class TestExtractionFilters(unittest.TestCase):
self.expect_file('parent/evil')
@symlink_test
+ @os_helper.skip_unless_symlink
+ def test_realpath_limit_attack(self):
+ # (CVE-2025-4517)
+
+ with ArchiveMaker() as arc:
+ # populate the symlinks and dirs that expand in os.path.realpath()
+ # The component length is chosen so that in common cases, the unexpanded
+ # path fits in PATH_MAX, but it overflows when the final symlink
+ # is expanded
+ steps = "abcdefghijklmnop"
+ if sys.platform == 'win32':
+ component = 'd' * 25
+ elif 'PC_PATH_MAX' in os.pathconf_names:
+ max_path_len = os.pathconf(self.outerdir.parent, "PC_PATH_MAX")
+ path_sep_len = 1
+ dest_len = len(str(self.destdir)) + path_sep_len
+ component_len = (max_path_len - dest_len) // (len(steps) + path_sep_len)
+ component = 'd' * component_len
+ else:
+ raise NotImplementedError("Need to guess component length for {sys.platform}")
+ path = ""
+ step_path = ""
+ for i in steps:
+ arc.add(os.path.join(path, component), type=tarfile.DIRTYPE,
+ mode='drwxrwxrwx')
+ arc.add(os.path.join(path, i), symlink_to=component)
+ path = os.path.join(path, component)
+ step_path = os.path.join(step_path, i)
+ # create the final symlink that exceeds PATH_MAX and simply points
+ # to the top dir.
+ # this link will never be expanded by
+ # os.path.realpath(strict=False), nor anything after it.
+ linkpath = os.path.join(*steps, "l"*254)
+ parent_segments = [".."] * len(steps)
+ arc.add(linkpath, symlink_to=os.path.join(*parent_segments))
+ # make a symlink outside to keep the tar command happy
+ arc.add("escape", symlink_to=os.path.join(linkpath, ".."))
+ # use the symlinks above, that are not checked, to create a hardlink
+ # to a file outside of the destination path
+ arc.add("flaglink", hardlink_to=os.path.join("escape", "flag"))
+ # now that we have the hardlink we can overwrite the file
+ arc.add("flaglink", content='overwrite')
+ # we can also create new files as well!
+ arc.add("escape/newfile", content='new')
+
+ with (self.subTest('fully_trusted'),
+ self.check_context(arc.open(), filter='fully_trusted',
+ check_flag=False)):
+ if sys.platform == 'win32':
+ self.expect_exception((FileNotFoundError, FileExistsError))
+ elif self.raised_exception:
+ # Cannot symlink/hardlink: tarfile falls back to getmember()
+ self.expect_exception(KeyError)
+ # Otherwise, this block should never enter.
+ else:
+ self.expect_any_tree(component)
+ self.expect_file('flaglink', content='overwrite')
+ self.expect_file('../newfile', content='new')
+ self.expect_file('escape', type=tarfile.SYMTYPE)
+ self.expect_file('a', symlink_to=component)
+
+ for filter in 'tar', 'data':
+ with self.subTest(filter), self.check_context(arc.open(), filter=filter):
+ exc = self.expect_exception((OSError, KeyError))
+ if isinstance(exc, OSError):
+ if sys.platform == 'win32':
+ # 3: ERROR_PATH_NOT_FOUND
+ # 5: ERROR_ACCESS_DENIED
+ # 206: ERROR_FILENAME_EXCED_RANGE
+ self.assertIn(exc.winerror, (3, 5, 206))
+ else:
+ self.assertEqual(exc.errno, errno.ENAMETOOLONG)
+
+ @symlink_test
def test_parent_symlink2(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
@@ -3931,8 +4082,8 @@ class TestExtractionFilters(unittest.TestCase):
arc.add('symlink2', symlink_to=os.path.join(
'linkdir', 'hardlink2'))
arc.add('targetdir/target', size=3)
- arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
- arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
+ arc.add('linkdir/hardlink', hardlink_to=os.path.join('targetdir', 'target'))
+ arc.add('linkdir/hardlink2', hardlink_to=os.path.join('linkdir', 'symlink'))
for filter in 'tar', 'data', 'fully_trusted':
with self.check_context(arc.open(), filter):
@@ -3948,6 +4099,129 @@ class TestExtractionFilters(unittest.TestCase):
self.expect_file('linkdir/symlink', size=3)
self.expect_file('symlink2', size=3)
+ @symlink_test
+ def test_sneaky_hardlink_fallback(self):
+ # (CVE-2025-4330)
+ # Test that when hardlink extraction falls back to extracting members
+ # from the archive, the extracted member is (re-)filtered.
+ with ArchiveMaker() as arc:
+ # Create a directory structure so the c/escape symlink stays
+ # inside the path
+ arc.add("a/t/dummy")
+ # Create b/ directory
+ arc.add("b/")
+ # Point "c" to the bottom of the tree in "a"
+ arc.add("c", symlink_to=os.path.join("a", "t"))
+ # link to non-existant location under "a"
+ arc.add("c/escape", symlink_to=os.path.join("..", "..",
+ "link_here"))
+ # Move "c" to point to "b" ("c/escape" no longer exists)
+ arc.add("c", symlink_to="b")
+ # Attempt to create a hard link to "c/escape". Since it doesn't
+ # exist it will attempt to extract "cescape" but at "boom".
+ arc.add("boom", hardlink_to=os.path.join("c", "escape"))
+
+ with self.check_context(arc.open(), 'data'):
+ if not os_helper.can_symlink():
+ # When 'c/escape' is extracted, 'c' is a regular
+ # directory, and 'c/escape' *would* point outside
+ # the destination if symlinks were allowed.
+ self.expect_exception(
+ tarfile.LinkOutsideDestinationError)
+ elif sys.platform == "win32":
+ # On Windows, 'c/escape' points outside the destination
+ self.expect_exception(tarfile.LinkOutsideDestinationError)
+ else:
+ e = self.expect_exception(
+ tarfile.LinkFallbackError,
+ "link 'boom' would be extracted as a copy of "
+ + "'c/escape', which was rejected")
+ self.assertIsInstance(e.__cause__,
+ tarfile.LinkOutsideDestinationError)
+ for filter in 'tar', 'fully_trusted':
+ with self.subTest(filter), self.check_context(arc.open(), filter):
+ if not os_helper.can_symlink():
+ self.expect_file("a/t/dummy")
+ self.expect_file("b/")
+ self.expect_file("c/")
+ else:
+ self.expect_file("a/t/dummy")
+ self.expect_file("b/")
+ self.expect_file("a/t/escape", symlink_to='../../link_here')
+ self.expect_file("boom", symlink_to='../../link_here')
+ self.expect_file("c", symlink_to='b')
+
+ @symlink_test
+ def test_exfiltration_via_symlink(self):
+ # (CVE-2025-4138)
+ # Test changing symlinks that result in a symlink pointing outside
+ # the extraction directory, unless prevented by 'data' filter's
+ # normalization.
+ with ArchiveMaker() as arc:
+ arc.add("escape", symlink_to=os.path.join('link', 'link', '..', '..', 'link-here'))
+ arc.add("link", symlink_to='./')
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter):
+ if os_helper.can_symlink():
+ self.expect_file("link", symlink_to='./')
+ if filter == 'data':
+ self.expect_file("escape", symlink_to='link-here')
+ else:
+ self.expect_file("escape",
+ symlink_to='link/link/../../link-here')
+ else:
+ # Nothing is extracted.
+ pass
+
+ @symlink_test
+ def test_chmod_outside_dir(self):
+ # (CVE-2024-12718)
+ # Test that members used for delayed updates of directory metadata
+ # are (re-)filtered.
+ with ArchiveMaker() as arc:
+ # "pwn" is a veeeery innocent symlink:
+ arc.add("a/pwn", symlink_to='.')
+ # But now "pwn" is also a directory, so it's scheduled to have its
+ # metadata updated later:
+ arc.add("a/pwn/", mode='drwxrwxrwx')
+ # Oops, "pwn" is not so innocent any more:
+ arc.add("a/pwn", symlink_to='x/../')
+ # Newly created symlink points to the dest dir,
+ # so it's OK for the "data" filter.
+ arc.add('a/x', symlink_to=('../'))
+ # But now "pwn" points outside the dest dir
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter) as cc:
+ if not os_helper.can_symlink():
+ self.expect_file("a/pwn/")
+ elif filter == 'data':
+ self.expect_file("a/x", symlink_to='../')
+ self.expect_file("a/pwn", symlink_to='.')
+ else:
+ self.expect_file("a/x", symlink_to='../')
+ self.expect_file("a/pwn", symlink_to='x/../')
+ if sys.platform != "win32":
+ st_mode = cc.outerdir.stat().st_mode
+ self.assertNotEqual(st_mode & 0o777, 0o777)
+
+ def test_link_fallback_normalizes(self):
+ # Make sure hardlink fallbacks work for non-normalized paths for all
+ # filters
+ with ArchiveMaker() as arc:
+ arc.add("dir/")
+ arc.add("dir/../afile")
+ arc.add("link1", hardlink_to='dir/../afile')
+ arc.add("link2", hardlink_to='dir/../dir/../afile')
+
+ for filter in 'tar', 'data', 'fully_trusted':
+ with self.check_context(arc.open(), filter) as cc:
+ self.expect_file("dir/")
+ self.expect_file("afile")
+ self.expect_file("link1")
+ self.expect_file("link2")
+
def test_modes(self):
# Test how file modes are extracted
# (Note that the modes are ignored on platforms without working chmod)
@@ -4072,7 +4346,7 @@ class TestExtractionFilters(unittest.TestCase):
# The 'tar' filter returns TarInfo objects with the same name/type.
# (It can also fail for particularly "evil" input, but we don't have
# that in the test archive.)
- with tarfile.TarFile.open(tarname) as tar:
+ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar:
for tarinfo in tar.getmembers():
try:
filtered = tarfile.tar_filter(tarinfo, '')
@@ -4084,7 +4358,7 @@ class TestExtractionFilters(unittest.TestCase):
def test_data_filter(self):
# The 'data' filter either raises, or returns TarInfo with the same
# name/type.
- with tarfile.TarFile.open(tarname) as tar:
+ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar:
for tarinfo in tar.getmembers():
try:
filtered = tarfile.data_filter(tarinfo, '')
@@ -4242,13 +4516,13 @@ class TestExtractionFilters(unittest.TestCase):
# If errorlevel is 0, errors affected by errorlevel are ignored
with self.check_context(arc.open(errorlevel=0), extracterror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), filtererror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), oserror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=0), tarerror_filter):
self.expect_exception(tarfile.TarError)
@@ -4259,7 +4533,7 @@ class TestExtractionFilters(unittest.TestCase):
# If 1, all fatal errors are raised
with self.check_context(arc.open(errorlevel=1), extracterror_filter):
- self.expect_file('file')
+ pass
with self.check_context(arc.open(errorlevel=1), filtererror_filter):
self.expect_exception(tarfile.FilterError)