diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-04-11 01:26:53 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-11 01:26:53 +0100 |
commit | 0cc71bde001950d3634c235e2b0d24cda6ce7dce (patch) | |
tree | b7cdb33ccd2c1d0f793a8c3b60f590d9af8c0e0e /Lib/pathlib/_abc.py | |
parent | 6258844c27e3b5a43816e7c559089a5fe0a47123 (diff) | |
download | cpython-0cc71bde001950d3634c235e2b0d24cda6ce7dce.tar.gz cpython-0cc71bde001950d3634c235e2b0d24cda6ce7dce.zip |
GH-117586: Speed up `pathlib.Path.walk()` by working with strings (#117726)
Move `pathlib.Path.walk()` implementation into `glob._Globber`. The new
`glob._Globber.walk()` classmethod works with strings internally, which is
a little faster than generating `Path` objects and keeping them normalized.
The `pathlib.Path.walk()` method converts the strings back to path objects.
In the private pathlib ABCs, our existing subclass of `_Globber` ensures
that `PathBase` instances are used throughout.
Follow-up to #117589.
Diffstat (limited to 'Lib/pathlib/_abc.py')
-rw-r--r-- | Lib/pathlib/_abc.py | 65 |
1 files changed, 8 insertions, 57 deletions
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 553f797d75e..b6cab0d285a 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -45,10 +45,16 @@ def _is_case_sensitive(parser): class Globber(glob._Globber): lstat = operator.methodcaller('lstat') - scandir = operator.methodcaller('_scandir') add_slash = operator.methodcaller('joinpath', '') @staticmethod + def scandir(path): + # Emulate os.scandir(), which returns an object that can be used as a + # context manager. This method is called by walk() and glob(). + from contextlib import nullcontext + return nullcontext(path.iterdir()) + + @staticmethod def concat_path(path, text): """Appends text to the given path. """ @@ -677,20 +683,6 @@ class PathBase(PurePathBase): """ raise UnsupportedOperation(self._unsupported_msg('iterdir()')) - def _scandir(self): - # Emulate os.scandir(), which returns an object that can be used as a - # context manager. This method is called by walk() and glob(). - from contextlib import nullcontext - return nullcontext(self.iterdir()) - - def _make_child_direntry(self, entry): - # Transform an entry yielded from _scandir() into a path object. - # PathBase._scandir() yields PathBase objects, so this is a no-op. - return entry - - def _make_child_relpath(self, name): - return self.joinpath(name) - def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: case_sensitive = _is_case_sensitive(self.parser) @@ -724,48 +716,7 @@ class PathBase(PurePathBase): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" - paths = [self] - - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk() - # always suppressed the exception in that instance, rather than - # blow up for a minor reason when (say) a thousand readable - # directories are still left to visit. That logic is copied here. - try: - scandir_obj = path._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - continue - - with scandir_obj as scandir_it: - dirnames = [] - filenames = [] - if not top_down: - paths.append((path, dirnames, filenames)) - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - if not top_down: - paths.append(path._make_child_direntry(entry)) - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield path, dirnames, filenames - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + return self._globber.walk(self, top_down, on_error, follow_symlinks) def absolute(self): """Return an absolute version of this path |