diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-01-09 23:52:15 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-09 23:52:15 +0000 |
commit | beb80d11ec0ddaf00a97f8a38ec9eae68e07c28e (patch) | |
tree | 12fb69939ee1fc81d152f240c86bde0542b85545 /Lib/pathlib/_abc.py | |
parent | 57bdc6c30d2665c2760ff5a88487e57c8b3c397a (diff) | |
download | cpython-beb80d11ec0ddaf00a97f8a38ec9eae68e07c28e.tar.gz cpython-beb80d11ec0ddaf00a97f8a38ec9eae68e07c28e.zip |
GH-113528: Deoptimise `pathlib._abc.PurePathBase` (#113559)
Apply pathlib's normalization and performance tuning in `pathlib.PurePath`, but not `pathlib._abc.PurePathBase`.
With this change, the pathlib ABCs do not normalize away alternate path separators, empty segments, or dot segments. A single string given to the initialiser will round-trip by default, i.e. `str(PurePathBase(my_string)) == my_string`. Implementors can set their own path domain-specific normalization scheme by overriding `__str__()`
Eliminating path normalization makes maintaining and caching the path's parts and string representation both optional and not very useful, so this commit moves the `_drv`, `_root`, `_tail_cached` and `_str` slots from `PurePathBase` to `PurePath`. Only `_raw_paths` and `_resolving` slots remain in `PurePathBase`. This frees the ABCs from the burden of some of pathlib's hardest-to-understand code.
Diffstat (limited to 'Lib/pathlib/_abc.py')
-rw-r--r-- | Lib/pathlib/_abc.py | 142 |
1 files changed, 29 insertions, 113 deletions
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 5caad3c0502..6a1928495c9 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -1,7 +1,6 @@ import functools import ntpath import posixpath -import sys from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -82,7 +81,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match): except OSError: continue if match(entry.name): - yield parent_path._make_child_entry(entry) + yield parent_path._make_child_entry(entry, dir_only) def _select_recursive(parent_paths, dir_only, follow_symlinks): @@ -105,7 +104,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): for entry in entries: try: if entry.is_dir(follow_symlinks=follow_symlinks): - paths.append(path._make_child_entry(entry)) + paths.append(path._make_child_entry(entry, dir_only)) continue except OSError: pass @@ -147,20 +146,6 @@ class PurePathBase: # in the `__init__()` method. '_raw_paths', - # The `_drv`, `_root` and `_tail_cached` slots store parsed and - # normalized parts of the path. They are set when any of the `drive`, - # `root` or `_tail` properties are accessed for the first time. The - # three-part division corresponds to the result of - # `os.path.splitroot()`, except that the tail is further split on path - # separators (i.e. it is a list of strings), and that the root and - # tail are normalized. - '_drv', '_root', '_tail_cached', - - # The `_str` slot stores the string representation of the path, - # computed from the drive, root and tail when `__str__()` is called - # for the first time. It's used to implement `_str_normcase` - '_str', - # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate # work from occurring when `resolve()` calls `stat()` or `readlink()`. @@ -179,65 +164,16 @@ class PurePathBase: """ return type(self)(*pathsegments) - @classmethod - def _parse_path(cls, path): - if not path: - return '', '', [] - sep = cls.pathmod.sep - altsep = cls.pathmod.altsep - if altsep: - path = path.replace(altsep, sep) - drv, root, rel = cls.pathmod.splitroot(path) - if not root and drv.startswith(sep) and not drv.endswith(sep): - drv_parts = drv.split(sep) - if len(drv_parts) == 4 and drv_parts[2] not in '?.': - # e.g. //server/share - root = sep - elif len(drv_parts) == 6: - # e.g. //?/unc/server/share - root = sep - parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] - return drv, root, parsed - - def _load_parts(self): - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self.pathmod.join(*paths) - drv, root, tail = self._parse_path(path) - self._drv = drv - self._root = root - self._tail_cached = tail - - def _from_parsed_parts(self, drv, root, tail): - path_str = self._format_parsed_parts(drv, root, tail) - path = self.with_segments(path_str) - path._str = path_str or '.' - path._drv = drv - path._root = root - path._tail_cached = tail - return path - - @classmethod - def _format_parsed_parts(cls, drv, root, tail): - if drv or root: - return drv + root + cls.pathmod.sep.join(tail) - elif tail and cls.pathmod.splitdrive(tail[0])[0]: - tail = ['.'] + tail - return cls.pathmod.sep.join(tail) - def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - try: - return self._str - except AttributeError: - self._str = self._format_parsed_parts(self.drive, self.root, - self._tail) or '.' - return self._str + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + return self.pathmod.join(*paths) + else: + return '' def as_posix(self): """Return the string representation of the path with forward (/) @@ -247,42 +183,23 @@ class PurePathBase: @property def drive(self): """The drive prefix (letter or UNC path), if any.""" - try: - return self._drv - except AttributeError: - self._load_parts() - return self._drv + return self.pathmod.splitdrive(str(self))[0] @property def root(self): """The root of the path, if any.""" - try: - return self._root - except AttributeError: - self._load_parts() - return self._root - - @property - def _tail(self): - try: - return self._tail_cached - except AttributeError: - self._load_parts() - return self._tail_cached + return self.pathmod.splitroot(str(self))[1] @property def anchor(self): """The concatenation of the drive and root, or ''.""" - anchor = self.drive + self.root - return anchor + drive, root, _ = self.pathmod.splitroot(str(self)) + return drive + root @property def name(self): """The final path component, if any.""" - path_str = str(self) - if not path_str or path_str == '.': - return '' - return self.pathmod.basename(path_str) + return self.pathmod.basename(str(self)) @property def suffix(self): @@ -323,13 +240,10 @@ class PurePathBase: def with_name(self, name): """Return a new path with the file name changed.""" - m = self.pathmod - if not name or m.sep in name or (m.altsep and m.altsep in name) or name == '.': + dirname = self.pathmod.dirname + if dirname(name): raise ValueError(f"Invalid name {name!r}") - parent, old_name = m.split(str(self)) - if not old_name or old_name == '.': - raise ValueError(f"{self!r} has an empty name") - return self.with_segments(parent, name) + return self.with_segments(dirname(str(self)), name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -480,7 +394,7 @@ class PurePathBase: def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - if self.pathmod is posixpath or not self._tail: + if self.pathmod is posixpath or not self.name: return False # NOTE: the rules for reserved names seem somewhat complicated @@ -490,7 +404,7 @@ class PurePathBase: if self.drive.startswith('\\\\'): # UNC paths are never reserved. return False - name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') + name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') return name.upper() in _WIN_RESERVED_NAMES def match(self, path_pattern, *, case_sensitive=None): @@ -503,9 +417,9 @@ class PurePathBase: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep pattern_str = str(path_pattern) - if path_pattern.drive or path_pattern.root: + if path_pattern.anchor: pass - elif path_pattern._tail: + elif path_pattern.parts: pattern_str = f'**{sep}{pattern_str}' else: raise ValueError("empty pattern") @@ -780,8 +694,10 @@ class PathBase(PurePathBase): from contextlib import nullcontext return nullcontext(self.iterdir()) - def _make_child_entry(self, entry): + def _make_child_entry(self, entry, is_dir=False): # Transform an entry yielded from _scandir() into a path object. + if is_dir: + return entry.joinpath('') return entry def _make_child_relpath(self, name): @@ -792,13 +708,13 @@ class PathBase(PurePathBase): kind, including directories) matching the given relative pattern. """ path_pattern = self.with_segments(pattern) - if path_pattern.drive or path_pattern.root: + if path_pattern.anchor: raise NotImplementedError("Non-relative patterns are unsupported") - elif not path_pattern._tail: + elif not path_pattern.parts: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - pattern_parts = path_pattern._tail.copy() - if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): + pattern_parts = list(path_pattern.parts) + if not self.pathmod.basename(pattern): # GH-65238: pathlib doesn't preserve trailing slash. Add it back. pattern_parts.append('') @@ -816,7 +732,7 @@ class PathBase(PurePathBase): filter_paths = follow_symlinks is not None and '..' not in pattern_parts deduplicate_paths = False sep = self.pathmod.sep - paths = iter([self] if self.is_dir() else []) + paths = iter([self.joinpath('')] if self.is_dir() else []) part_idx = 0 while part_idx < len(pattern_parts): part = pattern_parts[part_idx] |