diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 490f89f39d26d1..b4ac2d1e07e362 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -17,7 +17,6 @@ from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from urllib.parse import quote_from_bytes as urlquote_from_bytes __all__ = [ @@ -203,6 +202,64 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): return +class _WalkAction: + WALK = object() + YIELD = object() + CLOSE = object() + + +def _walk(top_down, on_error, follow_symlinks, follow_junctions, use_fd, actions): + while actions: + action, value = actions.pop() + try: + if action is _WalkAction.WALK: + path, dir_fd, entry = value + dirnames = [] + filenames = [] + try: + if use_fd: + scandir_it, fd = path._scandir_fwalk( + follow_symlinks, actions, dir_fd, entry) + result = path, dirnames, filenames, fd + else: + scandir_it, fd = path._scandir(), None + result = path, dirnames, filenames + except OSError as error: + if not hasattr(error, '_func'): + error._func = os.scandir + error.filename = str(path) + raise error + if not top_down: + actions.append((_WalkAction.YIELD, result)) + with scandir_it: + for entry in scandir_it: + try: + if entry.is_dir(follow_symlinks=follow_symlinks) and ( + follow_junctions or not entry.is_junction()): + if not top_down: + actions.append((_WalkAction.WALK, ( + path._make_child_relpath(entry.name), fd, + entry if use_fd and not follow_symlinks else None))) + dirnames.append(entry.name) + else: + filenames.append(entry.name) + except OSError: + filenames.append(entry.name) + if top_down: + yield result + for dirname in reversed(dirnames): + actions.append((_WalkAction.WALK, ( + path._make_child_relpath(dirname), fd, None))) + elif action is _WalkAction.YIELD: + yield value + elif action is _WalkAction.CLOSE: + os.close(value) + else: + raise AssertionError(f"unknown walk action: {action}") + except OSError as error: + if on_error is not None: + on_error(error) + # # Public API # @@ -372,7 +429,9 @@ def as_uri(self): # It's a posix path => 'file:///etc/hosts' prefix = 'file://' path = str(self) - return prefix + urlquote_from_bytes(os.fsencode(path)) + + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) @property def _parts_normcase(self): @@ -1205,50 +1264,46 @@ def expanduser(self): return self - def walk(self, top_down=True, on_error=None, follow_symlinks=False): + def walk(self, top_down=True, on_error=None, follow_symlinks=False, follow_junctions=True): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - paths = [self] - - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk() - # always suppressed the exception in that instance, rather than - # blow up for a minor reason when (say) a thousand readable - # directories are still left to visit. That logic is copied here. + actions = [(_WalkAction.WALK, (self, None, None))] + return _walk(top_down, on_error, follow_symlinks, follow_junctions, False, actions) + + if {os.stat, os.open} <= os.supports_dir_fd and {os.stat, os.scandir} <= os.supports_fd: + def _fwalk(self, top_down=True, *, on_error=None, follow_symlinks=False, dir_fd=None): + """Walk the directory tree from this directory, similar to os.fwalk().""" + sys.audit("pathlib.Path._fwalk", self, on_error, follow_symlinks, dir_fd) + actions = [(_WalkAction.WALK, (self, dir_fd, None))] try: - scandir_it = path._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - continue - - with scandir_it: - dirnames = [] - filenames = [] - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield path, dirnames, filenames + return _walk(top_down, on_error, follow_symlinks, True, True, actions) + finally: + for action, value in reversed(actions): + if action is _WalkAction.CLOSE: + try: + os.close(value) + except OSError: + pass + + def _scandir_fwalk(self, follow_symlinks, actions, dir_fd, entry): + name = self if dir_fd is None else self.name + if follow_symlinks: + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + if entry is None: + orig_st = os.stat(name, follow_symlinks=False, dir_fd=dir_fd) + else: + orig_st = entry.stat(follow_symlinks=False) + fd = os.open(name, os.O_RDONLY, dir_fd=dir_fd) + actions.append((_WalkAction.CLOSE, fd)) + if not os.path.samestat(orig_st, os.stat(fd)): + error = NotADirectoryError("Cannot walk into a symbolic link") + error._func = os.path.islink + raise error + return os.scandir(fd), fd class PosixPath(Path, PurePosixPath): diff --git a/Lib/shutil.py b/Lib/shutil.py index b0576407e02ffb..d907c8a86ac38a 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,6 +10,7 @@ import fnmatch import collections import errno +import pathlib import warnings try: @@ -562,116 +563,49 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) -if hasattr(os.stat_result, 'st_file_attributes'): - def _rmtree_islink(path): - try: - st = os.lstat(path) - return (stat.S_ISLNK(st.st_mode) or - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) - except OSError: - return False -else: - def _rmtree_islink(path): - return os.path.islink(path) - # version vulnerable to race conditions def _rmtree_unsafe(path, onexc): - try: - with os.scandir(path) as scandir_it: - entries = list(scandir_it) - except OSError as err: - onexc(os.scandir, path, err) - entries = [] - for entry in entries: - fullname = entry.path - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - - if is_dir and not entry.is_junction(): + walker = path.walk( + top_down=False, + follow_symlinks=False, + follow_junctions=False, + on_error=lambda err: onexc(err._func, err.filename, err)) + for dirpath, _, filenames in walker: + for filename in filenames: try: - if entry.is_symlink(): - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or entry.is_dir above. - raise OSError("Cannot call rmtree on a symbolic link") + dirpath._make_child_relpath(filename).unlink() except OSError as err: - onexc(os.path.islink, fullname, err) - continue - _rmtree_unsafe(fullname, onexc) - else: - try: - os.unlink(fullname) - except OSError as err: - onexc(os.unlink, fullname, err) - try: - os.rmdir(path) - except OSError as err: - onexc(os.rmdir, path, err) + onexc(os.unlink, err.filename, err) + try: + dirpath.rmdir() + except OSError as err: + onexc(os.rmdir, err.filename, err) # Version using fd-based APIs to protect against races def _rmtree_safe_fd(topfd, path, onexc): - try: - with os.scandir(topfd) as scandir_it: - entries = list(scandir_it) - except OSError as err: - err.filename = path - onexc(os.scandir, path, err) - return - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - is_dir = entry.is_dir(follow_symlinks=False) - except OSError: - is_dir = False - else: - if is_dir: - try: - orig_st = entry.stat(follow_symlinks=False) - is_dir = stat.S_ISDIR(orig_st.st_mode) - except OSError as err: - onexc(os.lstat, fullname, err) - continue - if is_dir: + walker = path._fwalk( + top_down=False, + follow_symlinks=False, + on_error=lambda err: onexc(err._func, err.filename, err), + dir_fd=topfd) + for dirpath, dirnames, filenames, fd in walker: + for filename in filenames: try: - dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd) - dirfd_closed = False + os.unlink(filename, dir_fd=fd) except OSError as err: - onexc(os.open, fullname, err) - else: - try: - if os.path.samestat(orig_st, os.fstat(dirfd)): - _rmtree_safe_fd(dirfd, fullname, onexc) - try: - os.close(dirfd) - dirfd_closed = True - os.rmdir(entry.name, dir_fd=topfd) - except OSError as err: - onexc(os.rmdir, fullname, err) - else: - try: - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or stat.S_ISDIR above. - raise OSError("Cannot call rmtree on a symbolic " - "link") - except OSError as err: - onexc(os.path.islink, fullname, err) - finally: - if not dirfd_closed: - os.close(dirfd) - else: + onexc(os.unlink, str(dirpath._make_child_relpath(filename)), err) + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=fd) + except OSError as err: + onexc(os.rmdir, str(dirpath._make_child_relpath(dirname)), err) + if dirpath == path: try: - os.unlink(entry.name, dir_fd=topfd) + os.rmdir(path, dir_fd=topfd) except OSError as err: - onexc(os.unlink, fullname, err) + onexc(os.rmdir, str(path), err) -_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= - os.supports_dir_fd and - os.scandir in os.supports_fd and - os.stat in os.supports_follow_symlinks) +_use_fd_functions = hasattr(pathlib.Path, '_fwalk') and {os.unlink, os.rmdir} <= os.supports_dir_fd def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -718,51 +652,20 @@ def onexc(*args): else: exc_info = type(exc), exc, exc.__traceback__ return onerror(func, path, exc_info) - + if isinstance(path, bytes): + path = os.fsdecode(path) + path = pathlib.Path(path) if _use_fd_functions: - # While the unsafe rmtree works fine on bytes, the fd based does not. - if isinstance(path, bytes): - path = os.fsdecode(path) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - try: - orig_st = os.lstat(path, dir_fd=dir_fd) - except Exception as err: - onexc(os.lstat, path, err) - return - try: - fd = os.open(path, os.O_RDONLY, dir_fd=dir_fd) - fd_closed = False - except Exception as err: - onexc(os.open, path, err) - return - try: - if os.path.samestat(orig_st, os.fstat(fd)): - _rmtree_safe_fd(fd, path, onexc) - try: - os.close(fd) - fd_closed = True - os.rmdir(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.rmdir, path, err) - else: - try: - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - finally: - if not fd_closed: - os.close(fd) + return _rmtree_safe_fd(dir_fd, path, onexc) else: if dir_fd is not None: raise NotImplementedError("dir_fd unavailable on this platform") try: - if _rmtree_islink(path): + if path.is_symlink() or path.is_junction(): # symlinks to directories are forbidden, see bug #1669 raise OSError("Cannot call rmtree on a symbolic link") except OSError as err: - onexc(os.path.islink, path, err) + onexc(os.path.islink, str(path), err) # can't continue even if onexc hook returns return return _rmtree_unsafe(path, onexc) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8b6e012b730d75..bfe68f2fce9edc 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2693,20 +2693,20 @@ def setUp(self): del self.sub2_tree[1][:1] def test_walk_topdown(self): - all = list(self.walk_path.walk()) - - self.assertEqual(len(all), 4) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: TESTFN, SUB1, SUB11, SUB2 - # flipped: TESTFN, SUB2, SUB1, SUB11 - flipped = all[0][1][0] != "SUB1" - all[0][1].sort() - all[3 - 2 * flipped][-1].sort() - all[3 - 2 * flipped][1].sort() - self.assertEqual(all[0], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[1 + flipped], (self.sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 + flipped], (self.sub11_path, [], [])) - self.assertEqual(all[3 - 2 * flipped], self.sub2_tree) + walker = self.walk_path.walk() + entry = next(walker) + entry[1].sort() # Ensure we visit SUB1 before SUB2 + self.assertEqual(entry, (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + entry = next(walker) + self.assertEqual(entry, (self.sub1_path, ["SUB11"], ["tmp2"])) + entry = next(walker) + self.assertEqual(entry, (self.sub11_path, [], [])) + entry = next(walker) + entry[1].sort() + entry[2].sort() + self.assertEqual(entry, self.sub2_tree) + with self.assertRaises(StopIteration): + next(walker) def test_walk_prune(self, walk_path=None): if walk_path is None: @@ -2730,24 +2730,37 @@ def test_file_like_path(self): self.test_walk_prune(FakePath(self.walk_path).__fspath__()) def test_walk_bottom_up(self): - all = list(self.walk_path.walk( top_down=False)) - - self.assertEqual(len(all), 4, all) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: SUB11, SUB1, SUB2, TESTFN - # flipped: SUB2, SUB11, SUB1, TESTFN - flipped = all[3][1][0] != "SUB1" - all[3][1].sort() - all[2 - 2 * flipped][-1].sort() - all[2 - 2 * flipped][1].sort() - self.assertEqual(all[3], - (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[flipped], - (self.sub11_path, [], [])) - self.assertEqual(all[flipped + 1], - (self.sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 - 2 * flipped], - self.sub2_tree) + seen_testfn = seen_sub1 = seen_sub11 = seen_sub2 = False + for path, dirnames, filenames in self.walk_path.walk(top_down=False): + if path == self.walk_path: + self.assertFalse(seen_testfn) + self.assertTrue(seen_sub1) + self.assertTrue(seen_sub2) + self.assertEqual(sorted(dirnames), ["SUB1", "SUB2"]) + self.assertEqual(filenames, ["tmp1"]) + seen_testfn = True + elif path == self.sub1_path: + self.assertFalse(seen_testfn) + self.assertFalse(seen_sub1) + self.assertTrue(seen_sub11) + self.assertEqual(dirnames, ["SUB11"]) + self.assertEqual(filenames, ["tmp2"]) + seen_sub1 = True + elif path == self.sub11_path: + self.assertFalse(seen_sub1) + self.assertFalse(seen_sub11) + self.assertEqual(dirnames, []) + self.assertEqual(filenames, []) + seen_sub11 = True + elif path == self.sub2_path: + self.assertFalse(seen_testfn) + self.assertFalse(seen_sub2) + self.assertEqual(sorted(dirnames), sorted(self.sub2_tree[1])) + self.assertEqual(sorted(filenames), sorted(self.sub2_tree[2])) + seen_sub2 = True + else: + raise AssertionError(f"Unexpected path: {path}") + self.assertTrue(seen_testfn) @os_helper.skip_unless_symlink def test_walk_follow_symlinks(self): diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 1c0589ced9ea89..156c0a2c56c72c 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -342,15 +342,11 @@ def onerror(*args): errors.append(args) with self.assertWarns(DeprecationWarning): shutil.rmtree(filename, onerror=onerror) - self.assertEqual(len(errors), 2) + self.assertEqual(len(errors), 1) self.assertIs(errors[0][0], os.scandir) self.assertEqual(errors[0][1], filename) self.assertIsInstance(errors[0][2][1], NotADirectoryError) self.assertEqual(errors[0][2][1].filename, filename) - self.assertIs(errors[1][0], os.rmdir) - self.assertEqual(errors[1][1], filename) - self.assertIsInstance(errors[1][2][1], NotADirectoryError) - self.assertEqual(errors[1][2][1].filename, filename) def test_rmtree_errors_onexc(self): # filename is guaranteed not to exist @@ -374,15 +370,11 @@ def test_rmtree_errors_onexc(self): def onexc(*args): errors.append(args) shutil.rmtree(filename, onexc=onexc) - self.assertEqual(len(errors), 2) + self.assertEqual(len(errors), 1) self.assertIs(errors[0][0], os.scandir) self.assertEqual(errors[0][1], filename) self.assertIsInstance(errors[0][2], NotADirectoryError) self.assertEqual(errors[0][2].filename, filename) - self.assertIs(errors[1][0], os.rmdir) - self.assertEqual(errors[1][1], filename) - self.assertIsInstance(errors[1][2], NotADirectoryError) - self.assertEqual(errors[1][2].filename, filename) @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).")