From 13a6f56097e9d6be75b15ddd33749dc92d1bc88c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 26 Sep 2024 16:42:59 -0700 Subject: [PATCH] runc run: fix mount leak When preparing to mount container root, we need to make its parent mount private (i.e. disable propagation), otherwise the new in-container mounts are leaked to the host. To find a parent mount, we use to read mountinfo and find the longest entry which can be a parent of the container root directory. Unfortunately, due to kernel bug in all Linux kernels older than v5.8 (see [1], [2]), sometimes mountinfo can't be read in its entirety. In this case, getParentMount may occasionally return a wrong parent mount. As a result, we do not change the mount propagation to private, and container mounts are leaked. Alas, we can not fix the kernel, and reading mountinfo a few times to ensure its consistency (like it's done in, say, Kubernetes) does not look like a good solution for performance reasons. Fortunately, we don't need mountinfo. Let's just traverse the directory tree, trying to remount it private until we find a mount point (any error other than EINVAL means we just found it). Fixes issue 2404. [1]: https://github.com/kolyshkin/procfs-test [2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f6c61f96f2d97cbb5f Signed-off-by: Kir Kolyshkin --- libcontainer/rootfs_linux.go | 68 ++++++++++++------------------------ 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index f5112398cb2..18fa54dd315 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -982,54 +982,33 @@ func mknodDevice(dest string, node *devices.Device) error { return os.Chown(dest, int(node.Uid), int(node.Gid)) } -// Get the parent mount point of directory passed in as argument. Also return -// optional fields. -func getParentMount(rootfs string) (string, string, error) { - mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(rootfs)) - if err != nil { - return "", "", err - } - if len(mi) < 1 { - return "", "", fmt.Errorf("could not find parent mount of %s", rootfs) - } - - // find the longest mount point - var idx, maxlen int - for i := range mi { - if len(mi[i].Mountpoint) > maxlen { - maxlen = len(mi[i].Mountpoint) - idx = i +// rootfsParentMountPrivate ensures rootfs parent mount is private. +// This is needed for two reasons: +// - pivot_root() will fail if parent mount is shared; +// - when we bind mount rootfs, if its parent is not private, the new mount +// will propagate (leak!) to parent namespace and we don't want that. +func rootfsParentMountPrivate(path string) error { + var err error + // Assuming path is absolute and clean (this is checked in + // libcontainer/validate). Any error other than EINVAL means we failed, + // and EINVAL means this is not a mount point, so traverse up until we + // find one. + for { + err = unix.Mount("", path, "", unix.MS_PRIVATE, "") + if err == nil { + return nil } - } - return mi[idx].Mountpoint, mi[idx].Optional, nil -} - -// Make parent mount private if it was shared -func rootfsParentMountPrivate(rootfs string) error { - sharedMount := false - - parentMount, optionalOpts, err := getParentMount(rootfs) - if err != nil { - return err - } - - optsSplit := strings.Split(optionalOpts, " ") - for _, opt := range optsSplit { - if strings.HasPrefix(opt, "shared:") { - sharedMount = true + if err != unix.EINVAL || path == "/" { //nolint:errorlint // unix errors are bare break } + path = filepath.Dir(path) } - - // Make parent mount PRIVATE if it was shared. It is needed for two - // reasons. First of all pivot_root() will fail if parent mount is - // shared. Secondly when we bind mount rootfs it will propagate to - // parent namespace and we don't want that to happen. - if sharedMount { - return mount("", parentMount, "", unix.MS_PRIVATE, "") + return &mountError{ + op: "remount-private", + target: path, + flags: unix.MS_PRIVATE, + err: err, } - - return nil } func prepareRoot(config *configs.Config) error { @@ -1041,9 +1020,6 @@ func prepareRoot(config *configs.Config) error { return err } - // Make parent mount private to make sure following bind mount does - // not propagate in other namespaces. Also it will help with kernel - // check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) if err := rootfsParentMountPrivate(config.Rootfs); err != nil { return err }