diff --git a/go.mod b/go.mod index fdb71676..6539386e 100644 --- a/go.mod +++ b/go.mod @@ -124,7 +124,7 @@ require ( github.com/onsi/gomega v1.17.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect - github.com/opencontainers/runc v1.1.4 // indirect + github.com/opencontainers/runc v1.1.5 // indirect github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 // indirect github.com/opencontainers/selinux v1.10.1 // indirect github.com/pierrec/lz4 v2.6.1+incompatible // indirect diff --git a/go.sum b/go.sum index ce4af475..560dd41c 100644 --- a/go.sum +++ b/go.sum @@ -840,8 +840,8 @@ github.com/opencontainers/runc v1.0.0-rc8.0.20190926000215-3e425f80a8c9/go.mod h github.com/opencontainers/runc v1.0.0-rc9/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runc v1.0.0-rc93/go.mod h1:3NOsor4w32B2tC0Zbl8Knk4Wg84SM2ImC1fxBuqJ/H0= github.com/opencontainers/runc v1.0.2/go.mod h1:aTaHFFwQXuA71CiyxOdFFIorAoemI04suvGRQFzWTD0= -github.com/opencontainers/runc v1.1.4 h1:nRCz/8sKg6K6jgYAFLDlXzPeITBZJyX28DBVhWD+5dg= -github.com/opencontainers/runc v1.1.4/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= +github.com/opencontainers/runc v1.1.5 h1:L44KXEpKmfWDcS02aeGm8QNTFXTo2D+8MYGDIJ/GDEs= +github.com/opencontainers/runc v1.1.5/go.mod h1:1J5XiS+vdZ3wCyZybsuxXZWGrgSr8fFJHLXuG2PsnNg= github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.1/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2-0.20190207185410-29686dbc5559/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go index 45744c15..b6bfb080 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go @@ -293,8 +293,18 @@ func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, err // rules separately to systemd) we can safely skip entries that don't // have a corresponding path. if _, err := os.Stat(entry.Path); err != nil { - logrus.Debugf("skipping device %s for systemd: %s", entry.Path, err) - continue + // Also check /sys/dev so that we don't depend on /dev/{block,char} + // being populated. (/dev/{block,char} is populated by udev, which + // isn't strictly required for systemd). Ironically, this happens most + // easily when starting containerd within a runc created container + // itself. + + // We don't bother with securejoin here because we create entry.Path + // right above here, so we know it's safe. + if _, err := os.Stat("/sys" + entry.Path); err != nil { + logrus.Warnf("skipping device %s for systemd: %s", entry.Path, err) + continue + } } } deviceAllowList = append(deviceAllowList, entry) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go index 1e5c394c..2e4c5935 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -411,8 +411,9 @@ func fixStdioPermissions(u *user.ExecUser) error { return &os.PathError{Op: "fstat", Path: file.Name(), Err: err} } - // Skip chown if uid is already the one we want. - if int(s.Uid) == u.Uid { + // Skip chown if uid is already the one we want or any of the STDIO descriptors + // were redirected to /dev/null. + if int(s.Uid) == u.Uid || s.Rdev == null.Rdev { continue } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index 9ecf791e..2d224bab 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -168,15 +168,17 @@ static void write_log(int level, const char *format, ...) message = escape_json_string(message); - if (current_stage == STAGE_SETUP) + if (current_stage == STAGE_SETUP) { stage = strdup("nsexec"); - else + if (stage == NULL) + goto out; + } else { ret = asprintf(&stage, "nsexec-%d", current_stage); - if (ret < 0) { - stage = NULL; - goto out; + if (ret < 0) { + stage = NULL; + goto out; + } } - ret = asprintf(&json, "{\"level\":\"%s\", \"msg\": \"%s[%d]: %s\"}\n", level_str[level], stage, getpid(), message); if (ret < 0) { @@ -830,6 +832,25 @@ void send_mountsources(int sockfd, pid_t child, char *mountsources, size_t mount bail("failed to close container mount namespace fd %d", container_mntns_fd); } +void try_unshare(int flags, const char *msg) +{ + write_log(DEBUG, "unshare %s", msg); + /* + * Kernels prior to v4.3 may return EINVAL on unshare when another process + * reads runc's /proc/$PID/status or /proc/$PID/maps. To work around this, + * retry on EINVAL a few times. + */ + int retries = 5; + for (; retries > 0; retries--) { + if (unshare(flags) == 0) { + return; + } + if (errno != EINVAL) + break; + } + bail("failed to unshare %s", msg); +} + void nsexec(void) { int pipenum; @@ -1168,9 +1189,7 @@ void nsexec(void) * problem. */ if (config.cloneflags & CLONE_NEWUSER) { - write_log(DEBUG, "unshare user namespace"); - if (unshare(CLONE_NEWUSER) < 0) - bail("failed to unshare user namespace"); + try_unshare(CLONE_NEWUSER, "user namespace"); config.cloneflags &= ~CLONE_NEWUSER; /* @@ -1222,9 +1241,7 @@ void nsexec(void) * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) * was broken, so we'll just do it the long way anyway. */ - write_log(DEBUG, "unshare remaining namespace (except cgroupns)"); - if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) - bail("failed to unshare remaining namespaces (except cgroupns)"); + try_unshare(config.cloneflags & ~CLONE_NEWCGROUP, "remaining namespaces (except cgroupns)"); /* Ask our parent to send the mount sources fds. */ if (config.mountsources) { @@ -1342,8 +1359,7 @@ void nsexec(void) } if (config.cloneflags & CLONE_NEWCGROUP) { - if (unshare(CLONE_NEWCGROUP) < 0) - bail("failed to unshare cgroup namespace"); + try_unshare(CLONE_NEWCGROUP, "cgroup namespace"); } write_log(DEBUG, "signal completion to stage-0"); diff --git a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go index ec7638e4..c3f88fc7 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -329,26 +329,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error { if err := os.MkdirAll(dest, 0o755); err != nil { return err } - return utils.WithProcfd(c.root, m.Destination, func(procfd string) error { - if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil { - // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) - if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) { - src := fs2.UnifiedMountpoint - if c.cgroupns && c.cgroup2Path != "" { - // Emulate cgroupns by bind-mounting - // the container cgroup path rather than - // the whole /sys/fs/cgroup. - src = c.cgroup2Path - } - err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "") - if c.rootlessCgroups && errors.Is(err, unix.ENOENT) { - err = nil - } - } - return err - } - return nil + err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { + return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data) }) + if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) { + return err + } + + // When we are in UserNS but CgroupNS is not unshared, we cannot mount + // cgroup2 (#2158), so fall back to bind mount. + bindM := &configs.Mount{ + Device: "bind", + Source: fs2.UnifiedMountpoint, + Destination: m.Destination, + Flags: unix.MS_BIND | m.Flags, + PropagationFlags: m.PropagationFlags, + } + if c.cgroupns && c.cgroup2Path != "" { + // Emulate cgroupns by bind-mounting the container cgroup path + // rather than the whole /sys/fs/cgroup. + bindM.Source = c.cgroup2Path + } + // mountToRootfs() handles remounting for MS_RDONLY. + // No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate(). + err = mountToRootfs(bindM, c) + if c.rootlessCgroups && errors.Is(err, unix.ENOENT) { + // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed + // outside the userns+mountns. + // + // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted + // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`). + err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { + return maskPath(procfd, c.label) + }) + } + return err } func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { @@ -398,32 +413,43 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { func mountToRootfs(m *configs.Mount, c *mountConfig) error { rootfs := c.root - mountLabel := c.label - mountFd := c.fd - dest, err := securejoin.SecureJoin(rootfs, m.Destination) - if err != nil { - return err - } + // procfs and sysfs are special because we need to ensure they are actually + // mounted on a specific path in a container without any funny business. switch m.Device { case "proc", "sysfs": // If the destination already exists and is not a directory, we bail - // out This is to avoid mounting through a symlink or similar -- which + // out. This is to avoid mounting through a symlink or similar -- which // has been a "fun" attack scenario in the past. // TODO: This won't be necessary once we switch to libpathrs and we can // stop all of these symlink-exchange attacks. + dest := filepath.Clean(m.Destination) + if !strings.HasPrefix(dest, rootfs) { + // Do not use securejoin as it resolves symlinks. + dest = filepath.Join(rootfs, dest) + } if fi, err := os.Lstat(dest); err != nil { if !os.IsNotExist(err) { return err } - } else if fi.Mode()&os.ModeDir == 0 { + } else if !fi.IsDir() { return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) } if err := os.MkdirAll(dest, 0o755); err != nil { return err } - // Selinux kernels do not support labeling of /proc or /sys + // Selinux kernels do not support labeling of /proc or /sys. return mountPropagate(m, rootfs, "", nil) + } + + mountLabel := c.label + mountFd := c.fd + dest, err := securejoin.SecureJoin(rootfs, m.Destination) + if err != nil { + return err + } + + switch m.Device { case "mqueue": if err := os.MkdirAll(dest, 0o755); err != nil { return err diff --git a/vendor/modules.txt b/vendor/modules.txt index 3bea3e62..d8650643 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -557,7 +557,7 @@ github.com/opencontainers/go-digest ## explicit github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.1.4 +# github.com/opencontainers/runc v1.1.5 ## explicit; go 1.16 github.com/opencontainers/runc/libcontainer github.com/opencontainers/runc/libcontainer/apparmor