Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisit devices and freezer controller detection for cgroup v2 #3077

Merged
merged 3 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/raspberry-pi4.md
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ Operating system: Linux (pass)
cgroup controller "cpuacct": available (via cpu in version 2) (pass)
cgroup controller "cpuset": available (pass)
cgroup controller "memory": available (pass)
cgroup controller "devices": available (assumed) (pass)
cgroup controller "freezer": available (assumed) (pass)
cgroup controller "devices": unknown (warning: insufficient permissions, try with elevated permissions)
cgroup controller "freezer": available (cgroup.freeze exists) (pass)
cgroup controller "pids": available (pass)
cgroup controller "hugetlb": available (pass)
cgroup controller "blkio": available (via io in version 2) (pass)
Expand Down
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ require (
github.com/bombsimon/logrusr/v4 v4.0.0
github.com/carlmjohnson/requests v0.23.4
github.com/cavaliergopher/grab/v3 v3.0.1
github.com/cilium/ebpf v0.11.0
github.com/cloudflare/cfssl v1.6.4
github.com/containerd/cgroups/v3 v3.0.2
github.com/containerd/containerd v1.7.6
github.com/denisbrodbeck/machineid v1.0.1
github.com/estesp/manifest-tool/v2 v2.0.8
Expand All @@ -33,6 +35,7 @@ require (
github.com/mitchellh/go-homedir v1.1.0
github.com/olekukonko/tablewriter v0.0.5
github.com/opencontainers/image-spec v1.1.0-rc5
github.com/opencontainers/runtime-spec v1.1.0-rc.2
github.com/otiai10/copy v1.14.0
github.com/pelletier/go-toml v1.9.5
github.com/robfig/cron v1.2.0
Expand All @@ -54,7 +57,7 @@ require (
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.26.0
golang.org/x/crypto v0.14.0
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb
golang.org/x/mod v0.13.0
golang.org/x/sync v0.4.0
golang.org/x/sys v0.13.0
Expand Down Expand Up @@ -106,9 +109,7 @@ require (
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/cilium/ebpf v0.9.1 // indirect
github.com/containerd/cgroups v1.1.0 // indirect
github.com/containerd/cgroups/v3 v3.0.2 // indirect
github.com/containerd/console v1.0.3 // indirect
github.com/containerd/continuity v0.4.2 // indirect
github.com/containerd/fifo v1.1.0 // indirect
Expand Down Expand Up @@ -211,7 +212,6 @@ require (
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/runc v1.1.9 // indirect
github.com/opencontainers/runtime-spec v1.1.0-rc.2 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand Down
10 changes: 5 additions & 5 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHe
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/cilium/ebpf v0.9.1 h1:64sn2K3UKw8NbP/blsixRpF3nXuyhz/VjRlRzvlBRu4=
github.com/cilium/ebpf v0.9.1/go.mod h1:+OhNOIXx/Fnu1IE8bJz2dzOA+VSfyTfdNUVdlQnxUFY=
github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y=
github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8=
github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0=
Expand Down Expand Up @@ -255,8 +255,8 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/foxcpp/go-mockdns v1.0.0 h1:7jBqxd3WDWwi/6WhDvacvH1XsN3rOLXyHM1uhvIx6FI=
github.com/frankban/quicktest v1.14.3 h1:FJKSZTDHjyhriyC81FLQ0LY93eSai0ZyR/ZIkd3ZUKE=
github.com/frankban/quicktest v1.14.3/go.mod h1:mgiwOwqx65TmIk1wJ6Q7wvnVMocbUorkibMOrVTHZps=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
Expand Down Expand Up @@ -959,8 +959,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91 h1:tnebWN09GYg9OLPss1KXj8txwZc6X6uMr6VFdcGNbHw=
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb h1:xIApU0ow1zwMa2uL1VDNeQlNVFTWMQxZUZCMDy0Q4Us=
golang.org/x/exp v0.0.0-20230711153332-06a737ee72cb/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
Expand Down
10 changes: 9 additions & 1 deletion internal/pkg/sysinfo/probes/linux/cgroup_controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error {
} else if available, err := sys.probeController(c.name); err != nil {
return reporter.Error(desc, err)
} else if available.available {
if available.warning != "" {
return reporter.Warn(desc, available, available.warning)
}
return reporter.Pass(desc, available)
} else if c.require {
return reporter.Reject(desc, available, "")
Expand All @@ -71,10 +74,15 @@ func (c *cgroupControllerProbe) Probe(reporter probes.Reporter) error {
type cgroupControllerAvailable struct {
available bool
msg string
warning string
}

func (a cgroupControllerAvailable) String() (msg string) {
if a.available {
if a.warning != "" {
return a.msg
}

msg = "available"
} else {
msg = "unavailable"
Expand All @@ -97,7 +105,7 @@ func (p *cgroupControllerProber) probeController(s cgroupSystem, controllerName
p.once.Do(func() {
p.controllers = make(map[string]cgroupControllerAvailable)
p.err = s.loadControllers(func(name, msg string) {
p.controllers[name] = cgroupControllerAvailable{true, msg}
p.controllers[name] = cgroupControllerAvailable{true, msg, ""}
})
})
return p.controllers[controllerName], p.err
Expand Down
211 changes: 173 additions & 38 deletions internal/pkg/sysinfo/probes/linux/cgroup_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,17 @@ package linux
import (
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"

"k8s.io/utils/pointer"

"github.com/cilium/ebpf/rlimit"
"github.com/containerd/cgroups/v3/cgroup2"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)

type cgroupV2 struct {
Expand All @@ -39,63 +45,192 @@ func (*cgroupV2) String() string {
}

func (g *cgroupV2) probeController(controllerName string) (cgroupControllerAvailable, error) {
switch controllerName {
case "devices":
return g.detectDevicesController()
case "freezer":
return g.detectFreezerController()
}
return g.controllers.probeController(g, controllerName)
}

func (g *cgroupV2) loadControllers(seen func(string, string)) error {
// Some controllers are implicitly enabled by the kernel. Those controllers
// do not appear in /sys/fs/cgroup/cgroup.controllers. Their availability is
// assumed based on the kernel version, as it is hard to detect them
// directly.
// https://github.com/torvalds/linux/blob/v5.3/kernel/cgroup/cgroup.c#L433-L434
if major, minor, err := parseKernelRelease(g.probeUname); err == nil {
/* devices: since 4.15 */ if major > 4 || (major == 4 && minor >= 15) {
seen("devices", "assumed")
}
/* freezer: since 5.2 */ if major > 5 || (major == 5 && minor >= 2) {
seen("freezer", "assumed")
}
} else {
return err
return g.detectListedRootControllers(seen)
}

// Detects the device controller by trying to attach a dummy program of type
// BPF_CGROUP_DEVICE to a cgroup. Since the controller has no interface files
// and is implemented purely on top of BPF, this is the only reliable way to
// detect it. A best-guess detection via the kernel version has the major
// drawback of not working with kernels that have a lot of backported features,
// such as RHEL and friends.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#device-controller
func (g *cgroupV2) detectDevicesController() (cgroupControllerAvailable, error) {
err := attachDummyDeviceFilter(g.mountPoint)
switch {
case err == nil:
return cgroupControllerAvailable{true, "device filters attachable", ""}, nil

// EACCES occurs when not allowed to create cgroups.
// EPERM occurs when not allowed to load eBPF programs.
case errors.Is(err, os.ErrPermission) && os.Geteuid() != 0:
return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil
case errors.Is(err, unix.EROFS):
return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil

case eBPFProgramUnsupported(err):
return cgroupControllerAvailable{false, err.Error(), ""}, nil
}

controllerData, err := os.ReadFile(filepath.Join(g.mountPoint, "cgroup.controllers"))
return cgroupControllerAvailable{}, err
}

// Attaches a dummy program of type BPF_CGROUP_DEVICE to a randomly created
// cgroup and removes the program and cgroup again.
func attachDummyDeviceFilter(mountPoint string) (err error) {
insts, license, err := cgroup2.DeviceFilter([]specs.LinuxDeviceCgroup{{
Allow: true,
Type: "a",
Major: pointer.Int64(-1),
Minor: pointer.Int64(-1),
Access: "rwm",
}})
if err != nil {
return err
return fmt.Errorf("failed to create eBPF device filter program: %w", err)
}

for _, controllerName := range strings.Fields(string(controllerData)) {
seen(controllerName, "")
switch controllerName {
case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers.
seen("cpuacct", "via cpu in "+g.String())
case "io": // This is the successor of the version 1 blkio controller.
seen("blkio", "via io in "+g.String())
tmpCgroupPath, err := os.MkdirTemp(mountPoint, "k0s-devices-detection-*")
if err != nil {
return fmt.Errorf("failed to create temporary cgroup: %w", err)
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()

dirFD, err := unix.Open(tmpCgroupPath, unix.O_DIRECTORY|unix.O_RDONLY|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("failed to open temporary cgroup: %w", &fs.PathError{Op: "open", Path: tmpCgroupPath, Err: err})
}
defer func() {
if closeErr := unix.Close(dirFD); closeErr != nil {
err = errors.Join(err, &fs.PathError{Op: "close", Path: tmpCgroupPath, Err: closeErr})
}
}()

close, err := cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err != nil {
// RemoveMemlock may be required on kernels < 5.11
// observed on debian 11: 5.10.0-21-armmp-lpae #1 SMP Debian 5.10.162-1 (2023-01-21) armv7l
// https://github.com/cilium/ebpf/blob/v0.11.0/prog.go#L356-L360
if errors.Is(err, unix.EPERM) && strings.Contains(err.Error(), "RemoveMemlock") {
if err2 := rlimit.RemoveMemlock(); err2 != nil {
err = errors.Join(err, err2)
} else {
// Try again, MEMLOCK should be removed by now.
close, err2 = cgroup2.LoadAttachCgroupDeviceFilter(insts, license, dirFD)
if err2 != nil {
err = errors.Join(err, err2)
} else {
err = nil
}
}
}
}
if err != nil {
if eBPFProgramUnsupported(err) {
return err
}
return fmt.Errorf("failed to load/attach eBPF device filter program: %w", err)
}

return nil
return close()
}

// Returns true if the given error indicates that an eBPF program is unsupported
// by the kernel.
func eBPFProgramUnsupported(err error) bool {
// https://github.com/cilium/ebpf/blob/v0.11.0/features/prog.go#L43-L49

switch {
// EINVAL occurs when attempting to create a program with an unknown type.
case errors.Is(err, unix.EINVAL):
return true

// E2BIG occurs when ProgLoadAttr contains non-zero bytes past the end of
// the struct known by the running kernel, meaning the kernel is too old to
// support the given prog type.
case errors.Is(err, unix.E2BIG):
return true

default:
return false
}
}

func parseKernelRelease(probeUname unameProber) (int64, int64, error) {
uname, err := probeUname()
// Detect the freezer controller. It doesn't appear in the cgroup.controllers
// file. Check for the existence of the cgroup.freeze file in the k0s cgroup
// instead, or try to create a dummy cgroup if k0s runs in the root cgroup.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
func (g *cgroupV2) detectFreezerController() (cgroupControllerAvailable, error) {

// Detect the freezer controller by checking k0s's cgroup for the existence
// of the cgroup.freeze file.
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#processes
cgroupPath, err := cgroup2.NestedGroupPath("")
if err != nil {
return 0, 0, err
return cgroupControllerAvailable{}, fmt.Errorf("failed to get k0s cgroup: %w", err)
}

var major, minor int64
r := regexp.MustCompile(`^(\d+)\.(\d+)(\.|$)`)
if matches := r.FindStringSubmatch(uname.osRelease.value); matches == nil {
err = errors.New("unsupported format")
} else {
if major, err = strconv.ParseInt(matches[1], 10, 16); err == nil {
minor, err = strconv.ParseInt(matches[2], 10, 16)
if cgroupPath != "/" {
cgroupPath = filepath.Join(g.mountPoint, cgroupPath)
} else { // The root cgroup cannot be frozen. Try to create a dummy cgroup.
tmpCgroupPath, err := os.MkdirTemp(g.mountPoint, "k0s-freezer-detection-*")
if err != nil {
if errors.Is(err, os.ErrPermission) && os.Geteuid() != 0 {
return cgroupControllerAvailable{true, "unknown", "insufficient permissions, try with elevated permissions"}, nil
}
if errors.Is(err, unix.EROFS) && os.Geteuid() != 0 {
return cgroupControllerAvailable{true, "unknown", fmt.Sprintf("read-only file system: %s", g.mountPoint)}, nil
}

return cgroupControllerAvailable{}, fmt.Errorf("failed to create temporary cgroup: %w", err)
}
defer func() { err = errors.Join(err, os.Remove(tmpCgroupPath)) }()
cgroupPath = tmpCgroupPath
}

// Check if the cgroup.freeze exists
if stat, err := os.Stat(filepath.Join(cgroupPath, "cgroup.freeze")); (err == nil && stat.IsDir()) || os.IsNotExist(err) {
return cgroupControllerAvailable{false, "cgroup.freeze doesn't exist", ""}, nil
} else if err != nil {
return cgroupControllerAvailable{}, err
}
return cgroupControllerAvailable{true, "cgroup.freeze exists", ""}, nil
}

// Detects all the listed root controllers.
//
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#core-interface-files
func (g *cgroupV2) detectListedRootControllers(seen func(string, string)) (err error) {
root, err := cgroup2.Load("/", cgroup2.WithMountpoint(g.mountPoint))
if err != nil {
err = fmt.Errorf("failed to parse kernel release %q: %w", uname.osRelease, err)
return fmt.Errorf("failed to load root cgroup: %w", err)
}

return major, minor, err
controllerNames, err := root.RootControllers() // This reads cgroup.controllers
if err != nil {
return fmt.Errorf("failed to list cgroup root controllers: %w", err)
}

for _, controllerName := range controllerNames {
seen(controllerName, "is a listed root controller")
switch controllerName {
case "cpu": // This is the successor to the version 1 cpu and cpuacct controllers.
seen("cpuacct", "via cpu in "+g.String())
case "io": // This is the successor of the version 1 blkio controller.
seen("blkio", "via io in "+g.String())
}
}

return nil
}
2 changes: 1 addition & 1 deletion internal/pkg/sysinfo/probes/linux/cgroups_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestCgroupsProbes_Probe(t *testing.T) {
t.Run("Pass", func(t *testing.T) {
init()

available := cgroupControllerAvailable{true, ""}
available := cgroupControllerAvailable{true, "", ""}

reporter.On("Pass", mock.Anything, mockSys).Return(nil)
mockSys.On("probeController", "foo").Return(available, nil)
Expand Down
Loading