From 668b7ed0b2918ccc492c7e78a20f63f68de1ff0d Mon Sep 17 00:00:00 2001 From: Ricardo Maraschini Date: Fri, 27 Sep 2024 10:16:49 -0500 Subject: [PATCH] feat: add CPU micro architecture support (#1628) allows troubleshoot to collect and analyze CPU micro architecture. this is an usage example: ```yaml apiVersion: troubleshoot.sh/v1beta2 kind: HostPreflight metadata: name: ec-cluster-preflight spec: collectors: - cpu: {} analyzers: - cpu: checkName: CPU outcomes: - pass: when: 'supports x86-64-v2' message: CPU supports x86-64-v2 - fail: message: CPU does not support x86-64-v2 ``` --- pkg/analyze/host_cpu.go | 44 ++++++++++++++++++++++++++++++++---- pkg/analyze/host_cpu_test.go | 15 +++++++++++- pkg/collect/host_cpu.go | 24 ++++++++++++++++++-- pkg/collect/host_cpu_test.go | 5 ++-- 4 files changed, 79 insertions(+), 9 deletions(-) diff --git a/pkg/analyze/host_cpu.go b/pkg/analyze/host_cpu.go index 4605a784f..62703292a 100644 --- a/pkg/analyze/host_cpu.go +++ b/pkg/analyze/host_cpu.go @@ -2,6 +2,7 @@ package analyzer import ( "encoding/json" + "slices" "strconv" "strings" @@ -10,6 +11,18 @@ import ( "github.com/replicatedhq/troubleshoot/pkg/collect" ) +// microarchs holds a list of features present in each microarchitecture. +// ref: https://gitlab.com/x86-psABIs/x86-64-ABI +// ref: https://developers.redhat.com/blog/2021/01/05/building-red-hat-enterprise-linux-9-for-the-x86-64-v2-microarchitecture-level +var microarchs = map[string][]string{ + "x86-64-v2": {"cx16", "lahf_lm", "popcnt", "ssse3", "sse4_1", "sse4_2", "ssse3"}, + "x86-64-v3": {"avx", "avx2", "bmi1", "bmi2", "f16c", "fma", "lzcnt", "movbe", "xsave"}, + "x86-64-v4": {"avx512f", "avx512bw", "avx512cd", "avx512dq", "avx512vl"}, +} + +// x8664BaseFeatures are the features that are present in all x86-64 microarchitectures. +var x8664BaseFeatures = []string{"cmov", "cx8", "fpu", "fxsr", "mmx", "syscall", "sse", "sse2"} + type AnalyzeHostCPU struct { hostAnalyzer *troubleshootv1beta2.CPUAnalyze } @@ -52,7 +65,7 @@ func (a *AnalyzeHostCPU) Analyze( return []*AnalyzeResult{&result}, nil } - isMatch, err := compareHostCPUConditionalToActual(outcome.Fail.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount) + isMatch, err := compareHostCPUConditionalToActual(outcome.Fail.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount, cpuInfo.Flags) if err != nil { return nil, errors.Wrap(err, "failed to compare") } @@ -73,7 +86,7 @@ func (a *AnalyzeHostCPU) Analyze( return []*AnalyzeResult{&result}, nil } - isMatch, err := compareHostCPUConditionalToActual(outcome.Warn.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount) + isMatch, err := compareHostCPUConditionalToActual(outcome.Warn.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount, cpuInfo.Flags) if err != nil { return nil, errors.Wrap(err, "failed to compare") } @@ -94,7 +107,7 @@ func (a *AnalyzeHostCPU) Analyze( return []*AnalyzeResult{&result}, nil } - isMatch, err := compareHostCPUConditionalToActual(outcome.Pass.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount) + isMatch, err := compareHostCPUConditionalToActual(outcome.Pass.When, cpuInfo.LogicalCount, cpuInfo.PhysicalCount, cpuInfo.Flags) if err != nil { return nil, errors.Wrap(err, "failed to compare") } @@ -112,7 +125,25 @@ func (a *AnalyzeHostCPU) Analyze( return []*AnalyzeResult{&result}, nil } -func compareHostCPUConditionalToActual(conditional string, logicalCount int, physicalCount int) (res bool, err error) { +func doCompareHostCPUMicroArchitecture(microarch string, flags []string) (res bool, err error) { + specifics, ok := microarchs[microarch] + if !ok && microarch != "x86-64" { + return false, errors.Errorf("troubleshoot does not yet support microarchitecture %q", microarch) + } + expectedFlags := x8664BaseFeatures + if len(specifics) > 0 { + expectedFlags = append(expectedFlags, specifics...) + } + for _, flag := range expectedFlags { + if slices.Contains(flags, flag) { + continue + } + return false, nil + } + return true, nil +} + +func compareHostCPUConditionalToActual(conditional string, logicalCount int, physicalCount int, flags []string) (res bool, err error) { compareLogical := false comparePhysical := false compareUnspecified := false @@ -137,6 +168,11 @@ func compareHostCPUConditionalToActual(conditional string, logicalCount int, phy desired = parts[1] } + // analyze if the cpu supports a specific set of features, aka as micrarchitecture. + if strings.ToLower(comparator) == "supports" { + return doCompareHostCPUMicroArchitecture(desired, flags) + } + if !compareLogical && !comparePhysical && !compareUnspecified { return false, errors.New("unable to parse conditional") } diff --git a/pkg/analyze/host_cpu_test.go b/pkg/analyze/host_cpu_test.go index cc3b69017..b607cdcb9 100644 --- a/pkg/analyze/host_cpu_test.go +++ b/pkg/analyze/host_cpu_test.go @@ -81,6 +81,7 @@ func Test_compareHostCPUConditionalToActual(t *testing.T) { when string logicalCount int physicalCount int + flags []string expected bool }{ { @@ -139,12 +140,24 @@ func Test_compareHostCPUConditionalToActual(t *testing.T) { physicalCount: 4, expected: true, }, + { + name: "supports x86-64-v2 microarchitecture", + when: "supports x86-64-v2", + flags: []string{""}, + expected: false, + }, + { + name: "supports x86-64-v2 microarchitecture", + when: "supports x86-64-v2", + flags: []string{"cmov", "cx8", "fpu", "fxsr", "mmx", "syscall", "sse", "sse2", "cx16", "lahf_lm", "popcnt", "ssse3", "sse4_1", "sse4_2", "ssse3"}, + expected: true, + }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { req := require.New(t) - actual, err := compareHostCPUConditionalToActual(test.when, test.logicalCount, test.physicalCount) + actual, err := compareHostCPUConditionalToActual(test.when, test.logicalCount, test.physicalCount, test.flags) req.NoError(err) assert.Equal(t, test.expected, actual) diff --git a/pkg/collect/host_cpu.go b/pkg/collect/host_cpu.go index feae0a29d..d35cb00f6 100644 --- a/pkg/collect/host_cpu.go +++ b/pkg/collect/host_cpu.go @@ -10,8 +10,9 @@ import ( ) type CPUInfo struct { - LogicalCount int `json:"logicalCount"` - PhysicalCount int `json:"physicalCount"` + LogicalCount int `json:"logicalCount"` + PhysicalCount int `json:"physicalCount"` + Flags []string `json:"flags"` } const HostCPUPath = `host-collectors/system/cpu.json` @@ -44,6 +45,25 @@ func (c *CollectHostCPU) Collect(progressChan chan<- interface{}) (map[string][] } cpuInfo.PhysicalCount = physicalCount + // XXX even though the cpu.Info() returns a slice per CPU it is way + // common to have the same flags for all CPUs. We consolidate them here + // so the output is a list of all different flags present in all CPUs. + info, err := cpu.Info() + if err != nil { + return nil, errors.Wrap(err, "failed to get cpu info") + } + + seen := make(map[string]bool) + for _, infoForCPU := range info { + for _, flag := range infoForCPU.Flags { + if seen[flag] { + continue + } + seen[flag] = true + cpuInfo.Flags = append(cpuInfo.Flags, flag) + } + } + b, err := json.Marshal(cpuInfo) if err != nil { return nil, errors.Wrap(err, "failed to marshal cpu info") diff --git a/pkg/collect/host_cpu_test.go b/pkg/collect/host_cpu_test.go index 3ce5fbfea..3c5d6f9da 100644 --- a/pkg/collect/host_cpu_test.go +++ b/pkg/collect/host_cpu_test.go @@ -20,12 +20,13 @@ func TestCollectHostCPU_Collect(t *testing.T) { require.Contains(t, got, "host-collectors/system/cpu.json") values := got["host-collectors/system/cpu.json"] - var m map[string]int + var m map[string]interface{} err = json.Unmarshal(values, &m) require.NoError(t, err) // Check if values exist. They will be different on different machines. - assert.Equal(t, 2, len(m)) + assert.Equal(t, 3, len(m)) assert.Contains(t, m, "logicalCount") assert.Contains(t, m, "physicalCount") + assert.Contains(t, m, "flags") }