From 123d17ab4a3310e08077208612a054ddfef9077e Mon Sep 17 00:00:00 2001 From: Evans Mungai Date: Tue, 2 Apr 2024 14:59:55 +0100 Subject: [PATCH] feat: node metrics collector (#1516) * feat: node metrics collector A collector to collect node metrics served by the API server as per the documented API https://kubernetes.io/docs/reference/instrumentation/node-metrics/ * Update CRD schemas * Add tests * Remove clean from build target * Update comments * Commit missing tests * Remove unnecessary log in tests --- Makefile | 2 + config/crds/troubleshoot.sh_collectors.yaml | 15 +++ config/crds/troubleshoot.sh_preflights.yaml | 15 +++ .../crds/troubleshoot.sh_supportbundles.yaml | 15 +++ .../troubleshoot/v1beta2/collector_shared.go | 7 ++ .../v1beta2/zz_generated.deepcopy.go | 31 ++++++ pkg/collect/collector.go | 4 + pkg/collect/k8s_node_metrics.go | 102 ++++++++++++++++++ pkg/collect/k8s_node_metrics_test.go | 97 +++++++++++++++++ pkg/collect/redact.go | 4 +- pkg/collect/result.go | 12 +-- schemas/collector-troubleshoot-v1beta2.json | 23 ++++ schemas/preflight-troubleshoot-v1beta2.json | 23 ++++ .../supportbundle-troubleshoot-v1beta2.json | 23 ++++ 14 files changed, 365 insertions(+), 8 deletions(-) create mode 100644 pkg/collect/k8s_node_metrics.go create mode 100644 pkg/collect/k8s_node_metrics_test.go diff --git a/Makefile b/Makefile index 3e49cf485..70ce471d7 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,8 @@ support-bundle-e2e-go-test: go test ${BUILDFLAGS} ${E2EPATHS} -v; \ fi +rebuild: clean build + # Build all binaries in parallel ( -j ) build: tidy @echo "Build cli binaries" diff --git a/config/crds/troubleshoot.sh_collectors.yaml b/config/crds/troubleshoot.sh_collectors.yaml index 21f34948e..9b21df866 100644 --- a/config/crds/troubleshoot.sh_collectors.yaml +++ b/config/crds/troubleshoot.sh_collectors.yaml @@ -551,6 +551,21 @@ spec: required: - uri type: object + nodeMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + nodeNames: + items: + type: string + type: array + selector: + items: + type: string + type: array + type: object postgres: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_preflights.yaml b/config/crds/troubleshoot.sh_preflights.yaml index 1953a603c..d547449e0 100644 --- a/config/crds/troubleshoot.sh_preflights.yaml +++ b/config/crds/troubleshoot.sh_preflights.yaml @@ -2171,6 +2171,21 @@ spec: required: - uri type: object + nodeMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + nodeNames: + items: + type: string + type: array + selector: + items: + type: string + type: array + type: object postgres: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_supportbundles.yaml b/config/crds/troubleshoot.sh_supportbundles.yaml index cdd11e057..cb87baaee 100644 --- a/config/crds/troubleshoot.sh_supportbundles.yaml +++ b/config/crds/troubleshoot.sh_supportbundles.yaml @@ -2202,6 +2202,21 @@ spec: required: - uri type: object + nodeMetrics: + properties: + collectorName: + type: string + exclude: + type: BoolString + nodeNames: + items: + type: string + type: array + selector: + items: + type: string + type: array + type: object postgres: properties: collectorName: diff --git a/pkg/apis/troubleshoot/v1beta2/collector_shared.go b/pkg/apis/troubleshoot/v1beta2/collector_shared.go index c255247bf..20b33f513 100644 --- a/pkg/apis/troubleshoot/v1beta2/collector_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/collector_shared.go @@ -44,6 +44,12 @@ type CustomMetrics struct { MetricRequests []MetricRequest `json:"metricRequests,omitempty" yaml:"metricRequests,omitempty"` } +type NodeMetrics struct { + CollectorMeta `json:",inline" yaml:",inline"` + NodeNames []string `json:"nodeNames,omitempty" yaml:"nodeNames,omitempty"` + Selector []string `json:"selector,omitempty" yaml:"selector,omitempty"` +} + type Secret struct { CollectorMeta `json:",inline" yaml:",inline"` Name string `json:"name,omitempty" yaml:"name,omitempty"` @@ -315,6 +321,7 @@ type Collect struct { Helm *Helm `json:"helm,omitempty" yaml:"helm,omitempty"` Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"` Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"` + NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"` } func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec { diff --git a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go index 0a1a9077d..c7701ffcf 100644 --- a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go @@ -916,6 +916,11 @@ func (in *Collect) DeepCopyInto(out *Collect) { *out = new(Sonobuoy) (*in).DeepCopyInto(*out) } + if in.NodeMetrics != nil { + in, out := &in.NodeMetrics, &out.NodeMetrics + *out = new(NodeMetrics) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Collect. @@ -2984,6 +2989,32 @@ func (in *MetricRequest) DeepCopy() *MetricRequest { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeMetrics) DeepCopyInto(out *NodeMetrics) { + *out = *in + in.CollectorMeta.DeepCopyInto(&out.CollectorMeta) + if in.NodeNames != nil { + in, out := &in.NodeNames, &out.NodeNames + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Selector != nil { + in, out := &in.Selector, &out.Selector + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeMetrics. +func (in *NodeMetrics) DeepCopy() *NodeMetrics { + if in == nil { + return nil + } + out := new(NodeMetrics) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeResourceFilters) DeepCopyInto(out *NodeResourceFilters) { *out = *in diff --git a/pkg/collect/collector.go b/pkg/collect/collector.go index 6d764afab..c10a2a33b 100644 --- a/pkg/collect/collector.go +++ b/pkg/collect/collector.go @@ -122,6 +122,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam return &CollectGoldpinger{collector.Goldpinger, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true case collector.Sonobuoy != nil: return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true + case collector.NodeMetrics != nil: + return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true default: return nil, false } @@ -211,6 +213,8 @@ func getCollectorName(c interface{}) string { collector = "goldpinger" case *CollectSonobuoyResults: collector = "sonobuoy" + case *CollectNodeMetrics: + collector = "node-metrics" default: collector = "" } diff --git a/pkg/collect/k8s_node_metrics.go b/pkg/collect/k8s_node_metrics.go new file mode 100644 index 000000000..6eb954794 --- /dev/null +++ b/pkg/collect/k8s_node_metrics.go @@ -0,0 +1,102 @@ +package collect + +import ( + "bytes" + "context" + "fmt" + "strings" + + "github.com/pkg/errors" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" +) + +const ( + summaryUrlTemplate = "/api/v1/nodes/%s/proxy/stats/summary" +) + +type CollectNodeMetrics struct { + Collector *troubleshootv1beta2.NodeMetrics + BundlePath string + ClientConfig *rest.Config + Client kubernetes.Interface + Context context.Context + RBACErrors +} + +func (c *CollectNodeMetrics) Title() string { + return getCollectorName(c) +} + +func (c *CollectNodeMetrics) IsExcluded() (bool, error) { + return isExcluded(c.Collector.Exclude) +} + +func (c *CollectNodeMetrics) Collect(progressChan chan<- interface{}) (CollectorResult, error) { + output := NewResult() + nodesMap := c.constructNodesMap() + if len(nodesMap) == 0 { + klog.V(2).Info("no nodes found to collect metrics for") + return output, nil + } + + nodeNames := make([]string, 0, len(nodesMap)) + for nodeName := range nodesMap { + nodeNames = append(nodeNames, nodeName) + } + + klog.V(2).Infof("collecting node metrics for [%s] nodes", strings.Join(nodeNames, ", ")) + + for nodeName, endpoint := range nodesMap { + // Equivalent to `kubectl get --raw "/api/v1/nodes//proxy/stats/summary"` + klog.V(2).Infof("querying: %+v\n", endpoint) + response, err := c.Client.CoreV1().RESTClient().Get().AbsPath(endpoint).DoRaw(c.Context) + if err != nil { + return output, errors.Wrapf(err, "could not query endpoint %s", endpoint) + } + err = output.SaveResult(c.BundlePath, fmt.Sprintf("node-metrics/%s.json", nodeName), bytes.NewBuffer(response)) + if err != nil { + klog.Errorf("failed to save node metrics for %s: %v", nodeName, err) + } + + } + return output, nil +} + +func (c *CollectNodeMetrics) constructNodesMap() map[string]string { + nodesMap := map[string]string{} + + if c.Collector.NodeNames == nil && c.Collector.Selector == nil { + // If no node names or selectors are provided, collect all nodes + nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{}) + if err != nil { + klog.Errorf("failed to list nodes: %v", err) + } + for _, node := range nodes.Items { + nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name) + } + return nodesMap + } + + for _, nodeName := range c.Collector.NodeNames { + nodesMap[nodeName] = fmt.Sprintf(summaryUrlTemplate, nodeName) + } + + // Find nodes by label selector + if c.Collector.Selector != nil { + nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{ + LabelSelector: strings.Join(c.Collector.Selector, ","), + }) + if err != nil { + klog.Errorf("failed to list nodes by label selector: %v", err) + } + for _, node := range nodes.Items { + nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name) + } + } + + return nodesMap +} diff --git a/pkg/collect/k8s_node_metrics_test.go b/pkg/collect/k8s_node_metrics_test.go new file mode 100644 index 000000000..bfaee4a18 --- /dev/null +++ b/pkg/collect/k8s_node_metrics_test.go @@ -0,0 +1,97 @@ +package collect + +import ( + "context" + "testing" + + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + testclient "k8s.io/client-go/kubernetes/fake" +) + +func TestCollectNodeMetrics_constructNodesMap(t *testing.T) { + tests := []struct { + name string + objectMetas []metav1.ObjectMeta + collector troubleshootv1beta2.NodeMetrics + want map[string]string + }{ + { + name: "default collector no nodes", + want: map[string]string{}, + }, + { + name: "default collector one node", + objectMetas: []metav1.ObjectMeta{ + { + Name: "node1", + }, + }, + want: map[string]string{ + "node1": "/api/v1/nodes/node1/proxy/stats/summary", + }, + }, + { + name: "collector with node list picking one node", + objectMetas: []metav1.ObjectMeta{ + { + Name: "node1", + }, + { + Name: "node2", + }, + }, + collector: troubleshootv1beta2.NodeMetrics{ + NodeNames: []string{"node2"}, + }, + want: map[string]string{ + "node2": "/api/v1/nodes/node2/proxy/stats/summary", + }, + }, + { + name: "collector with selector picking one node", + objectMetas: []metav1.ObjectMeta{ + { + Name: "node1", + Labels: map[string]string{ + "hostname": "node1.example.com", + }, + }, + { + Name: "node2", + }, + }, + collector: troubleshootv1beta2.NodeMetrics{ + Selector: []string{"hostname=node1.example.com"}, + }, + want: map[string]string{ + "node1": "/api/v1/nodes/node1/proxy/stats/summary", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client := testclient.NewSimpleClientset() + ctx := context.Background() + collector := tt.collector + c := &CollectNodeMetrics{ + Collector: &collector, + Client: client, + Context: ctx, + } + + for _, objectMeta := range tt.objectMetas { + _, err := client.CoreV1().Nodes().Create(ctx, &v1.Node{ + ObjectMeta: objectMeta, + }, metav1.CreateOptions{}) + require.NoError(t, err) + } + + got := c.constructNodesMap() + assert.Equalf(t, tt.want, got, "constructNodesMap() = %v, want %v", got, tt.want) + }) + } +} diff --git a/pkg/collect/redact.go b/pkg/collect/redact.go index 3f689bc59..e86cab0b1 100644 --- a/pkg/collect/redact.go +++ b/pkg/collect/redact.go @@ -71,9 +71,9 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors errorCh <- errors.Wrap(err, "failed to get relative path") return } - klog.V(2).Infof("Redacting %s (symlink => %s)\n", file, symlink) + klog.V(4).Infof("Redacting %s (symlink => %s)\n", file, symlink) } else { - klog.V(2).Infof("Redacting %s\n", file) + klog.V(4).Infof("Redacting %s\n", file) } r, err := input.GetReader(bundlePath, file) if err != nil { diff --git a/pkg/collect/result.go b/pkg/collect/result.go index 225362539..5d41fcb26 100644 --- a/pkg/collect/result.go +++ b/pkg/collect/result.go @@ -25,7 +25,7 @@ func NewResult() CollectorResult { // is empty, no symlink is created. The relativeLinkPath is always saved in the result map. func (r CollectorResult) SymLinkResult(bundlePath, relativeLinkPath, relativeFilePath string) error { // We should have saved the result this symlink is pointing to prior to creating it - klog.V(2).Info("Creating symlink ", relativeLinkPath, " -> ", relativeFilePath) + klog.V(4).Info("Creating symlink ", relativeLinkPath, " -> ", relativeFilePath) data, ok := r[relativeFilePath] if !ok { return errors.Errorf("cannot create symlink, result in %q not found", relativeFilePath) @@ -75,7 +75,7 @@ func (r CollectorResult) SymLinkResult(bundlePath, relativeLinkPath, relativeFil return errors.Wrap(err, "failed to create symlink") } - klog.V(2).Infof("Added %q symlink of %q in bundle output", relativeLinkPath, relativeFilePath) + klog.V(4).Infof("Added %q symlink of %q in bundle output", relativeLinkPath, relativeFilePath) // store the file name referencing the symlink to have archived r[relativeLinkPath] = nil @@ -105,7 +105,7 @@ func (r CollectorResult) SaveResult(bundlePath string, relativePath string, read return errors.Wrap(err, "failed to read data") } // Memory only bundle - klog.V(2).Infof("Added %q to bundle output", relativePath) + klog.V(4).Infof("Added %q to bundle output", relativePath) r[relativePath] = data return nil } @@ -135,7 +135,7 @@ func (r CollectorResult) SaveResult(bundlePath string, relativePath string, read return errors.Wrap(err, "failed to stat file") } - klog.V(2).Infof("Added %q (%d KB) to bundle output", relativePath, fileInfo.Size()/(1024)) + klog.V(4).Infof("Added %q (%d KB) to bundle output", relativePath, fileInfo.Size()/(1024)) return nil } @@ -340,7 +340,7 @@ func (r CollectorResult) ArchiveSupportBundle(bundlePath string, outputFilename if fileMode.Type() == os.ModeSymlink { // Don't copy the symlink, just write the header which // will create a symlink in the tarball - klog.V(2).Infof("Added %q symlink to bundle archive", hdr.Linkname) + klog.V(4).Infof("Added %q symlink to bundle archive", hdr.Linkname) return nil } @@ -354,7 +354,7 @@ func (r CollectorResult) ArchiveSupportBundle(bundlePath string, outputFilename if err != nil { return errors.Wrap(err, "failed to copy file into archive") } - klog.V(2).Infof("Added %q file to bundle archive", hdr.Name) + klog.V(4).Infof("Added %q file to bundle archive", hdr.Name) return nil }() diff --git a/schemas/collector-troubleshoot-v1beta2.json b/schemas/collector-troubleshoot-v1beta2.json index d434ee216..85a24c865 100644 --- a/schemas/collector-troubleshoot-v1beta2.json +++ b/schemas/collector-troubleshoot-v1beta2.json @@ -774,6 +774,29 @@ } } }, + "nodeMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "nodeNames": { + "type": "array", + "items": { + "type": "string" + } + }, + "selector": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, "postgres": { "type": "object", "required": [ diff --git a/schemas/preflight-troubleshoot-v1beta2.json b/schemas/preflight-troubleshoot-v1beta2.json index 67b1e6dff..e6d85367e 100644 --- a/schemas/preflight-troubleshoot-v1beta2.json +++ b/schemas/preflight-troubleshoot-v1beta2.json @@ -3269,6 +3269,29 @@ } } }, + "nodeMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "nodeNames": { + "type": "array", + "items": { + "type": "string" + } + }, + "selector": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, "postgres": { "type": "object", "required": [ diff --git a/schemas/supportbundle-troubleshoot-v1beta2.json b/schemas/supportbundle-troubleshoot-v1beta2.json index 8849b12bc..67b55411a 100644 --- a/schemas/supportbundle-troubleshoot-v1beta2.json +++ b/schemas/supportbundle-troubleshoot-v1beta2.json @@ -3315,6 +3315,29 @@ } } }, + "nodeMetrics": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "nodeNames": { + "type": "array", + "items": { + "type": "string" + } + }, + "selector": { + "type": "array", + "items": { + "type": "string" + } + } + } + }, "postgres": { "type": "object", "required": [