Skip to content

Commit

Permalink
feat: node metrics collector (#1516)
Browse files Browse the repository at this point in the history
* feat: node metrics collector

A collector to collect node metrics served by the API server as
per the documented API https://kubernetes.io/docs/reference/instrumentation/node-metrics/

* Update CRD schemas

* Add tests

* Remove clean from build target

* Update comments

* Commit missing tests

* Remove unnecessary log in tests
  • Loading branch information
banjoh authored Apr 2, 2024
1 parent 867c706 commit 123d17a
Show file tree
Hide file tree
Showing 14 changed files with 365 additions and 8 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ support-bundle-e2e-go-test:
go test ${BUILDFLAGS} ${E2EPATHS} -v; \
fi

rebuild: clean build

# Build all binaries in parallel ( -j )
build: tidy
@echo "Build cli binaries"
Expand Down
15 changes: 15 additions & 0 deletions config/crds/troubleshoot.sh_collectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,21 @@ spec:
required:
- uri
type: object
nodeMetrics:
properties:
collectorName:
type: string
exclude:
type: BoolString
nodeNames:
items:
type: string
type: array
selector:
items:
type: string
type: array
type: object
postgres:
properties:
collectorName:
Expand Down
15 changes: 15 additions & 0 deletions config/crds/troubleshoot.sh_preflights.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2171,6 +2171,21 @@ spec:
required:
- uri
type: object
nodeMetrics:
properties:
collectorName:
type: string
exclude:
type: BoolString
nodeNames:
items:
type: string
type: array
selector:
items:
type: string
type: array
type: object
postgres:
properties:
collectorName:
Expand Down
15 changes: 15 additions & 0 deletions config/crds/troubleshoot.sh_supportbundles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2202,6 +2202,21 @@ spec:
required:
- uri
type: object
nodeMetrics:
properties:
collectorName:
type: string
exclude:
type: BoolString
nodeNames:
items:
type: string
type: array
selector:
items:
type: string
type: array
type: object
postgres:
properties:
collectorName:
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/collector_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ type CustomMetrics struct {
MetricRequests []MetricRequest `json:"metricRequests,omitempty" yaml:"metricRequests,omitempty"`
}

type NodeMetrics struct {
CollectorMeta `json:",inline" yaml:",inline"`
NodeNames []string `json:"nodeNames,omitempty" yaml:"nodeNames,omitempty"`
Selector []string `json:"selector,omitempty" yaml:"selector,omitempty"`
}

type Secret struct {
CollectorMeta `json:",inline" yaml:",inline"`
Name string `json:"name,omitempty" yaml:"name,omitempty"`
Expand Down Expand Up @@ -315,6 +321,7 @@ type Collect struct {
Helm *Helm `json:"helm,omitempty" yaml:"helm,omitempty"`
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
}

func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {
Expand Down
31 changes: 31 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/collect/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
return &CollectGoldpinger{collector.Goldpinger, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
case collector.Sonobuoy != nil:
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
case collector.NodeMetrics != nil:
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
default:
return nil, false
}
Expand Down Expand Up @@ -211,6 +213,8 @@ func getCollectorName(c interface{}) string {
collector = "goldpinger"
case *CollectSonobuoyResults:
collector = "sonobuoy"
case *CollectNodeMetrics:
collector = "node-metrics"
default:
collector = "<none>"
}
Expand Down
102 changes: 102 additions & 0 deletions pkg/collect/k8s_node_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package collect

import (
"bytes"
"context"
"fmt"
"strings"

"github.com/pkg/errors"
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/klog/v2"
)

const (
summaryUrlTemplate = "/api/v1/nodes/%s/proxy/stats/summary"
)

type CollectNodeMetrics struct {
Collector *troubleshootv1beta2.NodeMetrics
BundlePath string
ClientConfig *rest.Config
Client kubernetes.Interface
Context context.Context
RBACErrors
}

func (c *CollectNodeMetrics) Title() string {
return getCollectorName(c)
}

func (c *CollectNodeMetrics) IsExcluded() (bool, error) {
return isExcluded(c.Collector.Exclude)
}

func (c *CollectNodeMetrics) Collect(progressChan chan<- interface{}) (CollectorResult, error) {
output := NewResult()
nodesMap := c.constructNodesMap()
if len(nodesMap) == 0 {
klog.V(2).Info("no nodes found to collect metrics for")
return output, nil
}

nodeNames := make([]string, 0, len(nodesMap))
for nodeName := range nodesMap {
nodeNames = append(nodeNames, nodeName)
}

klog.V(2).Infof("collecting node metrics for [%s] nodes", strings.Join(nodeNames, ", "))

for nodeName, endpoint := range nodesMap {
// Equivalent to `kubectl get --raw "/api/v1/nodes/<nodeName>/proxy/stats/summary"`
klog.V(2).Infof("querying: %+v\n", endpoint)
response, err := c.Client.CoreV1().RESTClient().Get().AbsPath(endpoint).DoRaw(c.Context)
if err != nil {
return output, errors.Wrapf(err, "could not query endpoint %s", endpoint)
}
err = output.SaveResult(c.BundlePath, fmt.Sprintf("node-metrics/%s.json", nodeName), bytes.NewBuffer(response))
if err != nil {
klog.Errorf("failed to save node metrics for %s: %v", nodeName, err)
}

}
return output, nil
}

func (c *CollectNodeMetrics) constructNodesMap() map[string]string {
nodesMap := map[string]string{}

if c.Collector.NodeNames == nil && c.Collector.Selector == nil {
// If no node names or selectors are provided, collect all nodes
nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{})
if err != nil {
klog.Errorf("failed to list nodes: %v", err)
}
for _, node := range nodes.Items {
nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name)
}
return nodesMap
}

for _, nodeName := range c.Collector.NodeNames {
nodesMap[nodeName] = fmt.Sprintf(summaryUrlTemplate, nodeName)
}

// Find nodes by label selector
if c.Collector.Selector != nil {
nodes, err := c.Client.CoreV1().Nodes().List(c.Context, metav1.ListOptions{
LabelSelector: strings.Join(c.Collector.Selector, ","),
})
if err != nil {
klog.Errorf("failed to list nodes by label selector: %v", err)
}
for _, node := range nodes.Items {
nodesMap[node.Name] = fmt.Sprintf(summaryUrlTemplate, node.Name)
}
}

return nodesMap
}
97 changes: 97 additions & 0 deletions pkg/collect/k8s_node_metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package collect

import (
"context"
"testing"

troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
testclient "k8s.io/client-go/kubernetes/fake"
)

func TestCollectNodeMetrics_constructNodesMap(t *testing.T) {
tests := []struct {
name string
objectMetas []metav1.ObjectMeta
collector troubleshootv1beta2.NodeMetrics
want map[string]string
}{
{
name: "default collector no nodes",
want: map[string]string{},
},
{
name: "default collector one node",
objectMetas: []metav1.ObjectMeta{
{
Name: "node1",
},
},
want: map[string]string{
"node1": "/api/v1/nodes/node1/proxy/stats/summary",
},
},
{
name: "collector with node list picking one node",
objectMetas: []metav1.ObjectMeta{
{
Name: "node1",
},
{
Name: "node2",
},
},
collector: troubleshootv1beta2.NodeMetrics{
NodeNames: []string{"node2"},
},
want: map[string]string{
"node2": "/api/v1/nodes/node2/proxy/stats/summary",
},
},
{
name: "collector with selector picking one node",
objectMetas: []metav1.ObjectMeta{
{
Name: "node1",
Labels: map[string]string{
"hostname": "node1.example.com",
},
},
{
Name: "node2",
},
},
collector: troubleshootv1beta2.NodeMetrics{
Selector: []string{"hostname=node1.example.com"},
},
want: map[string]string{
"node1": "/api/v1/nodes/node1/proxy/stats/summary",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
client := testclient.NewSimpleClientset()
ctx := context.Background()
collector := tt.collector
c := &CollectNodeMetrics{
Collector: &collector,
Client: client,
Context: ctx,
}

for _, objectMeta := range tt.objectMetas {
_, err := client.CoreV1().Nodes().Create(ctx, &v1.Node{
ObjectMeta: objectMeta,
}, metav1.CreateOptions{})
require.NoError(t, err)
}

got := c.constructNodesMap()
assert.Equalf(t, tt.want, got, "constructNodesMap() = %v, want %v", got, tt.want)
})
}
}
4 changes: 2 additions & 2 deletions pkg/collect/redact.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ func RedactResult(bundlePath string, input CollectorResult, additionalRedactors
errorCh <- errors.Wrap(err, "failed to get relative path")
return
}
klog.V(2).Infof("Redacting %s (symlink => %s)\n", file, symlink)
klog.V(4).Infof("Redacting %s (symlink => %s)\n", file, symlink)
} else {
klog.V(2).Infof("Redacting %s\n", file)
klog.V(4).Infof("Redacting %s\n", file)
}
r, err := input.GetReader(bundlePath, file)
if err != nil {
Expand Down
Loading

0 comments on commit 123d17a

Please sign in to comment.