Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add missing node label on infra metrics #126

Merged
merged 3 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion charts/lumigo-operator/templates/cluster-agent-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ spec:
protocol: TCP
port: {{ .Values.prometheusNodeExporter.service.port }}
targetPort: {{ .Values.prometheusNodeExporter.service.port }}
type: ClusterIP
nodePort: {{ .Values.prometheusNodeExporter.service.nodePort }}
type: NodePort
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ spec:
- name: LUMIGO_CLUSTER_AGENT_SERVICE
value: "{{ include "helm.fullname" . }}-cluster-agent-service.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_PROM_NODE_EXPORTER_PORT
value: "{{ .Values.prometheusNodeExporter.service.port }}"
value: "{{ .Values.prometheusNodeExporter.service.nodePort }}"
- name: LUMIGO_KUBE_STATE_METRICS_SERVICE
value: "{{ .Release.Name }}-kube-state-metrics.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_KUBE_STATE_METRICS_PORT
Expand Down
1 change: 1 addition & 0 deletions charts/lumigo-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ prometheusNodeExporter:
tag: v1.8.2
service:
port: 9100
nodePort: 30090
resources:
limits:
cpu: 500m
Expand Down
19 changes: 15 additions & 4 deletions telemetryproxy/docker/etc/config.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,21 @@ receivers:
authorization:
credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
- job_name: 'prometheus-node-exporter'
metrics_path: /metrics
scrape_interval: {{ $infraMetricsFrequency }}
static_configs:
- targets: ['{{ getenv "LUMIGO_CLUSTER_AGENT_SERVICE" }}:{{ getenv "LUMIGO_PROM_NODE_EXPORTER_PORT" }}']
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__meta_kubernetes_node_address_InternalIP]
action: replace
target_label: __address__
# Scrape a custom port provided by LUMIGO_PROM_NODE_EXPORTER_PORT.
# '$$1' escapes '$1', as Gomplate otherwise thinks it's an environment variable.
replacement: '$$1:$LUMIGO_PROM_NODE_EXPORTER_PORT'
- source_labels: [__meta_kubernetes_node_name]
action: replace
target_label: node
metrics_path: "/metrics"
authorization:
credentials_file: "/var/run/secrets/kubernetes.io/serviceaccount/token"
- job_name: 'kube-state-metrics'
metrics_path: /metrics
scrape_interval: {{ $infraMetricsFrequency }}
Expand Down
51 changes: 37 additions & 14 deletions tests/kubernetes-distros/kind/lumigooperator_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,47 @@ func TestLumigoOperatorInfraMetrics(t *testing.T) {
}
}

allMetricNames := strings.Join(uniqueMetricNames, " ")
expectedSampleMetrics := []string{
// A sample for cadvisor metrics
"container_fs_usage_bytes",
// A sample for kube-state-metrics metrics
"kube_pod_status_scheduled",
// A sample for Prometheus Node Exporter metrics
"node_cpu_seconds_total",
}
prometheusNodeExporterMetricsFound := false
cadvisorMetricsFound := false
kubeStateMetricsFound := false

for _, metric := range metrics {
if metric.Name() == "node_cpu_seconds_total" {
prometheusNodeExporterMetricsFound = true
for i := 0; i < metric.Sum().DataPoints().Len(); i++ {
attributes := metric.Sum().DataPoints().At(i).Attributes()
_, nodeAttributeExists := attributes.Get("node")
if !nodeAttributeExists {
t.Logf("could not find attribute 'node' for metric 'node_cpu_seconds_total'")
return false, nil
}
}
}

if metric.Name() == "container_fs_usage_bytes" {
cadvisorMetricsFound = true
}

t.Logf("Collected metrics so far: %v\n", uniqueMetricNames)
for _, expectedSampleMetric := range expectedSampleMetrics {
if !strings.Contains(allMetricNames, expectedSampleMetric) {
t.Logf("could not find %s among collected metrics", expectedSampleMetric)
return false, nil
if metric.Name() == "kube_pod_status_scheduled" {
kubeStateMetricsFound = true
}
}

if !prometheusNodeExporterMetricsFound {
t.Logf("could not find Prometheus Node Exporter metrics. Seen metrics: %v", uniqueMetricNames)
return false, nil
}

if !cadvisorMetricsFound {
t.Logf("could not find cAdvisor metrics. Seen metrics: %v", uniqueMetricNames)
return false, nil
}

if !kubeStateMetricsFound {
t.Logf("could not find kube-state-metrics. Seen metrics: %v", uniqueMetricNames)
return false, nil
}

return true, nil
}); err != nil {
t.Fatalf("Failed to wait for metrics: %v", err)
Expand Down
Loading