From b994e841c9e77a5ac6865fb7dd2007ac99937c73 Mon Sep 17 00:00:00 2001 From: Jeff French Date: Wed, 10 Jan 2024 05:57:45 -0600 Subject: [PATCH 1/3] Use specific label for selecting monitors and rules --- charts/moonswitch-agent/Chart.yaml | 2 +- charts/moonswitch-agent/values.yaml | 192 ++++++++++++++++++++-------- 2 files changed, 140 insertions(+), 54 deletions(-) diff --git a/charts/moonswitch-agent/Chart.yaml b/charts/moonswitch-agent/Chart.yaml index c38db65..3d8166c 100644 --- a/charts/moonswitch-agent/Chart.yaml +++ b/charts/moonswitch-agent/Chart.yaml @@ -6,7 +6,7 @@ icon: https://static.moonswitch.com/logos/color/icon.svg sources: - https://github.com/moonswitch/charts -version: 0.10.0 +version: 0.10.1 dependencies: - name: teleport-kube-agent diff --git a/charts/moonswitch-agent/values.yaml b/charts/moonswitch-agent/values.yaml index 84f90f9..3e8ce4b 100644 --- a/charts/moonswitch-agent/values.yaml +++ b/charts/moonswitch-agent/values.yaml @@ -12,6 +12,10 @@ teleport-kube-agent: enabled: true minAvailable: 1 roles: kube + podMonitor: + enabled: true + additionalLabels: + monitored-by: moonswitch-agent joinParams: method: "token" tokenName: @@ -41,6 +45,7 @@ teleport-kube-agent: cloudflare-tunnel-remote: enabled: true + # TODO: write our own servicemonitor for this :2000/metrics kube-prometheus-stack: enabled: true @@ -48,6 +53,9 @@ kube-prometheus-stack: "moonswitch.io/app": moonswitch-agent alertmanager: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent alertmanagerSpec: storage: volumeClaimTemplate: @@ -55,8 +63,15 @@ kube-prometheus-stack: resources: requests: storage: 40Gi + alertmanagerConfigSelector: + matchLabels: + monitored-by: moonswitch-agent + grafana: + serviceMonitor: + labels: + monitored-by: moonswitch-agent defaultDashboardsTimezone: browser persistence: enabled: true @@ -84,7 +99,59 @@ kube-prometheus-stack: cleanPrometheusOperatorObjectNames: true + defaultRules: + labels: + monitored-by: moonswitch-agent + + kubeApiServer: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubelet: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubeControllerManager: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + coreDns: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubeDns: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubeEtcd: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubeScheduler: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + kubeProxy: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + + prometheusOperator: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent + prometheus: + serviceMonitor: + additionalLabels: + monitored-by: moonswitch-agent prometheusSpec: storageSpec: volumeClaimTemplate: @@ -92,26 +159,21 @@ kube-prometheus-stack: resources: requests: storage: 100Gi - additionalScrapeConfigs: - - job_name: kubecost - honor_labels: true - scrape_interval: 1m - scrape_timeout: 60s - metrics_path: /metrics - scheme: http - dns_sd_configs: - - names: - - moonswitch-agent-cost-analyzer - type: 'A' - port: 9003 - - job_name: kubecost-networking - kubernetes_sd_configs: - - role: pod - relabel_configs: - # These will need to be updated when we update the chart version past 1.106.x - - source_labels: [__meta_kubernetes_pod_label_app] - action: keep - regex: moonswitch-agent-network-costs + ruleSelector: + matchLabels: + monitored-by: moonswitch-agent + serviceMonitorSelector: + matchLabels: + monitored-by: moonswitch-agent + podMonitorSelector: + matchLabels: + monitored-by: moonswitch-agent + probeSelector: + matchLabels: + monitored-by: moonswitch-agent + scrapeConfigSelector: + matchLabels: + monitored-by: moonswitch-agent prometheus-node-exporter: affinity: @@ -124,39 +186,11 @@ kube-prometheus-stack: values: - fargate - additionalPrometheusRulesMap: - kubecost: - groups: - - name: CPU - rules: - - expr: sum(rate(container_cpu_usage_seconds_total{container!=""}[5m])) - record: cluster:cpu_usage:rate5m - - expr: rate(container_cpu_usage_seconds_total{container!=""}[5m]) - record: cluster:cpu_usage_nosum:rate5m - - expr: avg(irate(container_cpu_usage_seconds_total{container!="POD", container!=""}[5m])) by (container,pod,namespace) - record: kubecost_container_cpu_usage_irate - - expr: sum(container_memory_working_set_bytes{container!="POD",container!=""}) by (container,pod,namespace) - record: kubecost_container_memory_working_set_bytes - - expr: sum(container_memory_working_set_bytes{container!="POD",container!=""}) - record: kubecost_cluster_memory_working_set_bytes - - name: Savings - rules: - - expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod)) - record: kubecost_savings_cpu_allocation - labels: - daemonset: "false" - - expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_cpu_allocation) by (pod)) / sum(kube_node_info) - record: kubecost_savings_cpu_allocation - labels: - daemonset: "true" - - expr: sum(avg(kube_pod_owner{owner_kind!="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod)) - record: kubecost_savings_memory_allocation_bytes - labels: - daemonset: "false" - - expr: sum(avg(kube_pod_owner{owner_kind="DaemonSet"}) by (pod) * sum(container_memory_allocation_bytes) by (pod)) / sum(kube_node_info) - record: kubecost_savings_memory_allocation_bytes - labels: - daemonset: "true" + kube-state-metrics: + prometheus: + monitor: + additionalLabels: + monitored-by: moonswitch-agent weave-gitops: enabled: true @@ -165,6 +199,17 @@ weave-gitops: "moonswitch.io/app": moonswitch-agent additionalArgs: - --insecure-no-authentication-user=gitops-dashboard-user + metrics: + enabled: true + annotations: {} + # TODO: Write our own ServiceMonitor for this :2112/metrics + resources: + requests: + cpu: 10m + memory: 100M + limits: + cpu: 10m + memory: 100M cost-analyzer: enabled: true @@ -208,12 +253,26 @@ cost-analyzer: enabled: false kube-state-metrics: disabled: true + serviceMonitor: + enabled: true + additionalLabels: + monitored-by: moonswitch-agent + networkCosts: + enabled: true + additionalLabels: + monitored-by: moonswitch-agent + prometheusRule: + enabled: true + additionalLabels: + monitored-by: moonswitch-agent kubecostMetrics: emitKsmV1Metrics: false emitKsmV1MetricsOnly: true exporter: serviceMonitor: enabled: true + additionalLabels: + monitored-by: moonswitch-agent kubecostModel: image: "public.ecr.aws/kubecost/cost-model" outOfClusterPromMetricsEnabled: false @@ -234,6 +293,10 @@ cost-analyzer: config: services: amazon-web-services: true + podMonitor: + enabled: true + additionalLabels: + monitored-by: moonswitch-agent nginx: enabled: true @@ -244,6 +307,12 @@ nginx: staticSiteConfigmap: nginx-static-site service: type: ClusterIP + metrics: + enabled: true + serviceMonitor: + enabled: true + labels: + monitored-by: moonswitch-agent loki: enabled: true @@ -269,6 +338,13 @@ loki: installOperator: false serviceMonitor: enabled: true + labels: + monitored-by: moonswitch-agent + metricsInstance: + enabled: false + rules: + labels: + monitored-by: moonswitch-agent promtail: enabled: true @@ -285,8 +361,12 @@ promtail: expression: "(default)" # Use this to drop logs from client app namespaces e.g "(default|client-app1|cool-app-namespace)" serviceMonitor: enabled: true + labels: + monitored-by: moonswitch-agent prometheusRule: enabled: true + additionalLabels: + monitored-by: moonswitch-agent rules: - alert: PromtailRequestErrors expr: 100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10 @@ -360,6 +440,10 @@ kubernetes-dashboard: "moonswitch.io/app": moonswitch-agent serviceAccount: name: moonswitch-agent-kubernetes-dashboard + serviceMonitor: + enabled: true + labels: + monitored-by: moonswitch-agent helm-dashboard: enabled: true @@ -374,6 +458,8 @@ trivy-operator: builtInTrivyServer: true serviceMonitor: enabled: true + labels: + monitored-by: moonswitch-agent trivy: resources: requests: From 953a31ff3b873af0a468a3cb0c37ee8f7c8d3fe5 Mon Sep 17 00:00:00 2001 From: Jeff French Date: Wed, 10 Jan 2024 06:46:13 -0600 Subject: [PATCH 2/3] Add monitor for gitops-dashboard --- .../gitops-dashboard-servicemonitor.yaml | 43 +++++++++++++++++++ charts/moonswitch-agent/values.yaml | 14 +++++- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 charts/moonswitch-agent/templates/gitops-dashboard-servicemonitor.yaml diff --git a/charts/moonswitch-agent/templates/gitops-dashboard-servicemonitor.yaml b/charts/moonswitch-agent/templates/gitops-dashboard-servicemonitor.yaml new file mode 100644 index 0000000..e018020 --- /dev/null +++ b/charts/moonswitch-agent/templates/gitops-dashboard-servicemonitor.yaml @@ -0,0 +1,43 @@ +{{ $weave_gitops := index .Values "weave-gitops" }} +{{- if and $weave_gitops.enabled $weave_gitops.metrics.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "moonswitch-agent.fullname" . }}-weave-gitops + labels: + {{- include "moonswitch-agent.labels" . | nindent 4 }} + {{- with $weave_gitops.metrics.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ template "moonswitch-agent.fullname" . }}-weave-gitops + endpoints: + - port: http-metrics + {{- if $weave_gitops.metrics.serviceMonitor.path }} + path: {{ $weave_gitops.metrics.serviceMonitor.path }} + {{- end }} + {{- if $weave_gitops.metrics.serviceMonitor.interval }} + interval: {{ $weave_gitops.metrics.serviceMonitor.interval }} + {{- end }} + {{- if $weave_gitops.metrics.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ $weave_gitops.metrics.serviceMonitor.scrapeTimeout }} + {{- end }} + {{- if $weave_gitops.metrics.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{ toYaml $weave_gitops.metrics.serviceMonitor.metricRelabelings | indent 4 }} + {{- end }} + {{- if $weave_gitops.metrics.serviceMonitor.relabelings }} + relabelings: + {{ toYaml $weave_gitops.metrics.serviceMonitor.relabelings | indent 4 }} + {{- end }} + {{- if $weave_gitops.metrics.serviceMonitor.honorLabels }} + honorLabels: true + {{- end }} + selector: + matchLabels: + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: weave-gitops + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} +{{- end }} diff --git a/charts/moonswitch-agent/values.yaml b/charts/moonswitch-agent/values.yaml index 3e8ce4b..1a68b62 100644 --- a/charts/moonswitch-agent/values.yaml +++ b/charts/moonswitch-agent/values.yaml @@ -202,7 +202,19 @@ weave-gitops: metrics: enabled: true annotations: {} - # TODO: Write our own ServiceMonitor for this :2112/metrics + serviceMonitor: + enabled: true + # -- Interval at which endpoints should be scraped. + interval: 30s + # -- Add custom labels to the ServiceMonitor resource + additionalLabels: + monitored-by: moonswitch-agent + # -- Chooses the metric’s labels on collisions with target labels. + honorLabels: false + # -- Path to scrape metrics + path: /metrics + # -- Timeout for scrape metrics request + scrapeTimeout: 10s resources: requests: cpu: 10m From 60f1629cdf0ee3c2cd709052929f70aa55cf108a Mon Sep 17 00:00:00 2001 From: Jeff French Date: Wed, 10 Jan 2024 07:37:24 -0600 Subject: [PATCH 3/3] Add monitors and dashboards for flux --- .../grafana_dashboards/flux-cluster.json | 1383 +++++++++++++ .../flux-control-plane.json | 1730 +++++++++++++++++ .../grafana_dashboards/flux-logs.json | 332 ++++ .../templates/flux-podmonitor.yaml | 29 + .../grafana-dashboards-configmap.yaml | 2 +- 5 files changed, 3475 insertions(+), 1 deletion(-) create mode 100644 charts/moonswitch-agent/grafana_dashboards/flux-cluster.json create mode 100644 charts/moonswitch-agent/grafana_dashboards/flux-control-plane.json create mode 100644 charts/moonswitch-agent/grafana_dashboards/flux-logs.json create mode 100644 charts/moonswitch-agent/templates/flux-podmonitor.yaml diff --git a/charts/moonswitch-agent/grafana_dashboards/flux-cluster.json b/charts/moonswitch-agent/grafana_dashboards/flux-cluster.json new file mode 100644 index 0000000..0fbb5d0 --- /dev/null +++ b/charts/moonswitch-agent/grafana_dashboards/flux-cluster.json @@ -0,0 +1,1383 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Cluster Reconcilers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 28, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\", ready=\"False\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Failing Reconcilers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 29, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Kubernetes Manifests Sources", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 30, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\", ready=\"False\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "A" + } + ], + "title": "Failing Sources", + "type": "stat" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 61 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 8, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind)", + "interval": "", + "legendFormat": "{{kind}}", + "refId": "A" + } + ], + "title": "Reconciler ops avg. duration", + "type": "bargauge" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 1 + }, + { + "color": "red", + "value": 61 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 31, + "options": { + "displayMode": "gradient", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind)", + "interval": "", + "legendFormat": "{{kind}}", + "refId": "A" + } + ], + "title": "Source ops avg. duration", + "type": "bargauge" + }, + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 15, + "panels": [], + "title": "Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Not Ready" + }, + "True": { + "color": "blue", + "index": 0, + "text": "Ready" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + } + ] + }, + "gridPos": { + "h": 22, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 33, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Status" + } + ] + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"Kustomization|HelmRelease\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster reconciliation readiness ", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false, + "Value": false, + "__name__": false, + "app": false, + "chart_name": false, + "chart_source_name": false, + "container": false, + "customresource_group": false, + "customresource_kind": false, + "customresource_version": false, + "endpoint": false, + "exported_namespace": false, + "gotk_type": false, + "instance": false, + "job": false, + "kubernetes_namespace": false, + "kubernetes_pod_name": false, + "namespace": false, + "pod": false, + "pod_template_hash": false, + "revision": false, + "service": false, + "source_name": false, + "status": false, + "suspended": false, + "type": false + }, + "indexByName": { + "Time": 0, + "Value": 15, + "__name__": 1, + "container": 2, + "customresource_group": 4, + "customresource_kind": 5, + "customresource_version": 6, + "endpoint": 7, + "exported_namespace": 3, + "instance": 8, + "job": 9, + "name": 10, + "namespace": 11, + "pod": 12, + "ready": 13, + "service": 14 + }, + "renameByName": { + "Value": "", + "customresource_kind": "Kind", + "exported_namespace": "Namespace", + "kind": "Kind", + "name": "Name", + "namespace": "Operator Namespace", + "ready": "Status" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(Namespace|Kind|Name|Status)" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [ + { + "options": { + "False": { + "color": "red", + "index": 1, + "text": "Not Ready" + }, + "True": { + "color": "blue", + "index": 0, + "text": "Ready" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "noValue", + "value": "Ready" + }, + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 34, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Status" + } + ] + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "gotk_resource_info{exported_namespace=~\"$namespace\", customresource_kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Source acquisition readiness ", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false, + "Value": false, + "__name__": false, + "app": false, + "bucket_name": false, + "container": false, + "customresource_group": false, + "customresource_kind": false, + "customresource_version": false, + "endpoint": false, + "exported_namespace": false, + "gotk_type": false, + "instance": false, + "job": false, + "kubernetes_namespace": false, + "kubernetes_pod_name": false, + "namespace": false, + "pod": false, + "pod_template_hash": false, + "ready": false, + "revision": false, + "service": false, + "status": false, + "suspended": false, + "type": false, + "url": false + }, + "indexByName": { + "Time": 0, + "Value": 15, + "__name__": 1, + "container": 2, + "customresource_group": 5, + "customresource_kind": 6, + "customresource_version": 7, + "endpoint": 8, + "exported_namespace": 4, + "instance": 9, + "job": 10, + "name": 11, + "namespace": 3, + "pod": 12, + "ready": 13, + "service": 14 + }, + "renameByName": { + "Value": "", + "customresource_kind": "Kind", + "exported_namespace": "Namespace", + "kind": "Kind", + "name": "Name", + "namespace": "Operator Namespace", + "ready": "Status" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(Namespace|Kind|Name|Status)" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byType", + "options": "string" + }, + "properties": [ + { + "id": "custom.cellOptions", + "value": { + "mode": "basic", + "type": "color-background" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 36, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Namespace" + } + ] + }, + "pluginVersion": "10.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "gotk_resource_info{exported_namespace=~\"$namespace\", suspended=\"true\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Suspended Objects", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": false, + "Value": false, + "__name__": false, + "app": false, + "bucket_name": false, + "container": false, + "customresource_group": false, + "customresource_kind": false, + "customresource_version": false, + "endpoint": false, + "exported_namespace": false, + "gotk_type": false, + "instance": false, + "job": false, + "kubernetes_namespace": false, + "kubernetes_pod_name": false, + "namespace": false, + "pod": false, + "pod_template_hash": false, + "ready": false, + "revision": false, + "service": false, + "source_name": false, + "status": false, + "suspended": false, + "type": false, + "url": false + }, + "indexByName": { + "Time": 0, + "Value": 15, + "__name__": 1, + "container": 2, + "customresource_group": 5, + "customresource_kind": 6, + "customresource_version": 7, + "endpoint": 8, + "exported_namespace": 4, + "instance": 9, + "job": 10, + "name": 11, + "namespace": 3, + "pod": 12, + "ready": 13, + "service": 14 + }, + "renameByName": { + "customresource_kind": "Kind", + "exported_namespace": "Namespace", + "name": "Name" + } + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "pattern": "(Namespace|Name|Kind)" + } + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 17, + "panels": [], + "title": "Timing", + "type": "row" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind, name)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"Kustomization|HelmRelease\"}[5m])) by (kind, name)", + "hide": false, + "interval": "", + "legendFormat": "{{kind}}/{{name}}", + "refId": "B" + } + ], + "title": "Cluster reconciliation duration", + "type": "timeseries" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsZero", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + }, + { + "matcher": { + "id": "byValue", + "options": { + "op": "gte", + "reducer": "allIsNull", + "value": 0 + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": false + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 35, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(gotk_reconcile_duration_seconds_sum{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind, name)\n/\n sum(rate(gotk_reconcile_duration_seconds_count{namespace=~\"$operator_namespace\",exported_namespace=~\"$namespace\",kind=~\"GitRepository|HelmRepository|Bucket|OCIRepository\"}[5m])) by (kind, name)", + "hide": false, + "interval": "", + "legendFormat": "{{kind}}/{{name}}", + "refId": "B" + } + ], + "title": "Source acquisition duration", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$DS_PROMETHEUS" + }, + "definition": "label_values(gotk_reconcile_condition, namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "operator_namespace", + "options": [], + "query": { + "query": "label_values(gotk_reconcile_condition, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "$DS_PROMETHEUS" + }, + "definition": "label_values(gotk_resource_info,exported_namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(gotk_resource_info,exported_namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Flux Cluster Stats", + "uid": "flux-cluster", + "version": 4, + "weekStart": "" +} diff --git a/charts/moonswitch-agent/grafana_dashboards/flux-control-plane.json b/charts/moonswitch-agent/grafana_dashboards/flux-control-plane.json new file mode 100644 index 0000000..d47d391 --- /dev/null +++ b/charts/moonswitch-agent/grafana_dashboards/flux-control-plane.json @@ -0,0 +1,1730 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(go_info{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "pods", + "refId": "A" + } + ], + "title": "Controllers", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 23, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "max(workqueue_longest_running_processor_seconds{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "hide": false, + "interval": "", + "legendFormat": "seconds", + "refId": "B" + } + ], + "title": "Max Work Queue", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 500000000 + }, + { + "color": "red", + "value": 900000000 + } + ] + }, + "unit": "decbits" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 25, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(go_memstats_alloc_bytes{namespace=\"$namespace\",pod=~\".*-controller-.*\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Memory", + "type": "gauge" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + }, + { + "color": "#EAB839", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 26, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\",pod=~\".*-controller-.*\"}[1m]))", + "interval": "", + "legendFormat": "requests", + "refId": "A" + } + ], + "title": "API Requests", + "type": "stat" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "total", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(rest_client_requests_total{namespace=\"$namespace\",code!~\"2..\"}[1m]))", + "hide": false, + "interval": "", + "legendFormat": "errors", + "refId": "B" + } + ], + "title": "Kubernetes API Requests", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 15, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Resource Usage", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "rate(process_cpu_seconds_total{namespace=\"$namespace\",pod=~\".*-controller-.*\"}[1m])", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",container!=\"POD\",container!=\"\",pod=~\".*-controller-.*\"}) by (pod)", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 17, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Reconciliation Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 27, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "workqueue_longest_running_processor_seconds{name=\"kustomization\"}", + "hide": false, + "interval": "", + "legendFormat": "kustomizations", + "refId": "B" + } + ], + "title": "Cluster Reconciliation Duration", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"kustomization\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "title": "Cluster Reconciliations ops/min", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 29, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Sources Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful git pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"gitrepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed git pulls", + "refId": "B" + } + ], + "title": "Git Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 30, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"ocirepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful oci pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"ocirepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed oci pulls", + "refId": "B" + } + ], + "title": "OCI Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 31, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrepository\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful helm pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrepository\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed helm pulls", + "refId": "B" + } + ], + "title": "Helm Repos ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 32, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"bucket\",result!=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "successful bucket pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"bucket\",result=\"error\"}[1m]))", + "format": "time_series", + "interval": "", + "legendFormat": "failed bucket pulls", + "refId": "B" + } + ], + "title": "Buckets ops/min", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 61 + }, + "id": 19, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "refId": "A" + } + ], + "title": "Helm Stats", + "type": "row" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 62 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "list", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.50, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P50", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.90, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": true, + "interval": "", + "legendFormat": "P90", + "refId": "B" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket{controller=\"helmrelease\"}[5m])) by (le))", + "hide": false, + "interval": "", + "legendFormat": "P99", + "refId": "C" + } + ], + "title": "Helm Release Duration", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful reconciliations ", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmrelease\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed reconciliations ", + "refId": "B" + } + ], + "title": "Helm Releases ops/min", + "type": "timeseries" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 70 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.0.3", + "targets": [ + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result!=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "successful chart pulls", + "refId": "A" + }, + { + "datasource": { + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(controller_runtime_reconcile_total{controller=\"helmchart\",result=\"error\"}[1m])) by (controller)", + "format": "time_series", + "interval": "", + "legendFormat": "failed chart pulls", + "refId": "B" + } + ], + "title": "Helm Charts ops/min", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 38, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 2, + "includeAll": false, + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "flux-system", + "value": "flux-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "workqueue_work_duration_seconds_count", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "workqueue_work_duration_seconds_count", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 2, + "regex": "/.*namespace=\"([^\"]*).*/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Flux Control Plane", + "uid": "flux-control-plane", + "version": 2, + "weekStart": "" +} diff --git a/charts/moonswitch-agent/grafana_dashboards/flux-logs.json b/charts/moonswitch-agent/grafana_dashboards/flux-logs.json new file mode 100644 index 0000000..4d9f58d --- /dev/null +++ b/charts/moonswitch-agent/grafana_dashboards/flux-logs.json @@ -0,0 +1,332 @@ +{ + "__inputs": [ + { + "name": "DS_LOKI", + "label": "Loki", + "description": "", + "type": "datasource", + "pluginId": "loki", + "pluginName": "Loki" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + }, + { + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "iconColor": "red", + "name": "flux events", + "target": { + "limit": 100, + "matchAny": false, + "tags": [ + "flux" + ], + "type": "tags" + } + } + ] + }, + "description": "Flux logs collected from Kubernetes, stored in Loki", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 29, + "iteration": 1653748775696, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": "${DS_LOKI}", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": "${DS_LOKI}", + "expr": "sum(count_over_time({namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\" [$__interval]))", + "instant": false, + "legendFormat": "Log count", + "range": true, + "refId": "A" + } + ], + "type": "timeseries" + }, + { + "datasource": "${DS_LOKI}", + "description": "Logs from services running in Kubernetes", + "gridPos": { + "h": 25, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "dedupStrategy": "numbers", + "enableLogDetails": false, + "prettifyLogMessage": true, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": "${DS_LOKI}", + "expr": "{namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\"", + "refId": "A" + } + ], + "type": "logs" + } + ], + "refresh": "10s", + "schemaVersion": 36, + "style": "light", + "tags": [ + "flux" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "description": "String to search for", + "hide": 0, + "label": "Search Query", + "name": "query", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "allValue": "info|error", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "hide": 0, + "includeAll": true, + "multi": false, + "name": "level", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "info", + "value": "info" + }, + { + "selected": false, + "text": "error", + "value": "error" + } + ], + "query": "info,error", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(app)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "controller", + "options": [], + "query": "label_values(app)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "flux-system" + ], + "value": [ + "flux-system" + ] + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_LOKI}", + "definition": "label_values(stream)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "stream", + "options": [], + "query": "label_values(stream)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "Loki", + "value": "Loki" + }, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "DS_LOKI", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Flux Logs", + "uid": "flux-logs", + "version": 2 +} diff --git a/charts/moonswitch-agent/templates/flux-podmonitor.yaml b/charts/moonswitch-agent/templates/flux-podmonitor.yaml new file mode 100644 index 0000000..9d22910 --- /dev/null +++ b/charts/moonswitch-agent/templates/flux-podmonitor.yaml @@ -0,0 +1,29 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ template "moonswitch-agent.fullname" . }}-flux-system + labels: + {{- include "moonswitch-agent.labels" . | nindent 4 }} + monitored-by: moonswitch-agent +spec: + namespaceSelector: + matchNames: + - flux-system + selector: + matchExpressions: + - key: app + operator: In + values: + - helm-controller + - source-controller + - kustomize-controller + - notification-controller + - image-automation-controller + - image-reflector-controller + podMetricsEndpoints: + - port: http-prom + relabelings: + # https://github.com/prometheus-operator/prometheus-operator/issues/4816 + - sourceLabels: [__meta_kubernetes_pod_phase] + action: keep + regex: Running diff --git a/charts/moonswitch-agent/templates/grafana-dashboards-configmap.yaml b/charts/moonswitch-agent/templates/grafana-dashboards-configmap.yaml index 8206f7e..bb0679c 100644 --- a/charts/moonswitch-agent/templates/grafana-dashboards-configmap.yaml +++ b/charts/moonswitch-agent/templates/grafana-dashboards-configmap.yaml @@ -7,4 +7,4 @@ metadata: grafana_dashboard: "1" {{- include "moonswitch-agent.labels" . | nindent 4 }} data: -{{ (tpl (.Files.Glob "grafana_dashboards/*.json").AsConfig .) | indent 2 }} +{{ (.Files.Glob "grafana_dashboards/*.json").AsConfig | indent 2 }}