Skip to content

Commit

Permalink
Add dynamic syn_team label to alerts
Browse files Browse the repository at this point in the history
We use a small go template to extract the value for `syn_team` from the
`project` label of the metric so we can correctly label each individual
alert instance. This should allow us to group ArgoCD alerts by
`syn_team` (which is already well-defined for alert routing) to ensure
that alerts for each AppProject are routed to the responsible team.
  • Loading branch information
simu committed Jan 7, 2025
1 parent a0f76bc commit 7682c1a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
23 changes: 17 additions & 6 deletions component/monitoring.libsonnet
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
local kap = import 'lib/kapitan.libjsonnet';
local kube = import 'lib/kube.libjsonnet';
local prometheus = import 'lib/prometheus.libsonnet';
local syn_teams = import 'syn/syn-teams.libsonnet';

local inv = kap.inventory();
local params = inv.parameters.argocd;

Expand Down Expand Up @@ -29,6 +31,12 @@ local serviceMonitor(objname, name) =
};

local alert_rules =
local team_label =
if syn_teams.owner != null then
'{{if eq $labels.project "syn"}}{{ "%s" }}{{else}}{{ $labels.project }}{{end}}' % syn_teams.owner
else
null;

kube._Object('monitoring.coreos.com/v1', 'PrometheusRule', 'argocd') {
metadata: {
name: 'argocd',
Expand All @@ -47,10 +55,11 @@ local alert_rules =
alert: 'ArgoCDAppUnsynced',
expr: 'argocd_app_info{exported_namespace="' + params.namespace + '", sync_status!="Synced"} > 0',
'for': '10m',
labels: {
labels: std.prune({
severity: 'warning',
syn: 'true',
},
syn_team: team_label,
}),
annotations: {
message: 'Argo CD app {{ $labels.name }} is not synced',
description: 'kubectl -n ' + params.namespace + ' describe app {{ $labels.name }}',
Expand All @@ -61,10 +70,11 @@ local alert_rules =
alert: 'ArgoCDAppUnhealthy',
expr: 'argocd_app_info{exported_namespace="' + params.namespace + '", health_status!="Healthy"} > 0',
'for': '10m',
labels: {
labels: std.prune({
severity: 'critical',
syn: 'true',
},
syn_team: team_label,
}),
annotations: {
message: 'Argo CD app {{ $labels.name }} is not healthy',
description: 'kubectl -n ' + params.namespace + ' describe app {{ $labels.name }}',
Expand All @@ -75,10 +85,11 @@ local alert_rules =
alert: 'ArgoCDDown',
expr: 'up{namespace="' + params.namespace + '", job=~"^syn-argocd-.+$"} != 1',
'for': '5m',
labels: {
labels: std.prune({
severity: 'critical',
syn: 'true',
},
syn_team: team_label,
}),
annotations: {
message: 'Argo CD job {{ $labels.job }} is down',
dashboard: 'argocd',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ spec:
labels:
severity: warning
syn: 'true'
syn_team: '{{if eq $labels.project "syn"}}{{ "sparkling-sound" }}{{else}}{{
$labels.project }}{{end}}'
- alert: ArgoCDAppUnhealthy
annotations:
dashboard: argocd
Expand All @@ -86,6 +88,8 @@ spec:
labels:
severity: critical
syn: 'true'
syn_team: '{{if eq $labels.project "syn"}}{{ "sparkling-sound" }}{{else}}{{
$labels.project }}{{end}}'
- alert: ArgoCDDown
annotations:
dashboard: argocd
Expand All @@ -95,3 +99,5 @@ spec:
labels:
severity: critical
syn: 'true'
syn_team: '{{if eq $labels.project "syn"}}{{ "sparkling-sound" }}{{else}}{{
$labels.project }}{{end}}'

0 comments on commit 7682c1a

Please sign in to comment.