diff --git a/production/loki-mixin/alerts.libsonnet b/production/loki-mixin/alerts.libsonnet index 02fb2a0ee5662..0d34086d9b3a5 100644 --- a/production/loki-mixin/alerts.libsonnet +++ b/production/loki-mixin/alerts.libsonnet @@ -6,12 +6,12 @@ rules: [ { alert: 'LokiRequestErrors', - expr: std.strReplace(||| - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route) + expr: ||| + 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (%(group_by_cluster)s, job, route) / - sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route) + sum(rate(loki_request_duration_seconds_count[2m])) by (%(group_by_cluster)s, job, route) > 10 - |||, 'cluster', $._config.per_cluster_label), + ||| % $._config, 'for': '15m', labels: { severity: 'critical', @@ -26,8 +26,8 @@ { alert: 'LokiRequestPanics', expr: ||| - sum(increase(loki_panic_total[10m])) by (%s, namespace, job) > 0 - ||| % $._config.per_cluster_label, + sum(increase(loki_panic_total[10m])) by (%(group_by_cluster)s, job) > 0 + ||| % $._config, labels: { severity: 'critical', }, @@ -41,8 +41,8 @@ { alert: 'LokiRequestLatency', expr: ||| - %s_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 - ||| % $._config.per_cluster_label, + %(group_prefix_jobs)s_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 + ||| % $._config, 'for': '15m', labels: { severity: 'critical', @@ -57,8 +57,8 @@ { alert: 'LokiTooManyCompactorsRunning', expr: ||| - sum(loki_boltdb_shipper_compactor_running) by (%s, namespace) > 1 - ||| % $._config.per_cluster_label, + sum(loki_boltdb_shipper_compactor_running) by (%(group_by_cluster)s) > 1 + ||| % $._config, 'for': '5m', labels: { severity: 'warning', diff --git a/production/loki-mixin/config.libsonnet b/production/loki-mixin/config.libsonnet index eeea227c8f068..4b9053739e537 100644 --- a/production/loki-mixin/config.libsonnet +++ b/production/loki-mixin/config.libsonnet @@ -1,4 +1,7 @@ { + local makePrefix(groups) = std.join('_', groups), + local makeGroupBy(groups) = std.join(', ', groups), + _config+:: { // Tags for dashboards. tags: ['loki'], @@ -11,6 +14,20 @@ // The label used to differentiate between different clusters. per_cluster_label: 'cluster', + per_namespace_label: 'namespace', + per_job_label: 'job', + + // Grouping labels, to uniquely identify and group by {jobs, clusters} + job_labels: [$._config.per_cluster_label, $._config.per_namespace_label, $._config.per_job_label], + cluster_labels: [$._config.per_cluster_label, $._config.per_namespace_label], + + // Each group prefix is composed of `_`-separated labels + group_prefix_jobs: makePrefix($._config.job_labels), + group_prefix_clusters: makePrefix($._config.cluster_labels), + + // Each group-by label list is `, `-separated and unique identifies + group_by_job: makeGroupBy($._config.job_labels), + group_by_cluster: makeGroupBy($._config.cluster_labels), // Enable dashboard and panels for Grafana Labs internal components. internal_components: false, diff --git a/production/loki-mixin/dashboards.libsonnet b/production/loki-mixin/dashboards.libsonnet index 8b1ced08f3d15..a28f276cd66da 100644 --- a/production/loki-mixin/dashboards.libsonnet +++ b/production/loki-mixin/dashboards.libsonnet @@ -1,4 +1,3 @@ -(import 'config.libsonnet') + (import 'dashboards/loki-retention.libsonnet') + (import 'dashboards/loki-chunks.libsonnet') + (import 'dashboards/loki-logs.libsonnet') + diff --git a/production/loki-mixin/mixin-ssd.libsonnet b/production/loki-mixin/mixin-ssd.libsonnet index 01c59bb6ab7cc..273777ebeda9e 100644 --- a/production/loki-mixin/mixin-ssd.libsonnet +++ b/production/loki-mixin/mixin-ssd.libsonnet @@ -1,6 +1,4 @@ -(import 'dashboards.libsonnet') + -(import 'alerts.libsonnet') + -(import 'recording_rules.libsonnet') + { +(import 'mixin.libsonnet') + { grafanaDashboardFolder: 'Loki SSD', _config+:: { diff --git a/production/loki-mixin/mixin.libsonnet b/production/loki-mixin/mixin.libsonnet index 53584824c6331..72673ff3e43d7 100644 --- a/production/loki-mixin/mixin.libsonnet +++ b/production/loki-mixin/mixin.libsonnet @@ -1,5 +1,6 @@ (import 'dashboards.libsonnet') + (import 'alerts.libsonnet') + +(import 'config.libsonnet') + (import 'recording_rules.libsonnet') + { grafanaDashboardFolder: 'Loki', // Without this, configs is not taken into account