From 5cfcdc01deaacbdcf609c0efea435e2f0c53ba3e Mon Sep 17 00:00:00 2001 From: Gabriel Cocenza Date: Mon, 16 Dec 2024 11:03:39 -0300 Subject: [PATCH 1/3] Add critical alert if cluster is throttling for two hours After showing the current alert rules for Managed Solutions, they asked to have a critical alert if the cluster is throttling for two hours. --- src/alert_rules/prometheus/prometheus_alerts.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/alert_rules/prometheus/prometheus_alerts.yaml b/src/alert_rules/prometheus/prometheus_alerts.yaml index 630c567f8..44f241274 100644 --- a/src/alert_rules/prometheus/prometheus_alerts.yaml +++ b/src/alert_rules/prometheus/prometheus_alerts.yaml @@ -138,3 +138,13 @@ "for": "5m" "labels": "severity": "warning" + + - "alert": "OpenSearchThrottlingTooLong" + "annotations": + "message": "Cluster {{ $labels.cluster }} is throttling for two hours. Please optimize queries and indexing patterns or consider scale the application." + "summary": "OpenSearch Indexing Throttle too long" + "expr": | + sum by (cluster) (opensearch_indices_indexing_is_throttled_bool) > 0 + "for": "2h" + "labels": + "severity": "critical" From 4d96cc5dc838e004c6d42d6d554c9a84a7346a55 Mon Sep 17 00:00:00 2001 From: Gabriel Cocenza Date: Wed, 18 Dec 2024 15:02:23 -0300 Subject: [PATCH 2/3] Re-wording throttling message --- src/alert_rules/prometheus/prometheus_alerts.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alert_rules/prometheus/prometheus_alerts.yaml b/src/alert_rules/prometheus/prometheus_alerts.yaml index dd7ceb525..965e17bed 100644 --- a/src/alert_rules/prometheus/prometheus_alerts.yaml +++ b/src/alert_rules/prometheus/prometheus_alerts.yaml @@ -131,7 +131,7 @@ - "alert": "OpenSearchThrottling" "annotations": - "message": "Cluster {{ $labels.cluster }} is throttling. Please optimize queries and indexing patterns or consider scale the application." + "message": "Cluster {{ $labels.cluster }} is throttling. Please review your indexing request rate, index lifecycle or consider scale the application." "summary": "OpenSearch Indexing Throttle" "expr": | sum by (cluster) (opensearch_indices_indexing_is_throttled_bool) > 0 @@ -141,7 +141,7 @@ - "alert": "OpenSearchThrottlingTooLong" "annotations": - "message": "Cluster {{ $labels.cluster }} is throttling for two hours. Please optimize queries and indexing patterns or consider scale the application." + "message": "Cluster {{ $labels.cluster }} is throttling for at least two hours. Please review your indexing request rate, index lifecycle or consider scale the application." "summary": "OpenSearch Indexing Throttle too long" "expr": | sum by (cluster) (opensearch_indices_indexing_is_throttled_bool) > 0 From 8b4a4fa455b0997442fa2790c9a633d4164c07cc Mon Sep 17 00:00:00 2001 From: Gabriel Cocenza Date: Thu, 19 Dec 2024 10:03:42 -0300 Subject: [PATCH 3/3] - add unit test - fix msg --- .../test_alert_rules/test_opensearch_rules.yaml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_alert_rules/test_opensearch_rules.yaml b/tests/unit/test_alert_rules/test_opensearch_rules.yaml index 7a6b8f719..3f635d934 100644 --- a/tests/unit/test_alert_rules/test_opensearch_rules.yaml +++ b/tests/unit/test_alert_rules/test_opensearch_rules.yaml @@ -191,5 +191,20 @@ tests: severity: warning cluster: opensearch-x7zb exp_annotations: - message: "Cluster opensearch-x7zb is throttling. Please optimize queries and indexing patterns or consider scale the application." + message: "Cluster opensearch-x7zb is throttling. Please review your indexing request rate, index lifecycle or consider scale the application." summary: "OpenSearch Indexing Throttle" + + - interval: 1m + input_series: + - series: 'opensearch_indices_indexing_is_throttled_bool{cluster="opensearch-x7zb"}' + values: '1x360' + alert_rule_test: + - eval_time: 2h + alertname: OpenSearchThrottlingTooLong + exp_alerts: + - exp_labels: + severity: critical + cluster: opensearch-x7zb + exp_annotations: + message: "Cluster opensearch-x7zb is throttling for at least two hours. Please review your indexing request rate, index lifecycle or consider scale the application." + summary: "OpenSearch Indexing Throttle too long"