Skip to content

Commit

Permalink
Change duplicated rules names (#504)
Browse files Browse the repository at this point in the history
## Issue
Some rules has duplicated names that can be a little bit confusing. 

## Solution
This PR separate into different names to be more clear with the alert
rule expression and adds spaces between alerts to increase readability.
  • Loading branch information
gabrielcocenza authored Nov 20, 2024
1 parent 56736df commit e3f38a8
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions src/alert_rules/prometheus/prometheus_alerts.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"groups":
- "name": "opensearch.alerts"
"rules":
- "alert": "OpenSearchClusterNotHealthy"
- "alert": "OpenSearchClusterRed"
"annotations":
"message": "Cluster {{ $labels.cluster }} health status has been RED for at least 2m. Cluster does not accept writes, shards may be missing or master node hasn't been elected yet."
"summary": "Cluster health status is RED"
Expand All @@ -10,15 +10,17 @@
"for": "2m"
"labels":
"severity": "critical"
- "alert": "OpenSearchClusterNotHealthy"

- "alert": "OpenSearchClusterYellow"
"annotations":
"message": "Cluster {{ $labels.cluster }} health status has been YELLOW for at least 20m."
"message": "Cluster {{ $labels.cluster }} health status has been YELLOW for at least 20m. Some cluster replicas shards are not allocated."
"summary": "Cluster health status is YELLOW"
"expr": |
sum by (cluster) (opensearch_cluster_status == 1)
"for": "20m"
"labels":
"severity": "warning"

- "alert": "OpenSearchBulkRequestsRejectionJumps"
"annotations":
"message": "High Bulk Rejection Ratio at {{ $labels.node }} node in {{ $labels.cluster }} cluster. This node may not be keeping up with the indexing speed."
Expand All @@ -28,7 +30,8 @@
"for": "10m"
"labels":
"severity": "warning"
- "alert": "OpenSearchNodeDiskWatermarkReached"

- "alert": "OpenSearchNodeDiskLowWatermarkReached"
"annotations":
"message": "Disk Low Watermark Reached at {{ $labels.node }} node in {{ $labels.cluster }} cluster. Shards can not be allocated to this node anymore. You should consider adding more disk to the node."
"summary": "Disk Low Watermark Reached - disk saturation is {{ $value }}%"
Expand All @@ -44,7 +47,8 @@
"for": "5m"
"labels":
"severity": "alert"
- "alert": "OpenSearchNodeDiskWatermarkReached"

- "alert": "OpenSearchNodeDiskHighWatermarkReached"
"annotations":
"message": "Disk High Watermark Reached at {{ $labels.node }} node in {{ $labels.cluster }} cluster. Some shards will be re-allocated to different nodes if possible. Make sure more disk space is added to the node or drop old indices allocated to this node."
"summary": "Disk High Watermark Reached - disk saturation is {{ $value }}%"
Expand All @@ -60,6 +64,7 @@
"for": "5m"
"labels":
"severity": "high"

- "alert": "OpenSearchJVMHeapUseHigh"
"annotations":
"message": "JVM Heap usage on the node {{ $labels.node }} in {{ $labels.cluster }} cluster is {{ $value }}%."
Expand All @@ -69,6 +74,7 @@
"for": "10m"
"labels":
"severity": "alert"

- "alert": "OpenSearchHostSystemCPUHigh"
"annotations":
"message": "System CPU usage on the node {{ $labels.node }} in {{ $labels.cluster }} cluster is {{ $value }}%"
Expand All @@ -78,6 +84,7 @@
"for": "1m"
"labels":
"severity": "alert"

- "alert": "OpenSearchProcessCPUHigh"
"annotations":
"message": "OSE process CPU usage on the node {{ $labels.node }} in {{ $labels.cluster }} cluster is {{ $value }}%"
Expand Down

0 comments on commit e3f38a8

Please sign in to comment.