generated from canonical/template-operator
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CI and unit tests for prometheus rules using promtool
- It's good to have unit tests for the prometheus alerts and also check if the prometheus_alerts.yaml is a valid file. - using promtool is possible to check the rules and also run unit tests for the alerts.
- Loading branch information
1 parent
405725f
commit a1e3970
Showing
2 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
name: Test prometheus rules | ||
|
||
on: | ||
workflow_call: | ||
workflow_dispatch: | ||
pull_request: | ||
types: [opened, synchronize, reopened] | ||
branches: [main] | ||
paths-ignore: | ||
- "**.md" | ||
- "**.rst" | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
promtool: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- name: Checkout repo | ||
uses: actions/checkout@v3 | ||
|
||
# prometheus snap includes promtool | ||
- name: Install prometheus snap | ||
run: sudo snap install prometheus | ||
|
||
- name: Check validity of prometheus alert rules | ||
run: | | ||
promtool check rules src/alert_rules/prometheus/*.yaml | ||
- name: Run unit tests for prometheus alert rules | ||
run: | | ||
promtool test rules tests/unit/test_alert_rules/*.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
rule_files: | ||
- ../../../src/alert_rules/prometheus/prometheus_alerts.yaml | ||
|
||
evaluation_interval: 1m | ||
|
||
tests: | ||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_cluster_status{cluster="opensearch-x7zb"}' | ||
values: '2x3' | ||
alert_rule_test: | ||
- eval_time: 2m | ||
alertname: OpenSearchClusterNotHealthy | ||
exp_alerts: | ||
- exp_labels: | ||
severity: critical | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb health status has been RED for at least 2m. Cluster does not accept writes, shards may be missing or master node hasn't been elected yet." | ||
summary: "Cluster health status is RED" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_cluster_status{cluster="opensearch-x7zb"}' | ||
values: '1x21' | ||
alert_rule_test: | ||
- eval_time: 20m | ||
alertname: OpenSearchClusterNotHealthy | ||
exp_alerts: | ||
- exp_labels: | ||
severity: warning | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb health status has been YELLOW for at least 20m." | ||
summary: "Cluster health status is YELLOW" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_fs_path_available_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '69802552852x10' # just 70 GB available | ||
- series: 'opensearch_fs_path_total_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '498589663232x10' # HD with 500 GB | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchNodeDiskWatermarkReached | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "Disk Low Watermark Reached at opensearch-0.fa9 node in opensearch-x7zb cluster. Shards can not be allocated to this node anymore. You should consider adding more disk to the node." | ||
summary: "Disk Low Watermark Reached - disk saturation is 86%" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_fs_path_available_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '44873069690x10' # just 45 GB available | ||
- series: 'opensearch_fs_path_total_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '498589663232x10' # HD with 500 GB | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchNodeDiskWatermarkReached | ||
# both low and high water mark alerts are triggered | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "Disk Low Watermark Reached at opensearch-0.fa9 node in opensearch-x7zb cluster. Shards can not be allocated to this node anymore. You should consider adding more disk to the node." | ||
summary: "Disk Low Watermark Reached - disk saturation is 91%" | ||
- exp_labels: | ||
severity: high | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "Disk High Watermark Reached at opensearch-0.fa9 node in opensearch-x7zb cluster. Some shards will be re-allocated to different nodes if possible. Make sure more disk space is added to the node or drop old indices allocated to this node." | ||
summary: "Disk High Watermark Reached - disk saturation is 91%" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_jvm_mem_heap_used_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '76x60' | ||
alert_rule_test: | ||
- eval_time: 10m | ||
alertname: OpenSearchJVMHeapUseHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "JVM Heap usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 76%." | ||
summary: "JVM Heap usage on the node is high" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_os_cpu_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '91x6' | ||
alert_rule_test: | ||
- eval_time: 1m | ||
alertname: OpenSearchHostSystemCPUHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "System CPU usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 91%" | ||
summary: "System CPU usage is high" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_process_cpu_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '91x6' | ||
alert_rule_test: | ||
- eval_time: 1m | ||
alertname: OpenSearchProcessCPUHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "OSE process CPU usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 91%" | ||
summary: "OSE process CPU usage is high" |