-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e314f70
commit 5987d73
Showing
5 changed files
with
61 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
annotations: | ||
prometheus-operator-validated: "true" | ||
labels: | ||
prometheus_rule: "true" | ||
name: hoprd-node-rules | ||
spec: | ||
groups: | ||
- name: hoprd-nodes | ||
rules: | ||
- alert: HoprdNodePodRestarted | ||
annotations: | ||
description: Hoprd node {{`{{`}} $labels.namespace{{`}}`}} /{{`{{`}} $labels.job {{`}}`}} restarted. | ||
summary: Hoprd node restarted. | ||
expr: |- | ||
# When the node is running for less than 1 minute | ||
abs(sum (kube_pod_start_time) by (pod) - sum (hopr_up) by (pod)) <= 10 AND avg((time() - hopr_up < 60)) by (pod) | ||
for: 1m | ||
labels: | ||
severity: critical | ||
environment: {{ .Values.environmentName }} | ||
- alert: HoprdNodeProcessRebooted | ||
annotations: | ||
description: Hoprd node {{`{{`}} $labels.namespace{{`}}`}} /{{`{{`}} $labels.job {{`}}`}} process rebooted. | ||
summary: Hoprd node rebooted (OOM). | ||
expr: |- | ||
# When the pod is running correctly and the node reboots internally without affecting the pod | ||
abs(sum (kube_pod_start_time) by (pod) - sum (hopr_up) by (pod)) > 10 AND avg((time() - hopr_up < 60)) by (pod) | ||
for: 1m | ||
labels: | ||
severity: critical | ||
environment: {{ .Values.environmentName }} | ||
- alert: HoprdHealthChanged | ||
annotations: | ||
description: Hoprd node {{`{{`}} $labels.namespace{{`}}`}} /{{`{{`}} $labels.job {{`}}`}} restarted. | ||
summary: Hoprd node restarted. | ||
expr: |- | ||
# When the node is running for more than 15 minutes and the health status changes in the last 5 minutes | ||
avg((time() - hopr_up) > 900 and changes(hopr_network_health[5m]) > 0) by (namespace,job) | ||
for: 1m | ||
labels: | ||
severity: critical | ||
environment: {{ .Values.environmentName }} | ||
- alert: HoprdAPIErrors | ||
annotations: | ||
description: Hoprd api call errors on {{`{{`}} $labels.namespace{{`}}`}} /{{`{{`}} $labels.job {{`}}`}} . | ||
summary: Hoprd api call errors. | ||
expr: |- | ||
# When the node is running for more than 15 minutes and the http api call errors are more than 30 in the last 10 minutes | ||
sum(increase(hopr_http_api_call_count{status!~"[1,2,3].*"}[10m])) by (namespace, job) > 30 and avg((time() - hopr_up)) by (namespace,job) > 900 | ||
for: 10m | ||
labels: | ||
severity: critical | ||
environment: {{ .Values.environmentName }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
|
||
environmentName: prod | ||
operator: | ||
persistence: | ||
storageClassName: ceph-ephimeral | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
|
||
environmentName: staging | ||
operator: | ||
persistence: | ||
storageClassName: ceph-ephimeral | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ nameOverride: "" | |
## | ||
fullnameOverride: "" | ||
|
||
environmentName: "" | ||
|
||
## @section Replicator Parameters | ||
## | ||
|
||
|