-
Notifications
You must be signed in to change notification settings - Fork 1
/
redis-alerts.yaml
130 lines (130 loc) · 4.88 KB
/
redis-alerts.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
kind: PrometheusRule
apiVersion: monitoring.coreos.com/v1
metadata:
prometheus: dx
spec:
groups:
- name: redis-alert
interval: 10m
rules:
- expr: redis_up == 0
alert: '[P0]-Redis Down'
for: 1m
labels:
severity: error
annotations:
description: 实例为{{ $labels.instance }}的Redis宕机,请立即检查Redis实例
level: P0
ruleGroupName: redis-alert
ruleName: '[P0]-Redis Down'
type: redis
- expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80
alert: '[P3-]Redis内存使用率高'
for: 5m
labels:
severity: info
annotations:
description: 实例为{{ $labels.instance }}的Redis节点内存使用率高于80%,请检查Redis实例
level: P3
ruleGroupName: redis-alert
ruleName: '[P3-]Redis内存使用率高'
type: redis
- expr: rate(redis_connected_clients[1m]) > 100
alert: '[P2]-Redis客户端新建连接数高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为{{ $labels.instance }}的Redis节点1分钟内客户端新建连接数大于100
level: P2
ruleGroupName: redis-alert
ruleName: '[P2]-Redis客户端新建连接数高'
type: redis
- expr: increase(redis_rejected_connections_total[1m]) > 1
alert: '[P1]-Redis存在拒绝客户端连接情况'
for: 1m
labels:
severity: critical
annotations:
description: 实例为{{ $labels.instance }}的Redis节点存在拒绝客户端连接情况,请立即检查Redis实例
level: P1
ruleGroupName: redis-alert
ruleName: '[P1]-Redis存在拒绝客户端连接情况'
type: redis
- expr: rate(redis_commands_processed_total[1m]) > 600
alert: '[P1]-Redis QPS高'
for: 3m
labels:
severity: critical
annotations:
description: 实例为{{ $labels.instance }}的Redis节点QPS大于300
level: P1
ruleGroupName: redis-alert
ruleName: '[P1]-Redis QPS高'
type: redis
- expr: rate(redis_last_slow_execution_duration_seconds[1m]) > 0.5
alert: '[P1]-Redis发生慢查询'
for: 3m
labels:
severity: error
annotations:
description: 实例为{{ $labels.instance }}的Redis节点,在过去1分钟内慢查询大于500ms
level: P1
ruleGroupName: redis-alert
ruleName: '[P1]-Redis发生慢查询'
type: redis
- expr: delta(redis_connected_slaves[1m]) < 0
alert: '[P1]-Redis丢失从节点'
for: 1m
labels:
severity: error
annotations:
description: 实例为{{ $labels.instance }}的Redis丢失从节点,请立即检查Redis实例
level: P1
ruleGroupName: redis-alert
ruleName: '[P1]-Redis丢失从节点'
type: redis
- expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
alert: '[P2-]Redis内存使用率高'
for: 5m
labels:
severity: warn
annotations:
description: 实例为{{ $labels.instance }}的Redis节点内存使用率高于90%,请检查Redis实例
level: P2
ruleGroupName: redis-alert
ruleName: '[P2-]Redis内存使用率高'
type: redis
- expr: rate(redis_commands_processed_total[1m]) > 300
alert: '[P2]-Redis QPS高'
for: 3m
labels:
severity: warn
annotations:
description: 实例为{{ $labels.instance }}的Redis节点QPS大于100
level: P2
ruleGroupName: redis-alert
ruleName: '[P2]-Redis QPS高'
type: redis
- expr: rate(redis_last_slow_execution_duration_seconds[1m]) > 0.2
alert: '[P2]-Redis发生慢查询'
for: 3m
labels:
severity: warn
annotations:
description: 实例为{{ $labels.instance }}的Redis节点,在过去1分钟内慢查询大于200ms
level: P2
ruleGroupName: redis-alert
ruleName: '[P2]-Redis发生慢查询'
type: redis
- expr: redis_connected_clients < 5
alert: '[P2]-Redis客户端连接数低'
for: 5m
labels:
severity: warn
annotations:
description: 实例为{{ $labels.instance }}的Redis节点,客户端连接数低于5
level: P2
ruleGroupName: redis-alert
ruleName: '[P2]-Redis客户端连接数低'
type: redis