-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathnotifier.example.yml
336 lines (336 loc) · 16.5 KB
/
notifier.example.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# Redis configuration depends on fields specified in redis config section:
# 1. Use field `master_name` to enable Redis Sentinel support
# 2. Specify two or more `addrs` to enable cluster support
# 3. Otherwise, standalone configuration is enabled
redis:
# Sentinel master name
master_name: ""
# address list, format: {host1_name:port},{ip:port}
addrs: "localhost:6379"
# Redis username
username: "username"
# Redis password
password: "password"
# Redis Sentinel username
sentinel_username: "sentinel_username"
# Redis Sentinel password
sentinel_password: "sentinel_password"
# Moira will delete metrics older than this value from Redis. Large values will lead to various problems everywhere.
# See https://github.com/moira-alert/moira/pull/519
metrics_ttl: 3h
# Dial timeout for establishing new connections
# Default is 500 milliseconds
dial_timeout: 1s
# Timeout for socket reads. If reached, commands will fail
# with a timeout instead of blocking. Default is 3 seconds.
# Skip this setting or set 0 for default.
read_timeout: 5s
# Timeout for socket writes. If reached, commands will fail
# with a timeout instead of blocking. Default is ReadTimeout.
# Skip this setting or set 0 for default.
write_timeout: 5s
# Enables read-only commands on slave nodes
# The flag does not work without `route_randomly` or `route_by_latency` set to true
# Notifer requires master and slaves to synchronize too fast, so it is safer to turn this flag off
read_only: false
# Allows routing read-only commands to the **random** master or slave node
# It automatically enables ReadOnly
route_randomly: false
# Allows routing read-only commands to the **closest** master or slave node
# It automatically enables ReadOnly
route_by_latency: false
# Minimum backoff between retries. Used to calculate exponential backoff. Default value is 0.
min_retry_backoff: 1s
# Maximum backoff between retries. Used to calculate exponential backoff. Default value is 0.
max_retry_backoff: 10s
# Max amount of attempts to find alive node (in Redis cluster)
max_redirects: 4
telemetry:
# Common port for all telemetry data: Prometheus scraping, pprof, etc.
listen: ":8091"
pprof:
# If true, pprof will be enabled on common telemetry port.
enabled: false
graphite:
# If true, graphite sender will be enabled.
enabled: true
# If true, runtime stats will be captured and sent to graphite. Note: It takes to call stoptheworld() with configured "graphite.interval" to capture runtime stats (https://golang.org/src/runtime/mstats.go)
runtime_stats: false
# Graphite relay URI, format: ip:port
uri: "graphite-relay:2003"
# Moira metrics prefix. Use 'prefix: {hostname}' to use hostname autoresolver.
prefix: DevOps.moira
# Metrics sending interval
interval: 60s
notifier:
# Soft timeout to start retrying to send notification after single failed attempt
sender_timeout: 10s
# Hard timeout to stop retrying to send notification after multiple failed attempts
resending_timeout: "1:00"
# Delay before performing one more send attempt
rescheduling_delay: 60s
# Web-UI uri prefix for trigger links in notifications. For example: with 'http://localhost' every notification will contain link like 'http://localhost/trigger/triggerId'
front_uri: "https://moira.example.com"
# Timezone to use to convert ticks. Default is UTC. See https://golang.org/pkg/time/#LoadLocation for more details.
timezone: Europe/Moscow
# Format for email sender. Default is "15:04 02.01.2006". See https://golang.org/pkg/time/#Time.Format for more details about golang time formatting.
date_time_format: "15:04 02.01.2006"
# Amount of messages notifier reads from Redis per iteration, -1 for unlimited
read_batch_size: -1
# List of senders, every element has required "sender_type" field (one of ["pushover", "slack", "mail", "telegram", "twilio sms", "twilio voice", "script", "discord", "selfstate", "webhook", "opsgenie", "victorops", "pagerduty", "msteams", "mattermost"]) and "contact_type"
# It is possible to create several senders with the same "sender_type", but different "contact_type"
# Every type of sender has additional config fields
senders:
# sender_type characterizes the specific structure of the sender in the code
- sender_type: msteams
# contact_type matches 1-1 sender with the contact template type in the api web config, uniquely identifies the sender
contact_type: msteams
#the max amount of events you want to be sent to your channel, -1 for unlimited, any other positive value to limit events
max_events: -1
- sender_type: pushover
contact_type: pushover
# Api token for your pushover channel, for more info see https://pushover.net/api#registration
api_token: ...
- sender_type: slack
contact_type: slack
# Api token for your moira notifications slack user, for more info see https://get.slack.help/hc/en-us/articles/215770388-Create-and-regenerate-API-tokens
api_token: ...
# If true, notification will be sent with state-specific icon, for more info see https://moira.readthedocs.io/en/latest/installation/configuration.html#slack-icons.
use_emoji: true
# Used if `emoji_map` does not find `emoji` by any metric state in `emoji_map`
default_emoji: ':moira-state-ok:'
# Sets the correspondence between the emoji used and the state of the metrics in the notification in Slack
# Default fill is shown below
emoji_map:
'OK': ':moira-state-ok:'
'WARN': ':moira-state-warn:'
'ERROR': ':moira-state-error:'
'NODATA': ':moira-state-nodata:'
'EXCEPTION': ':moira-state-exception:'
'TEST': ':moira-state-test:'
- sender_type: mattermost
contact_type: mattermost
# Sets the url to the Mattermost API for the client
url: ...
# Controls whether a client verifies the server's certificate chain and host name
# If it's is true, crypto/tls accepts any certificate presented by the server and any host name in that certificate
insecure_tls: false
# Api token for moira notifications Mattermost client
api_token: ...
# If true, notification will be sent with state-specific icon
use_emoji: true
# Used if `emoji_map` does not find `emoji` by any metric state in `emoji_map`
default_emoji: ':moira-state-ok:'
# Sets the correspondence between the emoji used and the state of the metrics in the notification in Mattermost
# Default fill is shown below
emoji_map:
'OK': ':moira-state-ok:'
'WARN': ':moira-state-warn:'
'ERROR': ':moira-state-error:'
'NODATA': ':moira-state-nodata:'
'EXCEPTION': ':moira-state-exception:'
'TEST': ':moira-state-test:'
- sender_type: telegram
contact_type: telegram
# Api token for your telegram bot, for more info about creating bot and get token see https://core.telegram.org/bots#3-how-do-i-create-a-bot
api_token: ...
- sender_type: mail
contact_type: mail
mail_from: ...
smtp_host: ...
smtp_port: ...
# Skip SMTP server certificate chain validation if false
insecure_tls: false
# Uses "mail_from" if empty
smtp_user: ...
smtp_pass: ...
# Email template file path (standard Go templates). By default use 'Fancy' template (see screenshot below). If empty, use build-in template with no markups and styles.
template_file: '/etc/moira/fancy-template.html'
- sender_type: twilio voice
contact_type: twilio voice
api_asid: ...
api_authtoken: ...
api_fromphone: ...
# URL that responds with TwiML config for voice message generation, see https://www.twilio.com/docs/api/twiml/voice-overview
voiceurl: ...
append_message: true
- sender_type: twilio sms
contact_type: twilio sms
api_asid: ...
api_authtoken: ...
api_fromphone: ...
# Script and webhook senders support additional templated parameters:
# ${contact_id} contact ID
# ${contact_value} contact value (as specified by user via web UI)
# ${contact_type} contact type (as specified in web UI config file)
# ${trigger_id} trigger ID
- sender_type: script
contact_type: script
# Executable path. File must exist on all machines where notifier is running.
# You can use templated parameters here (see above), they will be replaced with appropriate values.
exec: ...
- sender_type: webhook
contact_type: webhook
# URL to send POST request (you can use templated parameters, see above)
url: ...
timeout: ...
# Basic authorization parameters (if required)
user: ...
password: ...
# optional field body allows you to set the json request body using go-templates, the available fields for use are: .Contact.Type and .Contact.Value
body: '{ "value": {{ .Contact.Value }}, "text": "test" }'
# headers a field that allows you to set headers in the "key: value" format
headers:
test: test-header
# Example of creating multiple webhook type senders
- sender_type: webhook
contact_type: webhook2
url: ...
timeout: ...
- sender_type: pagerduty
contact_type: pagerduty
- sender_type: opsgenie
contact_type: opsgenie
api_key: ...
- sender_type: victorops
contact_type: victorops
routing_url: ...
- sender_type: discord
contact_type: discord
token: ...
# Self state monitor configuration section. Note: No inner subscriptions is required. Moira will use its notification mechanism to send messages.
moira_selfstate:
enabled: true
# If true, Moira selfstate will check remote triggers checker works properly and notify admin if remote checker fails
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#graphite-remote-triggers-checker for futher information
remote_triggers_enabled: false
# Max Redis disconnect delay to send alert when reached
redis_disconect_delay: 60s
# Max Filter metrics receive delay to send alert when reached
last_metric_received_delay: 120s
# Max Checker checks perform delay to send alert when reached
last_check_delay: 120s
# Max Remote triggers Checker checks perform delay to send alert when reached
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#graphite-remote-triggers-checker for futher information
last_remote_check_delay: 300s
# Self state monitor alerting interval
notice_interval: 300s
# Maximum contact events storage interval
notification_history:
ttl: "48h"
# Self state monitor check interval (default 10s)
check_interval: 10s
# Count available mute resend call, if more than set - you see error in logs (default 3)
max_fail_attempt_to_send_available: 3
# Contact list for Self state monitor alerts, use this like delivery channels in web-ui
contacts:
- type: mail
value: [email protected]
log:
log_file: stdout
log_level: info
# notification sets the configuration setting for sending notifications
notification:
# resave_time is the time by which the timestamp of notifications with triggers or metrics on Maintenance is incremented
# Important! Do not set this time too long, as this parameter sets the delay in sending notifications
resave_time: 30s
# Need to determine if notification is delayed - the difference between creation time and sending time is greater than delayed_time
delayed_time: 1m
# transaction_timeout defines the timeout between fetch notifications transactions
transaction_timeout: 100ms
# transaction_max_retries defines the maximum number of attempts to make a transaction
transaction_max_retries: 10
# transaction_heuristic_limit maximum allowable limit, after this limit all notifications without limit will be taken
transaction_heuristic_limit: 10000
# This section configures the list of graphite remote triggers sources.
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#graphite-remote-triggers-checker for further information
# Used to be `remote` and contain only one source before v2.10
graphite_remote:
- # Unique cluster id (no other graphite_remote cluster should have the same one)
# Use `default` for compatibility with old triggers without cluster_id
# Should not be changed if there are any triggers using it
cluster_id: default
# Cluster name to be displayed in UI
cluster_name: Graphite Remote
# URL of Graphite HTTP API: graphite-web, carbonapi, etc.
# Specify full URL including '/render'
url: "http://graphite.example.com/render"
# Auth username. Only Basic-auth supported
user: graphite_admin
# Auth password. Only Basic-auth supported
password: verySecurePassword
# Minimal period to perform triggers re-check.
# Note: Reducing of this value leads to increasing of CPU and memory usage values and extra load on Graphite HTTP API
check_interval: 60s
# Don't fetch metrics older than this value from remote storage
metrics_ttl: 168h
# Maximum timeout for HTTP-request made to Graphite HTTP API
timeout: 60s
# From 2.14.0
# Retries configuration for requests, that fetch data.
# Library used for calculating exponential backoff retries: https://pkg.go.dev/github.com/cenkalti/backoff/v4
retries:
# Initial interval for retry attempt
initial_interval: 60s
# Used to calc interval between retries: RandomizedRetryInterval = RetryInterval *
# * (random value from range [1 - randomization_factor, 1 + randomization_factor])
randomization_factor: 0.5
# For calculating next: RetryInterval = RetryInterval * multiplier
multiplier: 1.5
# Caps RetryInterval (NOT RandomizedRetryInterval)
max_interval: 120s
# If already max_retries_count retries performed, stop retrying
# At least one of (max_retries_count, max_elapsed_time) should be specified
max_retries_count: 3
# If time passed since first try is more than max_elapsed_time than stop retrying
# At least one of (max_retries_count, max_elapsed_time) should be specified
max_elapsed_time: 360s
# From 2.14.0
# Maximum timeout for healthcheck requests
heathcheck_timeout: 60s
# From 2.14.0
# Retries configuration for healthcheck requests (same fields as for retries)
heathcheck_retries:
# Initial interval for retry attempt
initial_interval: 10s
# Used to calc interval between retries: RandomizedRetryInterval = RetryInterval *
# * (random value from range [1 - randomization_factor, 1 + randomization_factor])
randomization_factor: 0.5
# For calculating next: RetryInterval = RetryInterval * multiplier
multiplier: 1.5
# Caps RetryInterval (NOT RandomizedRetryInterval)
max_interval: 60s
# If already max_retries_count retries performed, stop retrying
# At least one of (max_retries_count, max_elapsed_time) should be specified
max_retries_count: 3
# If time passed since first try is more than max_elapsed_time than stop retrying
# At least one of (max_retries_count, max_elapsed_time) should be specified
max_elapsed_time: 200s
# This section configures the list of prometheus remote triggers sources.
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#prometheus-remote-triggers-checker for further information
# Used to be `prometheus` and contain only one source before v2.10
prometheus_remote:
- # Unique cluster id (no other prometheus_remote cluster should have the same one)
# Use `default` for compatibility with old triggers without cluster_id
# Should not be changed if there are any triggers using it
cluster_id: default
# Cluster name to be displayed in UI
cluster_name: Prometheus Remote
# URL of Prometheus HTTP API: Prometheus or VMSelect
# Only domain name must be specified, no URL-path
url: https://prometheus.example.com
# Auth username. Only Basic-auth supported
user: prometheus_admin
# Auth password. Only Basic-auth supported
password: verySecurePassword
# Minimal period to perform triggers re-check.
# Note: Reducing of this value leads to increasing of CPU and memory usage values and extra load on Prometheus HTTP API
check_interval: "60s"
# Maximum timeout for HTTP-request made to Prometheus HTTP API
timeout: "10s"
# Number of times failed request should be retried
retries: 3
# Delay between retries
retry_timeout: "3s"
# Don't fetch metrics older than this value from remote storage
metrics_ttl: "168h"