checker.example.yml

# Redis configuration depends on fields specified in redis config section:
# 1. Use field `master_name` to enable Redis Sentinel support
# 2. Specify two or more `addrs` to enable cluster support
# 3. Otherwise, standalone configuration is enabled
redis:
  # Sentinel master name
  master_name: ""
  # address list, format: {host1_name:port},{ip:port}
  addrs: "localhost:6379"
  # Redis username
  username: "username"
  # Redis password
  password: "password"
  # Redis Sentinel username
  sentinel_username: "sentinel_username"
  # Redis Sentinel password
  sentinel_password: "sentinel_password"
  # Moira will delete metrics older than this value from Redis. Large values will lead to various problems everywhere.
  # See https://github.com/moira-alert/moira/pull/519
  metrics_ttl: 3h
  # Dial timeout for establishing new connections
  # Default is 500 milliseconds
  dial_timeout: 1s
  # Timeout for socket reads. If reached, commands will fail
  # with a timeout instead of blocking. Default is 3 seconds.
  # Skip this setting or set 0 for default.
  read_timeout: 5s
  # Timeout for socket writes. If reached, commands will fail
  # with a timeout instead of blocking. Default is ReadTimeout.
  # Skip this setting or set 0 for default.
  write_timeout: 5s
  # Enables read-only commands on slave nodes
  # The flag does not work without `route_randomly` or `route_by_latency` set to true
  read_only: true
  # Allows routing read-only commands to the **random** master or slave node
  # It automatically enables ReadOnly
  route_randomly: true
  # Allows routing read-only commands to the **closest** master or slave node
  # It automatically enables ReadOnly
  route_by_latency: false
  # Minimum backoff between retries. Used to calculate exponential backoff. Default value is 0
  min_retry_backoff: 1s
  # Maximum backoff between retries. Used to calculate exponential backoff. Default value is 0
  max_retry_backoff: 10s
  # Max amount of attempts to find alive node (in Redis cluster)
  max_redirects: 4
telemetry:
  # Common port for all telemetry data: Prometheus scraping, pprof, etc.
  listen: ":8091"
  pprof:
    # If true, pprof will be enabled on common telemetry port.
    enabled: false
  graphite:
    # If true, graphite sender will be enabled.
    enabled: true
    # If true, runtime stats will be captured and sent to graphite. Note: It takes to call stoptheworld() with configured "graphite.interval" to capture runtime stats (https://golang.org/src/runtime/mstats.go)
    runtime_stats: false
    # Graphite relay URI, format: ip:port
    uri: "graphite-relay:2003"
    # Moira metrics prefix. Use 'prefix: {hostname}' to use hostname autoresolver.
    prefix: DevOps.moira
    # Metrics sending interval
    interval: 60s
checker:
  # Min period to perform triggers re-check. Note: Reducing of this value leads to increasing of CPU and memory usage values
  # Used to be `checker.check_interval` before v2.10
  metric_event_trigger_check_interval: 30s
  # In Moira 2.4 we add a new entity - Lazy Trigger. This is a regular trigger but without any subscription for it.
  # By default Moira treats any trigger equally regardless on its subscriptions number.
  # You can change this behaviour using option below. This can reduce CPU usage on your server.
  # Lazy triggers checker works if lazy_triggers_check_interval > check_interval. We recommend setting it to 10m.
  lazy_triggers_check_interval: 10m
  # Period to cancel forced checks for all triggers if no metrics were received (greater than 'local.check_interval')
  stop_checking_interval: 3600s
  # Time after which the check is considered critically slow. Such checks are logged for debug purposes
  # default: 1h
  critical_time_of_check: 10m
local: 
  # Period for every non-lazy trigger to perform a check on
  # Used to be `checker.nodata_check_interval` before v2.10
  check_interval: 60s
  # Equals to the number of processor cores found on Moira host by default or when variable is defined as 0.
  max_parallel_checks: 512
# This section configures the list of graphite remote triggers sources.
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#graphite-remote-triggers-checker for further information
# Used to be `remote` and contain only one source before v2.10
graphite_remote:
  - # Unique cluster id (no other graphite_remote cluster should have the same one)
    # Use `default` for compatibility with old triggers without cluster_id
    # Should not be changed if there are any triggers using it
    cluster_id: default
    # Cluster name to be displayed in UI
    cluster_name: Graphite Remote
    # URL of Graphite HTTP API: graphite-web, carbonapi, etc.
    # Specify full URL including '/render'
    url: "http://graphite.example.com/render"
    # Auth username. Only Basic-auth supported
    user: graphite_admin
    # Auth password. Only Basic-auth supported
    password: verySecurePassword
    # Minimal period to perform triggers re-check. 
    # Note: Reducing of this value leads to increasing of CPU and memory usage values and extra load on Graphite HTTP API
    check_interval: 60s
    # Don't fetch metrics older than this value from remote storage
    metrics_ttl: 168h
    # Maximum timeout for HTTP-request made to Graphite HTTP API
    timeout: 60s
    # Equals to the number of processor cores found on Moira host by default or when variable is defined as 0.
    max_parallel_checks: 0
    # From 2.14.0
    # Retries configuration for requests, that fetch data.
    # Library used for calculating exponential backoff retries: https://pkg.go.dev/github.com/cenkalti/backoff/v4
    retries:
      # Initial interval for retry attempt
      initial_interval: 60s
      # Used to calc interval between retries: RandomizedRetryInterval = RetryInterval *
      # * (random value from range [1 - randomization_factor, 1 + randomization_factor])
      randomization_factor: 0.5
      # For calculating next: RetryInterval = RetryInterval * multiplier
      multiplier: 1.5
      # Caps RetryInterval (NOT RandomizedRetryInterval)
      max_interval: 120s
      # If already max_retries_count retries performed, stop retrying.
      # At least one of (max_retries_count, max_elapsed_time) should be specified
      max_retries_count: 3
      # If time passed since first try is more than max_elapsed_time than stop retrying
      # At least one of (max_retries_count, max_elapsed_time) should be specified
      max_elapsed_time: 360s
    # From 2.14.0
    # Maximum timeout for healthcheck requests
    heathcheck_timeout: 60s
    # From 2.14.0
    # Retries configuration for healthcheck requests (same fields as for retries)
    heathcheck_retries:
      # Initial interval for retry attempt
      initial_interval: 10s
      # Used to calc interval between retries: RandomizedRetryInterval = RetryInterval *
      # * (random value from range [1 - randomization_factor, 1 + randomization_factor])
      randomization_factor: 0.5
      # For calculating next: RetryInterval = RetryInterval * multiplier
      multiplier: 1.5
      # Caps RetryInterval (NOT RandomizedRetryInterval)
      max_interval: 60s
      # If already max_retries_count retries performed, stop retrying.
      # At least one of (max_retries_count, max_elapsed_time) should be specified
      max_retries_count: 3
      # If time passed since first try is more than max_elapsed_time than stop retrying
      # At least one of (max_retries_count, max_elapsed_time) should be specified
      max_elapsed_time: 200s
# This section configures the list of prometheus remote triggers sources.
# See https://moira.readthedocs.io/en/latest/installation/configuration.html#prometheus-remote-triggers-checker for further information
# Used to be `prometheus` and contain only one source before v2.10
prometheus_remote:
  - # Unique cluster id (no other prometheus_remote cluster should have the same one)
    # Use `default` for compatibility with old triggers without cluster_id
    # Should not be changed if there are any triggers using it
    cluster_id: default
    # Cluster name to be displayed in UI
    cluster_name: Prometheus Remote
    # URL of Prometheus HTTP API: Prometheus or VMSelect
    # Only domain name must be specified, no URL-path
    url: https://prometheus.example.com
    # Auth username. Only Basic-auth supported
    user: prometheus_admin
    # Auth password. Only Basic-auth supported
    password: verySecurePassword
    # Minimal period to perform triggers re-check. 
    # Note: Reducing of this value leads to increasing of CPU and memory usage values and extra load on Prometheus HTTP API
    check_interval: "60s"
    # Maximum timeout for HTTP-request made to Prometheus HTTP API
    timeout: "10s"
    # Number of times failed request should be retried
    retries: 3
    # Delay between retries
    retry_timeout: "3s"
    # Don't fetch metrics older than this value from remote storage
    metrics_ttl: "168h"
    # Equals to the number of processor cores found on Moira host by default or when variable is defined as 0.
    max_parallel_checks: 0
log:
  log_file: stdout
  log_level: info