diff --git a/index.html b/index.html index c8b2a0a..2088b53 100644 --- a/index.html +++ b/index.html @@ -1,92 +1,150 @@ - -
- - - -Prometheus & Ansible
-a way to manage monitoring
-Roman Demachkovych
- -Paweł Krupa
-Open source, metrics-based monitoring system.
- -- -
It does one thing and does it well.
-Simple text format makes it easy to expose metrics to Prometheus.
- -- -
The data model identifies each time series an unordered set of key-value pairs called labels.
-Scraped data is stored in local time-series database.
- -- -
PromQL expression language allows easy metrics selection and aggregation.
-create graphs
- -set alert rules
- -expose data
-If you need 100% accuracy, such as for per-request billing, Prometheus is not a good choice as the collected data will likely not be detailed and complete enough.
-# HELP http_request_duration_microseconds The HTTP request latencies in microseconds.
+
+
+
+
+
+ Prometheus deployment with Ansible
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Prometheus & Ansible
+
+
+
+
+
+ a way to manage monitoring
+
+
+
+
+ Roman Demachkovych
+ Paweł Krupa
+
+
+
+
+
+
+
+
+
+
+
+
+
+ - simple
+ - agentless
+ - one basic dependency - python
+ - config in YAML and jinja2
+
+
+
+
+
+
+
+
+
+ Prometheus
+
+
+
+
+
+
+
+
+
+ Open source, metrics-based monitoring system.
+
+ It does one thing and does it well.
+
+
+
+
+
+
+ Simple text format makes it easy to expose metrics to Prometheus.
+
+ The data model identifies each time series an unordered set of key-value pairs called labels.
+
+
+
+
+
+
+ Scraped data is stored in local time-series database.
+
+ PromQL expression language allows easy metrics selection and aggregation.
+
+
+
+
+
+
+ create graphs
+ set alert rules
+ expose data
+
+
+
+
+ PromQL
+
+
+
+
+
+
+
+
+
+ Architecture
+
+
+
+
+
+
+ Caution!
+
+
+
+
+ If you need 100% accuracy, such as for per-request billing, Prometheus is not a good choice as the collected data will likely not be detailed and complete enough.
+
+
+
+
+
+
+
+
+ How to gather data?
+
+
+
+
+
+
+ Metrics exposition format
+
+
+
+
+ # HELP http_request_duration_microseconds The HTTP request latencies in microseconds.
# TYPE http_request_duration_microseconds summary
http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 73334.095
http_request_duration_microseconds{handler="prometheus",quantile="0.9"} 85549.187
@@ -113,12 +171,19 @@ Metrics exposition format
# HELP node_arp_entries ARP entries by device
# TYPE node_arp_entries gauge
node_arp_entries{device="docker0"} 1
-node_arp_entries{device="eth0"} 4
-
-Embed into software
-
-
-Official client libraries:
+node_arp_entries{device="eth0"} 4
+
+
+
+
+
+
+ Embed into software
+
+
+
-
-Or use metrics exporters
-
-## Core components starting at 9090
+* Rust
+
+
+
+
+
+
+ Or use metrics exporters
+
+
+
+
+
+ ## Core components starting at 9090
* 9090 - Prometheus server
* 9091 - Pushgateway
@@ -156,14 +230,26 @@ Or use metrics exporters
* 9102 - StatsD exporter
* 9103 - Collectd exporter
* 9108 - Graphite exporter
-* 9110 - Blackbox exporter
-
-
-Write your own!
-
-import json
+* 9110 - Blackbox exporter
+
+
+
+
+
+
+
+
+
+
+
+
+ Write your own!
+
+
+
+
+
+ import json
import time
import urllib2
from prometheus_client import start_http_server
@@ -191,164 +277,338 @@ Write your own!
if __name__ == "__main__":
REGISTRY.register(JenkinsCollector())
start_http_server(9118)
- while True: time.sleep(1)
-
-Connect prometheus
-
-scrape_configs:
+ while True: time.sleep(1)
+
+
+
+
+
+
+
+ Connect prometheus
+
+
+
+
+
+ scrape_configs:
- job_name: 'node_exporter'
static_configs:
- targets:
- - 'localhost:9100'
-Visualise.
-
-
-Promdash
-
-
-Grafana!
-
-
-Prometheus integration
-
-
-
- - Datasource support
- - Prometheus dashboard
- - PromQL autocomplete
- - Alerts
-
-
-
-Alert!
-
-Alertmanager handles alerts sent by client applications such as the Prometheus, Grafana, etc.
-
- Alertmanager
-
-
-
- - deduplication
- - grouping
- - routing
- - silencing
- - inhibition
- - sending
-
-
-
-Functionality
-
-
- - silencing
- -
-inhibition
-
-
-
-
-Functionality
-
-Alertmanager supports a mesh configuration to create a cluster for High Availability.
-
-
-Warning: High Availability is under active development
-
-
- - email
- - hipchat
- - pagerduty
- - pushover
- - slack
- - opsgenie
- - webhook
- - victorops
-
-
-
-Notification integrations
-
-Install
-
-Method
-
-
-
- - source
- - pre-compiled binary
- - docker container
-
-
-
-
- - apt-get install prometheus
- - yum install prometheus
- - any installation from package
-
-
-
-Recommended
-
-
-Don't do this!
- $ cd /tmp
+ - 'localhost:9100'
+
+
+
+
+
+
+
+
+
+ Visualise.
+
+
+
+
+
+
+ Promdash
+
+
+
+
+
+
+
+
+
+ Grafana!
+
+
+
+
+
+
+
+
+
+ Prometheus integration
+
+
+
+
+
+ - Datasource support
+ - Prometheus dashboard
+ - PromQL autocomplete
+ - Alerts
+
+
+
+
+
+
+
+
+
+
+
+
+ Alert!
+
+
+
+
+
+
+
+ Alertmanager handles alerts sent by client applications such as the Prometheus, Grafana, etc.
+
+
+
+
+
+
+
+
+ Alertmanager
+
+
+
+
+
+
+
+
+
+
+
+
+ - deduplication
+ - grouping
+ - routing
+ - sending
+
+
+
+
+
+
+ Functions
+
+
+
+
+
+
+
+
+ - silencing
+ -
+ inhibition
+
+
+
+
+
+
+
+
+ Functions
+
+
+
+
+
+
+
+ Alertmanager supports a mesh configuration to create a cluster for High Availability.
+
+
+
+
+
+ Warning: High Availability is under active development
+
+
+
+
+
+
+
+
+ - email
+ - hipchat
+ - pagerduty
+ - pushover
+ - slack
+ - opsgenie
+ - webhook
+ - victorops
+
+
+
+
+
+
+ Notification integrations
+
+
+
+
+
+
+
+
+ Install
+
+
+
+
+
+
+ Method
+
+
+
+
+
+ - source
+ - pre-compiled binary
+ - docker container
+
+
+
+
+
+
+ - apt-get install prometheus
+ - yum install prometheus
+ - any installation from package
+
+
+
+
+
+ Recommended
+
+
+
+
+ Don't do this!
+
+
+
+
+
+
+
+ $ cd /tmp
$ wget https://github.com/prometheus/prometheus/releases/download/v2.2.0/prometheus-2.2.0.linux-amd64.tar.gz
-$ tar -xzf prometheus-2.2.0.linux-amd64.tar.gz
-
-Binary
-
-
-Repeat for every part (prometheus, alertmanager, node_exporter, blackbox_exporter, *_exporter) on multiple nodes every month or so
-
-# chmod +x prometheus-2.2.0.linux-amd64/{prometheus,promtool}
-# cp prometheus-2.2.0.linux-amd64/{prometheus,promtool} /usr/local/bin/
-# chown root:root /usr/local/bin/{prometheus,promtool}
-# mkdir -p /etc/prometheus
-# vim /etc/prometheus/prometheus.yml
-# promtool check config prometheus.yml
+$ tar -xzf prometheus-2.2.0.linux-amd64.tar.gz
+
+
+
+
+
+ Binary
+
+
+
+
+ $ sudo chmod +x prometheus-2.2.0.linux-amd64/{prometheus,promtool}
+$ sudo cp prometheus-2.2.0.linux-amd64/{prometheus,promtool} /usr/local/bin/
+$ sudo chown root:root /usr/local/bin/{prometheus,promtool}
+
+
+
+
+ $ sudo mkdir -p /etc/prometheus
+$ sudo vim /etc/prometheus/prometheus.yml
+$ promtool check config prometheus.yml
Checking prometheus.yml
SUCCESS: 0 rule files found
-# prometheus --config.file "/etc/prometheus/prometheus.yml" &
-
-Problems
-
-
-
- - Too many operations
- - Won't survive reboot
- - No dedicated user
- - Try changing config
- - Troublesome upgrade
- - SELinux anyone?
-
-
-Manage
-
-(aka why Ansible?)
-
-Goals
-
-
-
- - Zero-configuration deployment
- - Easy management of multiple nodes
- - Error checking
- - Multiple CPU architecture support
-
-
-Where is my config?
-
-
-
- - command line parameters
- - main configuration file (in YAML)
- - files included from main file (ex. alert rules or file_sd config)
-
-
-Prometheus
-
-global:
+$ prometheus --config.file "/etc/prometheus/prometheus.yml" &
+
+
+
+
+ Repeat for every component (prometheus, alertmanager, node_exporter, blackbox_exporter, *_exporter) on multiple nodes every month or so
+
+
+
+
+
+
+ Problems
+
+
+
+
+
+ - Too many operations
+ - Won't survive reboot
+ - No dedicated user
+ - Try changing config
+ - Troublesome upgrade
+ - SELinux anyone?
+
+
+
+
+
+
+
+
+
+ Manage
+ (aka why Ansible?)
+
+
+
+
+
+
+ Goals
+
+
+
+
+
+ - Zero-configuration deployment
+ - Easy management of multiple nodes
+ - Error checking
+ - Multiple CPU architecture support
+
+
+
+
+
+
+
+ Where is my config?
+
+
+
+
+
+ - command line parameters
+ - main configuration file (in YAML)
+ - files included from main file (ex. alert rules or file_sd config)
+
+
+
+
+
+
+
+ Prometheus
+
+
+
+
+ global:
evaluation_interval: 15s
scrape_interval: 15s
scrape_timeout: 10s
@@ -365,17 +625,34 @@ Where is my config?
- job_name: node
file_sd_configs:
- files:
- - "/etc/prometheus/file_sd/node.yml"
-# Nothing.
-
-Ansible
-
-
-Main config
-
-Prometheus
-
-global:
+ - "/etc/prometheus/file_sd/node.yml"
+
+
+
+
+ # Nothing.
+
+
+
+
+ Ansible
+
+
+
+
+ Main config
+
+
+
+
+
+
+ Prometheus
+
+
+
+
+ global:
evaluation_interval: 15s
scrape_interval: 15s
scrape_timeout: 10s
@@ -402,8 +679,12 @@ Main config
- job_name: node
file_sd_configs:
- files:
- - "/etc/prometheus/file_sd/node.yml"
-prometheus_alertmanager_config:
+ - "/etc/prometheus/file_sd/node.yml"
+
+
+
+
+ prometheus_alertmanager_config:
- scheme: http
static_configs:
- targets:
@@ -418,16 +699,29 @@ Main config
prometheus_targets:
node:
- targets:
- - "localhost:9100"
-
-Ansible
-
-
-Main config (extended)
-
-Command line parameters
-
-# Ansible managed file. Be wary of possible overwrites.
+ - "localhost:9100"
+
+
+
+
+ Ansible
+
+
+
+
+ Main config (extended)
+
+
+
+
+
+
+ Command line parameters
+
+
+
+
+ # Ansible managed file. Be wary of possible overwrites.
[Unit]
Description=Prometheus
After=network.target
@@ -449,110 +743,144 @@ Command line parameters
Restart=always
[Install]
-WantedBy=multi-user.target
-Everyone makes mistakes.
-
-
-
-preflight checks included in role
-
-use `promtool` in ansible `validate` directive
-
-Gathering system metrics from many nodes with multiple CPU architectures?
-
-node_exporter!
-
-
-
- - One binary
- - Simple configuration with cli flags
-
-
-
-ansible role bonuses:
-
-
-
- - versioning
- - system user management
- - CPU architecture auto-detection
- - systemd service files
- - linux capabilites support
- - basic SELinux support
-
-
-Example
-
-
-
-
-Resources
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+WantedBy=multi-user.target
+ preflight checks included in role
+use `promtool` in ansible `validate` directive
+