diff --git a/changelogs/fragments/106.yml b/changelogs/fragments/106.yml new file mode 100644 index 0000000..2bca6d5 --- /dev/null +++ b/changelogs/fragments/106.yml @@ -0,0 +1,3 @@ +minor_changes: + - sql_exporter - Add metrics to track current autovacuum workers running and max autovacuum workers + - grafana - Add panel to PG Details dashboard to track autovac workers running vs max diff --git a/grafana/postgres/PG_Details.json b/grafana/postgres/PG_Details.json index bb70235..df018fa 100644 --- a/grafana/postgres/PG_Details.json +++ b/grafana/postgres/PG_Details.json @@ -85,7 +85,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -160,7 +160,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -234,7 +234,7 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -309,7 +309,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -384,7 +384,7 @@ "sizing": "auto", "text": {} }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -452,7 +452,7 @@ "text": {}, "valueMode": "color" }, - "pluginVersion": "10.4.2", + "pluginVersion": "10.4.3", "targets": [ { "datasource": { @@ -2127,12 +2127,16 @@ "x": 0, "y": 49 }, - "id": 15, + "id": 16, "options": { "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "right", "showLegend": true }, "tooltip": { @@ -2147,31 +2151,55 @@ "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_database_deadlocks{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_user_tables_autovacuum_count{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "Conflicts", - "metric": "pg_stat_database_conflicts", + "legendFormat": "AutoVacuum", + "metric": "ccp_stat_user_tables_autovacuum_count", "refId": "A", - "step": 240 + "step": 120 }, { "datasource": { "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_database_conflicts{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_user_tables_autoanalyze_count{job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "DeadLocks", - "metric": "pg_stat_database_deadlocks", + "legendFormat": "AutoAnalyze", "refId": "B", - "step": 240 + "step": 120 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "expr": "sum(rate(ccp_stat_user_tables_vacuum_count{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Vacuum", + "refId": "C", + "step": 120 + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "expr": "sum(rate(ccp_stat_user_tables_analyze_count{job=~\"[[pgnodes]]\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Analyze", + "refId": "D", + "step": 120 } ], - "title": "Conflicts/DeadLocks - [[pgdatabase]]", + "title": "Vacuum/Analyze Activity Rate - [[pgdatabase]]", "type": "timeseries" }, { @@ -2179,6 +2207,7 @@ "type": "prometheus", "uid": "${ccp_datasource}" }, + "description": "", "fieldConfig": { "defaults": { "color": { @@ -2192,7 +2221,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2206,7 +2235,7 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", @@ -2228,8 +2257,7 @@ "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, @@ -2239,79 +2267,159 @@ "x": 12, "y": 49 }, - "id": 16, + "id": 58, "options": { "legend": { - "calcs": [ - "mean", - "max", - "min" - ], - "displayMode": "table", - "placement": "right", + "calcs": [], + "displayMode": "list", + "placement": "bottom", "showLegend": true }, "tooltip": { - "mode": "multi", + "mode": "single", "sort": "none" } }, - "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_user_tables_autovacuum_count{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "AutoVacuum", - "metric": "ccp_stat_user_tables_autovacuum_count", - "refId": "A", - "step": 120 + "editorMode": "code", + "expr": "ccp_autovacuum_workers_count{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\"}", + "instant": false, + "legendFormat": "Running", + "range": true, + "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_user_tables_autoanalyze_count{job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "AutoAnalyze", - "refId": "B", - "step": 120 + "editorMode": "code", + "expr": "ccp_autovacuum_workers_max{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\"}", + "hide": false, + "instant": false, + "legendFormat": "Max", + "range": true, + "refId": "B" + } + ], + "title": "Autovacuum Workers", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.2", + "targets": [ { "datasource": { "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_user_tables_vacuum_count{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_database_deadlocks{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "Vacuum", - "refId": "C", - "step": 120 + "legendFormat": "Conflicts", + "metric": "pg_stat_database_conflicts", + "refId": "A", + "step": 240 }, { "datasource": { "type": "prometheus", "uid": "${ccp_datasource}" }, - "expr": "sum(rate(ccp_stat_user_tables_analyze_count{job=~\"[[pgnodes]]\"}[5m]))", + "expr": "sum(rate(ccp_stat_database_conflicts{cluster_name=\"[[pgcluster]]\", job=~\"[[pgnodes]]\", dbname=~\"[[pgdatabase]]\"}[5m]))", "format": "time_series", + "interval": "", "intervalFactor": 2, - "legendFormat": "Analyze", - "refId": "D", - "step": 120 + "legendFormat": "DeadLocks", + "metric": "pg_stat_database_deadlocks", + "refId": "B", + "step": 240 } ], - "title": "Vacuum/Analyze Activity Rate - [[pgdatabase]]", + "title": "Conflicts/DeadLocks - [[pgdatabase]]", "type": "timeseries" } ], @@ -2346,7 +2454,7 @@ }, "datasource": { "type": "prometheus", - "uid": "${ccp_datasource}" + "uid": "PDC1078F23EBDF0E5" }, "definition": "label_values(up{exp_type='pg'}, cluster_name)", "hide": 0, @@ -2376,7 +2484,7 @@ }, "datasource": { "type": "prometheus", - "uid": "${ccp_datasource}" + "uid": "PDC1078F23EBDF0E5" }, "definition": "label_values(up{exp_type='pg', cluster_name=\"[[pgcluster]]\"}, job)", "hide": 0, @@ -2400,7 +2508,7 @@ }, { "current": { - "selected": true, + "selected": false, "text": [ "All" ], @@ -2410,7 +2518,7 @@ }, "datasource": { "type": "prometheus", - "uid": "${ccp_datasource}" + "uid": "PDC1078F23EBDF0E5" }, "definition": "label_values(ccp_database_size_bytes{cluster_name=\"[[pgcluster]]\", job=\"[[pgnodes]]\"}, dbname)", "hide": 0, @@ -2454,6 +2562,6 @@ "timezone": "browser", "title": "PostgreSQL Details", "uid": "6jtN_vfiz", - "version": 2, + "version": 3, "weekStart": "" } diff --git a/sql_exporter/common/crunchy_global_collector.yml b/sql_exporter/common/crunchy_global_collector.yml index 707f773..ca9722b 100644 --- a/sql_exporter/common/crunchy_global_collector.yml +++ b/sql_exporter/common/crunchy_global_collector.yml @@ -351,6 +351,18 @@ metrics: values: [total_size_bytes] query_ref: ccp_wal_activity + - metric_name: ccp_autovacuum_workers_count + type: gauge + help: "Count of autovacuum workers currently running" + values: [count] + query_ref: ccp_autovacuum_workers + + - metric_name: ccp_autovacuum_workers_max + type: gauge + help: "Count of autovacuum workers currently running" + values: [max] + query_ref: ccp_autovacuum_workers + # - metric_name: # type: gauge # help: @@ -513,3 +525,10 @@ queries: SELECT last_5_min_size_bytes , total_size_bytes FROM pgmonitor_ext.ccp_wal_activity + + + - query_name: ccp_autovacuum_workers + query: | + SELECT count(*) AS count, current_setting('autovacuum_max_workers') AS max + FROM pg_catalog.pg_stat_activity + WHERE backend_type = 'autovacuum worker';