diff --git a/dashboards/prombenchy-gcm.json b/dashboards/prombenchy-gcm.json index 96a11df..f6a133c 100644 --- a/dashboards/prombenchy-gcm.json +++ b/dashboards/prombenchy-gcm.json @@ -4,37 +4,83 @@ "columns": 48, "tiles": [ { + "yPos": 39, "width": 24, "height": 13, "widget": { - "title": "Collector samples per sec scraped (should be ~10.6k)", "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, "dataSets": [ { + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace, type) (rate(prometheus_tsdb_head_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))\n", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (rate(prometheus_tsdb_head_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))\n", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] }, { + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace, type) (rate(prometheus_agent_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (rate(prometheus_agent_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] }, + { + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(otelcol_receiver_accepted_metric_points{cluster=\"bwplotka-prombenchy\"}[5m])) # Just series, not adjusted series?", + "unitOverride": "", + "outputFullDuration": false + }, + "plotType": "LINE", + "legendTemplate": "", + "targetAxis": "Y1", + "dimensions": [], + "measures": [], + "breakdowns": [] + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false + } + }, + "title": "Collector samples per sec scraped (should be ~10.6k for Prom, and ~5.7 for Otel due to \"adjusted\" logic)", + "id": "" + } + }, + { + "width": 24, + "height": 13, + "widget": { + "title": "Collector Memory (working set)", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ { "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (rate(otelcol_receiver_accepted_metric_points_total{cluster=\"bwplotka-prombenchy\"}[5m]))", - "unitOverride": "" + "prometheusQuery": "sum by (__name__, pod, namespace) (container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=~\"otel-collector|prometheus\", pod=~\"collector-.*\"})", + "unitOverride": "By" } } ], @@ -47,17 +93,16 @@ } }, { - "xPos": 24, - "yPos": 16, + "yPos": 52, "width": 24, - "height": 13, + "height": 12, "widget": { "xyChart": { "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "sum by (__name__, pod) (container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"})", - "unitOverride": "By", + "prometheusQuery": "sum by (pod, namespace) (prometheus_tsdb_head_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", + "unitOverride": "", "outputFullDuration": false }, "plotType": "LINE", @@ -69,8 +114,8 @@ }, { "timeSeriesQuery": { - "prometheusQuery": "sum by (__name__, pod) (go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"})\n\n", - "unitOverride": "By", + "prometheusQuery": "sum by (pod, namespace) (prometheus_agent_active_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", + "unitOverride": "", "outputFullDuration": false }, "plotType": "LINE", @@ -92,115 +137,144 @@ "displayHorizontal": false } }, - "title": "Collector Memory (working set and heap)", + "title": "Collector Active Series (should be ~160k with 2x spikes) - only Prometheus", "id": "" } }, { - "yPos": 13, + "xPos": 24, "width": 24, - "height": 12, + "height": 13, "widget": { - "title": "Collector Active Series (should be ~160k)", "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, "dataSets": [ { + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", job=\"collector\"}[${__interval}]))\n\n", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (prometheus_tsdb_head_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] }, { + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(otelcol_process_cpu_seconds{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[${__interval}]))\n", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (prometheus_agent_active_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false } - } + }, + "title": "Collector CPU Use", + "id": "" } }, { "xPos": 24, - "yPos": 29, + "yPos": 52, "width": 24, - "height": 12, + "height": 10, "widget": { - "title": "Collector CPU Use", "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, "dataSets": [ { + "timeSeriesQuery": { + "prometheusQuery": "count by (namespace)(avalanche_counter_metric_mmmmm_0_0_total{cluster=\"bwplotka-prombenchy\", series_id=\"1\"})", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (pod, namespace) (rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=~\"prometheus|otel-collector\", job=\"collector\"}[${__interval}]))", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false } - } + }, + "title": "GCM Ingested series for one counter, series_id=1 (should be 16 with 32 staleness spikes)", + "id": "" } }, { - "yPos": 25, + "xPos": 24, + "yPos": 62, "width": 24, - "height": 10, + "height": 13, "widget": { - "title": "GCM Ingested series for one counter (should be 160 with 320 staleness spikes)", "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, "dataSets": [ { + "timeSeriesQuery": { + "prometheusQuery": "sum by (namespace) (scrape_samples_scraped{cluster=\"bwplotka-prombenchy\", job=\"avalanche\"})", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "count by (namespace)(avalanche_counter_metric_mmmmm_0_0_total{cluster=\"bwplotka-prombenchy\"})", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false } - } + }, + "title": "Samples scraped by collector per all scrapes (only visible if collector sends data to GCM)", + "id": "" } }, { - "xPos": 24, - "yPos": 41, + "yPos": 26, "width": 24, - "height": 10, + "height": 13, "widget": { "xyChart": { "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "#sum(time() - container_start_time_seconds{cluster=\"bwplotka-prombenchy\", pod=~\"collector.*\", container!=\"\"}) by (container)\n\nkube_pod_container_status_restarts", - "unitOverride": "", + "prometheusQuery": "sum by (namespace, container) (container_memory_mapped_file{cluster=\"bwplotka-prombenchy\", container=~\"otel-collector|prometheus\", pod=~\"collector.*\"})", + "unitOverride": "By", "outputFullDuration": false }, "plotType": "LINE", @@ -222,13 +296,13 @@ "displayHorizontal": false } }, - "title": "(broken) Collector containers uptime ", + "title": "Collector memory mapped file bytes", "id": "" } }, { "xPos": 24, - "yPos": 51, + "yPos": 13, "width": 24, "height": 13, "widget": { @@ -236,7 +310,7 @@ "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "sum(scrape_samples_scraped{cluster=\"bwplotka-prombenchy\", job=\"avalanche\"}) by (namespace)", + "prometheusQuery": "sum by (namespace, pod) (prometheus_tsdb_data_replay_duration_seconds{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=\"collector\"})\n\n", "unitOverride": "", "outputFullDuration": false }, @@ -259,20 +333,21 @@ "displayHorizontal": false } }, - "title": "Samples scraped by collector per all scrapes (only visible if collector sends data to GCM)", + "title": "[prom only] Collector replay time", "id": "" } }, { - "yPos": 35, - "width": 24, + "xPos": 1, + "yPos": 89, + "width": 23, "height": 13, "widget": { "xyChart": { "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "container_memory_mapped_file{cluster=\"bwplotka-prombenchy\", image=~\".*prometheus.*\", pod=~\"collector.*\"}", + "prometheusQuery": "max by (namespace) (max_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=~\"otel-collector|prometheus\", pod=~\"collector-.*\"}[10h]))", "unitOverride": "By", "outputFullDuration": false }, @@ -295,161 +370,215 @@ "displayHorizontal": false } }, - "title": "Collector memory mapped file bytes", + "title": "RSS max_over_10h", "id": "" } }, { - "yPos": 48, - "width": 24, - "height": 13, + "xPos": 1, + "yPos": 102, + "width": 46, + "height": 18, "widget": { - "title": "Collector replay time", "xyChart": { - "chartOptions": { - "mode": "COLOR" - }, "dataSets": [ { + "timeSeriesQuery": { + "prometheusQuery": "max by (namespace) (rate(container_cpu_usage_seconds_total{cluster=\"bwplotka-prombenchy\", container=~\"otel-collector|prometheus\", pod=~\"collector-.*\"}[10h]))\n", + "unitOverride": "", + "outputFullDuration": false + }, "plotType": "LINE", + "legendTemplate": "", "targetAxis": "Y1", - "timeSeriesQuery": { - "prometheusQuery": "sum by (namespace, pod) (prometheus_tsdb_data_replay_duration_seconds{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=\"collector\"})\n\n", - "unitOverride": "" - } + "dimensions": [], + "measures": [], + "breakdowns": [] } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false } - } + }, + "title": "CPU rate 10h", + "id": "" } }, { "xPos": 24, - "width": 8, - "height": 8, + "yPos": 89, + "width": 23, + "height": 13, "widget": { - "scorecard": { - "timeSeriesQuery": { - "prometheusQuery": "max_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"}[10h])", - "unitOverride": "", - "outputFullDuration": true - }, - "gaugeView": { - "lowerBound": 0, - "upperBound": 1 - }, + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "prometheusQuery": "max by (namespace) (avg_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=~\"otel-collector|prometheus\", pod=~\"collector-.*\"}[10h]))", + "unitOverride": "By", + "outputFullDuration": false + }, + "plotType": "LINE", + "legendTemplate": "", + "targetAxis": "Y1", + "dimensions": [], + "measures": [], + "breakdowns": [] + } + ], "thresholds": [], - "dimensions": [], - "measures": [] + "yAxis": { + "label": "", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false + } }, - "title": "RSS max_over_10h", + "title": "RSS avg_over_10h", "id": "" } }, { - "xPos": 32, - "width": 8, - "height": 8, + "yPos": 64, + "width": 24, + "height": 12, "widget": { - "scorecard": { - "timeSeriesQuery": { - "prometheusQuery": "max_over_time(go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}[10h])", - "unitOverride": "", - "outputFullDuration": true - }, - "gaugeView": { - "lowerBound": 0, - "upperBound": 1 - }, + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "prometheusQuery": "avg by (namespace, job) (scrape_samples_scraped{cluster=\"bwplotka-prombenchy\", job=\"avalanche\"})", + "unitOverride": "", + "outputFullDuration": false + }, + "plotType": "LINE", + "legendTemplate": "", + "targetAxis": "Y1", + "dimensions": [], + "measures": [], + "breakdowns": [] + } + ], "thresholds": [], - "dimensions": [], - "measures": [] + "yAxis": { + "label": "", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR", + "showLegend": false, + "displayHorizontal": false + } }, - "title": "Heap max_over_10h", + "title": "Avg Avalanche Target Size", "id": "" } }, { - "xPos": 40, - "width": 8, - "height": 8, + "xPos": 24, + "yPos": 39, + "width": 24, + "height": 13, "widget": { - "scorecard": { - "timeSeriesQuery": { - "prometheusQuery": "rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", job=\"managed-prometheus-collector\"}[10h])", - "unitOverride": "", - "outputFullDuration": true - }, - "gaugeView": { - "lowerBound": 0, - "upperBound": 1 + "title": "GCM ingested samples for (all) avalanche metrics.", + "xyChart": { + "chartOptions": { + "mode": "COLOR" }, + "dataSets": [ + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "sum by (attribution_id) (rate(monitoring_googleapis_com:collection_attribution_write_sample_count{monitored_resource=\"monitoring.googleapis.com/MetricIngestionAttribution\", attribution_dimension=\"namespace\",metric_type=~\"prometheus.googleapis.com/avalanche_.*\", attribution_id!=\"bwtest\", resource_container=\"gpe-test-1\"}[${__interval}]))\n", + "unitOverride": "" + } + } + ], "thresholds": [], - "dimensions": [], - "measures": [] + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } + }, + { + "yPos": 89, + "width": 48, + "height": 31, + "widget": { + "title": "10h Efficiency Comparisons", + "collapsibleGroup": { + "collapsed": false }, - "title": "CPU rate 10h", "id": "" } }, { - "xPos": 24, - "yPos": 8, - "width": 8, - "height": 8, + "yPos": 39, + "width": 48, + "height": 50, "widget": { - "scorecard": { - "timeSeriesQuery": { - "prometheusQuery": "avg_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"}[10h])", - "unitOverride": "", - "outputFullDuration": true - }, - "gaugeView": { - "lowerBound": 0, - "upperBound": 1 - }, - "thresholds": [], - "dimensions": [], - "measures": [] + "title": "Scraped / Ingestion Load ", + "collapsibleGroup": { + "collapsed": false }, - "title": "RSS avg_over_10h", "id": "" } }, { - "xPos": 32, - "yPos": 8, - "width": 8, - "height": 8, + "yPos": 76, + "width": 24, + "height": 13, "widget": { - "scorecard": { - "timeSeriesQuery": { - "prometheusQuery": "avg_over_time(go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}[10h])", - "unitOverride": "", - "outputFullDuration": true - }, - "gaugeView": { - "lowerBound": 0, - "upperBound": 1 + "title": "Collector samples per second sent to GCM", + "xyChart": { + "chartOptions": { + "mode": "COLOR" }, + "dataSets": [ + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(gcm_export_samples_sent_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))\n", + "unitOverride": "" + } + }, + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(otelcol_exporter_sent_metric_points{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))\n", + "unitOverride": "" + } + } + ], "thresholds": [], - "dimensions": [], - "measures": [] - }, - "title": "Heap avg_over_10h", - "id": "" + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } } }, { - "yPos": 61, + "yPos": 13, "width": 24, - "height": 12, + "height": 13, "widget": { - "title": "Avg Avalanche Target Size", + "title": "Collector Memory (heap)", "xyChart": { "chartOptions": { "mode": "COLOR" @@ -459,7 +588,15 @@ "plotType": "LINE", "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "avg by (namespace, job) (scrape_samples_scraped{cluster=\"bwplotka-prombenchy\"})", + "prometheusQuery": "sum by (__name__, pod, namespace) (go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"})\n\n", + "unitOverride": "By" + } + }, + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "sum by (__name__, pod, namespace) (otelcol_process_runtime_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\"})", "unitOverride": "" } }