diff --git a/production/loki-mixin/dashboards/dashboard-utils.libsonnet b/production/loki-mixin/dashboards/dashboard-utils.libsonnet index 2d04c6cef8317..5766de2ad31e5 100644 --- a/production/loki-mixin/dashboards/dashboard-utils.libsonnet +++ b/production/loki-mixin/dashboards/dashboard-utils.libsonnet @@ -151,7 +151,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; datasource: '$datasource', }, CPUUsagePanel(title, matcher):: - $.panel(title) + + $.timeseriesPanel(title) + $.queryPanel([ 'sum by(pod) (rate(container_cpu_usage_seconds_total{%s, %s}[$__rate_interval]))' % [$.namespaceMatcher(), matcher], 'min(kube_pod_container_resource_requests{%s, %s, resource="cpu"} > 0)' % [$.namespaceMatcher(), matcher], @@ -176,7 +176,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; self.CPUUsagePanel(title, 'container=~"%s"' % containerName), memoryWorkingSetPanel(title, matcher):: - $.panel(title) + + $.timeseriesPanel(title) + $.queryPanel([ // We use "max" instead of "sum" otherwise during a rolling update of a statefulset we will end up // summing the memory of the old pod (whose metric will be stale for 5m) to the new pod. @@ -204,7 +204,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; self.memoryWorkingSetPanel(title, 'container=~"%s"' % containerName), goHeapInUsePanel(title, jobName):: - $.panel(title) + + $.timeseriesPanel(title) + $.queryPanel( 'sum by(%s) (go_memstats_heap_inuse_bytes{%s})' % [$._config.per_instance_label, $.jobMatcher(jobName)], '{{%s}}' % $._config.per_instance_label @@ -247,7 +247,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; }, containerDiskSpaceUtilizationPanel(title, containerName):: - $.panel(title) + + $.timeseriesPanel(title) + $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,%s})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher(), $.containerLabelMatcher(containerName)], '{{persistentvolumeclaim}}') + { yaxes: $.yaxes('percentunit') }, } diff --git a/production/loki-mixin/dashboards/loki-chunks.libsonnet b/production/loki-mixin/dashboards/loki-chunks.libsonnet index 99a1fa06fe8c7..49cfb5088c856 100644 --- a/production/loki-mixin/dashboards/loki-chunks.libsonnet +++ b/production/loki-mixin/dashboards/loki-chunks.libsonnet @@ -15,11 +15,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Active Series / Chunks') .addPanel( - $.panel('Series') + + $.timeseriesPanel('Series') + $.queryPanel('sum(loki_ingester_memory_chunks{%s})' % dashboards['loki-chunks.json'].labelsSelector, 'series'), ) .addPanel( - $.panel('Chunks per series') + + $.timeseriesPanel('Chunks per series') + $.queryPanel( 'sum(loki_ingester_memory_chunks{%s}) / sum(loki_ingester_memory_streams{%s})' % [ dashboards['loki-chunks.json'].labelsSelector, @@ -32,24 +32,24 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Flush Stats') .addPanel( - $.panel('Utilization') + + $.timeseriesPanel('Utilization') + $.latencyPanel('loki_ingester_chunk_utilization', '{%s}' % dashboards['loki-chunks.json'].labelsSelector, multiplier='1') + { yaxes: $.yaxes('percentunit') }, ) .addPanel( - $.panel('Age') + + $.timeseriesPanel('Age') + $.latencyPanel('loki_ingester_chunk_age_seconds', '{%s}' % dashboards['loki-chunks.json'].labelsSelector), ), ) .addRow( $.row('Flush Stats') .addPanel( - $.panel('Log Entries Per Chunk') + + $.timeseriesPanel('Log Entries Per Chunk') + $.latencyPanel('loki_ingester_chunk_entries', '{%s}' % dashboards['loki-chunks.json'].labelsSelector, multiplier='1') + { yaxes: $.yaxes('short') }, ) .addPanel( - $.panel('Index Entries Per Chunk') + + $.timeseriesPanel('Index Entries Per Chunk') + $.queryPanel( 'sum(rate(loki_chunk_store_index_entries_per_chunk_sum{%s}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{%s}[5m]))' % [ dashboards['loki-chunks.json'].labelsSelector, @@ -62,22 +62,22 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Flush Stats') .addPanel( - $.panel('Queue Length') + + $.timeseriesPanel('Queue Length') + $.queryPanel('loki_ingester_flush_queue_length{%(label)s} or cortex_ingester_flush_queue_length{%(label)s}' % { label: dashboards['loki-chunks.json'].labelsSelector }, '{{pod}}'), ) .addPanel( - $.panel('Flush Rate') + + $.timeseriesPanel('Flush Rate') + $.qpsPanel('loki_ingester_chunk_age_seconds_count{%s}' % dashboards['loki-chunks.json'].labelsSelector,), ), ) .addRow( $.row('Flush Stats') .addPanel( - $.panel('Chunks Flushed/Second') + + $.timeseriesPanel('Chunks Flushed/Second') + $.queryPanel('sum(rate(loki_ingester_chunks_flushed_total{%s}[$__rate_interval]))' % dashboards['loki-chunks.json'].labelsSelector, '{{pod}}'), ) .addPanel( - $.panel('Chunk Flush Reason') + + $.timeseriesPanel('Chunk Flush Reason') + $.queryPanel('sum by (reason) (rate(loki_ingester_chunks_flushed_total{%s}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{%s}[$__rate_interval]))' % [dashboards['loki-chunks.json'].labelsSelector, dashboards['loki-chunks.json'].labelsSelector], '{{reason}}') + { stack: true, yaxes: [ @@ -138,7 +138,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Utilization') .addPanel( - $.panel('Chunk Size Quantiles') + + $.timeseriesPanel('Chunk Size Quantiles') + $.queryPanel( [ 'histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{%s}[1m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector, @@ -158,7 +158,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('Duration') .addPanel( - $.panel('Chunk Duration hours (end-start)') + + $.timeseriesPanel('Chunk Duration hours (end-start)') + $.queryPanel( [ 'histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{%s}[5m])) by (le))' % dashboards['loki-chunks.json'].labelsSelector, diff --git a/production/loki-mixin/dashboards/loki-deletion.libsonnet b/production/loki-mixin/dashboards/loki-deletion.libsonnet index 58a46dd76dc11..f44bb0572cd43 100644 --- a/production/loki-mixin/dashboards/loki-deletion.libsonnet +++ b/production/loki-mixin/dashboards/loki-deletion.libsonnet @@ -28,39 +28,39 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( g.row('Churn') .addPanel( - g.panel('# of Delete Requests (received - processed) ') + + g.timeseriesPanel('# of Delete Requests (received - processed) ') + g.queryPanel('(loki_compactor_delete_requests_received_total{%s} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{%s} or on () vector(0))' % [$.namespaceMatcher(), $.namespaceMatcher()], 'in progress'), ) .addPanel( - g.panel('Delete Requests Received / Day') + + g.timeseriesPanel('Delete Requests Received / Day') + g.queryPanel('sum(increase(loki_compactor_delete_requests_received_total{%s}[1d]))' % $.namespaceMatcher(), 'received'), ) .addPanel( - g.panel('Delete Requests Processed / Day') + + g.timeseriesPanel('Delete Requests Processed / Day') + g.queryPanel('sum(increase(loki_compactor_delete_requests_processed_total{%s}[1d]))' % $.namespaceMatcher(), 'processed'), ) ).addRow( g.row('Compactor') .addPanel( - g.panel('Compactor CPU usage') + + g.timeseriesPanel('Compactor CPU usage') + g.queryPanel('node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%s, container="compactor"}' % $.namespaceMatcher(), '{{pod}}'), ) .addPanel( - g.panel('Compactor memory usage (MiB)') + + g.timeseriesPanel('Compactor memory usage (MiB)') + g.queryPanel('go_memstats_heap_inuse_bytes{%s, container="compactor"} / 1024 / 1024 ' % $.namespaceMatcher(), ' {{pod}} '), ) .addPanel( - g.panel('Compaction run duration (seconds)') + + g.timeseriesPanel('Compaction run duration (seconds)') + g.queryPanel('loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher(), '{{pod}}'), ) ).addRow( g.row('Deletion metrics') .addPanel( - g.panel('Failures in Loading Delete Requests / Hour') + + g.timeseriesPanel('Failures in Loading Delete Requests / Hour') + g.queryPanel('sum(increase(loki_compactor_load_pending_requests_attempts_total{status="fail", %s}[1h]))' % $.namespaceMatcher(), 'failures'), ) .addPanel( - g.panel('Lines Deleted / Sec') + + g.timeseriesPanel('Lines Deleted / Sec') + g.queryPanel('sum(rate(loki_compactor_deleted_lines{' + $._config.per_cluster_label + '=~"$cluster",job=~"$namespace/%s"}[$__rate_interval])) by (user)' % compactor_matcher, '{{user}}'), ) ).addRow( diff --git a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet index 3d17903cf83c0..1d4f24d8f5886 100644 --- a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet @@ -67,7 +67,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.goHeapInUsePanel('Memory (go heap inuse)', 'querier'), ) .addPanel( - $.panel('Disk Writes') + + $.timeseriesPanel('Disk Writes') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], '{{%s}} - {{device}}' % $._config.per_instance_label @@ -76,7 +76,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; { yaxes: $.yaxes('Bps') }, ) .addPanel( - $.panel('Disk Reads') + + $.timeseriesPanel('Disk Reads') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], '{{%s}} - {{device}}' % $._config.per_instance_label @@ -100,7 +100,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.goHeapInUsePanel('Memory (go heap inuse)', index_gateway_job_matcher), ) .addPanel( - $.panel('Disk Writes') + + $.timeseriesPanel('Disk Writes') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(index_gateway_pod_matcher)], '{{%s}} - {{device}}' % $._config.per_instance_label @@ -109,7 +109,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; { yaxes: $.yaxes('Bps') }, ) .addPanel( - $.panel('Disk Reads') + + $.timeseriesPanel('Disk Reads') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(index_gateway_pod_matcher)], '{{%s}} - {{device}}' % $._config.per_instance_label @@ -137,7 +137,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, grafana.row.new('Ruler') .addPanel( - $.panel('Rules') + + $.timeseriesPanel('Rules') + $.queryPanel( 'sum by(%(label)s) (loki_prometheus_rule_group_rules{%(matcher)s}) or sum by(%(label)s) (cortex_prometheus_rule_group_rules{%(matcher)s})' % { label: $._config.per_instance_label, matcher: $.jobMatcher('ruler') }, '{{%s}}' % $._config.per_instance_label diff --git a/production/loki-mixin/dashboards/loki-reads.libsonnet b/production/loki-mixin/dashboards/loki-reads.libsonnet index 823b03126ccc5..77cff5f38c378 100644 --- a/production/loki-mixin/dashboards/loki-reads.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads.libsonnet @@ -25,7 +25,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; }, local p99LatencyByPod(metric, selectorStr) = - $.panel('Per Pod Latency (p99)') + + $.timeseriesPanel('Per Pod Latency (p99)') + latencyPanelWithExtraGrouping(metric, selectorStr, '1e3', 'pod'), 'loki-reads.json': { @@ -69,11 +69,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; $._config.internal_components, $.row('Frontend (cortex_gw)') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].cortexGwSelector, http_routes]) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.cortexgateway + [utils.selector.re('route', http_routes)], @@ -92,11 +92,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row(if $._config.ssd.enabled then 'Read Path' else 'Frontend (query-frontend)') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].queryFrontendSelector, http_routes]) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.queryFrontend + [utils.selector.re('route', http_routes)], @@ -118,11 +118,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Querier') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].querierSelector, http_routes]) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.querier + [utils.selector.re('route', http_routes)], @@ -144,11 +144,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Ingester') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].ingesterSelector, grpc_routes]) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.ingester + [utils.selector.re('route', grpc_routes)], @@ -171,11 +171,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Ingester - Zone Aware') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"%s"}' % [dashboards['loki-reads.json'].ingesterZoneSelector, grpc_routes]) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.ingesterZoneAware + [utils.selector.re('route', grpc_routes)], @@ -197,11 +197,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Index') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_index_request_duration_seconds_count{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + $.latencyPanel('loki_index_request_duration_seconds', '{%s operation!="index_chunk"}' % dashboards['loki-reads.json'].querierSelector) ) .addPanel( @@ -215,11 +215,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; showBigTable, $.row('BigTable') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_bigtable_request_duration_seconds_count{%s operation="/google.bigtable.v2.Bigtable/ReadRows"}' % dashboards['loki-reads.json'].querierSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_bigtable_request_duration_seconds', dashboards['loki-reads.json'].clusterMatchers + dashboards['loki-reads.json'].matchers.querier + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/ReadRows')] @@ -229,11 +229,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('BoltDB Shipper') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s operation="Shipper.Query"}' % dashboards['loki-reads.json'].querierOrIndexGatewaySelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s operation="Shipper.Query"}' % dashboards['loki-reads.json'].querierOrIndexGatewaySelector) ) .addPanel( diff --git a/production/loki-mixin/dashboards/loki-retention.libsonnet b/production/loki-mixin/dashboards/loki-retention.libsonnet index a5aa45a13d756..da7e4739bc239 100644 --- a/production/loki-mixin/dashboards/loki-retention.libsonnet +++ b/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -30,7 +30,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.fromNowPanel('Last Compact Tables Operation Success', 'loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds') ) .addPanel( - $.panel('Compact Tables Operations Duration') + + $.timeseriesPanel('Compact Tables Operations Duration') + $.queryPanel(['loki_boltdb_shipper_compact_tables_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + { yaxes: $.yaxes('s') }, ) @@ -38,11 +38,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('') .addPanel( - $.panel('Number of times Tables were skipped during Compaction') + + $.timeseriesPanel('Number of times Tables were skipped during Compaction') + $.queryPanel(['sum(increase(loki_compactor_skipped_compacting_locked_table_total{%s}[$__range]))' % $.namespaceMatcher()], ['{{table_name}}']), ) .addPanel( - $.panel('Compact Tables Operations Per Status') + + $.timeseriesPanel('Compact Tables Operations Per Status') + $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), ) ) @@ -52,66 +52,66 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.fromNowPanel('Last Mark Operation Success', 'loki_compactor_apply_retention_last_successful_run_timestamp_seconds') ) .addPanel( - $.panel('Mark Operations Duration') + + $.timeseriesPanel('Mark Operations Duration') + $.queryPanel(['loki_compactor_apply_retention_operation_duration_seconds{%s}' % $.namespaceMatcher()], ['duration']) + { yaxes: $.yaxes('s') }, ) .addPanel( - $.panel('Mark Operations Per Status') + + $.timeseriesPanel('Mark Operations Per Status') + $.queryPanel(['sum by (status)(rate(loki_compactor_apply_retention_operation_total{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{success}}']), ) ) .addRow( $.row('Per Table Marker') .addPanel( - $.panel('Processed Tables Per Action') + + $.timeseriesPanel('Processed Tables Per Action') + $.queryPanel(['count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{%s})' % $.namespaceMatcher()], ['{{action}}']) + $.stack, ) .addPanel( - $.panel('Modified Tables') + + $.timeseriesPanel('Modified Tables') + $.queryPanel(['count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{%s , action=~"modified|deleted"})' % $.namespaceMatcher()], ['{{table}}-{{action}}']) + $.stack, ) .addPanel( - $.panel('Marks Creation Rate Per Table') + + $.timeseriesPanel('Marks Creation Rate Per Table') + $.queryPanel(['sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{%s}[$__rate_interval])) >0' % $.namespaceMatcher()], ['{{table}}']) + $.stack, ) ) .addRow( $.row('') .addPanel( - $.panel('Marked Chunks (24h)') + + $.timeseriesPanel('Marked Chunks (24h)') + $.statPanel('sum (increase(loki_boltdb_shipper_retention_marker_count_total{%s}[24h]))' % $.namespaceMatcher(), 'short') ) .addPanel( - $.panel('Mark Table Latency') + + $.timeseriesPanel('Mark Table Latency') + $.latencyPanel('loki_boltdb_shipper_retention_marker_table_processed_duration_seconds', '{%s}' % $.namespaceMatcher()) ) ) .addRow( $.row('Sweeper') .addPanel( - $.panel('Delete Chunks (24h)') + + $.timeseriesPanel('Delete Chunks (24h)') + $.statPanel('sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{%s}[24h]))' % $.namespaceMatcher(), 'short') ) .addPanel( - $.panel('Delete Latency') + + $.timeseriesPanel('Delete Latency') + $.latencyPanel('loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds', '{%s}' % $.namespaceMatcher()) ) ) .addRow( $.row('') .addPanel( - $.panel('Sweeper Lag') + + $.timeseriesPanel('Sweeper Lag') + $.queryPanel(['time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{%s} > 0)' % $.namespaceMatcher()], ['lag']) + { yaxes: $.yaxes({ format: 's', min: null }), }, ) .addPanel( - $.panel('Marks Files to Process') + + $.timeseriesPanel('Marks Files to Process') + $.queryPanel(['sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{%s})' % $.namespaceMatcher()], ['count']), ) .addPanel( - $.panel('Delete Rate Per Status') + + $.timeseriesPanel('Delete Rate Per Status') + $.queryPanel(['sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{%s}[$__rate_interval]))' % $.namespaceMatcher()], ['{{status}}']), ) ) diff --git a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet index bffbe5b59dace..099347d645b4c 100644 --- a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet @@ -41,7 +41,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( grafana.row.new(if $._config.ssd.enabled then 'Write path' else 'Ingester') .addPanel( - $.panel('In-memory streams') + + $.timeseriesPanel('In-memory streams') + $.queryPanel( 'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher(ingester_job_matcher)], '{{%s}}' % $._config.per_instance_label @@ -60,7 +60,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; $.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher), ) .addPanel( - $.panel('Disk Writes') + + $.timeseriesPanel('Disk Writes') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(ingester_pod_matcher)], '{{%s}} - {{device}}' % $._config.per_instance_label @@ -69,7 +69,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; { yaxes: $.yaxes('Bps') }, ) .addPanel( - $.panel('Disk Reads') + + $.timeseriesPanel('Disk Reads') + $.queryPanel( 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(ingester_pod_matcher)], '{{%s}} - {{device}}' % $._config.per_instance_label diff --git a/production/loki-mixin/dashboards/loki-writes.libsonnet b/production/loki-mixin/dashboards/loki-writes.libsonnet index 878a1ee7d7872..d7dbe8315e1d2 100644 --- a/production/loki-mixin/dashboards/loki-writes.libsonnet +++ b/production/loki-mixin/dashboards/loki-writes.libsonnet @@ -44,11 +44,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; $._config.internal_components, $.row('Frontend (cortex_gw)') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route=~"api_prom_push|loki_api_v1_push"}' % dashboards['loki-writes.json'].cortexGwSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.cortexgateway + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push')], @@ -58,11 +58,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row(if $._config.ssd.enabled then 'Write Path' else 'Distributor') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s, route=~"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle"}' % std.rstripChars(dashboards['loki-writes.json'].distributorSelector, ',')) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.distributor + [utils.selector.re('route', 'api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle')], @@ -73,11 +73,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; $._config.tsdb, $.row(if $._config.ssd.enabled then 'Write Path' else 'Distributor - Structured Metadata') .addPanel( - $.panel('Per Total Received Bytes') + + $.timeseriesPanel('Per Total Received Bytes') + $.queryPanel('sum (rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{%s}[$__rate_interval]))' % [dashboards['loki-writes.json'].distributorSelector, dashboards['loki-writes.json'].distributorSelector], 'bytes') ) .addPanel( - $.panel('Per Tenant') + + $.timeseriesPanel('Per Tenant') + $.queryPanel('sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{%s}[$__rate_interval]))' % [dashboards['loki-writes.json'].distributorSelector, dashboards['loki-writes.json'].distributorSelector], '{{tenant}}') + { stack: true, yaxes: [ @@ -91,11 +91,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Ingester - Zone Aware') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route="/logproto.Pusher/Push"}' % dashboards['loki-writes.json'].ingesterZoneSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester_zone + [utils.selector.eq('route', '/logproto.Pusher/Push')], @@ -106,12 +106,12 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Ingester') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_request_duration_seconds_count{%s route="/logproto.Pusher/Push"}' % dashboards['loki-writes.json'].ingesterSelector) + $.qpsPanel('loki_request_duration_seconds_count{%s route="/logproto.Pusher/Push"}' % dashboards['loki-writes.json'].ingesterSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_request_duration_seconds', dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('route', '/logproto.Pusher/Push')], @@ -122,11 +122,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; !$._config.ssd.enabled, $.row('Index') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_index_request_duration_seconds_count{%s operation="index_chunk"}' % dashboards['loki-writes.json'].anyIngester) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + $.latencyPanel('loki_index_request_duration_seconds', '{%s operation="index_chunk"}' % dashboards['loki-writes.json'].anyIngester) ) ) @@ -134,11 +134,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; showBigTable, $.row('BigTable') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_bigtable_request_duration_seconds_count{%s operation="/google.bigtable.v2.Bigtable/MutateRows"}' % dashboards['loki-writes.json'].ingesterSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + utils.latencyRecordingRulePanel( 'loki_bigtable_request_duration_seconds', dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].clusterMatchers + dashboards['loki-writes.json'].matchers.ingester + [utils.selector.eq('operation', '/google.bigtable.v2.Bigtable/MutateRows')] @@ -148,11 +148,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; .addRow( $.row('BoltDB Shipper') .addPanel( - $.panel('QPS') + + $.timeseriesPanel('QPS') + $.qpsPanel('loki_boltdb_shipper_request_duration_seconds_count{%s operation="WRITE"}' % dashboards['loki-writes.json'].ingesterSelector) ) .addPanel( - $.panel('Latency') + + $.timeseriesPanel('Latency') + $.latencyPanel('loki_boltdb_shipper_request_duration_seconds', '{%s operation="WRITE"}' % dashboards['loki-writes.json'].ingesterSelector) ) ), diff --git a/production/loki-mixin/jsonnetfile.lock.json b/production/loki-mixin/jsonnetfile.lock.json index 4dd7a006b6b00..f895125a2aa3c 100644 --- a/production/loki-mixin/jsonnetfile.lock.json +++ b/production/loki-mixin/jsonnetfile.lock.json @@ -18,8 +18,8 @@ "subdir": "grafana-builder" } }, - "version": "3f71e00a64810075b5d5f969cc6d0e419cbdebc4", - "sum": "TieGrr7GyKjURk1+wXHFpdoCiwNaIVfZvyc5mbI9OM0=" + "version": "f95501009c9b29bed87fe9d57c1a6e72e210f137", + "sum": "+z5VY+bPBNqXcmNAV8xbJcbsRA+pro1R3IM7aIY8OlU=" }, { "source": {