From 9d6078974a02cee74addb640a4b6883beac48d9b Mon Sep 17 00:00:00 2001 From: Binbin Li Date: Mon, 27 May 2024 05:08:31 +0000 Subject: [PATCH] feat: add namespace label to metrics --- .../grafana_namespaced_configMap.yaml | 1482 +++++++++++++++++ pkg/metrics/stats_reporter.go | 37 +- pkg/metrics/stats_reporter_test.go | 59 +- 3 files changed, 1558 insertions(+), 20 deletions(-) create mode 100644 instrumentation/grafana_namespaced_configMap.yaml diff --git a/instrumentation/grafana_namespaced_configMap.yaml b/instrumentation/grafana_namespaced_configMap.yaml new file mode 100644 index 000000000..ea65b96aa --- /dev/null +++ b/instrumentation/grafana_namespaced_configMap.yaml @@ -0,0 +1,1482 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: ratify-grafana-dashboard + labels: + grafana_dashboard: "1" + namespace: monitoring +data: + ratify-dashboard.json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.8", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "sum(rate(ratify_verification_request_count[$__rate_interval]))", + "instant": true, + "key": "Q-806348e4-5c52-4af8-87f1-0676999cd43c-0", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Verification RPS", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Count of Verifier Ops", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "verifier= isSuccess= isError=" + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": true, + "tooltip": true, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "sum by(error, success, verifier) (increase(ratify_verifier_duration_count{workload_namespace=~\"$workload_namespace\"}[$__rate_interval]))", + "instant": false, + "key": "Q-e20b12bd-6bd1-4c51-a1f5-4b1582183dc6-0", + "legendFormat": "verifier={{verifier}} isSuccess={{success}} isError={{error}}", + "range": true, + "refId": "A" + } + ], + "title": "Count of Verifier Operations Per Interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (ratify_mutation_request_bucket))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "P95 Mutation Request Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (ratify_verification_request_bucket))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "P95 Verification Request Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 110, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "increase(ratify_blob_cache_count_total{hit=\"true\", workload_namespace=~\"$workload_namespace\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": true, + "includeNullMetadata": true, + "instant": false, + "key": "Q-e087804a-f777-4009-b91d-0207684b5d52-0", + "legendFormat": "{{hit}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "increase(ratify_blob_cache_count_total{workload_namespace=~\"$workload_namespace\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": true, + "includeNullMetadata": true, + "instant": false, + "key": "Q-e087804a-f777-4009-b91d-0207684b5d52-0", + "legendFormat": "{{hit}}", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "name": "Expression", + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "$A/$B * 100", + "hide": false, + "refId": "C", + "type": "math" + } + ], + "title": "Blob Cache Hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "sum(increase(ratify_registry_request_count_total{workload_namespace=~\"$workload_namespace\"}[$__rate_interval]))", + "instant": false, + "key": "Q-03cacf42-7553-4d7a-9bce-8bea83f95a09-0", + "legendFormat": "non 429", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "sum(increase(ratify_registry_request_count_total{status_code=\"429\", workload_namespace=~\"$workload_namespace\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "key": "Q-03cacf42-7553-4d7a-9bce-8bea83f95a09-0", + "legendFormat": "429", + "range": true, + "refId": "B" + } + ], + "title": "Registry Request Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(container_memory_working_set_bytes{job=\"kubelet\", metrics_path=\"/metrics/cadvisor\", cluster=\"\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "key": "Q-e556d2e4-096e-4e25-8524-17875553f26c-0", + "legendFormat": "{{label_name}}", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "key": "Q-2810b9b8-6d65-402c-a8dd-e33651478da8-2", + "legendFormat": "limit", + "range": true, + "refId": "C", + "step": 10 + } + ], + "title": "Memory Usage (WSS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "dark-orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", container=\"ratify\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "key": "Q-255c079c-fc4f-462b-a7e6-675a987fb424-0", + "legendFormat": "ratify", + "range": true, + "refId": "A", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n kube_pod_container_resource_requests{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "key": "Q-a2ac92c1-8a95-49aa-96fc-01b51a7e7773-1", + "legendFormat": "request", + "range": true, + "refId": "B", + "step": 10 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(\n kube_pod_container_resource_limits{namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "key": "Q-fa5c0a68-ea62-4328-8f34-c389bd402946-2", + "legendFormat": "limit", + "range": true, + "refId": "C", + "step": 10 + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "ratify_system_error_count", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Error Count", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 19, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 41 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (ratify_akv_certificate_duration_bucket))", + "instant": false, + "key": "Q-fcdaeb64-9bf0-4d47-bd25-dbe3022be5f9-0", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "P95 AKV Certificate Fetch Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (ratify_aad_exchange_duration_bucket))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "P95 AAD Exchange Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (ratify_acr_exchange_duration_bucket))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "P95 ACR Exchange Duration", + "type": "timeseries" + } + ], + "title": "Azure", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "gatekeeper-system", + "value": "gatekeeper-system" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\"}, namespace)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\"}, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "hide": 0, + "includeAll": false, + "label": "ratify pod", + "multi": false, + "name": "pod", + "options": [], + "query": { + "query": "label_values(kube_pod_info{namespace=\"$namespace\"}, pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/^ratify.*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "default" + ], + "value": [ + "default" + ] + }, + "definition": "label_values(ratify_verifier_duration_count, workload_namespace)", + "hide": 0, + "includeAll": true, + "label": "workload namespace", + "multi": true, + "name": "workload_namespace", + "options": [], + "query": { + "qryType": 5, + "query": "label_values(ratify_verifier_duration_count, workload_namespace)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": { + "nowDelay": "" + }, + "timezone": "", + "title": "Ratify", + "uid": "mBWgLNBVk", + "version": 7, + "weekStart": "" + } diff --git a/pkg/metrics/stats_reporter.go b/pkg/metrics/stats_reporter.go index c9ec5d984..2cc294fbb 100644 --- a/pkg/metrics/stats_reporter.go +++ b/pkg/metrics/stats_reporter.go @@ -18,6 +18,7 @@ package metrics import ( "context" + ctxUtils "github.com/deislabs/ratify/internal/context" "github.com/sirupsen/logrus" "go.opentelemetry.io/otel/attribute" instrument "go.opentelemetry.io/otel/metric" @@ -202,6 +203,7 @@ func ReportMutationRequest(ctx context.Context, duration int64) { // subjectReference: the subject reference of the verification // success: whether the verification succeeded // isError: whether the verification failed due to an error +// workload_namespace: the namespace where workload is deployed func ReportVerifierDuration(ctx context.Context, duration int64, veriferName string, subjectReference string, success bool, isError bool) { if verifierDuration != nil { verifierDuration.Record(ctx, duration, instrument.WithAttributes( @@ -221,6 +223,10 @@ func ReportVerifierDuration(ctx context.Context, duration int64, veriferName str Key: "error", Value: attribute.BoolValue(isError), }, + attribute.KeyValue{ + Key: "workload_namespace", + Value: attribute.StringValue(ctxUtils.GetNamespace(ctx)), + }, )) } } @@ -228,9 +234,12 @@ func ReportVerifierDuration(ctx context.Context, duration int64, veriferName str // ReportSystemError reports a system error from the server handler // Attributes: // errorString: the error message +// workload_namespace: the namespace where workload is deployed func ReportSystemError(ctx context.Context, errorString string) { if systemErrorCount != nil { - systemErrorCount.Add(ctx, 1, instrument.WithAttributes(attribute.KeyValue{Key: "error", Value: attribute.StringValue(errorString)})) + systemErrorCount.Add(ctx, 1, instrument.WithAttributes( + attribute.KeyValue{Key: "error", Value: attribute.StringValue(errorString)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } @@ -238,44 +247,60 @@ func ReportSystemError(ctx context.Context, errorString string) { // Attributes: // statusCode: the status code of the request // registryHost: the host name of the registry +// workload_namespace: the namespace where workload is deployed func ReportRegistryRequestCount(ctx context.Context, statusCode int, registryHost string) { if registryRequestCount != nil { - registryRequestCount.Add(ctx, 1, instrument.WithAttributes(attribute.KeyValue{Key: "status_code", Value: attribute.IntValue(statusCode)}, attribute.KeyValue{Key: "registry_host", Value: attribute.StringValue(registryHost)})) + registryRequestCount.Add(ctx, 1, instrument.WithAttributes( + attribute.KeyValue{Key: "status_code", Value: attribute.IntValue(statusCode)}, + attribute.KeyValue{Key: "registry_host", Value: attribute.StringValue(registryHost)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } // ReportAADExchangeDuration reports the duration of an AAD exchange // Attributes: // resourceType: the scope of resource being exchanged (AKV or ACR) +// workload_namespace: the namespace where workload is deployed func ReportAADExchangeDuration(ctx context.Context, duration int64, resourceType string) { if aadExchangeDuration != nil { - aadExchangeDuration.Record(ctx, duration, instrument.WithAttributes(attribute.KeyValue{Key: "resource_type", Value: attribute.StringValue(resourceType)})) + aadExchangeDuration.Record(ctx, duration, instrument.WithAttributes( + attribute.KeyValue{Key: "resource_type", Value: attribute.StringValue(resourceType)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } // ReportACRExchangeDuration reports the duration of an ACR exchange (AAD token for ACR refresh token) // Attributes: // repository: the repository being accessed +// workload_namespace: the namespace where workload is deployed func ReportACRExchangeDuration(ctx context.Context, duration int64, repository string) { if acrExchangeDuration != nil { - acrExchangeDuration.Record(ctx, duration, instrument.WithAttributes(attribute.KeyValue{Key: "repository", Value: attribute.StringValue(repository)})) + acrExchangeDuration.Record(ctx, duration, instrument.WithAttributes( + attribute.KeyValue{Key: "repository", Value: attribute.StringValue(repository)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } // ReportAKVCertificateDuration reports the duration of an AKV certificate fetch // Attributes: // certificateName: the object name of the certificate +// workload_namespace: the namespace where workload is deployed func ReportAKVCertificateDuration(ctx context.Context, duration int64, certificateName string) { if akvCertificateDuration != nil { - akvCertificateDuration.Record(ctx, duration, instrument.WithAttributes(attribute.KeyValue{Key: "certificate_name", Value: attribute.StringValue(certificateName)})) + akvCertificateDuration.Record(ctx, duration, instrument.WithAttributes( + attribute.KeyValue{Key: "certificate_name", Value: attribute.StringValue(certificateName)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } // ReportBlobCacheCount reports a blob cache hit or miss // Attributes: // hit: whether the blob was found in the cache +// workload_namespace: the namespace where workload is deployed func ReportBlobCacheCount(ctx context.Context, hit bool) { if cacheBlobCount != nil { - cacheBlobCount.Add(ctx, 1, instrument.WithAttributes(attribute.KeyValue{Key: "hit", Value: attribute.BoolValue(hit)})) + cacheBlobCount.Add(ctx, 1, instrument.WithAttributes( + attribute.KeyValue{Key: "hit", Value: attribute.BoolValue(hit)}, + attribute.KeyValue{Key: "workload_namespace", Value: attribute.StringValue(ctxUtils.GetNamespace(ctx))})) } } diff --git a/pkg/metrics/stats_reporter_test.go b/pkg/metrics/stats_reporter_test.go index 7580b37e2..317aa98dc 100644 --- a/pkg/metrics/stats_reporter_test.go +++ b/pkg/metrics/stats_reporter_test.go @@ -20,10 +20,13 @@ import ( "fmt" "testing" + ctxUtils "github.com/deislabs/ratify/internal/context" "go.opentelemetry.io/otel/attribute" instrument "go.opentelemetry.io/otel/metric" ) +const testNamespace = "testNamespace" + type MockInt64Histogram struct { instrument.Int64Histogram Value int64 @@ -95,11 +98,12 @@ func TestReportVerifierDuration(t *testing.T) { mockDuration := &MockInt64Histogram{Attributes: make(map[string]string)} verifierDuration = mockDuration - ReportVerifierDuration(context.Background(), 5, "test_verifier", "test_subject", true, true) + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportVerifierDuration(ctx, 5, "test_verifier", "test_subject", true, true) if mockDuration.Value != 5 { t.Fatalf("ReportVerifierDuration() mockDuration.Value = %v, expected %v", mockDuration.Value, 5) } - if len(mockDuration.Attributes) != 4 { + if len(mockDuration.Attributes) != 5 { t.Fatalf("ReportVerifierDuration() len(mockDuration.Attributes) = %v, expected %v", len(mockDuration.Attributes), 2) } if mockDuration.Attributes["verifier"] != "test_verifier" { @@ -111,6 +115,9 @@ func TestReportVerifierDuration(t *testing.T) { if mockDuration.Attributes["error"] != "true" { t.Fatalf("expected error attribute to be true but got %s", mockDuration.Attributes["error"]) } + if mockDuration.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockDuration.Attributes["workload_namespac"]) + } } func TestReportSystemError(t *testing.T) { @@ -120,16 +127,20 @@ func TestReportSystemError(t *testing.T) { mockCounter := &MockInt64Counter{Attributes: make(map[string]string)} systemErrorCount = mockCounter - ReportSystemError(context.Background(), "test_error") + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportSystemError(ctx, "test_error") if mockCounter.Value != 1 { t.Fatalf("ReportSystemError() mockCounter.Value = %v, expected %v", mockCounter.Value, 1) } - if len(mockCounter.Attributes) != 1 { + if len(mockCounter.Attributes) != 2 { t.Fatalf("ReportSystemError() len(mockCounter.Attributes) = %v, expected %v", len(mockCounter.Attributes), 1) } if mockCounter.Attributes["error"] != "test_error" { t.Fatalf("expected error attributes to be test_error but got %s", mockCounter.Attributes["error"]) } + if mockCounter.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockCounter.Attributes["workload_namespac"]) + } } func TestReportRequestCount(t *testing.T) { @@ -139,11 +150,12 @@ func TestReportRequestCount(t *testing.T) { mockCounter := &MockInt64Counter{Attributes: make(map[string]string)} registryRequestCount = mockCounter - ReportRegistryRequestCount(context.Background(), 429, "test_registry_host") + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportRegistryRequestCount(ctx, 429, "test_registry_host") if mockCounter.Value != 1 { t.Fatalf("ReportRequestCount() mockCounter.Value = %v, expected %v", mockCounter.Value, 1) } - if len(mockCounter.Attributes) != 2 { + if len(mockCounter.Attributes) != 3 { t.Fatalf("ReportRequestCount() len(mockCounter.Attributes) = %v, expected %v", len(mockCounter.Attributes), 2) } if mockCounter.Attributes["status_code"] != "429" { @@ -152,6 +164,9 @@ func TestReportRequestCount(t *testing.T) { if mockCounter.Attributes["registry_host"] != "test_registry_host" { t.Fatalf("expected registry_host attribute to be test_registry_host but got %s", mockCounter.Attributes["registry_host"]) } + if mockCounter.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockCounter.Attributes["workload_namespac"]) + } } func TestReportAADExchangeDuration(t *testing.T) { @@ -161,16 +176,20 @@ func TestReportAADExchangeDuration(t *testing.T) { mockDuration := &MockInt64Histogram{Attributes: make(map[string]string)} aadExchangeDuration = mockDuration - ReportAADExchangeDuration(context.Background(), 500, "test_scope") + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportAADExchangeDuration(ctx, 500, "test_scope") if mockDuration.Value != 500 { t.Fatalf("ReportAADExchangeDuration() mockDuration.Value = %v, expected %v", mockDuration.Value, 500) } - if len(mockDuration.Attributes) != 1 { + if len(mockDuration.Attributes) != 2 { t.Fatalf("ReportAADExchangeDuration() len(mockDuration.Attributes) = %v, expected %v", len(mockDuration.Attributes), 1) } if mockDuration.Attributes["resource_type"] != "test_scope" { t.Fatalf("expected resource_type attribute to be test_scope but got %s", mockDuration.Attributes["resource_type"]) } + if mockDuration.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockDuration.Attributes["workload_namespac"]) + } } func TestReportACRExchangeDuration(t *testing.T) { @@ -180,16 +199,20 @@ func TestReportACRExchangeDuration(t *testing.T) { mockDuration := &MockInt64Histogram{Attributes: make(map[string]string)} acrExchangeDuration = mockDuration - ReportACRExchangeDuration(context.Background(), 500, "test_repo") + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportACRExchangeDuration(ctx, 500, "test_repo") if mockDuration.Value != 500 { t.Fatalf("ReportACRExchangeDuration() mockDuration.Value = %v, expected %v", mockDuration.Value, 500) } - if len(mockDuration.Attributes) != 1 { + if len(mockDuration.Attributes) != 2 { t.Fatalf("ReportACRExchangeDuration() len(mockDuration.Attributes) = %v, expected %v", len(mockDuration.Attributes), 1) } if mockDuration.Attributes["repository"] != "test_repo" { t.Fatalf("expected repository attribute to be test_repo but got %s", mockDuration.Attributes["repository"]) } + if mockDuration.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockDuration.Attributes["workload_namespac"]) + } } func TestReportAKVCertificateDuration(t *testing.T) { @@ -199,16 +222,20 @@ func TestReportAKVCertificateDuration(t *testing.T) { mockDuration := &MockInt64Histogram{Attributes: make(map[string]string)} akvCertificateDuration = mockDuration - ReportAKVCertificateDuration(context.Background(), 500, "test_cert") + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportAKVCertificateDuration(ctx, 500, "test_cert") if mockDuration.Value != 500 { t.Fatalf("ReportAKVCertificateDuration() mockDuration.Value = %v, expected %v", mockDuration.Value, 500) } - if len(mockDuration.Attributes) != 1 { + if len(mockDuration.Attributes) != 2 { t.Fatalf("ReportAKVCertificateDuration() len(mockDuration.Attributes) = %v, expected %v", len(mockDuration.Attributes), 1) } if mockDuration.Attributes["certificate_name"] != "test_cert" { t.Fatalf("expected certificate_name attribute to be test_cert but got %s", mockDuration.Attributes["certificate_name"]) } + if mockDuration.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockDuration.Attributes["workload_namespac"]) + } } func TestReportBlobCacheCount(t *testing.T) { @@ -218,14 +245,18 @@ func TestReportBlobCacheCount(t *testing.T) { mockCounter := &MockInt64Counter{Attributes: make(map[string]string)} cacheBlobCount = mockCounter - ReportBlobCacheCount(context.Background(), true) + ctx := ctxUtils.SetContextWithNamespace(context.Background(), testNamespace) + ReportBlobCacheCount(ctx, true) if mockCounter.Value != 1 { t.Fatalf("ReportBlobCacheCount() mockCounter.Value = %v, expected %v", mockCounter.Value, 1) } - if len(mockCounter.Attributes) != 1 { + if len(mockCounter.Attributes) != 2 { t.Fatalf("ReportBlobCacheCount() len(mockCounter.Attributes) = %v, expected %v", len(mockCounter.Attributes), 1) } if mockCounter.Attributes["hit"] != "true" { t.Fatalf("expected hit attribute to be true but got %s", mockCounter.Attributes["hit"]) } + if mockCounter.Attributes["workload_namespace"] != testNamespace { + t.Fatalf("expected workload_namespace attribute to be %s but got %s", testNamespace, mockCounter.Attributes["workload_namespac"]) + } }