From 23e77cee66929c6cd9addbd694c6011f1a290d5e Mon Sep 17 00:00:00 2001 From: Shreya Date: Mon, 25 Nov 2024 14:21:16 +0530 Subject: [PATCH 1/8] Add prometheus test script to capture the time range --- .../test_prometheus_query_execution_time.sh | 415 ++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100755 tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh new file mode 100755 index 000000000..cb4c229fc --- /dev/null +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -0,0 +1,415 @@ +#!/bin/bash + +# Variables +PROMETHEUS_NAMESPACE="openshift-monitoring" +SERVICE_NAME="prometheus-k8s" + +export PROMETHEUS_ROUTE=$(oc get route $SERVICE_NAME -n $PROMETHEUS_NAMESPACE --no-headers -o wide -o=custom-columns=NODE:.spec.host) +echo $PROMETHEUS_ROUTE + +oc -n $PROMETHEUS_NAMESPACE annotate route $SERVICE_NAME --overwrite haproxy.router.openshift.io/timeout=200s + +PROMETHEUS_URL="https://${PROMETHEUS_ROUTE}" + +echo $PROMETHEUS_URL + +export TOKEN=$(oc whoami --show-token) + +# List of Prometheus queries +default_queries=( + 'avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' +) + +individual_queries_by_pod=( + 'avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, pod, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, pod, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + 'avg by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, pod, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, pod, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'avg by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'sum by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'max by(container, pod, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + 'min by(container, pod, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + '(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(owner_kind, owner_name) max by(pod, namespace, owner_kind, owner_name) (max_over_time(kube_pod_owner{container!="", container!="POD", pod!="", namespace="$NAMESPACE"}[15m]))' + '(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(workload, workload_type) max by(pod, namespace, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!="", namespace="$NAMESPACE"}[15m]))' +) + +grouped_queries_by_owner_workload=( + 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'avg by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'sum by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'max by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'min by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'avg_over_time( + avg by(namespace,container,workload,workload_type,owner_kind) ( + (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:])' + 'min_over_time( + min by(namespace,container,workload,workload_type,owner_kind) ( + (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'max_over_time( + max by(namespace,container,workload,workload_type,owner_kind) ( + (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg_over_time( + sum by(namespace,container,workload,workload_type,owner_kind) ( + (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg_over_time( + avg by(namespace,container,workload,workload_type,owner_kind) ( + (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'max_over_time( + max by(namespace,container,workload,workload_type,owner_kind) ( + (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'min_over_time( + min by(namespace,container,workload,workload_type,owner_kind) ( + (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg_over_time( + sum by(namespace,container,workload,workload_type,owner_kind) ( + (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + ))' + 'avg_over_time( + avg by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'min_over_time( + min by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'max_over_time( + max by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:])' + 'avg_over_time( + sum by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg_over_time( + avg by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'min_over_time( + min by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'max_over_time( + max by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' + 'avg_over_time( + sum by(namespace,container,workload,workload_type,owner_kind) ( + container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) + * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) + )[15m:] )' +) + +# Function to return the correct query set as an array reference +get_queries() { + local query_set="$1" + local -n return_array="$2" + + case "$query_set" in + "default_queries") + return_array=("${default_queries[@]}") + ;; + "individual_queries") + return_array=("${individual_queries_by_pod[@]}") + ;; + "grouped_queries") + return_array=("${grouped_queries_by_owner_workload[@]}") + ;; + "grouped_queriesBy5days") + return_array=("${grouped_queries_by_owner_workload[@]}") + ;; + *) + echo "Invalid query set. Available sets are: individual_queries, grouped_queries, grouped_queriesBy5days." + return 1 # Return error + ;; + esac +} + +# Each query is sequentially executed dividing the 15days duration into three, 5 days window +run_query_across_5day_windows() { + local query="$1" + local namespace="$NAMESPACE" + local container="$CONTAINER" + local start_timestamp="$4" + local end_timestamp="$5" + + FIVE_DAYS_IN_SECONDS=$((5 * 24 * 60 * 60)) # 5 days in seconds + + + FIRST_END_TIME=$((start_timestamp + FIVE_DAYS_IN_SECONDS)) + + SECOND_START_TIME=$((FIRST_END_TIME)) + SECOND_END_TIME=$((SECOND_START_TIME + FIVE_DAYS_IN_SECONDS)) + + THIRD_START_TIME=$((SECOND_END_TIME)) + THIRD_END_TIME=$((THIRD_START_TIME + FIVE_DAYS_IN_SECONDS)) + + # Sequentially run the query across the 3 windows + measure_query_time "$query" "$namespace" "$container" "$START_TIME" "$FIRST_END_TIME" + + measure_query_time "$query" "$namespace" "$container" "$SECOND_START_TIME" "$SECOND_END_TIME" + + measure_query_time "$query" "$namespace" "$container" "$THIRD_START_TIME" "$THIRD_END_TIME" +} + + +# Function to send a Prometheus query and measure the time taken +measure_query_time() { + local query="$1" + local namespace="$2" + local container="$3" + local start_timestamp="$4" + local end_timestamp="$5" + + # Replace placeholders in the query with the actual namespace and container + query=${query//\$NAMESPACE/$namespace} + query=${query//\$CONTAINER_NAME/$container} + + start_time=$(date +%s.%N) + + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + echo "$response" >> "$RESPONSE_LOG_FILE" + + end_time=$(date +%s.%N) + + time_taken=$(echo "$end_time - $start_time" | bc) + + status=$(echo "$response" | jq -r '.status') + + if [[ "$status" == "success" ]]; then + echo "Success, ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${query}" >> "$OUTPUT_FILE" + else + error_type=$(echo "$response" | jq -r '.errorType') + error_message=$(echo "$response" | jq -r '.error') + echo "Failed | ErrorType: $error_type | Error: $error_message", ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${query}>> "$OUTPUT_FILE" + fi +} + +# Function to capture resource metrics (CPU and memory) of Prometheus pods +capture_prometheus_resource_metrics() { + echo -e "\n===== Prometheus Pod Resource Metrics (CPU & Memory) =====" >> "$RESPONSE_LOG_FILE" + # Get resource usage for Prometheus pods in the given namespace + kubectl top pod -n "$PROMETHEUS_NAMESPACE" | grep "prometheus" >> "$RESPONSE_LOG_FILE" + echo -e "=========================================================\n" >> "$RESPONSE_LOG_FILE" +} + + +DEFAULT_NAMESPACE="openshift-cloud-controller-manager-operator" +DEFAULT_CONTAINER="config-sync-controllers" +DEFAULT_QUERY_SET="default_queries" +DEFAULT_END_TIME=$(date +%s) +DEFAULT_START_TIME=$(date -d "15 days ago" +%s) + +# Parse command-line arguments +while getopts ":n:c:q:s:e:" opt; do + case $opt in + n) NAMESPACE="$OPTARG" + ;; + c) CONTAINER="$OPTARG" + ;; + q) QUERY_SET="$OPTARG" + ;; + s) START_TIME="$OPTARG" + ;; + e) END_TIME="$OPTARG" + ;; + \?) echo "Invalid option -$OPTARG" >&2 + exit 1 + ;; + esac +done + +# Set default values if not provided via command-line +NAMESPACE=${NAMESPACE:-$DEFAULT_NAMESPACE} +CONTAINER=${CONTAINER:-$DEFAULT_CONTAINER} +QUERY_SET=${QUERY_SET:-$DEFAULT_QUERY_SET} +START_TIME=${START_TIME:-$DEFAULT_START_TIME} +END_TIME=${END_TIME:-$DEFAULT_END_TIME} + +# Output file to store the results +OUTPUT_FILE="prometheus_${QUERY_SET}_stats.csv" +RESPONSE_LOG_FILE="${QUERY_SET}_response.log" + +# Clear the output file before starting +> "$OUTPUT_FILE" +> "$RESPONSE_LOG_FILE" + +echo "status, time_taken(s), start_time, end_time, query" > "$OUTPUT_FILE" + +queries=() # Declare an empty array to store the returned queries + +# Get the query set +get_queries "$QUERY_SET" queries + +if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then + for query in "${queries[@]}"; do + run_query_across_5day_windows "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + done +else + for query in "${queries[@]}"; do + measure_query_time "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + done +fi + +capture_prometheus_resource_metrics + +echo "Results have been written to $OUTPUT_FILE" +echo "Query output have been written to $RESPONSE_LOG_FILE" + +exit 0 From e53790d1b83c04f910a92999469de32de34d264b Mon Sep 17 00:00:00 2001 From: Shreya Date: Mon, 16 Dec 2024 18:48:00 +0530 Subject: [PATCH 2/8] Include option for all namespaces and containers present --- .../test_prometheus_query_execution_time.sh | 107 +++++++++++++++--- 1 file changed, 89 insertions(+), 18 deletions(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index cb4c229fc..df33e5a02 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -333,14 +333,53 @@ measure_query_time() { status=$(echo "$response" | jq -r '.status') if [[ "$status" == "success" ]]; then - echo "Success, ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${query}" >> "$OUTPUT_FILE" + echo "Success, ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${namespace}, ${container}, ${query}" >> "$OUTPUT_FILE" else error_type=$(echo "$response" | jq -r '.errorType') error_message=$(echo "$response" | jq -r '.error') - echo "Failed | ErrorType: $error_type | Error: $error_message", ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${query}>> "$OUTPUT_FILE" + echo "Failed | ErrorType: $error_type | Error: $error_message", ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${namespace}, ${container}, ${query}>> "$OUTPUT_FILE" fi } +# Function to fetch namespaces using Prometheus +fetch_namespaces() { + local start_timestamp="$1" + local end_timestamp="$2" + # Prometheus query to fetch unique namespaces + local query="count by (namespace) (kube_pod_container_info)" + + # Fetch namespaces from Prometheus + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + # Parse response and extract namespaces + echo "$response" | jq -r '.data.result[].metric.namespace' +} + +# Function to fetch containers for a specific namespace using Prometheus +fetch_containers_for_namespace() { + local namespace="$1" + local start_timestamp="$2" + local end_timestamp="$3" + + # Prometheus query to fetch containers in a namespace + local query="count by (container) (kube_pod_container_info{namespace='${namespace}'})" + + # Fetch containers from Prometheus + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + + # Parse response and extract container names + echo "$response" | jq -r '.data.result[].metric.container' +} + # Function to capture resource metrics (CPU and memory) of Prometheus pods capture_prometheus_resource_metrics() { echo -e "\n===== Prometheus Pod Resource Metrics (CPU & Memory) =====" >> "$RESPONSE_LOG_FILE" @@ -382,29 +421,61 @@ QUERY_SET=${QUERY_SET:-$DEFAULT_QUERY_SET} START_TIME=${START_TIME:-$DEFAULT_START_TIME} END_TIME=${END_TIME:-$DEFAULT_END_TIME} -# Output file to store the results -OUTPUT_FILE="prometheus_${QUERY_SET}_stats.csv" -RESPONSE_LOG_FILE="${QUERY_SET}_response.log" - -# Clear the output file before starting -> "$OUTPUT_FILE" -> "$RESPONSE_LOG_FILE" - -echo "status, time_taken(s), start_time, end_time, query" > "$OUTPUT_FILE" queries=() # Declare an empty array to store the returned queries # Get the query set get_queries "$QUERY_SET" queries -if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then - for query in "${queries[@]}"; do - run_query_across_5day_windows "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" - done +if [ -z "${NAMESPACE}" ] && [ -z "${CONTAINER}" ]; then + + # Output file to store the results + OUTPUT_FILE="prometheus_${QUERY_SET}_all_containers_stats.csv" + RESPONSE_LOG_FILE="${QUERY_SET}_all_containers_response.log" + + # Clear the output file before starting + > "$OUTPUT_FILE" + > "$RESPONSE_LOG_FILE" + + echo "status, time_taken(s), start_time, end_time, namespace, container, query" > "$OUTPUT_FILE" + + namespaces=$(fetch_namespaces "$START_TIME" "$END_TIME") + for namespace in $namespaces; do + containers=$(fetch_containers_for_namespace "$namespace" "$START_TIME" "$END_TIME") + echo $containers + for container in $containers; do + if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then + for query in "${queries[@]}"; do + run_query_across_5day_windows "$query" "$namespace" "$container" "$START_TIME" "$END_TIME" + done + else + for query in "${queries[@]}"; do + measure_query_time "$query" "$namespace" "$container" "$START_TIME" "$END_TIME" + done + fi + done + done else - for query in "${queries[@]}"; do - measure_query_time "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" - done + + # Output file to store the results + OUTPUT_FILE="prometheus_${QUERY_SET}_${NAMESPACE}_${CONTAINER}_stats.csv" + RESPONSE_LOG_FILE="${QUERY_SET}_${NAMESPACE}_${CONTAINER}_response.log" + + # Clear the output file before starting + > "$OUTPUT_FILE" + > "$RESPONSE_LOG_FILE" + + echo "status, time_taken(s), start_time, end_time, namespace, container, query" > "$OUTPUT_FILE" + + if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then + for query in "${queries[@]}"; do + run_query_across_5day_windows "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + done + else + for query in "${queries[@]}"; do + measure_query_time "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + done + fi fi capture_prometheus_resource_metrics From 41babea47445251f98824bcffb787020b32cd407 Mon Sep 17 00:00:00 2001 From: Shreya Date: Thu, 9 Jan 2025 10:12:59 +0530 Subject: [PATCH 3/8] Refactor the script and add metadata queries --- .../test_prometheus_query_execution_time.sh | 491 ++++++++++-------- 1 file changed, 273 insertions(+), 218 deletions(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index df33e5a02..420fc5888 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -15,235 +15,240 @@ echo $PROMETHEUS_URL export TOKEN=$(oc whoami --show-token) +declare -A default_queries +declare -A individual_queries_by_pod +declare -A grouped_queries_by_owner_workload +declare -A metadata_queries + # List of Prometheus queries default_queries=( - 'avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_request_avg]='avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_sum]='sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_max]='max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_min]='min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_avg]='avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_sum]='sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_max]='max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_min]='min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_usage_avg]='avg by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_sum]='sum by(container, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_max]='max by(container, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_min]='min by(container, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_avg]='avg by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_sum]='sum by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_max]='max by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_min]='min by(container, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_request_avg]='avg by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_sum]='sum by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_max]='max by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_min]='min by(container, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_avg]='avg by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_sum]='sum by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_max]='max by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_min]='min by(container, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_usage_avg]='avg by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_sum]='sum by(container, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_max]='max by(container, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_min]='min by(container, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_avg]='avg by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_sum]='sum by(container, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_max]='max by(container, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_min]='min by(container, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' ) individual_queries_by_pod=( - 'avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, pod, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, pod, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' - 'avg by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, pod, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, pod, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'avg by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'sum by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'max by(container, pod, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - 'min by(container, pod, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' - '(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(owner_kind, owner_name) max by(pod, namespace, owner_kind, owner_name) (max_over_time(kube_pod_owner{container!="", container!="POD", pod!="", namespace="$NAMESPACE"}[15m]))' - '(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(workload, workload_type) max by(pod, namespace, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!="", namespace="$NAMESPACE"}[15m]))' + [cpu_request_avg]='avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_sum]='sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_max]='max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_request_min]='min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_avg]='avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_sum]='sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_max]='max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_limits_min]='min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [cpu_usage_avg]='avg by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_sum]='sum by(container, pod, namespace) (avg_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_max]='max by(container, pod, namespace) (max_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_usage_min]='min by(container, pod, namespace) (min_over_time(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_avg]='avg by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_sum]='sum by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_max]='max by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [cpu_throttle_min]='min by(container, pod, namespace) (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_request_avg]='avg by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_sum]='sum by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_max]='max by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_request_min]='min by(container, pod, namespace) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_avg]='avg by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_sum]='sum by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_max]='max by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_limits_min]='min by(container, pod, namespace) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}))' + [memory_usage_avg]='avg by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_sum]='sum by(container, pod, namespace) (avg_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_max]='max by(container, pod, namespace) (max_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_usage_min]='min by(container, pod, namespace) (min_over_time(container_memory_working_set_bytes{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_avg]='avg by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_sum]='sum by(container, pod, namespace) (avg_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_max]='max by(container, pod, namespace) (max_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [memory_rss_min]='min by(container, pod, namespace) (min_over_time(container_memory_rss{container!="", container!="POD", pod!="",namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]))' + [image_owners]='(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(owner_kind, owner_name) max by(pod, namespace, owner_kind, owner_name) (max_over_time(kube_pod_owner{container!="", container!="POD", pod!="", namespace="$NAMESPACE"}[15m]))' + [image_workloads]='(max_over_time(kube_pod_container_info{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod, namespace) group_left(workload, workload_type) max by(pod, namespace, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!="", namespace="$NAMESPACE"}[15m]))' ) grouped_queries_by_owner_workload=( - 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_request_avg]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_request_sum]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'avg by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_limits_avg]='avg by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'sum by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_limits_sum]='sum by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'max by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_limits_max]='max by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'min by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [cpu_limits_min]='min by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'avg_over_time( + [cpu_usage_avg]='avg_over_time( avg by(namespace,container,workload,workload_type,owner_kind) ( (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:])' - 'min_over_time( + [cpu_usage_min]='min_over_time( min by(namespace,container,workload,workload_type,owner_kind) ( (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'max_over_time( + [cpu_usage_max]='max_over_time( max by(namespace,container,workload,workload_type,owner_kind) ( (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg_over_time( + [cpu_usage_sum]='avg_over_time( sum by(namespace,container,workload,workload_type,owner_kind) ( (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg_over_time( + [cpu_throttle_avg]='avg_over_time( avg by(namespace,container,workload,workload_type,owner_kind) ( (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'max_over_time( + [cpu_throttle_max]='max_over_time( max by(namespace,container,workload,workload_type,owner_kind) ( (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'min_over_time( + [cpu_throttle_min]='min_over_time( min by(namespace,container,workload,workload_type,owner_kind) ( (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg_over_time( + [cpu_throttle_sum]='avg_over_time( sum by(namespace,container,workload,workload_type,owner_kind) ( (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_request_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_request_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_limits_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_limits_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_limits_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} + [memory_limits_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) ))' - 'avg_over_time( + [memory_usage_avg]='avg_over_time( avg by(namespace,container,workload,workload_type,owner_kind) ( container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'min_over_time( + [memory_usage_min]='min_over_time( min by(namespace,container,workload,workload_type,owner_kind) ( container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'max_over_time( + [memory_usage_max]='max_over_time( max by(namespace,container,workload,workload_type,owner_kind) ( container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:])' - 'avg_over_time( + [memory_usage_sum]='avg_over_time( sum by(namespace,container,workload,workload_type,owner_kind) ( container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg_over_time( + [memory_rss_avg]='avg_over_time( avg by(namespace,container,workload,workload_type,owner_kind) ( container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'min_over_time( + [memory_rss_min]='min_over_time( min by(namespace,container,workload,workload_type,owner_kind) ( container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'max_over_time( + [memory_rss_max]='max_over_time( max by(namespace,container,workload,workload_type,owner_kind) ( container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) )[15m:] )' - 'avg_over_time( + [memory_rss_sum]='avg_over_time( sum by(namespace,container,workload,workload_type,owner_kind) ( container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) @@ -251,6 +256,15 @@ grouped_queries_by_owner_workload=( )[15m:] )' ) +metadata_queries=( + [namespaces_across_cluster]='sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[15m]))' + [workloads_across_cluster]='sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15m]))' + [containers_across_cluster]='sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=""}[15m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15m]))' +) + +declare -n grouped_queriesByDuration="grouped_queries_by_owner_workload" +queries_collection=( default_queries individual_queries_by_pod grouped_queries_by_owner_workload metadata_queries grouped_queriesByDuration ) + # Function to return the correct query set as an array reference get_queries() { local query_set="$1" @@ -258,49 +272,52 @@ get_queries() { case "$query_set" in "default_queries") - return_array=("${default_queries[@]}") + return_array=("default_queries") ;; "individual_queries") - return_array=("${individual_queries_by_pod[@]}") + return_array=("individual_queries_by_pod") ;; "grouped_queries") - return_array=("${grouped_queries_by_owner_workload[@]}") + return_array=("grouped_queries_by_owner_workload") ;; - "grouped_queriesBy5days") - return_array=("${grouped_queries_by_owner_workload[@]}") + "grouped_queriesByDuration") + return_array=("grouped_queries_by_owner_workload") + ;; + "metadata_queries") + return_array=("metadata_queries") ;; *) - echo "Invalid query set. Available sets are: individual_queries, grouped_queries, grouped_queriesBy5days." - return 1 # Return error + echo "Invalid query set. Available sets are: default_queries, individual_queries, grouped_queries, grouped_queriesByDuration, metadata_queries." + exit 0 # Return error ;; esac } # Each query is sequentially executed dividing the 15days duration into three, 5 days window -run_query_across_5day_windows() { +run_query_across_duration_windows() { local query="$1" local namespace="$NAMESPACE" local container="$CONTAINER" local start_timestamp="$4" local end_timestamp="$5" + local metric_name="$6" + local duration=${7:-5} # Default to 5 days if no duration is passed + local partitions=${8:-3} # Default to 3 partitions if not specified - FIVE_DAYS_IN_SECONDS=$((5 * 24 * 60 * 60)) # 5 days in seconds - - - FIRST_END_TIME=$((start_timestamp + FIVE_DAYS_IN_SECONDS)) + # Convert duration to seconds + local DURATION_IN_SECONDS=$((duration * 24 * 60 * 60)) - SECOND_START_TIME=$((FIRST_END_TIME)) - SECOND_END_TIME=$((SECOND_START_TIME + FIVE_DAYS_IN_SECONDS)) + # Loop to calculate start and end times for the specified number of partitions + local current_start_time=$start_timestamp - THIRD_START_TIME=$((SECOND_END_TIME)) - THIRD_END_TIME=$((THIRD_START_TIME + FIVE_DAYS_IN_SECONDS)) + for ((i = 1; i <= partitions; i++)); do + local current_end_time=$((current_start_time + DURATION_IN_SECONDS)) - # Sequentially run the query across the 3 windows - measure_query_time "$query" "$namespace" "$container" "$START_TIME" "$FIRST_END_TIME" + measure_query_time "$query" "$namespace" "$container" "$current_start_time" "$current_end_time" "$metric_name" - measure_query_time "$query" "$namespace" "$container" "$SECOND_START_TIME" "$SECOND_END_TIME" - - measure_query_time "$query" "$namespace" "$container" "$THIRD_START_TIME" "$THIRD_END_TIME" + # Update start time for the next period + current_start_time=$current_end_time + done } @@ -311,6 +328,7 @@ measure_query_time() { local container="$3" local start_timestamp="$4" local end_timestamp="$5" + local query_name="$6" # Replace placeholders in the query with the actual namespace and container query=${query//\$NAMESPACE/$namespace} @@ -324,6 +342,7 @@ measure_query_time() { --data-urlencode "end=${end_timestamp}" \ --data-urlencode "step=900" \ "${PROMETHEUS_URL}/api/v1/query_range") + echo "$query" >> "${RESPONSE_LOG_FILE}" echo "$response" >> "$RESPONSE_LOG_FILE" end_time=$(date +%s.%N) @@ -333,51 +352,49 @@ measure_query_time() { status=$(echo "$response" | jq -r '.status') if [[ "$status" == "success" ]]; then - echo "Success, ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${namespace}, ${container}, ${query}" >> "$OUTPUT_FILE" + echo "Success; ${time_taken}; ${start_timestamp}; ${end_timestamp}; ${namespace}; ${container}; ${query_name}; ${query}" >> "$OUTPUT_FILE" else error_type=$(echo "$response" | jq -r '.errorType') error_message=$(echo "$response" | jq -r '.error') - echo "Failed | ErrorType: $error_type | Error: $error_message", ${time_taken}, ${start_timestamp}, ${end_timestamp}, ${namespace}, ${container}, ${query}>> "$OUTPUT_FILE" + echo "Failed | ErrorType: $error_type | Error: $error_message; ${time_taken}; ${start_timestamp}; ${end_timestamp}; ${namespace}; ${container}; ${query}" >> "$OUTPUT_FILE" fi } -# Function to fetch namespaces using Prometheus -fetch_namespaces() { - local start_timestamp="$1" - local end_timestamp="$2" - # Prometheus query to fetch unique namespaces - local query="count by (namespace) (kube_pod_container_info)" +# Function to fetch long running namespace and container using Prometheus +fetch_namespace_and_container(){ + + local start_timestamp="$1" + local end_timestamp="$2" + + local query='topk(1, + (time() - container_start_time_seconds{container!="POD", container!=""}) + * on(pod, container, namespace) + group_left(workload, workload_type) ( + max(kube_pod_container_info{container!="", container!="POD", pod!=""}) by (pod, container, namespace) + ) + * on(pod, namespace) group_left(workload, workload_type) ( + max(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}) by (pod, namespace, workload, workload_type) + ) + )' + + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + + # Check if the result is empty + if [[ -z "$response" || "$response" == "null" ]]; then + echo "Error: No data returned from Prometheus query to create experiments. Exiting!" + exit 1 + fi - # Fetch namespaces from Prometheus - response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ - --data-urlencode "query=${query}" \ - --data-urlencode "start=${start_timestamp}" \ - --data-urlencode "end=${end_timestamp}" \ - --data-urlencode "step=900" \ - "${PROMETHEUS_URL}/api/v1/query_range") - # Parse response and extract namespaces - echo "$response" | jq -r '.data.result[].metric.namespace' -} + namespace=$(echo "$response" | jq -r '.data.result[].metric.namespace') + container=$(echo "$response" | jq -r '.data.result[].metric.container') -# Function to fetch containers for a specific namespace using Prometheus -fetch_containers_for_namespace() { - local namespace="$1" - local start_timestamp="$2" - local end_timestamp="$3" - - # Prometheus query to fetch containers in a namespace - local query="count by (container) (kube_pod_container_info{namespace='${namespace}'})" - - # Fetch containers from Prometheus - response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ - --data-urlencode "query=${query}" \ - --data-urlencode "start=${start_timestamp}" \ - --data-urlencode "end=${end_timestamp}" \ - --data-urlencode "step=900" \ - "${PROMETHEUS_URL}/api/v1/query_range") - - # Parse response and extract container names - echo "$response" | jq -r '.data.result[].metric.container' + # Return namespace and container as an array + echo "$namespace $container" } # Function to capture resource metrics (CPU and memory) of Prometheus pods @@ -388,74 +405,112 @@ capture_prometheus_resource_metrics() { echo -e "=========================================================\n" >> "$RESPONSE_LOG_FILE" } - -DEFAULT_NAMESPACE="openshift-cloud-controller-manager-operator" -DEFAULT_CONTAINER="config-sync-controllers" -DEFAULT_QUERY_SET="default_queries" DEFAULT_END_TIME=$(date +%s) DEFAULT_START_TIME=$(date -d "15 days ago" +%s) +ALL_QUERIES=0 +DEFAULT_DURATION_IN_DAYS=5 +DEFAULT_PARTITIONS=3 + +function usage() { + echo "Usage: $0 [-n namespace] [-c container-name] [-q query_set] [-s start_timestamp] [-e end_timestamp] [-d duration in days] [-p no. of partitions] [-a all query sets]" + echo "n = namespace" + echo "c = container" + echo "q = set of queries to be executed for eg. default_queries, individual_queries, grouped_queries_by_owner_workload, grouped_queriesByDuration, metadata_queries" + echo "s = start time in epoch" + echo "e = end timestamp in epoch, (if start and end timestamp are not specified 15 days is the default time range)" + echo "d = duration for equally dividing the time range for eg. dividing 15 days into 5 days duration and executing the grouped_queries" + echo "p = partitions in time range for eg. dividing 15 days into 5 days duration with 3 partitions" + echo "a = Flag to run all the query sets to capture the time taken" + echo "h = help" + + exit 1 +} # Parse command-line arguments -while getopts ":n:c:q:s:e:" opt; do - case $opt in - n) NAMESPACE="$OPTARG" - ;; - c) CONTAINER="$OPTARG" - ;; - q) QUERY_SET="$OPTARG" - ;; - s) START_TIME="$OPTARG" - ;; - e) END_TIME="$OPTARG" - ;; - \?) echo "Invalid option -$OPTARG" >&2 - exit 1 - ;; +while getopts ":n:c:q:s:e:d:p:a" opt; do + case "${opt}" in + n) + NAMESPACE="$OPTARG" + ;; + c) + CONTAINER="$OPTARG" + ;; + q) + QUERY_SET="$OPTARG" + ;; + s) + START_TIME="$OPTARG" + ;; + e) + END_TIME="$OPTARG" + ;; + a) + ALL_QUERIES=1 + ;; + d) + DURATION_IN_DAYS="$OPTARG" + ;; + p) + DURATION_PARTITIONS="$OPTARG" + ;; + *) + usage + ;; esac done # Set default values if not provided via command-line -NAMESPACE=${NAMESPACE:-$DEFAULT_NAMESPACE} -CONTAINER=${CONTAINER:-$DEFAULT_CONTAINER} -QUERY_SET=${QUERY_SET:-$DEFAULT_QUERY_SET} START_TIME=${START_TIME:-$DEFAULT_START_TIME} END_TIME=${END_TIME:-$DEFAULT_END_TIME} - - -queries=() # Declare an empty array to store the returned queries - -# Get the query set -get_queries "$QUERY_SET" queries +DURATION_IN_DAYS=${DURATION_IN_DAYS:-$DEFAULT_DURATION_IN_DAYS} +DURATION_PARTITIONS=${DURATION_PARTITIONS:-$DEFAULT_PARTITIONS} if [ -z "${NAMESPACE}" ] && [ -z "${CONTAINER}" ]; then + echo "Finding a long running container" + result=($(fetch_namespace_and_container "$START_TIME" "$END_TIME")) - # Output file to store the results - OUTPUT_FILE="prometheus_${QUERY_SET}_all_containers_stats.csv" - RESPONSE_LOG_FILE="${QUERY_SET}_all_containers_response.log" + # Access the namespace and container from the array + NAMESPACE="${result[0]}" + CONTAINER="${result[1]}" - # Clear the output file before starting - > "$OUTPUT_FILE" - > "$RESPONSE_LOG_FILE" + # Use the namespace and container values + echo "Namespace: $NAMESPACE" + echo "Container: $CONTAINER" +fi - echo "status, time_taken(s), start_time, end_time, namespace, container, query" > "$OUTPUT_FILE" - - namespaces=$(fetch_namespaces "$START_TIME" "$END_TIME") - for namespace in $namespaces; do - containers=$(fetch_containers_for_namespace "$namespace" "$START_TIME" "$END_TIME") - echo $containers - for container in $containers; do - if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then - for query in "${queries[@]}"; do - run_query_across_5day_windows "$query" "$namespace" "$container" "$START_TIME" "$END_TIME" - done - else - for query in "${queries[@]}"; do - measure_query_time "$query" "$namespace" "$container" "$START_TIME" "$END_TIME" - done - fi - done +if [ ${ALL_QUERIES} -eq 1 ]; then + for i in "${!queries_collection[@]}"; do + query_name=${queries_collection[i]} + declare -n current_queries="${queries_collection[i]}" + echo $query_name + + # Output file to store the results + OUTPUT_FILE="prometheus_${query_name}_${NAMESPACE}_${CONTAINER}_stats.csv" + RESPONSE_LOG_FILE="${query_name}_${NAMESPACE}_${CONTAINER}_response.log" + + # Clear the output file before starting + > "$OUTPUT_FILE" + > "$RESPONSE_LOG_FILE" + + echo "status; time_taken(s); start_time; end_time; namespace; container; metric_name; query" > "$OUTPUT_FILE" + + for key in "${!current_queries[@]}"; do + if [[ $query_name == "grouped_queriesByDuration" ]]; then + run_query_across_duration_windows "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" + else + measure_query_time "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" + fi + done + + echo "Results have been written to $OUTPUT_FILE" + echo "Query output have been written to $RESPONSE_LOG_FILE" + capture_prometheus_resource_metrics done else + queries=() # Declare an empty array to store the returned queries + + # Get the query set + get_queries "$QUERY_SET" queries # Output file to store the results OUTPUT_FILE="prometheus_${QUERY_SET}_${NAMESPACE}_${CONTAINER}_stats.csv" @@ -465,22 +520,22 @@ else > "$OUTPUT_FILE" > "$RESPONSE_LOG_FILE" - echo "status, time_taken(s), start_time, end_time, namespace, container, query" > "$OUTPUT_FILE" + echo "status; time_taken(s); start_time; end_time; namespace; container; metric_name; query" > "$OUTPUT_FILE" - if [[ "$QUERY_SET" == "grouped_queriesBy5days" ]]; then - for query in "${queries[@]}"; do - run_query_across_5day_windows "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + declare -n query_set=$queries + if [[ "$QUERY_SET" == "grouped_queriesByDuration" ]]; then + for key in "${!query_set[@]}"; do + run_query_across_duration_windows "${query_set[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" done else - for query in "${queries[@]}"; do - measure_query_time "$query" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" + for key in "${!query_set[@]}"; do + measure_query_time "${query_set[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" done fi -fi - -capture_prometheus_resource_metrics -echo "Results have been written to $OUTPUT_FILE" -echo "Query output have been written to $RESPONSE_LOG_FILE" + echo "Results have been written to $OUTPUT_FILE" + echo "Query output have been written to $RESPONSE_LOG_FILE" + capture_prometheus_resource_metrics +fi exit 0 From a9c4cb1785b3b4a8ff72900b350ddf220b3dd6de Mon Sep 17 00:00:00 2001 From: Shreya Date: Thu, 9 Jan 2025 12:49:18 +0530 Subject: [PATCH 4/8] Add readme --- .../test_prometheus_query_execution_time.md | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md new file mode 100644 index 000000000..5f7172a09 --- /dev/null +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md @@ -0,0 +1,55 @@ +# Prometheus Time Range script + +## Understanding the script flow +This script based on the input parameters runs the set of Prometheus queries, capturing the time taken, status and output of the PromQL queries executed over a specified time range by default over +15 days + +### Collection of queries +1. Default queries: Set of queries currently used in Metric profile to capture resource usage data +2. Individual queries: Set of queries, capturing the container resource data (cpu and memory) by pod with `imageOwners` and `imageWorkloads` queries +3. Grouped queries: Set of queries which group the container data by pod owner and workload on the fly +4. Grouped queries by duration: This collection uses the same set of grouped queries, instead divides the user defined time range into partitions, by default 15 days time range is divided into 3 partitions where each partition corresponding to 5 days of time range +5. Metadata queries: Set of metadata queries used to import datasource metadata + + +### Prerequisites: +OpenShift cluster + +### Execute the script on OpenShift as: +./test_prometheus_query_execution_time.sh + +``` +Usage: ./test_prometheus_query_execution_time.sh [-n namespace] [-c container-name] [-q query_set] [-s start_timestamp] [-e end_timestamp] [-d duration in days] [-p no. of partitions] [-a all query sets] +n = namespace +c = container +q = set of queries to be executed for eg. default_queries, individual_queries, grouped_queries_by_owner_workload, grouped_queriesByDuration, metadata_queries +s = start time in epoch +e = end timestamp in epoch, (if start and end timestamp are not specified 15 days is the default time range) +d = duration for equally dividing the time range for eg. dividing 15 days into 5 days duration and executing the grouped_queries +p = partitions in time range for eg. dividing 15 days into 5 days duration with 3 partitions +a = Flag to run all the query sets to capture the time taken + +Note: once query set/sets are executed output will be stored in +1. prometheus_${QUERY_SET}_${NAMESPACE}_${CONTAINER}_stats.csv - capturing status time taken, start and end time, namespace, container, metric_name and query +2. ${QUERY_SET}_${NAMESPACE}_${CONTAINER}_response.log - logs the query and query output +``` + +To capture time taken to run all the query sets, + +``` +/tests/scripts/local_monitoring_tests -a +``` + +To capture time taken to run the individual query set for "default" namespace and "app-container" container, + +``` +/tests/scripts/local_monitoring_tests -n default -c app-container -q individual_queries +``` + +To capture time taken to run the grouped queries by duration for duration=3days and partitions=5 (dividing 15 days into 5 partitions, querying each partition with 3 days of data) + +``` +/tests/scripts/local_monitoring_tests -q grouped_queriesByDuration -d 3 -p 5 +``` + +NOTE: In case no namespace and container is specified - long running container and it's namespace is found by the script \ No newline at end of file From c55297a757246302cc24faf709da85f335e0cd8f Mon Sep 17 00:00:00 2001 From: Shreya Date: Thu, 9 Jan 2025 15:51:48 +0530 Subject: [PATCH 5/8] Formatting grouped queries --- .../test_prometheus_query_execution_time.sh | 207 ++++-------------- 1 file changed, 46 insertions(+), 161 deletions(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index 420fc5888..8917f40db 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -94,166 +94,38 @@ individual_queries_by_pod=( ) grouped_queries_by_owner_workload=( - [cpu_request_avg]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_request_sum]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_limits_avg]='avg by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_limits_sum]='sum by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_limits_max]='max by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_limits_min]='min by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [cpu_usage_avg]='avg_over_time( - avg by(namespace,container,workload,workload_type,owner_kind) ( - (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:])' - [cpu_usage_min]='min_over_time( - min by(namespace,container,workload,workload_type,owner_kind) ( - (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_usage_max]='max_over_time( - max by(namespace,container,workload,workload_type,owner_kind) ( - (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_usage_sum]='avg_over_time( - sum by(namespace,container,workload,workload_type,owner_kind) ( - (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_throttle_avg]='avg_over_time( - avg by(namespace,container,workload,workload_type,owner_kind) ( - (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_throttle_max]='max_over_time( - max by(namespace,container,workload,workload_type,owner_kind) ( - (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_throttle_min]='min_over_time( - min by(namespace,container,workload,workload_type,owner_kind) ( - (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [cpu_throttle_sum]='avg_over_time( - sum by(namespace,container,workload,workload_type,owner_kind) ( - (rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_request_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_request_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_limits_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_limits_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_limits_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_limits_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - ))' - [memory_usage_avg]='avg_over_time( - avg by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_usage_min]='min_over_time( - min by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_usage_max]='max_over_time( - max by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:])' - [memory_usage_sum]='avg_over_time( - sum by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_rss_avg]='avg_over_time( - avg by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_rss_min]='min_over_time( - min by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_rss_max]='max_over_time( - max by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' - [memory_rss_sum]='avg_over_time( - sum by(namespace,container,workload,workload_type,owner_kind) ( - container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} - * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) - * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])) - )[15m:] )' + [cpu_request_avg]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_request_sum]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_limits_avg]='avg by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_limits_sum]='sum by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_limits_max]='max by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_limits_min]='min by(container, namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="cpu", unit="core", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [cpu_usage_avg]='avg_over_time(avg by(namespace,container,workload,workload_type,owner_kind) ((node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [cpu_usage_min]='min_over_time(min by(namespace,container,workload,workload_type,owner_kind) ((node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [cpu_usage_max]='max_over_time(max by(namespace,container,workload,workload_type,owner_kind) ((node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:] )' + [cpu_usage_sum]='avg_over_time(sum by(namespace,container,workload,workload_type,owner_kind) ((node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:] )' + [cpu_throttle_avg]='avg_over_time(avg by(namespace,container,workload,workload_type,owner_kind) ((rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:] )' + [cpu_throttle_max]='max_over_time(max by(namespace,container,workload,workload_type,owner_kind) ((rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [cpu_throttle_min]='min_over_time(min by(namespace,container,workload,workload_type,owner_kind) ((rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m]) ) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [cpu_throttle_sum]='avg_over_time(sum by(namespace,container,workload,workload_type,owner_kind) ((rate(container_cpu_cfs_throttled_seconds_total{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"}[15m])) * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:] )' + [memory_request_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"}* on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_request_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_request_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m]))* on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_request_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_requests{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_limits_avg]='avg by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_limits_sum]='sum by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_limits_max]='max by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_limits_min]='min by(container,namespace,workload,workload_type,owner_kind) ((kube_pod_container_resource_limits{container!="", container!="POD", pod!="", resource="memory", unit="byte", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m]))))' + [memory_usage_avg]='avg_over_time(avg by(namespace,container,workload,workload_type,owner_kind) (container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_usage_min]='min_over_time(min by(namespace,container,workload,workload_type,owner_kind) (container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_usage_max]='max_over_time(max by(namespace,container,workload,workload_type,owner_kind) (container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_usage_sum]='avg_over_time(sum by(namespace,container,workload,workload_type,owner_kind) (container_memory_working_set_bytes{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_rss_avg]='avg_over_time(avg by(namespace,container,workload,workload_type,owner_kind) (container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_rss_min]='min_over_time(min by(namespace,container,workload,workload_type,owner_kind) (container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_rss_max]='max_over_time(max by(namespace,container,workload,workload_type,owner_kind) (container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' + [memory_rss_sum]='avg_over_time(sum by(namespace,container,workload,workload_type,owner_kind) (container_memory_rss{container!="", container!="POD", pod!="", namespace="$NAMESPACE",container="$CONTAINER_NAME"} * on(pod) group_left(workload, workload_type) max by (pod, workload, workload_type) (max_over_time(namespace_workload_pod:kube_pod_owner:relabel{pod!=""}[15m])) * on(pod) group_left(owner_kind) max by (pod, owner_kind) (max_over_time(kube_pod_owner{pod!=""}[15m])))[15m:])' ) metadata_queries=( @@ -342,7 +214,8 @@ measure_query_time() { --data-urlencode "end=${end_timestamp}" \ --data-urlencode "step=900" \ "${PROMETHEUS_URL}/api/v1/query_range") - echo "$query" >> "${RESPONSE_LOG_FILE}" + + echo "Query: $query" >> "${RESPONSE_LOG_FILE}" echo "$response" >> "$RESPONSE_LOG_FILE" end_time=$(date +%s.%N) @@ -496,6 +369,13 @@ if [ ${ALL_QUERIES} -eq 1 ]; then for key in "${!current_queries[@]}"; do if [[ $query_name == "grouped_queriesByDuration" ]]; then + # Calculate the difference in seconds + TIME_DIFF=$((END_TIME - START_TIME)) + + # Convert the difference from seconds to days + DIFF_IN_DAYS=$((TIME_DIFF / 86400)) + echo "Dividing the ${DIFF_IN_DAYS} days time range into ${DURATION_PARTITIONS} partitions, each with ${DURATION_IN_DAYS} days duration" + run_query_across_duration_windows "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" else measure_query_time "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" @@ -524,6 +404,11 @@ else declare -n query_set=$queries if [[ "$QUERY_SET" == "grouped_queriesByDuration" ]]; then + TIME_DIFF=$((END_TIME - START_TIME)) + + # Convert the difference from seconds to days + DIFF_IN_DAYS=$((TIME_DIFF / 86400)) + echo "Dividing the ${DIFF_IN_DAYS} days time range into ${DURATION_PARTITIONS} partitions, each with ${DURATION_IN_DAYS} days duration" for key in "${!query_set[@]}"; do run_query_across_duration_windows "${query_set[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" done From 774f5f0ac5db38a24e800c33506dfe3a3d6b6f74 Mon Sep 17 00:00:00 2001 From: Shreya Date: Mon, 20 Jan 2025 19:18:17 +0530 Subject: [PATCH 6/8] Consolidate time taken for all query sets --- .../test_prometheus_query_execution_time.sh | 130 ++++++++++++++++-- 1 file changed, 120 insertions(+), 10 deletions(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index 8917f40db..3760551a6 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -14,6 +14,9 @@ PROMETHEUS_URL="https://${PROMETHEUS_ROUTE}" echo $PROMETHEUS_URL export TOKEN=$(oc whoami --show-token) +export STEP=900 +#15d=1296000s +export STEP_15DAYS=1296000 declare -A default_queries declare -A individual_queries_by_pod @@ -129,13 +132,13 @@ grouped_queries_by_owner_workload=( ) metadata_queries=( - [namespaces_across_cluster]='sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[15m]))' - [workloads_across_cluster]='sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15m]))' - [containers_across_cluster]='sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=""}[15m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15m]))' + [namespaces_across_cluster]='sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[15d]))' + [workloads_across_cluster]='sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15d]))' + [containers_across_cluster]='sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=""}[15d]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[15d]))' ) declare -n grouped_queriesByDuration="grouped_queries_by_owner_workload" -queries_collection=( default_queries individual_queries_by_pod grouped_queries_by_owner_workload metadata_queries grouped_queriesByDuration ) +queries_collection=( default_queries individual_queries_by_pod grouped_queries_by_owner_workload grouped_queriesByDuration metadata_queries ) # Function to return the correct query set as an array reference get_queries() { @@ -206,13 +209,20 @@ measure_query_time() { query=${query//\$NAMESPACE/$namespace} query=${query//\$CONTAINER_NAME/$container} + local step + if echo "$query" | grep -q "15d"; then + step=${STEP_15DAYS} + else + step=${STEP} + fi + start_time=$(date +%s.%N) response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ --data-urlencode "query=${query}" \ --data-urlencode "start=${start_timestamp}" \ --data-urlencode "end=${end_timestamp}" \ - --data-urlencode "step=900" \ + --data-urlencode "step=${step}" \ "${PROMETHEUS_URL}/api/v1/query_range") echo "Query: $query" >> "${RESPONSE_LOG_FILE}" @@ -272,12 +282,108 @@ fetch_namespace_and_container(){ # Function to capture resource metrics (CPU and memory) of Prometheus pods capture_prometheus_resource_metrics() { - echo -e "\n===== Prometheus Pod Resource Metrics (CPU & Memory) =====" >> "$RESPONSE_LOG_FILE" + local response_log_file=$1 + echo -e "\n===== Prometheus Pod Resource Metrics (CPU & Memory) =====" >> "$response_log_file" # Get resource usage for Prometheus pods in the given namespace - kubectl top pod -n "$PROMETHEUS_NAMESPACE" | grep "prometheus" >> "$RESPONSE_LOG_FILE" - echo -e "=========================================================\n" >> "$RESPONSE_LOG_FILE" + kubectl top pod -n "$PROMETHEUS_NAMESPACE" | grep "prometheus" >> "$response_log_file" + echo -e "=========================================================\n" >> "$response_log_file" +} + +# Declare associative arrays for summing total time for each query type +declare -A total_time_default_sum +declare -A total_time_individual_sum +declare -A total_time_grouped_sum +declare -A total_time_grouped_duration_sum + +# Declare arrays for tracking start_time and end_time +declare -A start_time_sum +declare -A end_time_sum + +sum_float() { + echo "$1 $2" | awk '{printf "%.9f", $1 + $2}' +} + +process_file() { + local file=$1 + local query_type=$2 + + # Read the CSV file line by line (skipping the header) + while IFS=';' read -r status time_taken start_time end_time namespace container metric_name query; do + # Create a unique key based on status, namespace, container, and metric_name + key="$status,$namespace,$container,$metric_name" + + # Sum the time based on the query type + case "$query_type" in + default_queries) + total_time_default_sum[$key]="$time_taken" + ;; + individual_queries_by_pod) + total_time_individual_sum[$key]="$time_taken" + ;; + grouped_queries_by_owner_workload) + total_time_grouped_sum[$key]="$time_taken" + ;; + grouped_queriesByDuration) + if [ -z "${total_time_grouped_duration_sum[$key]}" ]; then + total_time_grouped_duration_sum[$key]=0 + fi + + total_time_grouped_duration_sum[$key]=$(sum_float "${total_time_grouped_duration_sum[$key]}" "$time_taken") + esac + + # Track start_time and end_time for each key + if [[ -z "${start_time_sum[$key]}" || "${start_time_sum[$key]}" -gt "$start_time" ]]; then + start_time_sum[$key]=$start_time + fi + + if [[ -z "${end_time_sum[$key]}" || "${end_time_sum[$key]}" -lt "$end_time" ]]; then + end_time_sum[$key]=$end_time + fi + + done < <(tail -n +2 "$file") } +# Function to generate the output file +common_function() { + output_file1="metric_time_for_all_queries.csv" + output_file2="total_time_for_all_queries.csv" + total_time_default=0 + total_time_individual=0 + total_time_grouped=0 + total_time_grouped_by_duration=0 + + + # Output headers to the CSV file + echo "status;time_default_queries;time_individual_queries;time_grouped_queries;time_grouped_queriesByDuration;start_time;end_time;namespace;container;metric_name" > "$output_file1" + echo "status;total_time_default_queries;total_time_individual_queries;total_time_grouped_queries;total_time_grouped_queriesByDuration;start_time;end_time;namespace;container" > "$output_file2" + + for key in "${!total_time_default_sum[@]}"; do + # Extract the individual row data for each query type + time_default=${total_time_default_sum[$key]} + total_time_default=$(sum_float "${total_time_default}" "$time_default") + + time_individual=${total_time_individual_sum[$key]} + total_time_individual=$(sum_float "${vtotal_time_individual}" "$time_individual") + + time_grouped=${total_time_grouped_sum[$key]} + total_time_grouped=$(sum_float "${total_time_grouped}" "$time_grouped") + + time_grouped_duration=${total_time_grouped_duration_sum[$key]} + total_time_grouped_by_duration=$(sum_float "${total_time_grouped_by_duration}" "$time_grouped_duration") + + start_time=${start_time_sum[$key]:-0} + end_time=${end_time_sum[$key]:-0} + + IFS=',' read -r status namespace container metric_name <<< "$key" + + # Write the combined row to the output file + echo "$status;$time_default;$time_individual;$time_grouped;$time_grouped_duration;$start_time;$end_time;$namespace;$container;$metric_name" >> "$output_file1" + done + + echo "$status;$total_time_default;$total_time_individual;$total_time_grouped;$total_time_grouped_by_duration;$start_time;$end_time;$namespace;$container" >> "$output_file2" +} + + DEFAULT_END_TIME=$(date +%s) DEFAULT_START_TIME=$(date -d "15 days ago" +%s) ALL_QUERIES=0 @@ -384,8 +490,12 @@ if [ ${ALL_QUERIES} -eq 1 ]; then echo "Results have been written to $OUTPUT_FILE" echo "Query output have been written to $RESPONSE_LOG_FILE" - capture_prometheus_resource_metrics + capture_prometheus_resource_metrics "$RESPONSE_LOG_FILE" + process_file "$OUTPUT_FILE" $query_name + done + + common_function else queries=() # Declare an empty array to store the returned queries @@ -420,7 +530,7 @@ else echo "Results have been written to $OUTPUT_FILE" echo "Query output have been written to $RESPONSE_LOG_FILE" - capture_prometheus_resource_metrics + capture_prometheus_resource_metrics "$RESPONSE_LOG_FILE" fi exit 0 From 6e7dfdfdc1347869a5d0d5396aef0f970447eebc Mon Sep 17 00:00:00 2001 From: Shreya Date: Wed, 22 Jan 2025 16:23:26 +0530 Subject: [PATCH 7/8] Support all namespaces and containers present in the cluster --- .../test_prometheus_query_execution_time.sh | 176 +++++++++++++----- 1 file changed, 131 insertions(+), 45 deletions(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index 3760551a6..e1c1603bc 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -171,8 +171,8 @@ get_queries() { # Each query is sequentially executed dividing the 15days duration into three, 5 days window run_query_across_duration_windows() { local query="$1" - local namespace="$NAMESPACE" - local container="$CONTAINER" + local namespace="$2" + local container="$3" local start_timestamp="$4" local end_timestamp="$5" local metric_name="$6" @@ -230,16 +230,16 @@ measure_query_time() { end_time=$(date +%s.%N) - time_taken=$(echo "$end_time - $start_time" | bc) + time_taken=$(echo "$end_time $start_time" | awk '{print $1 - $2}') status=$(echo "$response" | jq -r '.status') if [[ "$status" == "success" ]]; then - echo "Success; ${time_taken}; ${start_timestamp}; ${end_timestamp}; ${namespace}; ${container}; ${query_name}; ${query}" >> "$OUTPUT_FILE" + echo "Success;${time_taken};${start_timestamp};${end_timestamp};${namespace};${container};${query_name};${query}" >> "$OUTPUT_FILE" else error_type=$(echo "$response" | jq -r '.errorType') error_message=$(echo "$response" | jq -r '.error') - echo "Failed | ErrorType: $error_type | Error: $error_message; ${time_taken}; ${start_timestamp}; ${end_timestamp}; ${namespace}; ${container}; ${query}" >> "$OUTPUT_FILE" + echo "Failed | ErrorType: $error_type | Error: $error_message;${time_taken};${start_timestamp};${end_timestamp};${namespace};${container};${query}" >> "$OUTPUT_FILE" fi } @@ -308,9 +308,9 @@ process_file() { local query_type=$2 # Read the CSV file line by line (skipping the header) - while IFS=';' read -r status time_taken start_time end_time namespace container metric_name query; do + while IFS=';' read -r status time_taken start_time end_time local_namespace local_container metric_name query; do # Create a unique key based on status, namespace, container, and metric_name - key="$status,$namespace,$container,$metric_name" + key="$status,$local_namespace,$local_container,$metric_name" # Sum the time based on the query type case "$query_type" in @@ -345,8 +345,10 @@ process_file() { # Function to generate the output file common_function() { - output_file1="metric_time_for_all_queries.csv" - output_file2="total_time_for_all_queries.csv" + local namespace=$1 + local container=$2 + local output_file1=$3 + local output_file2=$4 total_time_default=0 total_time_individual=0 total_time_grouped=0 @@ -355,15 +357,26 @@ common_function() { # Output headers to the CSV file echo "status;time_default_queries;time_individual_queries;time_grouped_queries;time_grouped_queriesByDuration;start_time;end_time;namespace;container;metric_name" > "$output_file1" - echo "status;total_time_default_queries;total_time_individual_queries;total_time_grouped_queries;total_time_grouped_queriesByDuration;start_time;end_time;namespace;container" > "$output_file2" + + if [ -z "$(cat "$output_file2")" ]; then + echo "status;total_time_default_queries;total_time_individual_queries;total_time_grouped_queries;total_time_grouped_queriesByDuration;start_time;end_time;namespace;container" > "$output_file2" + fi for key in "${!total_time_default_sum[@]}"; do + + IFS=',' read -r status target_namespace target_container metric_name <<< "$key" + + # Check if the key matches the target status and namespace + if [ "$namespace" != "$target_namespace" ] || [ "$container" != "$target_container" ]; then + continue; + fi + # Extract the individual row data for each query type time_default=${total_time_default_sum[$key]} total_time_default=$(sum_float "${total_time_default}" "$time_default") time_individual=${total_time_individual_sum[$key]} - total_time_individual=$(sum_float "${vtotal_time_individual}" "$time_individual") + total_time_individual=$(sum_float "${total_time_individual}" "$time_individual") time_grouped=${total_time_grouped_sum[$key]} total_time_grouped=$(sum_float "${total_time_grouped}" "$time_grouped") @@ -374,10 +387,10 @@ common_function() { start_time=${start_time_sum[$key]:-0} end_time=${end_time_sum[$key]:-0} - IFS=',' read -r status namespace container metric_name <<< "$key" + IFS=',' read -r status target_namespace target_container metric_name <<< "$key" # Write the combined row to the output file - echo "$status;$time_default;$time_individual;$time_grouped;$time_grouped_duration;$start_time;$end_time;$namespace;$container;$metric_name" >> "$output_file1" + echo "$status;$time_default;$time_individual;$time_grouped;$time_grouped_duration;$start_time;$end_time;$target_namespace;$target_container;$metric_name" >> "$output_file1" done echo "$status;$total_time_default;$total_time_individual;$total_time_grouped;$total_time_grouped_by_duration;$start_time;$end_time;$namespace;$container" >> "$output_file2" @@ -389,6 +402,7 @@ DEFAULT_START_TIME=$(date -d "15 days ago" +%s) ALL_QUERIES=0 DEFAULT_DURATION_IN_DAYS=5 DEFAULT_PARTITIONS=3 +ALL_NAMESPACES_CONTAINERS=0 function usage() { echo "Usage: $0 [-n namespace] [-c container-name] [-q query_set] [-s start_timestamp] [-e end_timestamp] [-d duration in days] [-p no. of partitions] [-a all query sets]" @@ -406,7 +420,7 @@ function usage() { } # Parse command-line arguments -while getopts ":n:c:q:s:e:d:p:a" opt; do +while getopts ":n:c:q:s:e:d:p:aA" opt; do case "${opt}" in n) NAMESPACE="$OPTARG" @@ -432,6 +446,9 @@ while getopts ":n:c:q:s:e:d:p:a" opt; do p) DURATION_PARTITIONS="$OPTARG" ;; + A) + ALL_NAMESPACES_CONTAINERS=1 + ;; *) usage ;; @@ -444,7 +461,87 @@ END_TIME=${END_TIME:-$DEFAULT_END_TIME} DURATION_IN_DAYS=${DURATION_IN_DAYS:-$DEFAULT_DURATION_IN_DAYS} DURATION_PARTITIONS=${DURATION_PARTITIONS:-$DEFAULT_PARTITIONS} -if [ -z "${NAMESPACE}" ] && [ -z "${CONTAINER}" ]; then +# Function to fetch namespaces using Prometheus +fetch_namespaces() { + local start_timestamp="$1" + local end_timestamp="$2" + # Prometheus query to fetch unique namespaces + local query="count by (namespace) (kube_pod_container_info)" + + # Fetch namespaces from Prometheus + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + # Parse response and extract namespaces + echo "$response" | jq -r '.data.result[].metric.namespace' +} + +# Function to fetch containers for a specific namespace using Prometheus +fetch_containers_for_namespace() { + local namespace="$1" + local start_timestamp="$2" + local end_timestamp="$3" + + # Prometheus query to fetch containers in a namespace + local query="count by (container) (kube_pod_container_info{namespace='${namespace}'})" + + # Fetch containers from Prometheus + response=$(curl -G -kH "Authorization: Bearer ${TOKEN}" \ + --data-urlencode "query=${query}" \ + --data-urlencode "start=${start_timestamp}" \ + --data-urlencode "end=${end_timestamp}" \ + --data-urlencode "step=900" \ + "${PROMETHEUS_URL}/api/v1/query_range") + + # Parse response and extract container names + echo "$response" | jq -r '.data.result[].metric.container' +} + +run_all_queries() { + local namespace=$1 + local container=$2 + + for i in "${!queries_collection[@]}"; do + query_name=${queries_collection[i]} + declare -n current_queries="${queries_collection[i]}" + + # Output file to store the results + OUTPUT_FILE="prometheus_${query_name}_${namespace}_${container}_stats.csv" + RESPONSE_LOG_FILE="${query_name}_${namespace}_${container}_response.log" + + # Clear the output file before starting + > "$OUTPUT_FILE" + > "$RESPONSE_LOG_FILE" + + echo "status;time_taken(s);start_time;end_time;namespace;container;metric_name;query" > "$OUTPUT_FILE" + + for key in "${!current_queries[@]}"; do + if [[ $query_name == "grouped_queriesByDuration" ]]; then + # Calculate the difference in seconds + TIME_DIFF=$((END_TIME - START_TIME)) + + # Convert the difference from seconds to days + DIFF_IN_DAYS=$((TIME_DIFF / 86400)) + echo "Dividing the ${DIFF_IN_DAYS} days time range into ${DURATION_PARTITIONS} partitions, each with ${DURATION_IN_DAYS} days duration" + + run_query_across_duration_windows "${current_queries[$key]}" "$namespace" "$container" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" + else + measure_query_time "${current_queries[$key]}" "$namespace" "$container" "$START_TIME" "$END_TIME" "$key" + fi + done + + echo "Results have been written to $OUTPUT_FILE" + echo "Query output have been written to $RESPONSE_LOG_FILE" + capture_prometheus_resource_metrics "$RESPONSE_LOG_FILE" + process_file "$OUTPUT_FILE" "$query_name" + done + +} + +if [ -z "${NAMESPACE}" ] && [ -z "${CONTAINER}" ] && [ "${ALL_NAMESPACES_CONTAINERS}" -eq 0 ]; then echo "Finding a long running container" result=($(fetch_namespace_and_container "$START_TIME" "$END_TIME")) @@ -457,45 +554,34 @@ if [ -z "${NAMESPACE}" ] && [ -z "${CONTAINER}" ]; then echo "Container: $CONTAINER" fi +TOTAL_TIME_FOR_ALL_CONTAINERS="total_time_for_all_queries.csv" + if [ ${ALL_QUERIES} -eq 1 ]; then - for i in "${!queries_collection[@]}"; do - query_name=${queries_collection[i]} - declare -n current_queries="${queries_collection[i]}" - echo $query_name - # Output file to store the results - OUTPUT_FILE="prometheus_${query_name}_${NAMESPACE}_${CONTAINER}_stats.csv" - RESPONSE_LOG_FILE="${query_name}_${NAMESPACE}_${CONTAINER}_response.log" + METRIC_TIME_FILE="metric_time_for_all_queries_${NAMESPACE}_${CONTAINER}.csv" - # Clear the output file before starting - > "$OUTPUT_FILE" - > "$RESPONSE_LOG_FILE" + run_all_queries "$NAMESPACE" "$CONTAINER" + common_function "$NAMESPACE" "$CONTAINER" "$METRIC_TIME_FILE" "$TOTAL_TIME_FOR_ALL_CONTAINERS" - echo "status; time_taken(s); start_time; end_time; namespace; container; metric_name; query" > "$OUTPUT_FILE" + echo "Time taken for each metric by all queries for namespace -$NAMESPACE and container -$CONTAINER have been written to $METRIC_TIME_FILE" + echo "Total time taken for all the queries have been written to $TOTAL_TIME_FOR_ALL_CONTAINERS" +elif [ ${ALL_NAMESPACES_CONTAINERS} -eq 1 ]; then - for key in "${!current_queries[@]}"; do - if [[ $query_name == "grouped_queriesByDuration" ]]; then - # Calculate the difference in seconds - TIME_DIFF=$((END_TIME - START_TIME)) + namespaces=$(fetch_namespaces "$START_TIME" "$END_TIME") - # Convert the difference from seconds to days - DIFF_IN_DAYS=$((TIME_DIFF / 86400)) - echo "Dividing the ${DIFF_IN_DAYS} days time range into ${DURATION_PARTITIONS} partitions, each with ${DURATION_IN_DAYS} days duration" + for namespace in $namespaces; do + containers=$(fetch_containers_for_namespace "$namespace" "$START_TIME" "$END_TIME") - run_query_across_duration_windows "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" "$DURATION_IN_DAYS" "$DURATION_PARTITIONS" - else - measure_query_time "${current_queries[$key]}" "$NAMESPACE" "$CONTAINER" "$START_TIME" "$END_TIME" "$key" - fi - done + for container in ${containers[@]}; do + METRIC_TIME_FILE="metric_time_for_all_queries_${namespace}_${container}.csv" - echo "Results have been written to $OUTPUT_FILE" - echo "Query output have been written to $RESPONSE_LOG_FILE" - capture_prometheus_resource_metrics "$RESPONSE_LOG_FILE" - process_file "$OUTPUT_FILE" $query_name + run_all_queries "$namespace" "$container" + common_function "$namespace" "$container" "$METRIC_TIME_FILE" "$TOTAL_TIME_FOR_ALL_CONTAINERS" + echo "Time taken for each metric by all queries for namespace -$namespace and container -$container have been written to $METRIC_TIME_FILE" + done done - - common_function + echo "Total time taken for all the queries have been written to $TOTAL_TIME_FOR_ALL_CONTAINERS" else queries=() # Declare an empty array to store the returned queries @@ -510,7 +596,7 @@ else > "$OUTPUT_FILE" > "$RESPONSE_LOG_FILE" - echo "status; time_taken(s); start_time; end_time; namespace; container; metric_name; query" > "$OUTPUT_FILE" + echo "status;time_taken(s);start_time;end_time;namespace;container;metric_name;query" > "$OUTPUT_FILE" declare -n query_set=$queries if [[ "$QUERY_SET" == "grouped_queriesByDuration" ]]; then From f96aa6e241f6bbe06499a646677e18b4e12e4e87 Mon Sep 17 00:00:00 2001 From: Shreya Date: Thu, 23 Jan 2025 11:52:15 +0530 Subject: [PATCH 8/8] Update readme --- .../test_prometheus_query_execution_time.md | 12 +++++++++++- .../test_prometheus_query_execution_time.sh | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md index 5f7172a09..4f4df6d20 100644 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.md @@ -29,9 +29,13 @@ d = duration for equally dividing the time range for eg. dividing 15 days into 5 p = partitions in time range for eg. dividing 15 days into 5 days duration with 3 partitions a = Flag to run all the query sets to capture the time taken -Note: once query set/sets are executed output will be stored in +Note: once the query set/sets are executed output will be stored in 1. prometheus_${QUERY_SET}_${NAMESPACE}_${CONTAINER}_stats.csv - capturing status time taken, start and end time, namespace, container, metric_name and query 2. ${QUERY_SET}_${NAMESPACE}_${CONTAINER}_response.log - logs the query and query output + +In case of running all query sets for all namespaces and containers - along with the respective query set data, consolidated output data will be stored in +1. metric_time_for_all_queries_${NAMESPACE}_${CONTAINER}.csv - this file will contain time taken by each metric for all the query sets for a given namespace and container +2. total_time_for_all_queries.csv - this file captures total time taken by each query set for all the namespaces and containers ``` To capture time taken to run all the query sets, @@ -40,6 +44,12 @@ To capture time taken to run all the query sets, /tests/scripts/local_monitoring_tests -a ``` +To capture time taken to run all the query sets for all the namespaces and containers present in the cluster + +``` +/tests/scripts/local_monitoring_tests -A +``` + To capture time taken to run the individual query set for "default" namespace and "app-container" container, ``` diff --git a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh index e1c1603bc..73c29eec2 100755 --- a/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh +++ b/tests/scripts/local_monitoring_tests/test_prometheus_query_execution_time.sh @@ -414,6 +414,7 @@ function usage() { echo "d = duration for equally dividing the time range for eg. dividing 15 days into 5 days duration and executing the grouped_queries" echo "p = partitions in time range for eg. dividing 15 days into 5 days duration with 3 partitions" echo "a = Flag to run all the query sets to capture the time taken" + echo "A = Flag to run all the query sets for all the namespaces and containers present in the cluster" echo "h = help" exit 1 @@ -508,6 +509,7 @@ run_all_queries() { query_name=${queries_collection[i]} declare -n current_queries="${queries_collection[i]}" + echo $query_name # Output file to store the results OUTPUT_FILE="prometheus_${query_name}_${namespace}_${container}_stats.csv" RESPONSE_LOG_FILE="${query_name}_${namespace}_${container}_response.log" @@ -587,6 +589,7 @@ else # Get the query set get_queries "$QUERY_SET" queries + echo "Running $QUERY_SET query set" # Output file to store the results OUTPUT_FILE="prometheus_${QUERY_SET}_${NAMESPACE}_${CONTAINER}_stats.csv"