diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml new file mode 100644 index 0000000..3fffc93 --- /dev/null +++ b/.github/workflows/go.yml @@ -0,0 +1,21 @@ +name: Go +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + name: Unit tests on Go ${{ matrix.go }} ${{ matrix.platform }} + steps: + - uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: '1.22.x' + - uses: actions/cache@v3 + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} + - name: Run unit tests. + env: + GOBIN: /tmp/.bin + run: make test diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 0ac408c..c75860d 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -1,31 +1,30 @@ --- -# This action is synced from https://github.com/prometheus/prometheus name: golangci-lint on: push: paths: - - "go.sum" - - "go.mod" - - "**.go" - - ".github/workflows/golangci-lint.yml" - - ".golangci.yml" - pull_request: + - "**.go" + - "scripts/errcheck_excludes.txt" + - ".github/workflows/golangci-lint.yml" + - ".golangci.yml" + +permissions: # added using https://github.com/step-security/secure-repo + contents: read + +env: + GO_VERSION: stable + GOLANGCI_LINT_VERSION: v1.60 jobs: - golangci: - name: lint + golangci-lint: runs-on: ubuntu-latest steps: - - name: Checkout repository - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.1.0 - - name: install Go - uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # v3.5.0 - with: - go-version: 1.20.x - - name: Install snmp_exporter/generator dependencies - run: sudo apt-get update && sudo apt-get -y install libsnmp-dev - if: github.repository == 'prometheus/snmp_exporter' - - name: Lint - uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0 - with: - version: v1.54.2 + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: golangci-lint mtypes + uses: golangci/golangci-lint-action@v6 + with: + version: ${{ env.GOLANGCI_LINT_VERSION }} + working-directory: ./tools/mtypes diff --git a/.golangci.yml b/.golangci.yml index f3b5429..edf170c 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,8 +1,135 @@ +# Generally copied from prometheus/prometheus. +# TODO(bwplotka): Add golangcilint, right now not run: - deadline: 5m - skip-files: - # Skip autogenerated files. - - ^.*\.(pb|y)\.go$ + timeout: 15m output: sort-results: true + +linters: + enable: + - depguard + - errorlint + - gocritic + - godot + - gofumpt + - goimports + - misspell + - nolintlint + - perfsprint + - predeclared + - revive + - testifylint + - unconvert + - unused + - usestdlibvars + - whitespace + - loggercheck + +issues: + max-issues-per-linter: 0 + max-same-issues: 0 + # The default exclusions are too aggressive. For one, they + # essentially disable any linting on doc comments. We disable + # default exclusions here and add exclusions fitting our codebase + # further down. + exclude-use-default: false + exclude-files: + # Skip autogenerated files. + - ^.*\.(pb|y)\.go$ + exclude-rules: + - linters: + - errcheck + # Taken from the default exclusions (that are otherwise disabled above). + text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked + - linters: + - govet + # We use many Seek methods that do not follow the usual pattern. + text: "stdmethods: method Seek.* should have signature Seek" + - path: _test.go + linters: + - errcheck + - linters: + - godot + source: "^// ===" + - linters: + - perfsprint + text: "fmt.Sprintf can be replaced with string concatenation" +linters-settings: + depguard: + rules: + main: + deny: + - pkg: "sync/atomic" + desc: "Use go.uber.org/atomic instead of sync/atomic" + - pkg: "github.com/stretchr/testify/assert" + desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert" + - pkg: "github.com/go-kit/kit/log" + desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log" + - pkg: "io/ioutil" + desc: "Use corresponding 'os' or 'io' functions instead." + - pkg: "regexp" + desc: "Use github.com/grafana/regexp instead of regexp" + - pkg: "github.com/pkg/errors" + desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors" + - pkg: "gzip" + desc: "Use github.com/klauspost/compress instead of gzip" + - pkg: "zlib" + desc: "Use github.com/klauspost/compress instead of zlib" + - pkg: "golang.org/x/exp/slices" + desc: "Use 'slices' instead." + errcheck: + exclude-functions: + # Don't flag lines such as "io.Copy(io.Discard, resp.Body)". + - io.Copy + # The next two are used in HTTP handlers, any error is handled by the server itself. + - io.WriteString + - (net/http.ResponseWriter).Write + # No need to check for errors on server's shutdown. + - (*net/http.Server).Shutdown + # Never check for logger errors. + - (github.com/go-kit/log.Logger).Log + goimports: + local-prefixes: github.com/prometheus/prometheus + gofumpt: + extra-rules: true + perfsprint: + # Optimizes `fmt.Errorf`. + errorf: false + revive: + # By default, revive will enable only the linting rules that are named in the configuration file. + # So, it's needed to explicitly enable all required rules here. + rules: + # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md + - name: blank-imports + - name: comment-spacings + - name: context-as-argument + arguments: + # Allow functions with test or bench signatures. + - allowTypesBefore: "*testing.T,testing.TB" + - name: context-keys-type + - name: dot-imports + - name: empty-block + - name: error-naming + - name: error-return + - name: error-strings + - name: errorf + - name: exported + - name: increment-decrement + - name: indent-error-flow + - name: package-comments + - name: range + - name: receiver-naming + - name: redefines-builtin-id + - name: superfluous-else + - name: time-naming + - name: unexported-return + - name: unreachable-code + - name: var-declaration + - name: var-naming + disabled: true # Consistency with dto type. + testifylint: + disable: + - float-compare + - go-require + enable-all: true diff --git a/Makefile b/Makefile index ed5934e..7eac208 100644 --- a/Makefile +++ b/Makefile @@ -8,13 +8,13 @@ help: @awk 'BEGIN {FS = ": ##"; printf "Usage:\n make \n\nTargets:\n"} /^[a-zA-Z0-9_\.\-\/%]+: ##/ { printf " %-45s %s\n", $$1, $$2 }' $(MAKEFILE_LIST) .PHONY: check-deps -check-deps: ## Check local dependencies. +check-deps: $(GOMPLATE) ## Check local dependencies. @command -v gcloud >/dev/null 2>&1 || { echo 'Please install gcloud (https://cloud.google.com/sdk/gcloud#download_and_install_the)'; exit 1; } @command -v go >/dev/null 2>&1 || { echo 'Please install go (https://go.dev/doc/install)'; exit 1; } @command -v kubectl >/dev/null 2>&1 || { echo 'Please install kubectl'; exit 1; } .PHONY: start -start: check-deps ## Start benchmark on the current cluster. +start: check-deps ## Start a new benchmark on the current cluster. @test -n "$(BENCH_NAME)" || (echo "BENCH_NAME variable is not set, what name for this benchmark you want to use?" ; exit 1) @# TODO(bwplotka): Check against cluster mismatches. @# TODO(bwplotka): Check if this benchmark is already running. @@ -41,3 +41,23 @@ cluster-destroy: check-deps ## Tear down the benchmarking GKE cluster. .PHONY: lint lint: ## Lint resources. bash ./scripts/shellcheck.sh + +GOMODS := $(shell find . -name "go.mod" | grep -v .bingo | xargs dirname) +.PHONY: test +test: + @for gomod in $(GOMODS); do \ + cd $$gomod && go test -v ./...; \ + done + +GOFUMPT = gofumpt +$(GOFUMPT): + @go install mvdan.cc/gofumpt@latest + +GO_FILES = $(shell find . -path ./vendor -prune -o -name '*.go' -print) + +.PHONY: format +format: $(GOFUMPT) $(GOIMPORTS) + @echo ">> formating imports" + @$(GOIMPORTS) -w $(GO_FILES) + @echo ">> gofumpt-ing the code; golangci-lint requires this" + @$(GOFUMPT) -extra -w $(GO_FILES) diff --git a/README.md b/README.md index ff5e2c5..751e386 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,34 @@ # prombenchy This repo contains very simplistic, experimental and opinionated [prombench](https://github.com/prometheus/test-infra/tree/master/prombench) alternative -that focuses on benchmarking (and testing) the collection mode for Prometheus metrics (discovery + scrape + basic processing + remote write) on GKE. +that focuses on benchmarking (and testing) the agents, so collection modes for Prometheus metrics (discovery + scrape + basic processing + remote write/alternative protocols) on GKE. -This may or may not end up as the another mode/feature/version of prombench itself, but feel free to use it as you wish. +Feel free to use it as you wish. ### Usage -TBD (: +Check `make help` on what's possible. Then if anything is failing check `scripts/` bash scripts and adjust according to your setup. Those are shell script, will be always flaky for edge cases or races, but it's better than nothing (: + +The general flow looks as follows: + +* You setup your GKE cluster once: `make cluster-setup CLUSTER_NAME=my-prombenchy` +* Then to set up any benchmark run you do `make start CLUSTER_NAME=my-prombenchy BENCH_NAME= SCENARIO=./manifests/scenarios/gmp`. This will setup node-pool and your collector (e.g. as daemon set or separate pod - up to you, as long as you do correct node section!) + +You can start as many scenarios as you want on the single cluster (make sure to use unique `BENCH_NAME` though!) + +The scenario is a path to the "collector" manifest, so anything that will scrape `./manifests/load/avalanche.exampletarget.yaml`. Feel free to adjust anything in `./manifests/scenarios/` or add your own. + +This setup uses separate Prometheus for gathering metrics about core resources and collectors (available locally and in GCM). Make sure your pod has `app=collector` label and relevant port name has `-ins` suffix, to be scraped by this core Prometheus. There is also a dashboard you can apply to GCM in `./dashboards/`. + +* `make stop CLUSTER_NAME=my-prombenchy BENCH_NAME= SCENARIO=./manifests/scenarios/gmp` kill the node-pool and experiment. + +### TODOs + +* [ ] All scenarios are GMP aware, so they send data to GCM. In the future, we plan to also benchmark remote-write or OTLP, but proper test reivers would need to be added. Help welcome! +* [ ] Probably Go code for scripts instead of bash, for reliability. +* [ ] Cleanup svc account permissions on stopped scenarios. +* [ ] Make config-reloader work with otel-collector (annoying to delete pod after config changes). ### Credits -This repo was started by sharing a lot of design and resources from https://github.com/prometheus/test-infra repo, which we maintain in the -Prometheus team mostly for [prombench](https://github.com/prometheus/test-infra/tree/master/prombench) functionality. Kudos to prombench -project for the hard work so far! +This repo was started by sharing a lot of design and resources from https://github.com/prometheus/test-infra repo, which we maintain in the Prometheus team mostly for [prombench](https://github.com/prometheus/test-infra/tree/master/prombench) functionality. Kudos to prombench project for the hard work so far! Since then, it was completely redesigned and simplified. diff --git a/dashboards/prombenchy-gcm.json b/dashboards/prombenchy-gcm.json index d161170..96a11df 100644 --- a/dashboards/prombenchy-gcm.json +++ b/dashboards/prombenchy-gcm.json @@ -7,63 +7,57 @@ "width": 24, "height": 13, "widget": { - "title": "Collector samples per sec scraped (should be 10.6k)", + "title": "Collector samples per sec scraped (should be ~10.6k)", "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, "dataSets": [ { - "timeSeriesQuery": { - "prometheusQuery": "rate(prometheus_tsdb_head_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=~\".*collector.*\"}[5m])", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(prometheus_tsdb_head_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))\n", + "unitOverride": "" + } }, { + "plotType": "LINE", + "targetAxis": "Y1", "timeSeriesQuery": { - "prometheusQuery": "rate(prometheus_agent_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=~\".*collector.*\"}[5m]) ", - "unitOverride": "", - "outputFullDuration": false - }, + "prometheusQuery": "sum by (pod, namespace) (rate(prometheus_agent_samples_appended_total{cluster=\"bwplotka-prombenchy\", job=\"collector\"}[5m]))", + "unitOverride": "" + } + }, + { "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(otelcol_receiver_accepted_metric_points_total{cluster=\"bwplotka-prombenchy\"}[5m]))", + "unitOverride": "" + } } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" - }, - "chartOptions": { - "mode": "COLOR", - "showLegend": false, - "displayHorizontal": false } - }, - "visualElementId": 0, - "id": "" + } } }, { "xPos": 24, + "yPos": 16, "width": 24, "height": 13, "widget": { - "title": "Collector Memory (working set and heap)", "xyChart": { "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"}", - "unitOverride": "", + "prometheusQuery": "sum by (__name__, pod) (container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"})", + "unitOverride": "By", "outputFullDuration": false }, "plotType": "LINE", @@ -75,8 +69,8 @@ }, { "timeSeriesQuery": { - "prometheusQuery": "go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}\n\n", - "unitOverride": "", + "prometheusQuery": "sum by (__name__, pod) (go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"})\n\n", + "unitOverride": "By", "outputFullDuration": false }, "plotType": "LINE", @@ -98,7 +92,7 @@ "displayHorizontal": false } }, - "visualElementId": 0, + "title": "Collector Memory (working set and heap)", "id": "" } }, @@ -107,87 +101,64 @@ "width": 24, "height": 12, "widget": { - "title": "Collector Active Series (should be 160k)", + "title": "Collector Active Series (should be ~160k)", "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, "dataSets": [ { - "timeSeriesQuery": { - "prometheusQuery": "prometheus_tsdb_head_series{cluster=\"bwplotka-prombenchy\", job=\"managed-prometheus-collector\"}", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (prometheus_tsdb_head_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", + "unitOverride": "" + } }, { - "timeSeriesQuery": { - "prometheusQuery": "prometheus_agent_active_series{cluster=\"bwplotka-prombenchy\", job=\"managed-prometheus-collector\"}", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (prometheus_agent_active_series{cluster=\"bwplotka-prombenchy\", job=\"collector\"})", + "unitOverride": "" + } } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" - }, - "chartOptions": { - "mode": "COLOR", - "showLegend": false, - "displayHorizontal": false } - }, - "visualElementId": 0, - "id": "" + } } }, { "xPos": 24, - "yPos": 13, + "yPos": 29, "width": 24, "height": 12, "widget": { "title": "Collector CPU Use", "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, "dataSets": [ { - "timeSeriesQuery": { - "prometheusQuery": "rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", job=\"managed-prometheus-collector\"}[${__interval}])", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (pod, namespace) (rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=~\"prometheus|otel-collector\", job=\"collector\"}[${__interval}]))", + "unitOverride": "" + } } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" - }, - "chartOptions": { - "mode": "COLOR", - "showLegend": false, - "displayHorizontal": false } - }, - "visualElementId": 0, - "id": "" + } } }, { @@ -195,45 +166,35 @@ "width": 24, "height": 10, "widget": { - "title": "GCM Ingested series for one metric (should be 160)", + "title": "GCM Ingested series for one counter (should be 160 with 320 staleness spikes)", "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, "dataSets": [ { - "timeSeriesQuery": { - "prometheusQuery": "count(avalanche_metric_mmmmm_0_0{cluster=\"bwplotka-prombenchy\"})", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "count by (namespace)(avalanche_counter_metric_mmmmm_0_0_total{cluster=\"bwplotka-prombenchy\"})", + "unitOverride": "" + } } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" - }, - "chartOptions": { - "mode": "COLOR", - "showLegend": false, - "displayHorizontal": false } - }, - "visualElementId": 0, - "id": "" + } } }, { "xPos": 24, - "yPos": 25, + "yPos": 41, "width": 24, "height": 10, "widget": { - "title": "Collector containers uptime", "xyChart": { "dataSets": [ { @@ -261,22 +222,21 @@ "displayHorizontal": false } }, - "visualElementId": 0, + "title": "(broken) Collector containers uptime ", "id": "" } }, { "xPos": 24, - "yPos": 35, + "yPos": 51, "width": 24, "height": 13, "widget": { - "title": "Samples scraped by collector per all scrapes (only visible if collector sends data to GCM)", "xyChart": { "dataSets": [ { "timeSeriesQuery": { - "prometheusQuery": "sum(scrape_samples_scraped{cluster=\"bwplotka-prombenchy\", namespace=\"test1\"}) by (namespace, job)", + "prometheusQuery": "sum(scrape_samples_scraped{cluster=\"bwplotka-prombenchy\", job=\"avalanche\"}) by (namespace)", "unitOverride": "", "outputFullDuration": false }, @@ -299,7 +259,7 @@ "displayHorizontal": false } }, - "visualElementId": 0, + "title": "Samples scraped by collector per all scrapes (only visible if collector sends data to GCM)", "id": "" } }, @@ -308,13 +268,12 @@ "width": 24, "height": 13, "widget": { - "title": "Collector memory mapped file bytes", "xyChart": { "dataSets": [ { "timeSeriesQuery": { "prometheusQuery": "container_memory_mapped_file{cluster=\"bwplotka-prombenchy\", image=~\".*prometheus.*\", pod=~\"collector.*\"}", - "unitOverride": "", + "unitOverride": "By", "outputFullDuration": false }, "plotType": "LINE", @@ -336,7 +295,7 @@ "displayHorizontal": false } }, - "visualElementId": 0, + "title": "Collector memory mapped file bytes", "id": "" } }, @@ -347,38 +306,182 @@ "widget": { "title": "Collector replay time", "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, "dataSets": [ { - "timeSeriesQuery": { - "prometheusQuery": "prometheus_tsdb_data_replay_duration_seconds{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}\n\n", - "unitOverride": "", - "outputFullDuration": false - }, "plotType": "LINE", - "legendTemplate": "", "targetAxis": "Y1", - "dimensions": [], - "measures": [], - "breakdowns": [] + "timeSeriesQuery": { + "prometheusQuery": "sum by (namespace, pod) (prometheus_tsdb_data_replay_duration_seconds{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=\"collector\"})\n\n", + "unitOverride": "" + } } ], "thresholds": [], "yAxis": { "label": "", "scale": "LINEAR" - }, - "chartOptions": { - "mode": "COLOR", - "showLegend": false, - "displayHorizontal": false } + } + } + }, + { + "xPos": 24, + "width": 8, + "height": 8, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "prometheusQuery": "max_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"}[10h])", + "unitOverride": "", + "outputFullDuration": true + }, + "gaugeView": { + "lowerBound": 0, + "upperBound": 1 + }, + "thresholds": [], + "dimensions": [], + "measures": [] }, - "visualElementId": 0, + "title": "RSS max_over_10h", "id": "" } + }, + { + "xPos": 32, + "width": 8, + "height": 8, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "prometheusQuery": "max_over_time(go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}[10h])", + "unitOverride": "", + "outputFullDuration": true + }, + "gaugeView": { + "lowerBound": 0, + "upperBound": 1 + }, + "thresholds": [], + "dimensions": [], + "measures": [] + }, + "title": "Heap max_over_10h", + "id": "" + } + }, + { + "xPos": 40, + "width": 8, + "height": 8, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "prometheusQuery": "rate(process_cpu_seconds_total{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", job=\"managed-prometheus-collector\"}[10h])", + "unitOverride": "", + "outputFullDuration": true + }, + "gaugeView": { + "lowerBound": 0, + "upperBound": 1 + }, + "thresholds": [], + "dimensions": [], + "measures": [] + }, + "title": "CPU rate 10h", + "id": "" + } + }, + { + "xPos": 24, + "yPos": 8, + "width": 8, + "height": 8, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "prometheusQuery": "avg_over_time(container_memory_working_set_bytes{cluster=\"bwplotka-prombenchy\", container=\"prometheus\", pod=~\"collector-.*\"}[10h])", + "unitOverride": "", + "outputFullDuration": true + }, + "gaugeView": { + "lowerBound": 0, + "upperBound": 1 + }, + "thresholds": [], + "dimensions": [], + "measures": [] + }, + "title": "RSS avg_over_10h", + "id": "" + } + }, + { + "xPos": 32, + "yPos": 8, + "width": 8, + "height": 8, + "widget": { + "scorecard": { + "timeSeriesQuery": { + "prometheusQuery": "avg_over_time(go_memstats_heap_alloc_bytes{cluster=\"bwplotka-prombenchy\",container=\"prometheus\", job=~\".*collector\"}[10h])", + "unitOverride": "", + "outputFullDuration": true + }, + "gaugeView": { + "lowerBound": 0, + "upperBound": 1 + }, + "thresholds": [], + "dimensions": [], + "measures": [] + }, + "title": "Heap avg_over_10h", + "id": "" + } + }, + { + "yPos": 61, + "width": 24, + "height": 12, + "widget": { + "title": "Avg Avalanche Target Size", + "xyChart": { + "chartOptions": { + "mode": "COLOR" + }, + "dataSets": [ + { + "plotType": "LINE", + "targetAxis": "Y1", + "timeSeriesQuery": { + "prometheusQuery": "avg by (namespace, job) (scrape_samples_scraped{cluster=\"bwplotka-prombenchy\"})", + "unitOverride": "" + } + } + ], + "thresholds": [], + "yAxis": { + "label": "", + "scale": "LINEAR" + } + } + } } ] }, - "dashboardFilters": [], + "dashboardFilters": [ + { + "labelKey": "cluster", + "templateVariable": "", + "stringValue": "bwplotka-prombenchy", + "filterType": "RESOURCE_LABEL", + "valueType": "STRING" + } + ], "labels": {} } diff --git a/manifests/core/1b_prometheus.yaml b/manifests/core/1b_prometheus.yaml index 5fb66c4..732c0d6 100644 --- a/manifests/core/1b_prometheus.yaml +++ b/manifests/core/1b_prometheus.yaml @@ -103,16 +103,13 @@ data: relabel_configs: - action: keep source_labels: [__meta_kubernetes_pod_label_app] - regex: parca|prometheus|managed-prometheus-collector + regex: parca|prometheus|collector - action: replace source_labels: [__meta_kubernetes_pod_label_app] target_label: job - action: replace source_labels: [__meta_kubernetes_namespace] target_label: namespace - - action: replace - source_labels: [__meta_kubernetes_pod_label_prometheus] - target_label: prometheus - action: replace source_labels: [__meta_kubernetes_pod_node_name] target_label: node_name @@ -134,18 +131,18 @@ metadata: namespace: core labels: app: prometheus - prometheus: meta + collector: meta spec: replicas: 1 selector: matchLabels: app: prometheus - prometheus: meta + collector: meta template: metadata: labels: app: prometheus - prometheus: meta + collector: meta spec: serviceAccountName: prometheus automountServiceAccountToken: true @@ -205,7 +202,7 @@ spec: mountPath: /data subPath: prometheus-data ports: - - name: prom-web-ins + - name: prom-web-ins # -ins, tells core Prometheus to scrape it. containerPort: 9090 volumes: - name: config @@ -230,7 +227,7 @@ metadata: name: prometheus namespace: core labels: - prometheus: meta + collector: meta app: prometheus spec: type: NodePort @@ -240,4 +237,4 @@ spec: targetPort: prom-web-ins selector: app: prometheus - prometheus: meta + collector: meta diff --git a/manifests/gmp-operator/1_setup.yaml b/manifests/gmp-operator/1_setup.yaml deleted file mode 100644 index c6f8e17..0000000 --- a/manifests/gmp-operator/1_setup.yaml +++ /dev/null @@ -1,3524 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# NOTE: This file is autogenerated. -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: clusternodemonitorings.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: ClusterNodeMonitoring - listKind: ClusterNodeMonitoringList - plural: clusternodemonitorings - singular: clusternodemonitoring - scope: Cluster - versions: - - name: v1 - schema: - openAPIV3Schema: - description: ClusterNodeMonitoring defines monitoring for a set of nodes. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - Specification of desired node selection for target discovery by - Prometheus. - properties: - endpoints: - description: The endpoints to scrape on the selected nodes. - items: - description: |- - ScrapeNodeEndpoint specifies a Prometheus metrics endpoint on a node to scrape. - It contains all the fields used in the ScrapeEndpoint except for port and HTTPClientConfig. - properties: - interval: - default: 1m - description: Interval at which to scrape metrics. Must be a valid Prometheus duration. - pattern: ^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$ - type: string - metricRelabeling: - description: |- - Relabeling rules for metrics scraped from this endpoint. Relabeling rules that - override protected target labels (project_id, location, cluster, namespace, job, - instance, or __address__) are not permitted. The labelmap action is not permitted - in general. - items: - description: RelabelingRule defines a single Prometheus relabeling rule. - properties: - action: - description: Action to perform based on regex matching. Defaults to 'replace'. - type: string - modulus: - description: Modulus to take of the hash of the source label values. - format: int64 - type: integer - regex: - description: Regular expression against which the extracted value is matched. Defaults to '(.*)'. - type: string - replacement: - description: |- - Replacement value against which a regex replace is performed if the - regular expression matches. Regex capture groups are available. Defaults to '$1'. - type: string - separator: - description: Separator placed between concatenated source label values. Defaults to ';'. - type: string - sourceLabels: - description: |- - The source labels select values from existing labels. Their content is concatenated - using the configured separator and matched against the configured regular expression - for the replace, keep, and drop actions. - items: - type: string - type: array - targetLabel: - description: |- - Label to which the resulting value is written in a replace action. - It is mandatory for replace actions. Regex capture groups are available. - type: string - type: object - type: array - params: - additionalProperties: - items: - type: string - type: array - description: HTTP GET params to use when scraping. - type: object - path: - description: HTTP path to scrape metrics from. Defaults to "/metrics". - type: string - scheme: - description: Protocol scheme to use to scrape. - type: string - timeout: - description: |- - Timeout for metrics scrapes. Must be a valid Prometheus duration. - Must not be larger then the scrape interval. - type: string - type: object - type: array - limits: - description: Limits to apply at scrape time. - properties: - labelNameLength: - description: |- - Maximum label name length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labelValueLength: - description: |- - Maximum label value length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labels: - description: |- - Maximum number of labels accepted for a single sample. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - samples: - description: |- - Maximum number of samples accepted within a single scrape. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector that specifies which nodes are selected for this monitoring - configuration. If left empty all nodes are selected. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - required: - - endpoints - type: object - status: - description: Most recently observed status of the resource. - properties: - conditions: - description: Represents the latest available observations of a podmonitor's current state. - items: - description: MonitoringCondition describes the condition of a PodMonitoring. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human-readable message indicating details about the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: MonitoringConditionType is the type of MonitoringCondition. - type: string - required: - - status - - type - type: object - type: array - observedGeneration: - description: The generation observed by the controller. - format: int64 - type: integer - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: clusterpodmonitorings.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: ClusterPodMonitoring - listKind: ClusterPodMonitoringList - plural: clusterpodmonitorings - singular: clusterpodmonitoring - scope: Cluster - versions: - - name: v1 - schema: - openAPIV3Schema: - description: |- - ClusterPodMonitoring defines monitoring for a set of pods, scoped to all - pods within the cluster. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - Specification of desired Pod selection for target discovery by - Prometheus. - properties: - endpoints: - description: The endpoints to scrape on the selected pods. - items: - description: ScrapeEndpoint specifies a Prometheus metrics endpoint to scrape. - properties: - authorization: - description: The HTTP authorization credentials for the targets. - properties: - credentials: - description: |- - Credentials references the Kubernetes secret's key with the credentials - (token) for the auth header to send along the request. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - type: - description: |- - The authentication type. Defaults to Bearer. - Basic will cause an error, as the BasicAuth object should be used instead. - type: string - type: object - basicAuth: - description: The HTTP basic authentication credentials for the targets. - properties: - password: - description: |- - Password references the Kubernetes secret's key with the password to use. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - username: - description: The username for authentication. - type: string - type: object - interval: - default: 1m - description: Interval at which to scrape metrics. Must be a valid Prometheus duration. - pattern: ^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$ - type: string - metricRelabeling: - description: |- - Relabeling rules for metrics scraped from this endpoint. Relabeling rules that - override protected target labels (project_id, location, cluster, namespace, job, - instance, or __address__) are not permitted. The labelmap action is not permitted - in general. - items: - description: RelabelingRule defines a single Prometheus relabeling rule. - properties: - action: - description: Action to perform based on regex matching. Defaults to 'replace'. - type: string - modulus: - description: Modulus to take of the hash of the source label values. - format: int64 - type: integer - regex: - description: Regular expression against which the extracted value is matched. Defaults to '(.*)'. - type: string - replacement: - description: |- - Replacement value against which a regex replace is performed if the - regular expression matches. Regex capture groups are available. Defaults to '$1'. - type: string - separator: - description: Separator placed between concatenated source label values. Defaults to ';'. - type: string - sourceLabels: - description: |- - The source labels select values from existing labels. Their content is concatenated - using the configured separator and matched against the configured regular expression - for the replace, keep, and drop actions. - items: - type: string - type: array - targetLabel: - description: |- - Label to which the resulting value is written in a replace action. - It is mandatory for replace actions. Regex capture groups are available. - type: string - type: object - type: array - oauth2: - description: The OAuth2 client credentials used to fetch a token for the targets. - properties: - clientID: - description: Public identifier for the client. - type: string - clientSecret: - description: |- - ClientSecret references the Kubernetes secret's key with the client secret - token for Oauth2 flow. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - endpointParams: - additionalProperties: - type: string - description: Optional parameters to append to the token URL. - type: object - proxyUrl: - description: HTTP proxy server to use to connect to the targets. Encoded passwords are not supported. - type: string - scopes: - description: Scopes for the token request. - items: - type: string - type: array - tlsConfig: - description: Configures the token request's TLS settings. - properties: - ca: - description: |- - CA references the Kubernetes secret's key with the CA certificate to - validate API server certificate with. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - cert: - description: |- - Cert references the Kubernetes secret's key with the certificate (public - key) for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - key: - description: |- - Key references the Kubernetes secret's key with the private key - for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - maxVersion: - description: |- - Maximum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - minVersion: - description: |- - Minimum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - tokenURL: - description: The URL to fetch the token from. - type: string - required: - - clientID - - tokenURL - type: object - params: - additionalProperties: - items: - type: string - type: array - description: HTTP GET params to use when scraping. - type: object - path: - description: HTTP path to scrape metrics from. Defaults to "/metrics". - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to scrape. - The container metadata label is only populated if the port is referenced by name - because port numbers are not unique across containers. - x-kubernetes-int-or-string: true - proxyUrl: - description: HTTP proxy server to use to connect to the targets. Encoded passwords are not supported. - type: string - scheme: - description: Protocol scheme to use to scrape. - type: string - timeout: - description: |- - Timeout for metrics scrapes. Must be a valid Prometheus duration. - Must not be larger than the scrape interval. - type: string - tls: - description: Configures the scrape request's TLS settings. - properties: - ca: - description: |- - CA references the Kubernetes secret's key with the CA certificate to - validate API server certificate with. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - cert: - description: |- - Cert references the Kubernetes secret's key with the certificate (public - key) for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - key: - description: |- - Key references the Kubernetes secret's key with the private key - for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - namespace: - description: |- - Namespace of the secret to select from. - If empty the parent resource namespace will be chosen. - type: string - required: - - key - - name - type: object - type: object - maxVersion: - description: |- - Maximum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - minVersion: - description: |- - Minimum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - required: - - port - type: object - type: array - filterRunning: - description: |- - FilterRunning will drop any pods that are in the "Failed" or "Succeeded" - pod lifecycle. - See: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase - Specifically, this prevents scraping Succeeded pods from K8s jobs, which - could contribute to noisy logs or irrelevant metrics. - Additionally, it can mitigate issues with reusing stale target - labels in cases where Pod IPs are reused (e.g. spot containers). - See: https://github.com/GoogleCloudPlatform/prometheus-engine/issues/145 - type: boolean - limits: - description: Limits to apply at scrape time. - properties: - labelNameLength: - description: |- - Maximum label name length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labelValueLength: - description: |- - Maximum label value length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labels: - description: |- - Maximum number of labels accepted for a single sample. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - samples: - description: |- - Maximum number of samples accepted within a single scrape. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector that specifies which pods are selected for this monitoring - configuration. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - targetLabels: - description: |- - Labels to add to the Prometheus target for discovered endpoints. - The `instance` label is always set to `:` or `:` - if the scraped pod is controlled by a DaemonSet. - properties: - fromPod: - description: |- - Labels to transfer from the Kubernetes Pod to Prometheus target labels. - Mappings are applied in order. - items: - description: |- - LabelMapping specifies how to transfer a label from a Kubernetes resource - onto a Prometheus target. - properties: - from: - description: Kubernetes resource label to remap. - type: string - to: - description: |- - Remapped Prometheus target label. - Defaults to the same name as `From`. - type: string - required: - - from - type: object - type: array - metadata: - description: |- - Pod metadata labels that are set on all scraped targets. - Permitted keys are `pod`, `container`, and `node` for PodMonitoring and - `pod`, `container`, `node`, and `namespace` for ClusterPodMonitoring. The `container` - label is only populated if the scrape port is referenced by name. - Defaults to [pod, container] for PodMonitoring and [namespace, pod, container] - for ClusterPodMonitoring. - If set to null, it will be interpreted as the empty list for PodMonitoring - and to [namespace] for ClusterPodMonitoring. This is for backwards-compatibility - only. - items: - type: string - type: array - type: object - required: - - endpoints - - selector - type: object - status: - description: Most recently observed status of the resource. - properties: - conditions: - description: Represents the latest available observations of a podmonitor's current state. - items: - description: MonitoringCondition describes the condition of a PodMonitoring. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human-readable message indicating details about the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: MonitoringConditionType is the type of MonitoringCondition. - type: string - required: - - status - - type - type: object - type: array - endpointStatuses: - description: Represents the latest available observations of target state for each ScrapeEndpoint. - items: - properties: - activeTargets: - description: Total number of active targets. - format: int64 - type: integer - collectorsFraction: - description: |- - Fraction of collectors included in status, bounded [0,1]. - Ideally, this should always be 1. Anything less can - be considered a problem and should be investigated. - type: string - lastUpdateTime: - description: Last time this status was updated. - format: date-time - type: string - name: - description: The name of the ScrapeEndpoint. - type: string - sampleGroups: - description: A fixed sample of targets grouped by error type. - items: - properties: - count: - description: Total count of similar errors. - format: int32 - type: integer - sampleTargets: - description: Targets emitting the error message. - items: - properties: - health: - description: Health status. - type: string - labels: - additionalProperties: - description: A LabelValue is an associated value for a LabelName. - type: string - description: The label set, keys and values, of the target. - type: object - lastError: - description: Error message. - type: string - lastScrapeDurationSeconds: - description: Scrape duration in seconds. - type: string - type: object - type: array - type: object - type: array - unhealthyTargets: - description: Total number of active, unhealthy targets. - format: int64 - type: integer - required: - - name - type: object - type: array - observedGeneration: - description: The generation observed by the controller. - format: int64 - type: integer - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: ClusterPodMonitoring defines monitoring for a set of pods. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - Specification of desired Pod selection for target discovery by - Prometheus. - properties: - endpoints: - description: The endpoints to scrape on the selected pods. - items: - description: ScrapeEndpoint specifies a Prometheus metrics endpoint to scrape. - properties: - interval: - description: Interval at which to scrape metrics. Must be a valid Prometheus duration. - type: string - metricRelabeling: - description: |- - Relabeling rules for metrics scraped from this endpoint. Relabeling rules that - override protected target labels (project_id, location, cluster, namespace, job, - instance, or __address__) are not permitted. The labelmap action is not permitted - in general. - items: - description: RelabelingRule defines a single Prometheus relabeling rule. - properties: - action: - description: Action to perform based on regex matching. Defaults to 'replace'. - type: string - modulus: - description: Modulus to take of the hash of the source label values. - format: int64 - type: integer - regex: - description: Regular expression against which the extracted value is matched. Defaults to '(.*)'. - type: string - replacement: - description: |- - Replacement value against which a regex replace is performed if the - regular expression matches. Regex capture groups are available. Defaults to '$1'. - type: string - separator: - description: Separator placed between concatenated source label values. Defaults to ';'. - type: string - sourceLabels: - description: |- - The source labels select values from existing labels. Their content is concatenated - using the configured separator and matched against the configured regular expression - for the replace, keep, and drop actions. - items: - type: string - type: array - targetLabel: - description: |- - Label to which the resulting value is written in a replace action. - It is mandatory for replace actions. Regex capture groups are available. - type: string - type: object - type: array - params: - additionalProperties: - items: - type: string - type: array - description: HTTP GET params to use when scraping. - type: object - path: - description: HTTP path to scrape metrics from. Defaults to "/metrics". - type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to scrape. - x-kubernetes-int-or-string: true - proxyUrl: - description: Proxy URL to scrape through. Encoded passwords are not supported. - type: string - scheme: - description: Protocol scheme to use to scrape. - type: string - timeout: - description: |- - Timeout for metrics scrapes. Must be a valid Prometheus duration. - Must not be larger then the scrape interval. - type: string - required: - - port - type: object - type: array - limits: - description: Limits to apply at scrape time. - properties: - labelNameLength: - description: |- - Maximum label name length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labelValueLength: - description: |- - Maximum label value length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labels: - description: |- - Maximum number of labels accepted for a single sample. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - samples: - description: |- - Maximum number of samples accepted within a single scrape. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector that specifies which pods are selected for this monitoring - configuration. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - targetLabels: - description: Labels to add to the Prometheus target for discovered endpoints - properties: - fromPod: - description: |- - Labels to transfer from the Kubernetes Pod to Prometheus target labels. - Mappings are applied in order. - items: - description: |- - LabelMapping specifies how to transfer a label from a Kubernetes resource - onto a Prometheus target. - properties: - from: - description: Kubenetes resource label to remap. - type: string - to: - description: |- - Remapped Prometheus target label. - Defaults to the same name as `From`. - type: string - required: - - from - type: object - type: array - metadata: - description: |- - Pod metadata labels that are set on all scraped targets. - Permitted keys are `pod`, `container`, and `node` for PodMonitoring and - `pod`, `container`, `node`, and `namespace` for ClusterPodMonitoring. - Defaults to [pod, container] for PodMonitoring and [namespace, pod, container] - for ClusterPodMonitoring. - If set to null, it will be interpreted as the empty list for PodMonitoring - and to [namespace] for ClusterPodMonitoring. This is for backwards-compatibility - only. - items: - type: string - type: array - type: object - required: - - endpoints - - selector - type: object - status: - description: Most recently observed status of the resource. - properties: - conditions: - description: Represents the latest available observations of a podmonitor's current state. - items: - description: MonitoringCondition describes a condition of a PodMonitoring. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human-readable message indicating details about the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: MonitoringConditionType is the type of MonitoringCondition. - type: string - required: - - status - - type - type: object - type: array - observedGeneration: - description: The generation observed by the controller. - format: int64 - type: integer - type: object - required: - - spec - type: object - served: true - storage: false - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: clusterrules.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: ClusterRules - listKind: ClusterRulesList - plural: clusterrules - singular: clusterrules - scope: Cluster - versions: - - name: v1 - schema: - openAPIV3Schema: - description: |- - ClusterRules defines Prometheus alerting and recording rules that are scoped - to the current cluster. Only metric data from the current cluster is processed - and all rule results have their project_id and cluster label preserved - for query processing. - If the location label is not preserved by the rule, it defaults to the cluster's location. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: |- - ClusterRules defines Prometheus alerting and recording rules that are scoped - to the current cluster. Only metric data from the current cluster is processed - and all rule results have their project_id and cluster label preserved - for query processing. - If the location label is not preserved by the rule, it defaults to the cluster's location. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: false - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: globalrules.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: GlobalRules - listKind: GlobalRulesList - plural: globalrules - singular: globalrules - scope: Cluster - versions: - - name: v1 - schema: - openAPIV3Schema: - description: |- - GlobalRules defines Prometheus alerting and recording rules that are scoped - to all data in the queried project. - If the project_id or location labels are not preserved by the rule, they default to - the values of the cluster. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: |- - GlobalRules defines Prometheus alerting and recording rules that are scoped - to all data in the queried project. - If the project_id or location labels are not preserved by the rule, they default to - the values of the cluster. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: false - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: operatorconfigs.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: OperatorConfig - listKind: OperatorConfigList - plural: operatorconfigs - singular: operatorconfig - scope: Namespaced - versions: - - name: v1 - schema: - openAPIV3Schema: - description: OperatorConfig defines configuration of the gmp-operator. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - collection: - description: |- - Collection specifies how the operator configures collection, including - scraping and an integrated export to Google Cloud Monitoring. - properties: - compression: - description: Compression enables compression of metrics collection data - enum: - - none - - gzip - type: string - credentials: - description: |- - A reference to GCP service account credentials with which Prometheus collectors - are run. It needs to have metric write permissions for all project IDs to which - data is written. - Within GKE, this can typically be left empty if the compute default - service account has the required permissions. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - externalLabels: - additionalProperties: - type: string - description: |- - ExternalLabels specifies external labels that are attached to all scraped - data before being written to Google Cloud Monitoring or any other additional exports - specified in the OperatorConfig. The precedence behavior matches that of Prometheus. - type: object - filter: - description: Filter limits which metric data is sent to Cloud Monitoring (it doesn't apply to additional exports). - properties: - matchOneOf: - description: |- - A list of Prometheus time series matchers. Every time series must match at least one - of the matchers to be exported. This field can be used equivalently to the match[] - parameter of the Prometheus federation endpoint to selectively export data. - Example: `["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]` - items: - type: string - type: array - type: object - kubeletScraping: - description: Configuration to scrape the metric endpoints of the Kubelets. - properties: - interval: - description: The interval at which the metric endpoints are scraped. - type: string - tlsInsecureSkipVerify: - description: |- - TLSInsecureSkipVerify disables verifying the target cert. - This can be useful for clusters provisioned with kubeadm. - type: boolean - required: - - interval - type: object - type: object - exports: - description: |- - Exports is an EXPERIMENTAL feature that specifies additional, optional endpoints to export to, - on top of Google Cloud Monitoring collection. - Note: To disable integrated export to Google Cloud Monitoring specify a non-matching filter in the "collection.filter" field. - items: - properties: - url: - description: The URL of the endpoint that supports Prometheus Remote Write to export samples to. - type: string - required: - - url - type: object - type: array - features: - description: Features holds configuration for optional managed-collection features. - properties: - config: - description: Settings for the collector configuration propagation. - properties: - compression: - description: |- - Compression enables compression of the config data propagated by the operator to collectors - and the rule-evaluator. It is recommended to use the gzip option when using a large number of - ClusterPodMonitoring, PodMonitoring, GlobalRules, ClusterRules, and/or Rules. - enum: - - none - - gzip - type: string - type: object - targetStatus: - description: Configuration of target status reporting. - properties: - enabled: - description: Enable target status reporting. - type: boolean - type: object - type: object - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - managedAlertmanager: - default: - configSecret: - key: alertmanager.yaml - name: alertmanager - description: ManagedAlertmanager holds information for configuring the managed instance of Alertmanager. - properties: - configSecret: - description: |- - ConfigSecret refers to the name of a single-key Secret in the public namespace that - holds the managed Alertmanager config file. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - externalURL: - description: |- - ExternalURL is the URL under which Alertmanager is externally reachable - (for example, if Alertmanager is served via a reverse proxy). - Used for generating relative and absolute links back to Alertmanager - itself. If the URL has a path portion, it will be used to prefix all HTTP - endpoints served by Alertmanager. - If omitted, relevant URL components will be derived automatically. - type: string - type: object - metadata: - type: object - rules: - description: Rules specifies how the operator configures and deploys rule-evaluator. - properties: - alerting: - description: Alerting contains how the rule-evaluator configures alerting. - properties: - alertmanagers: - description: Alertmanagers contains endpoint configuration for designated Alertmanagers. - items: - description: |- - AlertmanagerEndpoints defines a selection of a single Endpoints object - containing alertmanager IPs to fire alerts against. - properties: - apiVersion: - description: |- - Version of the Alertmanager API that rule-evaluator uses to send alerts. It - can be "v1" or "v2". - type: string - authorization: - description: Authorization section for this alertmanager endpoint - properties: - credentials: - description: The secret's key that contains the credentials of the request - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: - description: |- - Set the authentication type. Defaults to Bearer, Basic will cause an - error - type: string - type: object - name: - description: Name of Endpoints object in Namespace. - type: string - namespace: - description: Namespace of Endpoints object. - type: string - pathPrefix: - description: Prefix for the HTTP path alerts are pushed to. - type: string - port: - anyOf: - - type: integer - - type: string - description: Port the Alertmanager API is exposed on. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use when firing alerts. - type: string - timeout: - description: Timeout is a per-target Alertmanager timeout when pushing alerts. - type: string - tls: - description: TLS Config to use for alertmanager connection. - properties: - ca: - description: Struct containing the CA cert to use for the targets. - properties: - configMap: - description: ConfigMap containing data to use for the targets. - properties: - key: - description: The key to select. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - secret: - description: Secret containing data to use for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - cert: - description: Struct containing the client cert file for the targets. - properties: - configMap: - description: ConfigMap containing data to use for the targets. - properties: - key: - description: The key to select. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - secret: - description: Secret containing data to use for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - keySecret: - description: Secret containing the client key file for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - maxVersion: - description: |- - Maximum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - minVersion: - description: |- - Minimum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - required: - - name - - namespace - - port - type: object - type: array - type: object - credentials: - description: |- - A reference to GCP service account credentials with which the rule - evaluator container is run. It needs to have metric read permissions - against queryProjectId and metric write permissions against all projects - to which rule results are written. - Within GKE, this can typically be left empty if the compute default - service account has the required permissions. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - externalLabels: - additionalProperties: - type: string - description: |- - ExternalLabels specifies external labels that are attached to any rule - results and alerts produced by rules. The precedence behavior matches that - of Prometheus. - type: object - generatorUrl: - description: |- - The base URL used for the generator URL in the alert notification payload. - Should point to an instance of a query frontend that gives access to queryProjectID. - type: string - queryProjectID: - description: |- - QueryProjectID is the GCP project ID to evaluate rules against. - If left blank, the rule-evaluator will try attempt to infer the Project ID - from the environment. - type: string - type: object - type: object - served: true - storage: true - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: OperatorConfig defines configuration of the gmp-operator. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - collection: - description: Collection specifies how the operator configures collection. - properties: - credentials: - description: |- - A reference to GCP service account credentials with which Prometheus collectors - are run. It needs to have metric write permissions for all project IDs to which - data is written. - Within GKE, this can typically be left empty if the compute default - service account has the required permissions. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - externalLabels: - additionalProperties: - type: string - description: |- - ExternalLabels specifies external labels that are attached to all scraped - data before being written to Cloud Monitoring. The precedence behavior matches that - of Prometheus. - type: object - filter: - description: Filter limits which metric data is sent to Cloud Monitoring. - properties: - matchOneOf: - description: |- - A list Prometheus time series matchers. Every time series must match at least one - of the matchers to be exported. This field can be used equivalently to the match[] - parameter of the Prometheus federation endpoint to selectively export data. - Example: `["{job='prometheus'}", "{__name__=~'job:.*'}"]` - items: - type: string - type: array - type: object - type: object - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - rules: - description: Rules specifies how the operator configures and deployes rule-evaluator. - properties: - alerting: - description: Alerting contains how the rule-evaluator configures alerting. - properties: - alertmanagers: - description: Alertmanagers contains endpoint configuration for designated Alertmanagers. - items: - description: |- - AlertmanagerEndpoints defines a selection of a single Endpoints object - containing alertmanager IPs to fire alerts against. - properties: - apiVersion: - description: |- - Version of the Alertmanager API that rule-evaluator uses to send alerts. It - can be "v1" or "v2". - type: string - authorization: - description: Authorization section for this alertmanager endpoint - properties: - credentials: - description: The secret's key that contains the credentials of the request - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: - description: |- - Set the authentication type. Defaults to Bearer, Basic will cause an - error - type: string - type: object - name: - description: Name of Endpoints object in Namespace. - type: string - namespace: - description: Namespace of Endpoints object. - type: string - pathPrefix: - description: Prefix for the HTTP path alerts are pushed to. - type: string - port: - anyOf: - - type: integer - - type: string - description: Port the Alertmanager API is exposed on. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use when firing alerts. - type: string - timeout: - description: Timeout is a per-target Alertmanager timeout when pushing alerts. - type: string - tls: - description: TLS Config to use for alertmanager connection. - properties: - ca: - description: Struct containing the CA cert to use for the targets. - properties: - configMap: - description: ConfigMap containing data to use for the targets. - properties: - key: - description: The key to select. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - secret: - description: Secret containing data to use for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - cert: - description: Struct containing the client cert file for the targets. - properties: - configMap: - description: ConfigMap containing data to use for the targets. - properties: - key: - description: The key to select. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the ConfigMap or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - secret: - description: Secret containing data to use for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - keySecret: - description: Secret containing the client key file for the targets. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - required: - - name - - namespace - - port - type: object - type: array - type: object - credentials: - description: |- - A reference to GCP service account credentials with which the rule - evaluator container is run. It needs to have metric read permissions - against queryProjectId and metric write permissions against all projects - to which rule results are written. - Within GKE, this can typically be left empty if the compute default - service account has the required permissions. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid? - type: string - optional: - description: Specify whether the Secret or its key must be defined - type: boolean - required: - - key - type: object - x-kubernetes-map-type: atomic - externalLabels: - additionalProperties: - type: string - description: |- - ExternalLabels specifies external labels that are attached to any rule - results and alerts produced by rules. The precedence behavior matches that - of Prometheus. - type: object - queryProjectID: - description: |- - QueryProjectID is the GCP project ID to evaluate rules against. - If left blank, the rule-evaluator will try attempt to infer the Project ID - from the environment. - type: string - type: object - type: object - served: true - storage: false ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: podmonitorings.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: PodMonitoring - listKind: PodMonitoringList - plural: podmonitorings - singular: podmonitoring - scope: Namespaced - versions: - - name: v1 - schema: - openAPIV3Schema: - description: |- - PodMonitoring defines monitoring for a set of pods, scoped to pods - within the PodMonitoring's namespace. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - Specification of desired Pod selection for target discovery by - Prometheus. - properties: - endpoints: - description: The endpoints to scrape on the selected pods. - items: - description: ScrapeEndpoint specifies a Prometheus metrics endpoint to scrape. - properties: - authorization: - description: The HTTP authorization credentials for the targets. - properties: - credentials: - description: |- - Credentials references the Kubernetes secret's key with the credentials - (token) for the auth header to send along the request. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - type: - description: |- - The authentication type. Defaults to Bearer. - Basic will cause an error, as the BasicAuth object should be used instead. - type: string - type: object - basicAuth: - description: The HTTP basic authentication credentials for the targets. - properties: - password: - description: |- - Password references the Kubernetes secret's key with the password to use. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - username: - description: The username for authentication. - type: string - type: object - interval: - default: 1m - description: Interval at which to scrape metrics. Must be a valid Prometheus duration. - pattern: ^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$ - type: string - metricRelabeling: - description: |- - Relabeling rules for metrics scraped from this endpoint. Relabeling rules that - override protected target labels (project_id, location, cluster, namespace, job, - instance, or __address__) are not permitted. The labelmap action is not permitted - in general. - items: - description: RelabelingRule defines a single Prometheus relabeling rule. - properties: - action: - description: Action to perform based on regex matching. Defaults to 'replace'. - type: string - modulus: - description: Modulus to take of the hash of the source label values. - format: int64 - type: integer - regex: - description: Regular expression against which the extracted value is matched. Defaults to '(.*)'. - type: string - replacement: - description: |- - Replacement value against which a regex replace is performed if the - regular expression matches. Regex capture groups are available. Defaults to '$1'. - type: string - separator: - description: Separator placed between concatenated source label values. Defaults to ';'. - type: string - sourceLabels: - description: |- - The source labels select values from existing labels. Their content is concatenated - using the configured separator and matched against the configured regular expression - for the replace, keep, and drop actions. - items: - type: string - type: array - targetLabel: - description: |- - Label to which the resulting value is written in a replace action. - It is mandatory for replace actions. Regex capture groups are available. - type: string - type: object - type: array - oauth2: - description: The OAuth2 client credentials used to fetch a token for the targets. - properties: - clientID: - description: Public identifier for the client. - type: string - clientSecret: - description: |- - ClientSecret references the Kubernetes secret's key with the client secret - token for Oauth2 flow. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - endpointParams: - additionalProperties: - type: string - description: Optional parameters to append to the token URL. - type: object - proxyUrl: - description: HTTP proxy server to use to connect to the targets. Encoded passwords are not supported. - type: string - scopes: - description: Scopes for the token request. - items: - type: string - type: array - tlsConfig: - description: Configures the token request's TLS settings. - properties: - ca: - description: |- - CA references the Kubernetes secret's key with the CA certificate to - validate API server certificate with. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - cert: - description: |- - Cert references the Kubernetes secret's key with the certificate (public - key) for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - key: - description: |- - Key references the Kubernetes secret's key with the private key - for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - maxVersion: - description: |- - Maximum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - minVersion: - description: |- - Minimum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - tokenURL: - description: The URL to fetch the token from. - type: string - required: - - clientID - - tokenURL - type: object - params: - additionalProperties: - items: - type: string - type: array - description: HTTP GET params to use when scraping. - type: object - path: - description: HTTP path to scrape metrics from. Defaults to "/metrics". - type: string - port: - anyOf: - - type: integer - - type: string - description: |- - Name or number of the port to scrape. - The container metadata label is only populated if the port is referenced by name - because port numbers are not unique across containers. - x-kubernetes-int-or-string: true - proxyUrl: - description: HTTP proxy server to use to connect to the targets. Encoded passwords are not supported. - type: string - scheme: - description: Protocol scheme to use to scrape. - type: string - timeout: - description: |- - Timeout for metrics scrapes. Must be a valid Prometheus duration. - Must not be larger than the scrape interval. - type: string - tls: - description: Configures the scrape request's TLS settings. - properties: - ca: - description: |- - CA references the Kubernetes secret's key with the CA certificate to - validate API server certificate with. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - cert: - description: |- - Cert references the Kubernetes secret's key with the certificate (public - key) for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - insecureSkipVerify: - description: Disable target certificate validation. - type: boolean - key: - description: |- - Key references the Kubernetes secret's key with the private key - for client cert authentication to the server. - Optional, as in previous resource versions we allowed no credentials. - properties: - secret: - description: |- - KubernetesSecret represents reference to a given key from certain Kubernetes Secret - in a given namespace. - properties: - key: - description: The key of the secret to select from. Must be a valid secret key. - type: string - name: - description: Name of the secret to select from. - type: string - required: - - key - - name - type: object - type: object - maxVersion: - description: |- - Maximum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - minVersion: - description: |- - Minimum TLS version. Accepted values: TLS10 (TLS 1.0), TLS11 (TLS 1.1), TLS12 (TLS 1.2), TLS13 (TLS 1.3). - If unset, Prometheus will use Go default minimum version, which is TLS 1.2. - See MinVersion in https://pkg.go.dev/crypto/tls#Config. - type: string - serverName: - description: Used to verify the hostname for the targets. - type: string - type: object - required: - - port - type: object - type: array - filterRunning: - description: |- - FilterRunning will drop any pods that are in the "Failed" or "Succeeded" - pod lifecycle. - See: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase - type: boolean - limits: - description: Limits to apply at scrape time. - properties: - labelNameLength: - description: |- - Maximum label name length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labelValueLength: - description: |- - Maximum label value length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labels: - description: |- - Maximum number of labels accepted for a single sample. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - samples: - description: |- - Maximum number of samples accepted within a single scrape. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector that specifies which pods are selected for this monitoring - configuration. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - targetLabels: - description: |- - Labels to add to the Prometheus target for discovered endpoints. - The `instance` label is always set to `:` or `:` - if the scraped pod is controlled by a DaemonSet. - properties: - fromPod: - description: |- - Labels to transfer from the Kubernetes Pod to Prometheus target labels. - Mappings are applied in order. - items: - description: |- - LabelMapping specifies how to transfer a label from a Kubernetes resource - onto a Prometheus target. - properties: - from: - description: Kubernetes resource label to remap. - type: string - to: - description: |- - Remapped Prometheus target label. - Defaults to the same name as `From`. - type: string - required: - - from - type: object - type: array - metadata: - description: |- - Pod metadata labels that are set on all scraped targets. - Permitted keys are `pod`, `container`, and `node` for PodMonitoring and - `pod`, `container`, `node`, and `namespace` for ClusterPodMonitoring. The `container` - label is only populated if the scrape port is referenced by name. - Defaults to [pod, container] for PodMonitoring and [namespace, pod, container] - for ClusterPodMonitoring. - If set to null, it will be interpreted as the empty list for PodMonitoring - and to [namespace] for ClusterPodMonitoring. This is for backwards-compatibility - only. - items: - type: string - type: array - type: object - required: - - endpoints - - selector - type: object - status: - description: Most recently observed status of the resource. - properties: - conditions: - description: Represents the latest available observations of a podmonitor's current state. - items: - description: MonitoringCondition describes the condition of a PodMonitoring. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human-readable message indicating details about the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: MonitoringConditionType is the type of MonitoringCondition. - type: string - required: - - status - - type - type: object - type: array - endpointStatuses: - description: Represents the latest available observations of target state for each ScrapeEndpoint. - items: - properties: - activeTargets: - description: Total number of active targets. - format: int64 - type: integer - collectorsFraction: - description: |- - Fraction of collectors included in status, bounded [0,1]. - Ideally, this should always be 1. Anything less can - be considered a problem and should be investigated. - type: string - lastUpdateTime: - description: Last time this status was updated. - format: date-time - type: string - name: - description: The name of the ScrapeEndpoint. - type: string - sampleGroups: - description: A fixed sample of targets grouped by error type. - items: - properties: - count: - description: Total count of similar errors. - format: int32 - type: integer - sampleTargets: - description: Targets emitting the error message. - items: - properties: - health: - description: Health status. - type: string - labels: - additionalProperties: - description: A LabelValue is an associated value for a LabelName. - type: string - description: The label set, keys and values, of the target. - type: object - lastError: - description: Error message. - type: string - lastScrapeDurationSeconds: - description: Scrape duration in seconds. - type: string - type: object - type: array - type: object - type: array - unhealthyTargets: - description: Total number of active, unhealthy targets. - format: int64 - type: integer - required: - - name - type: object - type: array - observedGeneration: - description: The generation observed by the controller. - format: int64 - type: integer - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: PodMonitoring defines monitoring for a set of pods. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: |- - Specification of desired Pod selection for target discovery by - Prometheus. - properties: - endpoints: - description: The endpoints to scrape on the selected pods. - items: - description: ScrapeEndpoint specifies a Prometheus metrics endpoint to scrape. - properties: - interval: - description: Interval at which to scrape metrics. Must be a valid Prometheus duration. - type: string - metricRelabeling: - description: |- - Relabeling rules for metrics scraped from this endpoint. Relabeling rules that - override protected target labels (project_id, location, cluster, namespace, job, - instance, or __address__) are not permitted. The labelmap action is not permitted - in general. - items: - description: RelabelingRule defines a single Prometheus relabeling rule. - properties: - action: - description: Action to perform based on regex matching. Defaults to 'replace'. - type: string - modulus: - description: Modulus to take of the hash of the source label values. - format: int64 - type: integer - regex: - description: Regular expression against which the extracted value is matched. Defaults to '(.*)'. - type: string - replacement: - description: |- - Replacement value against which a regex replace is performed if the - regular expression matches. Regex capture groups are available. Defaults to '$1'. - type: string - separator: - description: Separator placed between concatenated source label values. Defaults to ';'. - type: string - sourceLabels: - description: |- - The source labels select values from existing labels. Their content is concatenated - using the configured separator and matched against the configured regular expression - for the replace, keep, and drop actions. - items: - type: string - type: array - targetLabel: - description: |- - Label to which the resulting value is written in a replace action. - It is mandatory for replace actions. Regex capture groups are available. - type: string - type: object - type: array - params: - additionalProperties: - items: - type: string - type: array - description: HTTP GET params to use when scraping. - type: object - path: - description: HTTP path to scrape metrics from. Defaults to "/metrics". - type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to scrape. - x-kubernetes-int-or-string: true - proxyUrl: - description: Proxy URL to scrape through. Encoded passwords are not supported. - type: string - scheme: - description: Protocol scheme to use to scrape. - type: string - timeout: - description: |- - Timeout for metrics scrapes. Must be a valid Prometheus duration. - Must not be larger then the scrape interval. - type: string - required: - - port - type: object - type: array - limits: - description: Limits to apply at scrape time. - properties: - labelNameLength: - description: |- - Maximum label name length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labelValueLength: - description: |- - Maximum label value length. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - labels: - description: |- - Maximum number of labels accepted for a single sample. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - samples: - description: |- - Maximum number of samples accepted within a single scrape. - Uses Prometheus default if left unspecified. - format: int64 - type: integer - type: object - selector: - description: |- - Label selector that specifies which pods are selected for this monitoring - configuration. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - targetLabels: - description: Labels to add to the Prometheus target for discovered endpoints. - properties: - fromPod: - description: |- - Labels to transfer from the Kubernetes Pod to Prometheus target labels. - Mappings are applied in order. - items: - description: |- - LabelMapping specifies how to transfer a label from a Kubernetes resource - onto a Prometheus target. - properties: - from: - description: Kubenetes resource label to remap. - type: string - to: - description: |- - Remapped Prometheus target label. - Defaults to the same name as `From`. - type: string - required: - - from - type: object - type: array - metadata: - description: |- - Pod metadata labels that are set on all scraped targets. - Permitted keys are `pod`, `container`, and `node` for PodMonitoring and - `pod`, `container`, `node`, and `namespace` for ClusterPodMonitoring. - Defaults to [pod, container] for PodMonitoring and [namespace, pod, container] - for ClusterPodMonitoring. - If set to null, it will be interpreted as the empty list for PodMonitoring - and to [namespace] for ClusterPodMonitoring. This is for backwards-compatibility - only. - items: - type: string - type: array - type: object - required: - - endpoints - - selector - type: object - status: - description: Most recently observed status of the resource. - properties: - conditions: - description: Represents the latest available observations of a podmonitor's current state. - items: - description: MonitoringCondition describes a condition of a PodMonitoring. - properties: - lastTransitionTime: - description: Last time the condition transitioned from one status to another. - format: date-time - type: string - lastUpdateTime: - description: The last time this condition was updated. - format: date-time - type: string - message: - description: A human-readable message indicating details about the transition. - type: string - reason: - description: The reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, Unknown. - type: string - type: - description: MonitoringConditionType is the type of MonitoringCondition. - type: string - required: - - status - - type - type: object - type: array - observedGeneration: - description: The generation observed by the controller. - format: int64 - type: integer - type: object - required: - - spec - type: object - served: true - storage: false - subresources: - status: {} ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.1-gmp - name: rules.monitoring.googleapis.com -spec: - group: monitoring.googleapis.com - names: - kind: Rules - listKind: RulesList - plural: rules - singular: rules - scope: Namespaced - versions: - - name: v1 - schema: - openAPIV3Schema: - description: |- - Rules defines Prometheus alerting and recording rules that are scoped - to the namespace of the resource. Only metric data from this namespace is processed - and all rule results have their project_id, cluster, and namespace label preserved - for query processing. - If the location label is not preserved by the rule, it defaults to the cluster's location. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} - - deprecated: true - name: v1alpha1 - schema: - openAPIV3Schema: - description: |- - Rules defines Prometheus alerting and recording rules that are scoped - to the namespace of the resource. Only metric data from this namespace is processed - and all rule results have their project_id, cluster, and namespace label preserved - for query processing. - If the location label is not preserved by the rule, it defaults to the cluster's location. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: Specification of rules to record and alert on. - properties: - groups: - description: A list of Prometheus rule groups. - items: - description: |- - RuleGroup declares rules in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - interval: - description: The interval at which to evaluate the rules. Must be a valid Prometheus duration. - type: string - name: - description: The name of the rule group. - type: string - rules: - description: A list of rules that are executed sequentially as part of this group. - items: - description: |- - Rule is a single rule in the Prometheus format: - https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ - properties: - alert: - description: |- - Name of the alert to evaluate the expression as. - Only one of `record` and `alert` must be set. - type: string - annotations: - additionalProperties: - type: string - description: |- - A set of annotations to attach to alerts produced by the query expression. - Only valid if `alert` is set. - type: object - expr: - description: The PromQL expression to evaluate. - type: string - for: - description: |- - The duration to wait before a firing alert produced by this rule is sent to Alertmanager. - Only valid if `alert` is set. - type: string - labels: - additionalProperties: - type: string - description: A set of labels to attach to the result of the query expression. - type: object - record: - description: |- - Record the result of the expression to this metric name. - Only one of `record` and `alert` must be set. - type: string - required: - - expr - type: object - type: array - required: - - interval - - name - - rules - type: object - type: array - required: - - groups - type: object - status: - description: Most recently observed status of the resource. - type: object - required: - - spec - type: object - served: true - storage: false - subresources: - status: {} diff --git a/manifests/gmp-operator/2_operator.yaml b/manifests/gmp-operator/2_operator.yaml deleted file mode 100644 index 9b0b724..0000000 --- a/manifests/gmp-operator/2_operator.yaml +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -# Source: prometheus-engine/templates/priority-class.yaml -apiVersion: scheduling.k8s.io/v1 -kind: PriorityClass -metadata: - name: gmp-critical -# Maximum allowed user-defined. Only system-node-critical and system-cluster-critical -# pods are higher. -value: 1000000000 -description: Used for GMP collector pods. ---- -# Source: prometheus-engine/templates/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: gmp-system ---- -# Source: prometheus-engine/templates/namespace.yaml -apiVersion: v1 -kind: Namespace -metadata: - name: gmp-public ---- -# Source: prometheus-engine/templates/service-account.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: operator - namespace: gmp-system - annotations: - iam.gke.io/gcp-service-account: gmp-prombench@{{ .Env.PROJECT_ID }}.iam.gserviceaccount.com ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: gmp-system:operator -rules: -# Resources controlled by the operator. -- resources: - - clusterpodmonitorings - - clusterrules - - globalrules - - clusternodemonitorings - - podmonitorings - - rules - apiGroups: ["monitoring.googleapis.com"] - verbs: ["get", "list", "watch"] -- resources: - - clusterpodmonitorings/status - - clusterrules/status - - globalrules/status - - clusternodemonitorings/status - - podmonitorings/status - - rules/status - apiGroups: ["monitoring.googleapis.com"] - verbs: ["get", "patch", "update"] -- resources: - - statefulsets - apiGroups: ["apps"] - verbs: ["get", "list", "watch"] ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: gmp-system:operator:webhook-admin -rules: -# Permission to inject CA bundles into webhook configs of fixed name. -- resources: - - validatingwebhookconfigurations - - mutatingwebhookconfigurations - apiGroups: ["admissionregistration.k8s.io"] - resourceNames: - - gmp-operator.gmp-system.monitoring.googleapis.com - verbs: ["get", "patch", "update", "watch"] -# Permission to delete legacy webhook config the operator directly created -# in previous versions. -- resources: - - validatingwebhookconfigurations - - mutatingwebhookconfigurations - apiGroups: ["admissionregistration.k8s.io"] - resourceNames: - - gmp-operator - verbs: ["delete"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: gmp-system:operator -roleRef: - name: gmp-system:operator - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: operator - namespace: gmp-system - kind: ServiceAccount ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: gmp-system:operator:webhook-admin -roleRef: - name: gmp-system:operator:webhook-admin - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: operator - namespace: gmp-system - kind: ServiceAccount ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: operator - namespace: gmp-system -rules: -- resources: - - pods - apiGroups: [""] - verbs: ["list", "watch"] -- resources: - - secrets - apiGroups: [""] - verbs: ["list", "watch", "create"] -- resources: - - secrets - apiGroups: [""] - resourceNames: ["collection", "rules", "alertmanager"] - verbs: ["get", "patch", "update"] -- resources: - - configmaps - apiGroups: [""] - verbs: ["list", "watch", "create"] -- resources: - - configmaps - apiGroups: [""] - resourceNames: ["collector", "rule-evaluator", "rules-generated"] - verbs: ["get", "patch", "update"] -- resources: - - daemonsets - apiGroups: ["apps"] - resourceNames: ["collector"] - verbs: ["get", "list", "watch", "patch", "update"] -- resources: - - deployments - apiGroups: ["apps"] - verbs: ["list", "watch"] -- resources: - - deployments - apiGroups: ["apps"] - resourceNames: ["rule-evaluator"] - verbs: ["get", "patch", "update"] -- resources: - - services - apiGroups: [""] - resourceNames: ["alertmanager"] - verbs: ["get", "list", "watch"] -- resources: - - statefulsets - apiGroups: ["apps"] - resourceNames: ["alertmanager"] - verbs: ["get", "patch", "update"] ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: operator - namespace: gmp-public -rules: -- resources: - - secrets - apiGroups: [""] - verbs: ["get", "list", "watch"] -- resources: - - operatorconfigs - apiGroups: ["monitoring.googleapis.com"] - verbs: ["get", "list", "watch"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: operator - namespace: gmp-public -roleRef: - name: operator - kind: Role - apiGroup: rbac.authorization.k8s.io -subjects: -- name: operator - namespace: gmp-system - kind: ServiceAccount ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: operator - namespace: gmp-system -roleRef: - name: operator - kind: Role - apiGroup: rbac.authorization.k8s.io -subjects: -- name: operator - kind: ServiceAccount ---- -# Source: prometheus-engine/templates/alertmanager.yaml -apiVersion: v1 -kind: Service -metadata: - name: alertmanager - namespace: gmp-system -spec: - selector: - app: managed-prometheus-alertmanager - app.kubernetes.io/name: alertmanager - ports: - - name: alertmanager - port: 9093 - targetPort: 9093 - clusterIP: None ---- -# Source: prometheus-engine/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: gmp-operator - namespace: gmp-system -spec: - selector: - app.kubernetes.io/component: operator - app.kubernetes.io/name: gmp-operator - app.kubernetes.io/part-of: gmp - ports: - # This port does not do anything, but allows upgrades in the case - # of server-side apply (SSA) conflicts. - # TODO(pintohutch): remove once the SSA issues from upgrades are resolved. - - name: legacy - protocol: TCP - port: 8443 - targetPort: webhook - - name: webhook - protocol: TCP - port: 443 - targetPort: web - ---- -# Source: prometheus-engine/templates/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: gmp-operator - namespace: gmp-system - labels: - app: managed-prometheus-operator - app.kubernetes.io/component: operator - app.kubernetes.io/name: gmp-operator - app.kubernetes.io/part-of: gmp -spec: - replicas: 1 - selector: - matchLabels: - # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. - # see: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#label-selector-updates. - app.kubernetes.io/component: operator - app.kubernetes.io/name: gmp-operator - app.kubernetes.io/part-of: gmp - template: - metadata: - labels: - app: managed-prometheus-operator - app.kubernetes.io/component: operator - app.kubernetes.io/name: gmp-operator - app.kubernetes.io/part-of: gmp - app.kubernetes.io/version: 0.11.0 - spec: - serviceAccountName: operator - automountServiceAccountToken: true - priorityClassName: gmp-critical - containers: - - name: operator - image: gke.gcr.io/prometheus-engine/operator:v0.11.0-gke.3 - args: - - "--operator-namespace=gmp-system" - - "--public-namespace=gmp-public" - - "--webhook-addr=:10250" - ports: - - name: web - # Note this should match the --listen-addr flag passed in to the operator args. - # Default is 10250. - containerPort: 10250 - - name: metrics - # Note this should match the --metrics-addr flag passed in to the operator args. - # Default is 18080. - containerPort: 18080 - resources: - limits: - memory: 2G - requests: - cpu: 1m - memory: 16M - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - amd64 - - key: kubernetes.io/os - operator: In - values: - - linux - tolerations: - - value: "amd64" - effect: "NoSchedule" - key: "kubernetes.io/arch" - operator: "Equal" - - value: "arm64" - effect: "NoSchedule" - key: "kubernetes.io/arch" - operator: "Equal" - securityContext: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - nodeSelector: - role: core ---- -# Source: prometheus-engine/templates/mutatingwebhookconfiguration.yaml -apiVersion: admissionregistration.k8s.io/v1 -kind: MutatingWebhookConfiguration -metadata: - name: gmp-operator.gmp-system.monitoring.googleapis.com -webhooks: -- name: default.podmonitorings.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /default/monitoring.googleapis.com/v1/podmonitorings - failurePolicy: Fail - rules: - - resources: - - podmonitorings - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: default.clusterpodmonitorings.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /default/monitoring.googleapis.com/v1/clusterpodmonitorings - failurePolicy: Fail - rules: - - resources: - - clusterpodmonitorings - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None ---- -# Source: prometheus-engine/templates/operatorconfig.yaml -apiVersion: monitoring.googleapis.com/v1 -kind: OperatorConfig -metadata: - name: config - namespace: gmp-public - labels: ---- -# Source: prometheus-engine/templates/validatingwebhookconfiguration.yaml -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingWebhookConfiguration -metadata: - name: gmp-operator.gmp-system.monitoring.googleapis.com -webhooks: -- name: validate.podmonitorings.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/podmonitorings - failurePolicy: Fail - rules: - - resources: - - podmonitorings - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.clusterpodmonitorings.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/clusterpodmonitorings - failurePolicy: Fail - rules: - - resources: - - clusterpodmonitorings - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.clusternodemonitorings.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/clusternodemonitorings - failurePolicy: Fail - rules: - - resources: - - clusternodemonitorings - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.rules.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/rules - failurePolicy: Fail - rules: - - resources: - - rules - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.clusterrules.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/clusterrules - failurePolicy: Fail - rules: - - resources: - - clusterrules - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.globalrules.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/globalrules - failurePolicy: Fail - rules: - - resources: - - globalrules - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None -- name: validate.operatorconfigs.gmp-operator.gmp-system.monitoring.googleapis.com - admissionReviewVersions: - - v1 - clientConfig: - # caBundle populated by operator. - service: - name: gmp-operator - namespace: gmp-system - port: 443 - path: /validate/monitoring.googleapis.com/v1/operatorconfigs - failurePolicy: Fail - rules: - - resources: - - operatorconfigs - apiGroups: - - monitoring.googleapis.com - apiVersions: - - v1 - operations: - - CREATE - - UPDATE - sideEffects: None diff --git a/manifests/load/avalanche.exampletarget.yaml b/manifests/load/avalanche.exampletarget.yaml new file mode 100644 index 0000000..cb60a60 --- /dev/null +++ b/manifests/load/avalanche.exampletarget.yaml @@ -0,0 +1,47 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Env.BENCH_NAME }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: avalanche + namespace: {{ .Env.BENCH_NAME }} + labels: + app: avalanche +spec: + selector: + matchLabels: + app: avalanche + replicas: 16 + template: + metadata: + labels: + app: avalanche + spec: + containers: + - name: avalanche + image: gcr.io/gpe-test-1/avalanche:v0.6.0-main-bw6 #quay.io/prometheuscommunity/avalanche:main + args: + # Aim for 10k series target with realistic distribution of types. + # Captured exampleprometheustarget.txt from an example Prometheus /metric page. + # Then generated (and adjusted a bit) below flags using: + # cd tools/mtypes && cat ../../manifests/load/exampleprometheustarget.txt | go run main.go --avalanche-flags-for-adjusted-series=10000 + - "--gauge-metric-count=158" + - "--counter-metric-count=280" + - "--histogram-metric-count=28" + - "--histogram-metric-bucket-count=10" # Does not count +Inf + - "--native-histogram-metric-count=0" + - "--summary-metric-count=48" # One metric gives 2 series. + - "--summary-metric-objective-count=2" # One metric gives 2 series. + - "--series-count=10" + - "--value-interval=300" # Changes values every 5m + - "--series-interval=3600" # 1h series churn. + - "--metric-interval=0" + - "--port=9001" + ports: + - containerPort: 9001 + name: metrics + nodeSelector: + role: {{ .Env.BENCH_NAME }}-work diff --git a/manifests/load/avalanche.yaml b/manifests/load/avalanche.yaml deleted file mode 100644 index 1304059..0000000 --- a/manifests/load/avalanche.yaml +++ /dev/null @@ -1,48 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: {{ .Env.BENCH_NAME }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: avalanche - namespace: {{ .Env.BENCH_NAME }} - labels: - app: avalanche -spec: - selector: - matchLabels: - app: avalanche - replicas: 16 - template: - metadata: - labels: - app: avalanche - spec: - containers: - - name: avalanche - image: quay.io/prometheuscommunity/avalanche:main - args: - - "--metric-count=1000" # 1K metrics per target. - - "--series-count=10" # 10 series per metric, so 10k series in total per target. Excessive for normal target, but easier to get to desired write QPS. - - "--value-interval=300" # Changes values every 5m, but it does not matter. - - "--series-interval=31536000" # Change 1y (so technically never). - - "--metric-interval=31536000" # Change __name__ value every 1y (so technically never). - # Kept default. - - "--label-count=10" # Number of labels per-metric. - - "--metricname-length=5" # Modify length of metric names. - - "--labelname-length=5" # Modify length of label names. - - "--port=9001" - ports: - - containerPort: 9001 - name: metrics - nodeSelector: - role: {{ .Env.BENCH_NAME }}-work -# resources: -# limits: -# cpu: 25m -# memory: 150Mi -# requests: -# cpu: 5m -# memory: 50Mi diff --git a/manifests/load/exampleprometheustarget.txt b/manifests/load/exampleprometheustarget.txt new file mode 100644 index 0000000..b081e51 --- /dev/null +++ b/manifests/load/exampleprometheustarget.txt @@ -0,0 +1,1012 @@ +# HELP gcm_export_pending_requests Number of in-flight requests to GCM. +# TYPE gcm_export_pending_requests gauge +gcm_export_pending_requests 1 +# HELP gcm_export_projects_per_batch Number of different projects in a batch that's being sent. +# TYPE gcm_export_projects_per_batch histogram +gcm_export_projects_per_batch_bucket{le="1"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="2"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="4"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="8"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="16"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="32"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="64"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="128"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="256"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="512"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="1024"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="+Inf"} 1.0832458e+07 +gcm_export_projects_per_batch_sum 1.0832458e+07 +gcm_export_projects_per_batch_count 1.0832458e+07 +# HELP gcm_export_samples_exported_total Number of samples exported at scrape time. +# TYPE gcm_export_samples_exported_total counter +gcm_export_samples_exported_total 1.966333233e+09 +# HELP gcm_export_samples_per_rpc_batch Number of samples that ended up in a single RPC batch. +# TYPE gcm_export_samples_per_rpc_batch histogram +gcm_export_samples_per_rpc_batch_bucket{le="1"} 236541 +gcm_export_samples_per_rpc_batch_bucket{le="2"} 304313 +gcm_export_samples_per_rpc_batch_bucket{le="5"} 355002 +gcm_export_samples_per_rpc_batch_bucket{le="10"} 483585 +gcm_export_samples_per_rpc_batch_bucket{le="20"} 579284 +gcm_export_samples_per_rpc_batch_bucket{le="50"} 1.027749e+06 +gcm_export_samples_per_rpc_batch_bucket{le="100"} 1.704702e+06 +gcm_export_samples_per_rpc_batch_bucket{le="150"} 2.355089e+06 +gcm_export_samples_per_rpc_batch_bucket{le="200"} 1.0832458e+07 +gcm_export_samples_per_rpc_batch_bucket{le="+Inf"} 1.0832458e+07 +gcm_export_samples_per_rpc_batch_sum 1.83976418e+09 +gcm_export_samples_per_rpc_batch_count 1.0832458e+07 +# HELP gcm_export_samples_sent_total Number of exported samples sent to GCM. +# TYPE gcm_export_samples_sent_total counter +gcm_export_samples_sent_total 1.839764124e+09 +# HELP gcm_export_send_iterations_total Number of processing iterations of the sample export send handler. +# TYPE gcm_export_send_iterations_total counter +gcm_export_send_iterations_total 1.2444615e+07 +# HELP gcm_export_shard_process_pending_total Number of shard retrievals with an empty result. +# TYPE gcm_export_shard_process_pending_total counter +gcm_export_shard_process_pending_total 8.66546153e+08 +# HELP gcm_export_shard_process_samples_taken Number of samples taken when processing a shard. +# TYPE gcm_export_shard_process_samples_taken histogram +gcm_export_shard_process_samples_taken_bucket{le="1"} 5.6291878e+07 +gcm_export_shard_process_samples_taken_bucket{le="2"} 9.1249561e+07 +gcm_export_shard_process_samples_taken_bucket{le="5"} 1.27173414e+08 +gcm_export_shard_process_samples_taken_bucket{le="10"} 1.34384486e+08 +gcm_export_shard_process_samples_taken_bucket{le="20"} 1.68076229e+08 +gcm_export_shard_process_samples_taken_bucket{le="50"} 2.04738182e+08 +gcm_export_shard_process_samples_taken_bucket{le="100"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="150"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="200"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="+Inf"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_sum 1.83976418e+09 +gcm_export_shard_process_samples_taken_count 2.04762012e+08 +# HELP gcm_export_shard_process_total Number of shard retrievals. +# TYPE gcm_export_shard_process_total counter +gcm_export_shard_process_total 2.488923e+09 +# HELP gcm_pool_intern_total Time series memory intern operations. +# TYPE gcm_pool_intern_total counter +gcm_pool_intern_total 4.8525498e+07 +# HELP gcm_pool_release_total Time series memory intern release operations. +# TYPE gcm_pool_release_total counter +gcm_pool_release_total 4.8514709e+07 +# HELP gcm_prometheus_samples_discarded_total Samples that were discarded during data model conversion. +# TYPE gcm_prometheus_samples_discarded_total counter +gcm_prometheus_samples_discarded_total{reason="staleness-marker"} 9919 +gcm_prometheus_samples_discarded_total{reason="zero-buckets-bounds"} 1.076142e+07 +# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 5.8641e-05 +go_gc_duration_seconds{quantile="0.25"} 8.4045e-05 +go_gc_duration_seconds{quantile="0.5"} 0.000119609 +go_gc_duration_seconds{quantile="0.75"} 0.000149195 +go_gc_duration_seconds{quantile="1"} 0.000312434 +go_gc_duration_seconds_sum 11.324308382 +go_gc_duration_seconds_count 92364 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 112 +# HELP go_info Information about the Go environment. +# TYPE go_info gauge +go_info{version="go1.20.14"} 1 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 1.09818568e+08 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 7.420978933248e+12 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 3.653156e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 1.19996693238e+11 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 1.6556264e+07 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 1.09818568e+08 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 1.8628608e+08 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.3860864e+08 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 738856 +# HELP go_memstats_heap_released_bytes Number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes gauge +go_memstats_heap_released_bytes 1.42557184e+08 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 3.2489472e+08 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.7278073317025118e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 0 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 1.19997432094e+11 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 4800 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 15600 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 1.8024e+06 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 3.24768e+06 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 1.636618e+08 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 1.202956e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 2.260992e+06 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 2.260992e+06 +# HELP go_memstats_sys_bytes Number of bytes obtained from system. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 3.51831368e+08 +# HELP go_threads Number of OS threads created. +# TYPE go_threads gauge +go_threads 12 +# HELP grpc_client_handled_total Total number of RPCs completed by the client, regardless of success or failure. +# TYPE grpc_client_handled_total counter +grpc_client_handled_total{grpc_code="Canceled",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 9 +grpc_client_handled_total{grpc_code="DeadlineExceeded",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 82 +grpc_client_handled_total{grpc_code="Internal",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 4 +grpc_client_handled_total{grpc_code="OK",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0831867e+07 +grpc_client_handled_total{grpc_code="Unauthenticated",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1 +grpc_client_handled_total{grpc_code="Unavailable",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 494 +# HELP grpc_client_handling_seconds Histogram of response latency (seconds) of the gRPC until it is finished by the application. +# TYPE grpc_client_handling_seconds histogram +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.005"} 0 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.01"} 0 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.025"} 34059 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.05"} 1.127825e+06 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.1"} 9.058302e+06 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.25"} 1.0721886e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.5"} 1.0759498e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="1"} 1.0774023e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="2.5"} 1.079026e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="5"} 1.0800098e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="10"} 1.0832159e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="15"} 1.0832261e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="20"} 1.0832299e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="30"} 1.0832376e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="40"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="50"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="60"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="+Inf"} 1.0832457e+07 +grpc_client_handling_seconds_sum{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.2123103039707085e+06 +grpc_client_handling_seconds_count{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832457e+07 +# HELP grpc_client_msg_received_total Total number of RPC stream messages received by the client. +# TYPE grpc_client_msg_received_total counter +grpc_client_msg_received_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 590 +# HELP grpc_client_msg_sent_total Total number of gRPC stream messages sent by the client. +# TYPE grpc_client_msg_sent_total counter +grpc_client_msg_sent_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832458e+07 +# HELP grpc_client_started_total Total number of RPCs started on the client. +# TYPE grpc_client_started_total counter +grpc_client_started_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832458e+07 +# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name. +# TYPE net_conntrack_dialer_conn_attempted_total counter +net_conntrack_dialer_conn_attempted_total{dialer_name="cadvisor"} 94 +net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_attempted_total{dialer_name="kube-state-metrics"} 2 +net_conntrack_dialer_conn_attempted_total{dialer_name="pods"} 179445 +# HELP net_conntrack_dialer_conn_closed_total Total number of connections closed which originated from the dialer of a given name. +# TYPE net_conntrack_dialer_conn_closed_total counter +net_conntrack_dialer_conn_closed_total{dialer_name="cadvisor"} 3 +net_conntrack_dialer_conn_closed_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_closed_total{dialer_name="kube-state-metrics"} 0 +net_conntrack_dialer_conn_closed_total{dialer_name="pods"} 179394 +# HELP net_conntrack_dialer_conn_established_total Total number of connections successfully established by the given dialer a given name. +# TYPE net_conntrack_dialer_conn_established_total counter +net_conntrack_dialer_conn_established_total{dialer_name="cadvisor"} 4 +net_conntrack_dialer_conn_established_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_established_total{dialer_name="kube-state-metrics"} 2 +net_conntrack_dialer_conn_established_total{dialer_name="pods"} 179399 +# HELP net_conntrack_dialer_conn_failed_total Total number of connections failed to dial by the dialer a given name. +# TYPE net_conntrack_dialer_conn_failed_total counter +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="refused"} 7 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="timeout"} 83 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="unknown"} 90 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="refused"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="timeout"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="unknown"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="refused"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="timeout"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="unknown"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="refused"} 4 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="timeout"} 42 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="unknown"} 46 +# HELP net_conntrack_listener_conn_accepted_total Total number of connections opened to the listener of a given name. +# TYPE net_conntrack_listener_conn_accepted_total counter +net_conntrack_listener_conn_accepted_total{listener_name="http"} 8 +# HELP net_conntrack_listener_conn_closed_total Total number of connections closed that were made to the listener of a given name. +# TYPE net_conntrack_listener_conn_closed_total counter +net_conntrack_listener_conn_closed_total{listener_name="http"} 3 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 64026.65 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 105 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 2.81624576e+08 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.72511698039e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 2.8450332672e+10 +# HELP process_virtual_memory_max_bytes Maximum amount of virtual memory available in bytes. +# TYPE process_virtual_memory_max_bytes gauge +process_virtual_memory_max_bytes 1.8446744073709552e+19 +# HELP prometheus_api_remote_read_queries The current number of remote read queries being executed or waiting. +# TYPE prometheus_api_remote_read_queries gauge +prometheus_api_remote_read_queries 0 +# HELP prometheus_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which prometheus was built, and the goos and goarch for the build. +# TYPE prometheus_build_info gauge +prometheus_build_info{branch="",goarch="amd64",goos="linux",goversion="go1.20.14",revision="d7b199739aa7e0d00e7ebd0792339dd4b167a269-modified",tags="builtinassets",version="2.45.3"} 1 +# HELP prometheus_config_last_reload_success_timestamp_seconds Timestamp of the last successful configuration reload. +# TYPE prometheus_config_last_reload_success_timestamp_seconds gauge +prometheus_config_last_reload_success_timestamp_seconds 1.725116982549508e+09 +# HELP prometheus_config_last_reload_successful Whether the last configuration reload attempt was successful. +# TYPE prometheus_config_last_reload_successful gauge +prometheus_config_last_reload_successful 1 +# HELP prometheus_engine_queries The current number of queries being executed or waiting. +# TYPE prometheus_engine_queries gauge +prometheus_engine_queries 0 +# HELP prometheus_engine_queries_concurrent_max The max number of concurrent queries. +# TYPE prometheus_engine_queries_concurrent_max gauge +prometheus_engine_queries_concurrent_max 20 +# HELP prometheus_engine_query_duration_seconds Query timings +# TYPE prometheus_engine_query_duration_seconds summary +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="inner_eval"} 0 +prometheus_engine_query_duration_seconds_count{slice="inner_eval"} 0 +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="prepare_time"} 0 +prometheus_engine_query_duration_seconds_count{slice="prepare_time"} 0 +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="queue_time"} 0 +prometheus_engine_query_duration_seconds_count{slice="queue_time"} 0 +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="result_sort"} 0 +prometheus_engine_query_duration_seconds_count{slice="result_sort"} 0 +# HELP prometheus_engine_query_log_enabled State of the query log. +# TYPE prometheus_engine_query_log_enabled gauge +prometheus_engine_query_log_enabled 0 +# HELP prometheus_engine_query_log_failures_total The number of query log failures. +# TYPE prometheus_engine_query_log_failures_total counter +prometheus_engine_query_log_failures_total 0 +# HELP prometheus_engine_query_samples_total The total number of samples loaded by all queries. +# TYPE prometheus_engine_query_samples_total counter +prometheus_engine_query_samples_total 0 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.1"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.2"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.4"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="1"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="3"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="8"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="20"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="60"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="120"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="+Inf"} 2 +prometheus_http_request_duration_seconds_sum{handler="/-/ready"} 4.7443999999999995e-05 +prometheus_http_request_duration_seconds_count{handler="/-/ready"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.1"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.2"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.4"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="1"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="3"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="8"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="20"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="60"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="120"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="+Inf"} 1 +prometheus_http_request_duration_seconds_sum{handler="/-/reload"} 0.002356799 +prometheus_http_request_duration_seconds_count{handler="/-/reload"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.1"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.2"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.4"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="1"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="3"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="8"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="20"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="60"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="120"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="+Inf"} 448346 +prometheus_http_request_duration_seconds_sum{handler="/debug/*subpath"} 2.692262582005182e+06 +prometheus_http_request_duration_seconds_count{handler="/debug/*subpath"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.1"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.2"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.4"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="1"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="3"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="8"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="20"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="60"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="120"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="+Inf"} 179357 +prometheus_http_request_duration_seconds_sum{handler="/metrics"} 552.2173053479947 +prometheus_http_request_duration_seconds_count{handler="/metrics"} 179357 +# HELP prometheus_http_requests_total Counter of HTTP requests. +# TYPE prometheus_http_requests_total counter +prometheus_http_requests_total{code="200",handler="/-/ready"} 1 +prometheus_http_requests_total{code="200",handler="/-/reload"} 1 +prometheus_http_requests_total{code="200",handler="/debug/*subpath"} 448346 +prometheus_http_requests_total{code="200",handler="/metrics"} 179357 +prometheus_http_requests_total{code="503",handler="/-/ready"} 1 +# HELP prometheus_http_response_size_bytes Histogram of response size for HTTP requests. +# TYPE prometheus_http_response_size_bytes histogram +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="100"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="10000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="100000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+06"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+07"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+08"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+09"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="+Inf"} 2 +prometheus_http_response_size_bytes_sum{handler="/-/ready"} 47 +prometheus_http_response_size_bytes_count{handler="/-/ready"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="100"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="10000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="100000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+06"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+07"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+08"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+09"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="+Inf"} 1 +prometheus_http_response_size_bytes_sum{handler="/-/reload"} 0 +prometheus_http_response_size_bytes_count{handler="/-/reload"} 1 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="100"} 0 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1000"} 179358 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="10000"} 269558 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="100000"} 359969 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+06"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+07"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+08"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+09"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="+Inf"} 448346 +prometheus_http_response_size_bytes_sum{handler="/debug/*subpath"} 1.7640059511e+10 +prometheus_http_response_size_bytes_count{handler="/debug/*subpath"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="100"} 0 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1000"} 0 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="10000"} 191 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="100000"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+06"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+07"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+08"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+09"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="+Inf"} 179357 +prometheus_http_response_size_bytes_sum{handler="/metrics"} 1.895799365e+09 +prometheus_http_response_size_bytes_count{handler="/metrics"} 179357 +# HELP prometheus_notifications_alertmanagers_discovered The number of alertmanagers discovered and active. +# TYPE prometheus_notifications_alertmanagers_discovered gauge +prometheus_notifications_alertmanagers_discovered 0 +# HELP prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. +# TYPE prometheus_notifications_dropped_total counter +prometheus_notifications_dropped_total 0 +# HELP prometheus_notifications_queue_capacity The capacity of the alert notifications queue. +# TYPE prometheus_notifications_queue_capacity gauge +prometheus_notifications_queue_capacity 10000 +# HELP prometheus_notifications_queue_length The number of alert notifications in the queue. +# TYPE prometheus_notifications_queue_length gauge +prometheus_notifications_queue_length 0 +# HELP prometheus_ready Whether Prometheus startup was fully completed and the server is ready for normal operation. +# TYPE prometheus_ready gauge +prometheus_ready 1 +# HELP prometheus_remote_storage_exemplars_in_total Exemplars in to remote storage, compare to exemplars out for queue managers. +# TYPE prometheus_remote_storage_exemplars_in_total counter +prometheus_remote_storage_exemplars_in_total 0 +# HELP prometheus_remote_storage_highest_timestamp_in_seconds Highest timestamp that has come into the remote storage via the Appender interface, in seconds since epoch. +# TYPE prometheus_remote_storage_highest_timestamp_in_seconds gauge +prometheus_remote_storage_highest_timestamp_in_seconds 1.727807345e+09 +# HELP prometheus_remote_storage_histograms_in_total HistogramSamples in to remote storage, compare to histograms out for queue managers. +# TYPE prometheus_remote_storage_histograms_in_total counter +prometheus_remote_storage_histograms_in_total 0 +# HELP prometheus_remote_storage_samples_in_total Samples in to remote storage, compare to samples out for queue managers. +# TYPE prometheus_remote_storage_samples_in_total counter +prometheus_remote_storage_samples_in_total 1.966333233e+09 +# HELP prometheus_remote_storage_string_interner_zero_reference_releases_total The number of times release has been called for strings that are not interned. +# TYPE prometheus_remote_storage_string_interner_zero_reference_releases_total counter +prometheus_remote_storage_string_interner_zero_reference_releases_total 0 +# HELP prometheus_rule_evaluation_duration_seconds The duration for a rule to execute. +# TYPE prometheus_rule_evaluation_duration_seconds summary +prometheus_rule_evaluation_duration_seconds{quantile="0.5"} NaN +prometheus_rule_evaluation_duration_seconds{quantile="0.9"} NaN +prometheus_rule_evaluation_duration_seconds{quantile="0.99"} NaN +prometheus_rule_evaluation_duration_seconds_sum 0 +prometheus_rule_evaluation_duration_seconds_count 0 +# HELP prometheus_rule_group_duration_seconds The duration of rule group evaluations. +# TYPE prometheus_rule_group_duration_seconds summary +prometheus_rule_group_duration_seconds{quantile="0.01"} NaN +prometheus_rule_group_duration_seconds{quantile="0.05"} NaN +prometheus_rule_group_duration_seconds{quantile="0.5"} NaN +prometheus_rule_group_duration_seconds{quantile="0.9"} NaN +prometheus_rule_group_duration_seconds{quantile="0.99"} NaN +prometheus_rule_group_duration_seconds_sum 0 +prometheus_rule_group_duration_seconds_count 0 +# HELP prometheus_sd_azure_failures_total Number of Azure service discovery refresh failures. +# TYPE prometheus_sd_azure_failures_total counter +prometheus_sd_azure_failures_total 0 +# HELP prometheus_sd_consul_rpc_duration_seconds The duration of a Consul RPC call in seconds. +# TYPE prometheus_sd_consul_rpc_duration_seconds summary +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.5"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.9"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.99"} NaN +prometheus_sd_consul_rpc_duration_seconds_sum{call="service",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds_count{call="service",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.5"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.9"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.99"} NaN +prometheus_sd_consul_rpc_duration_seconds_sum{call="services",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds_count{call="services",endpoint="catalog"} 0 +# HELP prometheus_sd_consul_rpc_failures_total The number of Consul RPC call failures. +# TYPE prometheus_sd_consul_rpc_failures_total counter +prometheus_sd_consul_rpc_failures_total 0 +# HELP prometheus_sd_discovered_targets Current number of discovered targets. +# TYPE prometheus_sd_discovered_targets gauge +prometheus_sd_discovered_targets{config="cadvisor",name="scrape"} 2 +prometheus_sd_discovered_targets{config="kube-state-metrics",name="scrape"} 9 +prometheus_sd_discovered_targets{config="pods",name="scrape"} 82 +# HELP prometheus_sd_dns_lookup_failures_total The number of DNS-SD lookup failures. +# TYPE prometheus_sd_dns_lookup_failures_total counter +prometheus_sd_dns_lookup_failures_total 0 +# HELP prometheus_sd_dns_lookups_total The number of DNS-SD lookups. +# TYPE prometheus_sd_dns_lookups_total counter +prometheus_sd_dns_lookups_total 0 +# HELP prometheus_sd_failed_configs Current number of service discovery configurations that failed to load. +# TYPE prometheus_sd_failed_configs gauge +prometheus_sd_failed_configs{name="notify"} 0 +prometheus_sd_failed_configs{name="scrape"} 0 +# HELP prometheus_sd_file_read_errors_total The number of File-SD read errors. +# TYPE prometheus_sd_file_read_errors_total counter +prometheus_sd_file_read_errors_total 0 +# HELP prometheus_sd_file_scan_duration_seconds The duration of the File-SD scan in seconds. +# TYPE prometheus_sd_file_scan_duration_seconds summary +prometheus_sd_file_scan_duration_seconds{quantile="0.5"} NaN +prometheus_sd_file_scan_duration_seconds{quantile="0.9"} NaN +prometheus_sd_file_scan_duration_seconds{quantile="0.99"} NaN +prometheus_sd_file_scan_duration_seconds_sum 0 +prometheus_sd_file_scan_duration_seconds_count 0 +# HELP prometheus_sd_file_watcher_errors_total The number of File-SD errors caused by filesystem watch failures. +# TYPE prometheus_sd_file_watcher_errors_total counter +prometheus_sd_file_watcher_errors_total 0 +# HELP prometheus_sd_http_failures_total Number of HTTP service discovery refresh failures. +# TYPE prometheus_sd_http_failures_total counter +prometheus_sd_http_failures_total 0 +# HELP prometheus_sd_kubernetes_events_total The number of Kubernetes events handled. +# TYPE prometheus_sd_kubernetes_events_total counter +prometheus_sd_kubernetes_events_total{event="add",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="node"} 5 +prometheus_sd_kubernetes_events_total{event="add",role="pod"} 169 +prometheus_sd_kubernetes_events_total{event="add",role="service"} 9 +prometheus_sd_kubernetes_events_total{event="delete",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="node"} 3 +prometheus_sd_kubernetes_events_total{event="delete",role="pod"} 128 +prometheus_sd_kubernetes_events_total{event="delete",role="service"} 2 +prometheus_sd_kubernetes_events_total{event="update",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="node"} 35525 +prometheus_sd_kubernetes_events_total{event="update",role="pod"} 1034 +prometheus_sd_kubernetes_events_total{event="update",role="service"} 29 +# HELP prometheus_sd_kubernetes_http_request_duration_seconds Summary of latencies for HTTP requests to the Kubernetes API by endpoint. +# TYPE prometheus_sd_kubernetes_http_request_duration_seconds summary +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/nodes"} 0.017348603 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/nodes"} 4 +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/pods"} 0.038949225999999997 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/pods"} 4 +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/services"} 0.014277334000000001 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/services"} 4 +# HELP prometheus_sd_kubernetes_http_request_total Total number of HTTP requests to the Kubernetes API by status code. +# TYPE prometheus_sd_kubernetes_http_request_total counter +prometheus_sd_kubernetes_http_request_total{status_code="200"} 17957 +prometheus_sd_kubernetes_http_request_total{status_code=""} 83 +# HELP prometheus_sd_kubernetes_workqueue_depth Current depth of the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_depth gauge +prometheus_sd_kubernetes_workqueue_depth{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_depth{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_depth{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_items_total Total number of items added to the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_items_total counter +prometheus_sd_kubernetes_workqueue_items_total{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_items_total{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_items_total{queue_name="service"} 40 +# HELP prometheus_sd_kubernetes_workqueue_latency_seconds How long an item stays in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_latency_seconds summary +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="node"} 0.49772388200000356 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="pod"} 4.155762530999996 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="service"} 0.8281205150000001 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="service"} 40 +# HELP prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds Duration of the longest running processor in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds gauge +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_unfinished_work_seconds How long an item has remained unfinished in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_unfinished_work_seconds gauge +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_work_duration_seconds How long processing an item from the work queue takes. +# TYPE prometheus_sd_kubernetes_workqueue_work_duration_seconds summary +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="node"} 5.840500786999983 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="pod"} 0.034607483000000085 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="service"} 0.0010254919999999998 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="service"} 40 +# HELP prometheus_sd_kuma_fetch_duration_seconds The duration of a Kuma MADS fetch call. +# TYPE prometheus_sd_kuma_fetch_duration_seconds summary +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.5"} NaN +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.9"} NaN +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.99"} NaN +prometheus_sd_kuma_fetch_duration_seconds_sum 0 +prometheus_sd_kuma_fetch_duration_seconds_count 0 +# HELP prometheus_sd_kuma_fetch_failures_total The number of Kuma MADS fetch call failures. +# TYPE prometheus_sd_kuma_fetch_failures_total counter +prometheus_sd_kuma_fetch_failures_total 0 +# HELP prometheus_sd_kuma_fetch_skipped_updates_total The number of Kuma MADS fetch calls that result in no updates to the targets. +# TYPE prometheus_sd_kuma_fetch_skipped_updates_total counter +prometheus_sd_kuma_fetch_skipped_updates_total 0 +# HELP prometheus_sd_linode_failures_total Number of Linode service discovery refresh failures. +# TYPE prometheus_sd_linode_failures_total counter +prometheus_sd_linode_failures_total 0 +# HELP prometheus_sd_nomad_failures_total Number of nomad service discovery refresh failures. +# TYPE prometheus_sd_nomad_failures_total counter +prometheus_sd_nomad_failures_total 0 +# HELP prometheus_sd_received_updates_total Total number of update events received from the SD providers. +# TYPE prometheus_sd_received_updates_total counter +prometheus_sd_received_updates_total{name="scrape"} 36897 +# HELP prometheus_sd_updates_total Total number of update events sent to the SD consumers. +# TYPE prometheus_sd_updates_total counter +prometheus_sd_updates_total{name="scrape"} 34137 +# HELP prometheus_target_interval_length_seconds Actual intervals between scrapes. +# TYPE prometheus_target_interval_length_seconds summary +prometheus_target_interval_length_seconds{interval="15s",quantile="0.01"} 14.99914058 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.05"} 14.999310634 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.5"} 15.000008779 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.9"} 15.000545764 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.99"} 15.000857257 +prometheus_target_interval_length_seconds_sum{interval="15s"} 2.4210266343189236e+07 +prometheus_target_interval_length_seconds_count{interval="15s"} 1.614017e+06 +# HELP prometheus_target_metadata_cache_bytes The number of bytes that are currently used for storing metric metadata in the cache +# TYPE prometheus_target_metadata_cache_bytes gauge +prometheus_target_metadata_cache_bytes{scrape_job="cadvisor"} 6898 +prometheus_target_metadata_cache_bytes{scrape_job="kube-state-metrics"} 1933 +prometheus_target_metadata_cache_bytes{scrape_job="pods"} 34437 +# HELP prometheus_target_metadata_cache_entries Total number of metric metadata entries in the cache +# TYPE prometheus_target_metadata_cache_entries gauge +prometheus_target_metadata_cache_entries{scrape_job="cadvisor"} 138 +prometheus_target_metadata_cache_entries{scrape_job="kube-state-metrics"} 39 +prometheus_target_metadata_cache_entries{scrape_job="pods"} 583 +# HELP prometheus_target_scrape_pool_exceeded_label_limits_total Total number of times scrape pools hit the label limits, during sync or config reload. +# TYPE prometheus_target_scrape_pool_exceeded_label_limits_total counter +prometheus_target_scrape_pool_exceeded_label_limits_total 0 +# HELP prometheus_target_scrape_pool_exceeded_target_limit_total Total number of times scrape pools hit the target limit, during sync or config reload. +# TYPE prometheus_target_scrape_pool_exceeded_target_limit_total counter +prometheus_target_scrape_pool_exceeded_target_limit_total 0 +# HELP prometheus_target_scrape_pool_reloads_failed_total Total number of failed scrape pool reloads. +# TYPE prometheus_target_scrape_pool_reloads_failed_total counter +prometheus_target_scrape_pool_reloads_failed_total 0 +# HELP prometheus_target_scrape_pool_reloads_total Total number of scrape pool reloads. +# TYPE prometheus_target_scrape_pool_reloads_total counter +prometheus_target_scrape_pool_reloads_total 0 +# HELP prometheus_target_scrape_pool_sync_total Total number of syncs that were executed on a scrape pool. +# TYPE prometheus_target_scrape_pool_sync_total counter +prometheus_target_scrape_pool_sync_total{scrape_job="cadvisor"} 34137 +prometheus_target_scrape_pool_sync_total{scrape_job="kube-state-metrics"} 34137 +prometheus_target_scrape_pool_sync_total{scrape_job="pods"} 34137 +# HELP prometheus_target_scrape_pool_target_limit Maximum number of targets allowed in this scrape pool. +# TYPE prometheus_target_scrape_pool_target_limit gauge +prometheus_target_scrape_pool_target_limit{scrape_job="cadvisor"} 0 +prometheus_target_scrape_pool_target_limit{scrape_job="kube-state-metrics"} 0 +prometheus_target_scrape_pool_target_limit{scrape_job="pods"} 0 +# HELP prometheus_target_scrape_pool_targets Current number of targets in this scrape pool. +# TYPE prometheus_target_scrape_pool_targets gauge +prometheus_target_scrape_pool_targets{scrape_job="cadvisor"} 2 +prometheus_target_scrape_pool_targets{scrape_job="kube-state-metrics"} 2 +prometheus_target_scrape_pool_targets{scrape_job="pods"} 5 +# HELP prometheus_target_scrape_pools_failed_total Total number of scrape pool creations that failed. +# TYPE prometheus_target_scrape_pools_failed_total counter +prometheus_target_scrape_pools_failed_total 0 +# HELP prometheus_target_scrape_pools_total Total number of scrape pool creation attempts. +# TYPE prometheus_target_scrape_pools_total counter +prometheus_target_scrape_pools_total 3 +# HELP prometheus_target_scrapes_cache_flush_forced_total How many times a scrape cache was flushed due to getting big while scrapes are failing. +# TYPE prometheus_target_scrapes_cache_flush_forced_total counter +prometheus_target_scrapes_cache_flush_forced_total 0 +# HELP prometheus_target_scrapes_exceeded_body_size_limit_total Total number of scrapes that hit the body size limit +# TYPE prometheus_target_scrapes_exceeded_body_size_limit_total counter +prometheus_target_scrapes_exceeded_body_size_limit_total 0 +# HELP prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total Total number of scrapes that hit the native histogram bucket limit and were rejected. +# TYPE prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total counter +prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total 0 +# HELP prometheus_target_scrapes_exceeded_sample_limit_total Total number of scrapes that hit the sample limit and were rejected. +# TYPE prometheus_target_scrapes_exceeded_sample_limit_total counter +prometheus_target_scrapes_exceeded_sample_limit_total 0 +# HELP prometheus_target_scrapes_exemplar_out_of_order_total Total number of exemplar rejected due to not being out of the expected order. +# TYPE prometheus_target_scrapes_exemplar_out_of_order_total counter +prometheus_target_scrapes_exemplar_out_of_order_total 0 +# HELP prometheus_target_scrapes_sample_duplicate_timestamp_total Total number of samples rejected due to duplicate timestamps but different values. +# TYPE prometheus_target_scrapes_sample_duplicate_timestamp_total counter +prometheus_target_scrapes_sample_duplicate_timestamp_total 0 +# HELP prometheus_target_scrapes_sample_out_of_bounds_total Total number of samples rejected due to timestamp falling outside of the time bounds. +# TYPE prometheus_target_scrapes_sample_out_of_bounds_total counter +prometheus_target_scrapes_sample_out_of_bounds_total 0 +# HELP prometheus_target_scrapes_sample_out_of_order_total Total number of samples rejected due to not being out of the expected order. +# TYPE prometheus_target_scrapes_sample_out_of_order_total counter +prometheus_target_scrapes_sample_out_of_order_total 0 +# HELP prometheus_target_sync_failed_total Total number of target sync failures. +# TYPE prometheus_target_sync_failed_total counter +prometheus_target_sync_failed_total{scrape_job="cadvisor"} 0 +prometheus_target_sync_failed_total{scrape_job="kube-state-metrics"} 0 +prometheus_target_sync_failed_total{scrape_job="pods"} 0 +# HELP prometheus_target_sync_length_seconds Actual interval to sync the scrape pool. +# TYPE prometheus_target_sync_length_seconds summary +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.01"} 0.00016778 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.05"} 0.00016778 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.5"} 0.000201532 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.9"} 0.000217346 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.99"} 0.000217346 +prometheus_target_sync_length_seconds_sum{scrape_job="cadvisor"} 9.36278804700008 +prometheus_target_sync_length_seconds_count{scrape_job="cadvisor"} 34137 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.01"} 0.000148145 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.05"} 0.000148145 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.5"} 0.000175667 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.9"} 0.000188701 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.99"} 0.000188701 +prometheus_target_sync_length_seconds_sum{scrape_job="kube-state-metrics"} 6.007913164999995 +prometheus_target_sync_length_seconds_count{scrape_job="kube-state-metrics"} 34137 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.01"} 0.000867282 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.05"} 0.000867282 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.5"} 0.000913952 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.9"} 0.001163668 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.99"} 0.001163668 +prometheus_target_sync_length_seconds_sum{scrape_job="pods"} 44.38431514700025 +prometheus_target_sync_length_seconds_count{scrape_job="pods"} 34137 +# HELP prometheus_template_text_expansion_failures_total The total number of template text expansion failures. +# TYPE prometheus_template_text_expansion_failures_total counter +prometheus_template_text_expansion_failures_total 0 +# HELP prometheus_template_text_expansions_total The total number of template text expansions. +# TYPE prometheus_template_text_expansions_total counter +prometheus_template_text_expansions_total 0 +# HELP prometheus_treecache_watcher_goroutines The current number of watcher goroutines. +# TYPE prometheus_treecache_watcher_goroutines gauge +prometheus_treecache_watcher_goroutines 0 +# HELP prometheus_treecache_zookeeper_failures_total The total number of ZooKeeper failures. +# TYPE prometheus_treecache_zookeeper_failures_total counter +prometheus_treecache_zookeeper_failures_total 0 +# HELP prometheus_tsdb_blocks_loaded Number of currently loaded data blocks +# TYPE prometheus_tsdb_blocks_loaded gauge +prometheus_tsdb_blocks_loaded 16 +# HELP prometheus_tsdb_checkpoint_creations_failed_total Total number of checkpoint creations that failed. +# TYPE prometheus_tsdb_checkpoint_creations_failed_total counter +prometheus_tsdb_checkpoint_creations_failed_total 0 +# HELP prometheus_tsdb_checkpoint_creations_total Total number of checkpoint creations attempted. +# TYPE prometheus_tsdb_checkpoint_creations_total counter +prometheus_tsdb_checkpoint_creations_total 187 +# HELP prometheus_tsdb_checkpoint_deletions_failed_total Total number of checkpoint deletions that failed. +# TYPE prometheus_tsdb_checkpoint_deletions_failed_total counter +prometheus_tsdb_checkpoint_deletions_failed_total 0 +# HELP prometheus_tsdb_checkpoint_deletions_total Total number of checkpoint deletions attempted. +# TYPE prometheus_tsdb_checkpoint_deletions_total counter +prometheus_tsdb_checkpoint_deletions_total 187 +# HELP prometheus_tsdb_clean_start -1: lockfile is disabled. 0: a lockfile from a previous execution was replaced. 1: lockfile creation was clean +# TYPE prometheus_tsdb_clean_start gauge +prometheus_tsdb_clean_start -1 +# HELP prometheus_tsdb_compaction_chunk_range_seconds Final time range of chunks on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_range_seconds histogram +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="100"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="400"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="1600"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="6400"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="25600"} 952 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="102400"} 2954 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="409600"} 11240 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="1.6384e+06"} 34940 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="6.5536e+06"} 1.3837075e+07 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="2.62144e+07"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_range_seconds_sum 2.9219718662064e+13 +prometheus_tsdb_compaction_chunk_range_seconds_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_chunk_samples Final number of samples on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_samples histogram +prometheus_tsdb_compaction_chunk_samples_bucket{le="4"} 1813 +prometheus_tsdb_compaction_chunk_samples_bucket{le="6"} 2625 +prometheus_tsdb_compaction_chunk_samples_bucket{le="9"} 5359 +prometheus_tsdb_compaction_chunk_samples_bucket{le="13.5"} 7578 +prometheus_tsdb_compaction_chunk_samples_bucket{le="20.25"} 10695 +prometheus_tsdb_compaction_chunk_samples_bucket{le="30.375"} 14153 +prometheus_tsdb_compaction_chunk_samples_bucket{le="45.5625"} 20641 +prometheus_tsdb_compaction_chunk_samples_bucket{le="68.34375"} 26828 +prometheus_tsdb_compaction_chunk_samples_bucket{le="102.515625"} 37088 +prometheus_tsdb_compaction_chunk_samples_bucket{le="153.7734375"} 1.3192758e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="230.66015625"} 1.3830353e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="345.990234375"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_samples_sum 1.852852608e+09 +prometheus_tsdb_compaction_chunk_samples_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 5907 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 3.717611e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 3.972949e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 4.043949e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4.106797e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 4.42655e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 1.075848e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 1.2225892e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 1.3311939e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 1.3795122e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 1.3836776e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_size_bytes_sum 4.281044268e+09 +prometheus_tsdb_compaction_chunk_size_bytes_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_duration_seconds Duration of compaction runs +# TYPE prometheus_tsdb_compaction_duration_seconds histogram +prometheus_tsdb_compaction_duration_seconds_bucket{le="1"} 540 +prometheus_tsdb_compaction_duration_seconds_bucket{le="2"} 540 +prometheus_tsdb_compaction_duration_seconds_bucket{le="4"} 554 +prometheus_tsdb_compaction_duration_seconds_bucket{le="8"} 559 +prometheus_tsdb_compaction_duration_seconds_bucket{le="16"} 559 +prometheus_tsdb_compaction_duration_seconds_bucket{le="32"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="64"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="128"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="256"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="512"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="1024"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="2048"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="4096"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="8192"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="+Inf"} 561 +prometheus_tsdb_compaction_duration_seconds_sum 272.2973793669999 +prometheus_tsdb_compaction_duration_seconds_count 561 +# HELP prometheus_tsdb_compaction_populating_block Set to 1 when a block is currently being written to the disk. +# TYPE prometheus_tsdb_compaction_populating_block gauge +prometheus_tsdb_compaction_populating_block 0 +# HELP prometheus_tsdb_compactions_failed_total Total number of compactions that failed for the partition. +# TYPE prometheus_tsdb_compactions_failed_total counter +prometheus_tsdb_compactions_failed_total 0 +# HELP prometheus_tsdb_compactions_skipped_total Total number of skipped compactions due to disabled auto compaction. +# TYPE prometheus_tsdb_compactions_skipped_total counter +prometheus_tsdb_compactions_skipped_total 0 +# HELP prometheus_tsdb_compactions_total Total number of compactions that were executed for the partition. +# TYPE prometheus_tsdb_compactions_total counter +prometheus_tsdb_compactions_total 561 +# HELP prometheus_tsdb_compactions_triggered_total Total number of triggered compactions for the partition. +# TYPE prometheus_tsdb_compactions_triggered_total counter +prometheus_tsdb_compactions_triggered_total 44842 +# HELP prometheus_tsdb_data_replay_duration_seconds Time taken to replay the data on disk. +# TYPE prometheus_tsdb_data_replay_duration_seconds gauge +prometheus_tsdb_data_replay_duration_seconds 0.767674068 +# HELP prometheus_tsdb_exemplar_exemplars_appended_total Total number of appended exemplars. +# TYPE prometheus_tsdb_exemplar_exemplars_appended_total counter +prometheus_tsdb_exemplar_exemplars_appended_total 0 +# HELP prometheus_tsdb_exemplar_exemplars_in_storage Number of exemplars currently in circular storage. +# TYPE prometheus_tsdb_exemplar_exemplars_in_storage gauge +prometheus_tsdb_exemplar_exemplars_in_storage 0 +# HELP prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds The timestamp of the oldest exemplar stored in circular storage. Useful to check for what timerange the current exemplar buffer limit allows. This usually means the last timestampfor all exemplars for a typical setup. This is not true though if one of the series timestamp is in future compared to rest series. +# TYPE prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds gauge +prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds 0 +# HELP prometheus_tsdb_exemplar_max_exemplars Total number of exemplars the exemplar storage can store, resizeable. +# TYPE prometheus_tsdb_exemplar_max_exemplars gauge +prometheus_tsdb_exemplar_max_exemplars 0 +# HELP prometheus_tsdb_exemplar_out_of_order_exemplars_total Total number of out of order exemplar ingestion failed attempts. +# TYPE prometheus_tsdb_exemplar_out_of_order_exemplars_total counter +prometheus_tsdb_exemplar_out_of_order_exemplars_total 0 +# HELP prometheus_tsdb_exemplar_series_with_exemplars_in_storage Number of series with exemplars currently in circular storage. +# TYPE prometheus_tsdb_exemplar_series_with_exemplars_in_storage gauge +prometheus_tsdb_exemplar_series_with_exemplars_in_storage 0 +# HELP prometheus_tsdb_head_active_appenders Number of currently active appender transactions +# TYPE prometheus_tsdb_head_active_appenders gauge +prometheus_tsdb_head_active_appenders 0 +# HELP prometheus_tsdb_head_chunks Total number of chunks in the head block. +# TYPE prometheus_tsdb_head_chunks gauge +prometheus_tsdb_head_chunks 47276 +# HELP prometheus_tsdb_head_chunks_created_total Total number of chunks created in the head +# TYPE prometheus_tsdb_head_chunks_created_total counter +prometheus_tsdb_head_chunks_created_total 1.3884353e+07 +# HELP prometheus_tsdb_head_chunks_removed_total Total number of chunks removed in the head +# TYPE prometheus_tsdb_head_chunks_removed_total counter +prometheus_tsdb_head_chunks_removed_total 1.3837077e+07 +# HELP prometheus_tsdb_head_chunks_storage_size_bytes Size of the chunks_head directory. +# TYPE prometheus_tsdb_head_chunks_storage_size_bytes gauge +prometheus_tsdb_head_chunks_storage_size_bytes 2.0828256e+07 +# HELP prometheus_tsdb_head_gc_duration_seconds Runtime of garbage collection in the head block. +# TYPE prometheus_tsdb_head_gc_duration_seconds summary +prometheus_tsdb_head_gc_duration_seconds_sum 3.114924039999997 +prometheus_tsdb_head_gc_duration_seconds_count 373 +# HELP prometheus_tsdb_head_max_time Maximum timestamp of the head block. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_head_max_time gauge +prometheus_tsdb_head_max_time 1.727807345546e+12 +# HELP prometheus_tsdb_head_max_time_seconds Maximum timestamp of the head block. +# TYPE prometheus_tsdb_head_max_time_seconds gauge +prometheus_tsdb_head_max_time_seconds 1.727807345e+09 +# HELP prometheus_tsdb_head_min_time Minimum time bound of the head block. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_head_min_time gauge +prometheus_tsdb_head_min_time 1.727798400141e+12 +# HELP prometheus_tsdb_head_min_time_seconds Minimum time bound of the head block. +# TYPE prometheus_tsdb_head_min_time_seconds gauge +prometheus_tsdb_head_min_time_seconds 1.7277984e+09 +# HELP prometheus_tsdb_head_out_of_order_samples_appended_total Total number of appended out of order samples. +# TYPE prometheus_tsdb_head_out_of_order_samples_appended_total counter +prometheus_tsdb_head_out_of_order_samples_appended_total 0 +# HELP prometheus_tsdb_head_samples_appended_total Total number of appended samples. +# TYPE prometheus_tsdb_head_samples_appended_total counter +prometheus_tsdb_head_samples_appended_total{type="float"} 1.856200861e+09 +prometheus_tsdb_head_samples_appended_total{type="histogram"} 0 +# HELP prometheus_tsdb_head_series Total number of series in the head block. +# TYPE prometheus_tsdb_head_series gauge +prometheus_tsdb_head_series 10789 +# HELP prometheus_tsdb_head_series_created_total Total number of series created in the head +# TYPE prometheus_tsdb_head_series_created_total counter +prometheus_tsdb_head_series_created_total 42838 +# HELP prometheus_tsdb_head_series_not_found_total Total number of requests for series that were not found. +# TYPE prometheus_tsdb_head_series_not_found_total counter +prometheus_tsdb_head_series_not_found_total 0 +# HELP prometheus_tsdb_head_series_removed_total Total number of series removed in the head +# TYPE prometheus_tsdb_head_series_removed_total counter +prometheus_tsdb_head_series_removed_total 32049 +# HELP prometheus_tsdb_head_truncations_failed_total Total number of head truncations that failed. +# TYPE prometheus_tsdb_head_truncations_failed_total counter +prometheus_tsdb_head_truncations_failed_total 0 +# HELP prometheus_tsdb_head_truncations_total Total number of head truncations attempted. +# TYPE prometheus_tsdb_head_truncations_total counter +prometheus_tsdb_head_truncations_total 373 +# HELP prometheus_tsdb_isolation_high_watermark The highest TSDB append ID that has been given out. +# TYPE prometheus_tsdb_isolation_high_watermark gauge +prometheus_tsdb_isolation_high_watermark 1.614044e+06 +# HELP prometheus_tsdb_isolation_low_watermark The lowest TSDB append ID that is still referenced. +# TYPE prometheus_tsdb_isolation_low_watermark gauge +prometheus_tsdb_isolation_low_watermark 1.614044e+06 +# HELP prometheus_tsdb_lowest_timestamp Lowest timestamp value stored in the database. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_lowest_timestamp gauge +prometheus_tsdb_lowest_timestamp 1.711547243455e+12 +# HELP prometheus_tsdb_lowest_timestamp_seconds Lowest timestamp value stored in the database. +# TYPE prometheus_tsdb_lowest_timestamp_seconds gauge +prometheus_tsdb_lowest_timestamp_seconds 1.711547243e+09 +# HELP prometheus_tsdb_mmap_chunk_corruptions_total Total number of memory-mapped chunk corruptions. +# TYPE prometheus_tsdb_mmap_chunk_corruptions_total counter +prometheus_tsdb_mmap_chunk_corruptions_total 0 +# HELP prometheus_tsdb_out_of_bound_samples_total Total number of out of bound samples ingestion failed attempts with out of order support disabled. +# TYPE prometheus_tsdb_out_of_bound_samples_total counter +prometheus_tsdb_out_of_bound_samples_total{type="float"} 0 +# HELP prometheus_tsdb_out_of_order_samples_total Total number of out of order samples ingestion failed attempts due to out of order being disabled. +# TYPE prometheus_tsdb_out_of_order_samples_total counter +prometheus_tsdb_out_of_order_samples_total{type="float"} 0 +prometheus_tsdb_out_of_order_samples_total{type="histogram"} 0 +# HELP prometheus_tsdb_reloads_failures_total Number of times the database failed to reloadBlocks block data from disk. +# TYPE prometheus_tsdb_reloads_failures_total counter +prometheus_tsdb_reloads_failures_total 0 +# HELP prometheus_tsdb_reloads_total Number of times the database reloaded block data from disk. +# TYPE prometheus_tsdb_reloads_total counter +prometheus_tsdb_reloads_total 45030 +# HELP prometheus_tsdb_retention_limit_bytes Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled +# TYPE prometheus_tsdb_retention_limit_bytes gauge +prometheus_tsdb_retention_limit_bytes 5.36870912e+11 +# HELP prometheus_tsdb_size_retentions_total The number of times that blocks were deleted because the maximum number of bytes was exceeded. +# TYPE prometheus_tsdb_size_retentions_total counter +prometheus_tsdb_size_retentions_total 0 +# HELP prometheus_tsdb_snapshot_replay_error_total Total number snapshot replays that failed. +# TYPE prometheus_tsdb_snapshot_replay_error_total counter +prometheus_tsdb_snapshot_replay_error_total 0 +# HELP prometheus_tsdb_storage_blocks_bytes The number of bytes that are currently used for local storage by all blocks. +# TYPE prometheus_tsdb_storage_blocks_bytes gauge +prometheus_tsdb_storage_blocks_bytes 2.7078242758e+10 +# HELP prometheus_tsdb_symbol_table_size_bytes Size of symbol table in memory for loaded blocks +# TYPE prometheus_tsdb_symbol_table_size_bytes gauge +prometheus_tsdb_symbol_table_size_bytes 6624 +# HELP prometheus_tsdb_time_retentions_total The number of times that blocks were deleted because the maximum time limit was exceeded. +# TYPE prometheus_tsdb_time_retentions_total counter +prometheus_tsdb_time_retentions_total 0 +# HELP prometheus_tsdb_tombstone_cleanup_seconds The time taken to recompact blocks to remove tombstones. +# TYPE prometheus_tsdb_tombstone_cleanup_seconds histogram +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.005"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.01"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.025"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.05"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.1"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.25"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="1"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="2.5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="10"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="+Inf"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_sum 0 +prometheus_tsdb_tombstone_cleanup_seconds_count 0 +# HELP prometheus_tsdb_too_old_samples_total Total number of out of order samples ingestion failed attempts with out of support enabled, but sample outside of time window. +# TYPE prometheus_tsdb_too_old_samples_total counter +prometheus_tsdb_too_old_samples_total{type="float"} 0 +# HELP prometheus_tsdb_vertical_compactions_total Total number of compactions done on overlapping blocks. +# TYPE prometheus_tsdb_vertical_compactions_total counter +prometheus_tsdb_vertical_compactions_total 0 +# HELP prometheus_tsdb_wal_completed_pages_total Total number of completed pages. +# TYPE prometheus_tsdb_wal_completed_pages_total counter +prometheus_tsdb_wal_completed_pages_total 397233 +# HELP prometheus_tsdb_wal_corruptions_total Total number of WAL corruptions. +# TYPE prometheus_tsdb_wal_corruptions_total counter +prometheus_tsdb_wal_corruptions_total 0 +# HELP prometheus_tsdb_wal_fsync_duration_seconds Duration of write log fsync. +# TYPE prometheus_tsdb_wal_fsync_duration_seconds summary +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.5"} NaN +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.9"} NaN +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.99"} NaN +prometheus_tsdb_wal_fsync_duration_seconds_sum 0.805116427 +prometheus_tsdb_wal_fsync_duration_seconds_count 373 +# HELP prometheus_tsdb_wal_page_flushes_total Total number of page flushes. +# TYPE prometheus_tsdb_wal_page_flushes_total counter +prometheus_tsdb_wal_page_flushes_total 2.011145e+06 +# HELP prometheus_tsdb_wal_segment_current Write log segment index that TSDB is currently writing to. +# TYPE prometheus_tsdb_wal_segment_current gauge +prometheus_tsdb_wal_segment_current 2277 +# HELP prometheus_tsdb_wal_storage_size_bytes Size of the write log directory. +# TYPE prometheus_tsdb_wal_storage_size_bytes gauge +prometheus_tsdb_wal_storage_size_bytes 9.6264943e+07 +# HELP prometheus_tsdb_wal_truncate_duration_seconds Duration of WAL truncation. +# TYPE prometheus_tsdb_wal_truncate_duration_seconds summary +prometheus_tsdb_wal_truncate_duration_seconds_sum 69.80804534300002 +prometheus_tsdb_wal_truncate_duration_seconds_count 187 +# HELP prometheus_tsdb_wal_truncations_failed_total Total number of write log truncations that failed. +# TYPE prometheus_tsdb_wal_truncations_failed_total counter +prometheus_tsdb_wal_truncations_failed_total 0 +# HELP prometheus_tsdb_wal_truncations_total Total number of write log truncations attempted. +# TYPE prometheus_tsdb_wal_truncations_total counter +prometheus_tsdb_wal_truncations_total 187 +# HELP prometheus_tsdb_wal_writes_failed_total Total number of write log writes that failed. +# TYPE prometheus_tsdb_wal_writes_failed_total counter +prometheus_tsdb_wal_writes_failed_total 0 +# HELP prometheus_web_federation_errors_total Total number of errors that occurred while sending federation responses. +# TYPE prometheus_web_federation_errors_total counter +prometheus_web_federation_errors_total 0 +# HELP prometheus_web_federation_warnings_total Total number of warnings that occurred while sending federation responses. +# TYPE prometheus_web_federation_warnings_total counter +prometheus_web_federation_warnings_total 0 +# HELP promhttp_metric_handler_requests_in_flight Current number of scrapes being served. +# TYPE promhttp_metric_handler_requests_in_flight gauge +promhttp_metric_handler_requests_in_flight 1 +# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code. +# TYPE promhttp_metric_handler_requests_total counter +promhttp_metric_handler_requests_total{code="200"} 179357 +promhttp_metric_handler_requests_total{code="500"} 0 +promhttp_metric_handler_requests_total{code="503"} 0 diff --git a/manifests/scenarios/gmp-agent/1_collector.yaml b/manifests/scenarios/gmp-agent/1_collector.yaml deleted file mode 100644 index 46fc236..0000000 --- a/manifests/scenarios/gmp-agent/1_collector.yaml +++ /dev/null @@ -1,232 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: collector - namespace: gmp-system ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: gmp-system:collector -rules: -- resources: - - endpoints - - nodes - - nodes/metrics - - pods - - services - apiGroups: [""] - verbs: ["get", "list", "watch"] -- resources: - - configmaps - apiGroups: [""] - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: gmp-system:collector -roleRef: - name: gmp-system:collector - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: collector - namespace: gmp-system - kind: ServiceAccount ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: collector - namespace: gmp-system - labels: - prometheus: "gmp-agent" - benchmark: {{ .Env.BENCH_NAME }} -spec: - selector: - matchLabels: - # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. - # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. - app.kubernetes.io/name: collector - template: - metadata: - labels: - app: managed-prometheus-collector - app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.11.0 - prometheus: "gmp-agent" - benchmark: {{ .Env.BENCH_NAME }} - annotations: - # The emptyDir for the storage and config directories prevents cluster - # autoscaling unless this annotation is set. - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - components.gke.io/component-name: managed_prometheus - spec: - serviceAccountName: collector - automountServiceAccountToken: true - priorityClassName: gmp-critical - initContainers: - - name: config-init - image: gke.gcr.io/gke-distroless/bash:20220419 - command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] - volumeMounts: - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - containers: - - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.9.0-gke.1 - args: - - --config-file=/prometheus/config/config.yaml - - --config-file-output=/prometheus/config_out/config.yaml - - --reload-url=http://127.0.0.1:19090/-/reload - - --ready-url=http://127.0.0.1:19090/-/ready - - --listen-address=:19091 - ports: - - name: cfg-rel-ins - containerPort: 19091 - env: - - name: NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - resources: - limits: - memory: 32M - requests: - cpu: 1m - memory: 4M - volumeMounts: - - name: config - readOnly: true - mountPath: /prometheus/config - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - - name: prometheus - image: gke.gcr.io/prometheus-engine/prometheus:v2.45.3-gmp.7-gke.0 - args: - - --config.file=/prometheus/config_out/config.yaml - - --enable-feature=exemplar-storage - - --enable-feature=agent - # Special Google flag for authorization using native Kubernetes secrets. - - --enable-feature=google-kubernetes-secret-provider - - --storage.agent.path=/prometheus/data - - --storage.agent.no-lockfile - - --storage.agent.retention.max-time=30m - - --storage.agent.wal-compression - - --web.listen-address=:19090 - - --web.enable-lifecycle - - --web.route-prefix=/ - - --export.user-agent-mode=kubectl - # JSON log format is needed for GKE to display log levels correctly. - - --log.format=json - ports: - - name: prom-ins - containerPort: 19090 - # The environment variable EXTRA_ARGS will be populated by the operator. - # DO NOT specify it here. - env: - - name: GOGC - value: "25" - resources: - limits: - memory: 2G - requests: - cpu: 4m - memory: 32M - volumeMounts: - - name: storage - mountPath: /prometheus/data - - name: config-out - readOnly: true - mountPath: /prometheus/config_out - - name: collection-secret - readOnly: true - mountPath: /etc/secrets - livenessProbe: - httpGet: - port: 19090 - path: /-/healthy - scheme: HTTP - readinessProbe: - httpGet: - port: 19090 - path: /-/ready - scheme: HTTP - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - volumes: - - name: storage - emptyDir: {} - - name: config - configMap: - name: collector - - name: config-out - emptyDir: {} - - name: collection-secret - secret: - secretName: collection - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - amd64 - - key: kubernetes.io/os - operator: In - values: - - linux - tolerations: - - effect: NoExecute - operator: Exists - - effect: NoSchedule - operator: Exists - securityContext: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - nodeSelector: - role: {{ .Env.BENCH_NAME }}-work ---- -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: avalanche - namespace: {{ .Env.BENCH_NAME }} - labels: - app: avalanche -spec: - endpoints: - - port: metrics - interval: 15s - path: /metrics - selector: - matchLabels: - app: avalanche diff --git a/manifests/scenarios/gmp-noexport-2.51.1/1_collector.yaml b/manifests/scenarios/gmp-noexport-2.51.1/1_collector.yaml deleted file mode 100644 index 9890202..0000000 --- a/manifests/scenarios/gmp-noexport-2.51.1/1_collector.yaml +++ /dev/null @@ -1,244 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: collector - namespace: gmp-system ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: gmp-system:collector -rules: -- resources: - - endpoints - - nodes - - nodes/metrics - - pods - - services - apiGroups: [""] - verbs: ["get", "list", "watch"] -- resources: - - configmaps - apiGroups: [""] - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: gmp-system:collector -roleRef: - name: gmp-system:collector - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: collector - namespace: gmp-system - kind: ServiceAccount ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: collector - namespace: gmp-system - labels: - prometheus: "gmp-noexport-2.51.1" - benchmark: {{ .Env.BENCH_NAME }} -spec: - selector: - matchLabels: - # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. - # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. - app.kubernetes.io/name: collector - template: - metadata: - labels: - app: managed-prometheus-collector - app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.11.0 - prometheus: "gmp-noexport-2.51.1" - benchmark: {{ .Env.BENCH_NAME }} - annotations: - # The emptyDir for the storage and config directories prevents cluster - # autoscaling unless this annotation is set. - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - components.gke.io/component-name: managed_prometheus - spec: - serviceAccountName: collector - automountServiceAccountToken: true - priorityClassName: gmp-critical - initContainers: - - name: config-init - image: gke.gcr.io/gke-distroless/bash:20220419 - command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] - volumeMounts: - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - containers: - - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.9.0-gke.1 - args: - - --config-file=/prometheus/config/config.yaml - - --config-file-output=/prometheus/config_out/config.yaml - - --reload-url=http://127.0.0.1:19090/-/reload - - --ready-url=http://127.0.0.1:19090/-/ready - - --listen-address=:19091 - ports: - - name: cfg-rel-ins - containerPort: 19091 - env: - - name: NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - resources: - limits: - memory: 32M - requests: - cpu: 1m - memory: 4M - volumeMounts: - - name: config - readOnly: true - mountPath: /prometheus/config - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - - name: prometheus - image: quay.io/prometheus/prometheus:v2.51.1 - args: - - --config.file=/prometheus/config_out/config.yaml - - --enable-feature=exemplar-storage - # Special Google flag for authorization using native Kubernetes secrets. - - --enable-feature=google-kubernetes-secret-provider - - --storage.tsdb.path=/prometheus/data - - --storage.tsdb.no-lockfile - # Special Google flag for force deleting all data on start. We use ephemeral storage in - # this manifest, but there are cases were container restart still reuses, potentially - # bad data (corrupted, with high cardinality causing OOMs or slow startups). - # Force deleting, so container restart is consistent with pod restart. - # NOTE: Data is likely already sent GCM, plus GCM export does not use that - # data on disk (WAL). - - --gmp.storage.delete-data-on-start - # Keep 30 minutes of data. As we are backed by an emptyDir volume, this will count towards - # the containers memory usage. We could lower it further if this becomes problematic, but - # it the window for local data is quite convenient for debugging. - - --storage.tsdb.retention.time=30m - - --storage.tsdb.wal-compression - # Effectively disable compaction and make blocks short enough so that our retention window - # can be kept in practice. - - --storage.tsdb.min-block-duration=10m - - --storage.tsdb.max-block-duration=10m - - --web.listen-address=:19090 - - --web.enable-lifecycle - - --web.route-prefix=/ - # JSON log format is needed for GKE to display log levels correctly. - - --log.format=json - ports: - - name: prom-ins - containerPort: 19090 - # The environment variable EXTRA_ARGS will be populated by the operator. - # DO NOT specify it here. - env: - - name: GOGC - value: "25" - resources: - limits: - memory: 2G - requests: - cpu: 4m - memory: 32M - volumeMounts: - - name: storage - mountPath: /prometheus/data - - name: config-out - readOnly: true - mountPath: /prometheus/config_out - - name: collection-secret - readOnly: true - mountPath: /etc/secrets - livenessProbe: - httpGet: - port: 19090 - path: /-/healthy - scheme: HTTP - readinessProbe: - httpGet: - port: 19090 - path: /-/ready - scheme: HTTP - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - volumes: - - name: storage - emptyDir: {} - - name: config - configMap: - name: collector - - name: config-out - emptyDir: {} - - name: collection-secret - secret: - secretName: collection - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - amd64 - - key: kubernetes.io/os - operator: In - values: - - linux - tolerations: - - effect: NoExecute - operator: Exists - - effect: NoSchedule - operator: Exists - securityContext: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - nodeSelector: - role: {{ .Env.BENCH_NAME }}-work ---- -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: avalanche - namespace: {{ .Env.BENCH_NAME }} - labels: - app: avalanche -spec: - endpoints: - - port: metrics - interval: 15s - path: /metrics - selector: - matchLabels: - app: avalanche diff --git a/manifests/scenarios/gmp-noexport/1_collector.yaml b/manifests/scenarios/gmp-noexport/1_collector.yaml deleted file mode 100644 index 72cfd13..0000000 --- a/manifests/scenarios/gmp-noexport/1_collector.yaml +++ /dev/null @@ -1,246 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: collector - namespace: gmp-system ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: gmp-system:collector -rules: -- resources: - - endpoints - - nodes - - nodes/metrics - - pods - - services - apiGroups: [""] - verbs: ["get", "list", "watch"] -- resources: - - configmaps - apiGroups: [""] - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: gmp-system:collector -roleRef: - name: gmp-system:collector - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: collector - namespace: gmp-system - kind: ServiceAccount ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: collector - namespace: gmp-system - labels: - prometheus: "gmp-noexport" - benchmark: {{ .Env.BENCH_NAME }} -spec: - selector: - matchLabels: - # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. - # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. - app.kubernetes.io/name: collector - template: - metadata: - labels: - app: managed-prometheus-collector - app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.11.0 - prometheus: "gmp-noexport" - benchmark: {{ .Env.BENCH_NAME }} - annotations: - # The emptyDir for the storage and config directories prevents cluster - # autoscaling unless this annotation is set. - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - components.gke.io/component-name: managed_prometheus - spec: - serviceAccountName: collector - automountServiceAccountToken: true - priorityClassName: gmp-critical - initContainers: - - name: config-init - image: gke.gcr.io/gke-distroless/bash:20220419 - command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] - volumeMounts: - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - containers: - - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.9.0-gke.1 - args: - - --config-file=/prometheus/config/config.yaml - - --config-file-output=/prometheus/config_out/config.yaml - - --reload-url=http://127.0.0.1:19090/-/reload - - --ready-url=http://127.0.0.1:19090/-/ready - - --listen-address=:19091 - ports: - - name: cfg-rel-ins - containerPort: 19091 - env: - - name: NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - resources: - limits: - memory: 32M - requests: - cpu: 1m - memory: 4M - volumeMounts: - - name: config - readOnly: true - mountPath: /prometheus/config - - name: config-out - mountPath: /prometheus/config_out - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - - name: prometheus - image: gke.gcr.io/prometheus-engine/prometheus:v2.45.3-gmp.7-gke.0 - args: - - --config.file=/prometheus/config_out/config.yaml - - --enable-feature=exemplar-storage - # Special Google flag for authorization using native Kubernetes secrets. - - --enable-feature=google-kubernetes-secret-provider - - --storage.tsdb.path=/prometheus/data - - --storage.tsdb.no-lockfile - # Special Google flag for force deleting all data on start. We use ephemeral storage in - # this manifest, but there are cases were container restart still reuses, potentially - # bad data (corrupted, with high cardinality causing OOMs or slow startups). - # Force deleting, so container restart is consistent with pod restart. - # NOTE: Data is likely already sent GCM, plus GCM export does not use that - # data on disk (WAL). - - --gmp.storage.delete-data-on-start - # Keep 30 minutes of data. As we are backed by an emptyDir volume, this will count towards - # the containers memory usage. We could lower it further if this becomes problematic, but - # it the window for local data is quite convenient for debugging. - - --storage.tsdb.retention.time=30m - - --storage.tsdb.wal-compression - # Effectively disable compaction and make blocks short enough so that our retention window - # can be kept in practice. - - --storage.tsdb.min-block-duration=10m - - --storage.tsdb.max-block-duration=10m - - --web.listen-address=:19090 - - --web.enable-lifecycle - - --web.route-prefix=/ - - --export.disable - - --export.user-agent-mode=kubectl - # JSON log format is needed for GKE to display log levels correctly. - - --log.format=json - ports: - - name: prom-ins - containerPort: 19090 - # The environment variable EXTRA_ARGS will be populated by the operator. - # DO NOT specify it here. - env: - - name: GOGC - value: "25" - resources: - limits: - memory: 2G - requests: - cpu: 4m - memory: 32M - volumeMounts: - - name: storage - mountPath: /prometheus/data - - name: config-out - readOnly: true - mountPath: /prometheus/config_out - - name: collection-secret - readOnly: true - mountPath: /etc/secrets - livenessProbe: - httpGet: - port: 19090 - path: /-/healthy - scheme: HTTP - readinessProbe: - httpGet: - port: 19090 - path: /-/ready - scheme: HTTP - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - volumes: - - name: storage - emptyDir: {} - - name: config - configMap: - name: collector - - name: config-out - emptyDir: {} - - name: collection-secret - secret: - secretName: collection - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - amd64 - - key: kubernetes.io/os - operator: In - values: - - linux - tolerations: - - effect: NoExecute - operator: Exists - - effect: NoSchedule - operator: Exists - securityContext: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - nodeSelector: - role: {{ .Env.BENCH_NAME }}-work ---- -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: avalanche - namespace: {{ .Env.BENCH_NAME }} - labels: - app: avalanche -spec: - endpoints: - - port: metrics - interval: 15s - path: /metrics - selector: - matchLabels: - app: avalanche diff --git a/manifests/scenarios/gmp-otel/1_collector.yaml b/manifests/scenarios/gmp-otel/1_collector.yaml new file mode 100644 index 0000000..69e66f5 --- /dev/null +++ b/manifests/scenarios/gmp-otel/1_collector.yaml @@ -0,0 +1,276 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: collector + namespace: {{ .Env.BENCH_NAME }} + annotations: + iam.gke.io/gcp-service-account: gmp-prombench@{{ .Env.PROJECT_ID }}.iam.gserviceaccount.com +--- +# Source: prometheus-engine/templates/role.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ .Env.BENCH_NAME }}:collector +rules: +- resources: + - endpoints + - nodes + - nodes/metrics + - pods + - services + apiGroups: [""] + verbs: ["get", "list", "watch"] +- resources: + - configmaps + apiGroups: [""] + verbs: ["get"] +- nonResourceURLs: ["/metrics"] + verbs: ["get"] +--- +# Source: prometheus-engine/templates/rolebinding.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ .Env.BENCH_NAME }}:collector +roleRef: + name: {{ .Env.BENCH_NAME }}:collector + kind: ClusterRole + apiGroup: rbac.authorization.k8s.io +subjects: +- name: collector + namespace: {{ .Env.BENCH_NAME }} + kind: ServiceAccount +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: collector + namespace: {{ .Env.BENCH_NAME }} + labels: + benchmark: {{ .Env.BENCH_NAME }} +spec: + selector: + matchLabels: + # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. + # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. + app.kubernetes.io/name: collector + template: + metadata: + labels: + app: collector + app.kubernetes.io/name: collector + benchmark: {{ .Env.BENCH_NAME }} + annotations: + # The emptyDir for the storage and config directories prevents cluster + # autoscaling unless this annotation is set. + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + spec: + serviceAccountName: collector + automountServiceAccountToken: true + containers: + - name: otel-collector + image: otel/opentelemetry-collector-contrib:0.105.0 + command: + - "/otelcol-contrib" + - "--config=/conf/collector.yaml" + env: + - name: MY_POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + volumeMounts: + - name: collector-config + mountPath: /conf + ports: + - name: otel-ins # -ins, tells core Prometheus to scrape it. + containerPort: 8888 + readinessProbe: + httpGet: + path: / + port: 13133 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + privileged: false + volumes: + - name: collector-config + configMap: + name: collector-config + items: + - key: collector.yaml + path: collector.yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + - amd64 + - key: kubernetes.io/os + operator: In + values: + - linux + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + operator: Exists + securityContext: + runAsGroup: 1000 + runAsNonRoot: true + runAsUser: 1000 + seccompProfile: + type: RuntimeDefault + nodeSelector: + role: {{ .Env.BENCH_NAME }}-work +--- +apiVersion: v1 +kind: ConfigMap +metadata: + creationTimestamp: null + name: collector-config + namespace: {{ .Env.BENCH_NAME }} +data: + collector.yaml: | + exporters: + googlemanagedprometheus: + + extensions: + health_check: + endpoint: ${env:MY_POD_IP}:13133 + processors: + # resource/bench: + # attributes: + # - key: "cluster" + # value: "{{ .Env.CLUSTER_NAME }}" + # action: upsert + # - key: "namespace" + # value: "{{ .Env.BENCH_NAME }}" + # action: upsert + # - key: "location" + # value: "{{ .Env.ZONE }}" + # action: upsert + + batch: + send_batch_max_size: 200 + send_batch_size: 200 + timeout: 5s + + resourcedetection: + detectors: [gcp] + timeout: 10s + + #transform/collision: + # metric_statements: + # - context: datapoint + # statements: + # - set(attributes["exported_location"], attributes["location"]) + # - delete_key(attributes, "location") + # - set(attributes["exported_cluster"], attributes["cluster"]) + # - delete_key(attributes, "cluster") + # - set(attributes["exported_namespace"], attributes["namespace"]) + # - delete_key(attributes, "namespace") + # - set(attributes["exported_job"], attributes["job"]) + # - delete_key(attributes, "job") + # - set(attributes["exported_instance"], attributes["instance"]) + # - delete_key(attributes, "instance") + # - set(attributes["exported_project_id"], attributes["project_id"]) + # - delete_key(attributes, "project_id") + + receivers: + # NOTE(bwplotka): Mimicking scrape config of GMP operator, to ensure + # we have the same labels on the output with the same relabel processing. + # Related issue: https://github.com/bwplotka/prombenchy/issues/13 + prometheus/bench: + config: + scrape_configs: + - job_name: PodMonitoring/gmp/avalanche/metrics + honor_timestamps: false + scrape_interval: 15s + scrape_timeout: 15s + metrics_path: /metrics + follow_redirects: true + enable_http2: true + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: {{ .Env.BENCH_NAME }} + action: keep + - source_labels: [__meta_kubernetes_pod_label_app] + regex: avalanche + action: keep + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + action: replace + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + action: replace + - target_label: job + replacement: avalanche + action: replace + - source_labels: [__meta_kubernetes_pod_phase] + regex: (Failed|Succeeded) + action: drop + - target_label: project_id + replacement: {{ .Env.PROJECT_ID }} + action: replace + - target_label: location + replacement: {{ .Env.ZONE }} + action: replace + - target_label: cluster + replacement: {{ .Env.CLUSTER_NAME }} + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: __tmp_instance + action: replace + - source_labels: [__meta_kubernetes_pod_controller_kind, __meta_kubernetes_pod_node_name] + regex: DaemonSet;(.*) + target_label: __tmp_instance + replacement: $$1 + action: replace + - source_labels: [__meta_kubernetes_pod_container_port_name] + regex: metrics + action: keep + - source_labels: [__tmp_instance, __meta_kubernetes_pod_container_port_name] + regex: (.+);(.+) + target_label: instance + replacement: $$1:$$2 + action: replace + kubernetes_sd_configs: + - role: pod + kubeconfig_file: "" + follow_redirects: true + enable_http2: true + selectors: + - role: pod + field: spec.nodeName=${env:NODE_NAME} + + service: + extensions: + - health_check + pipelines: + metrics: + exporters: + - googlemanagedprometheus + processors: + - resourcedetection + - batch + # - transform/collision + receivers: + - prometheus/bench + telemetry: + metrics: + address: 0.0.0.0:8888 diff --git a/manifests/scenarios/gmp/1_collector.yaml b/manifests/scenarios/gmp/1_collector.yaml index 83bcfa0..0019ebe 100644 --- a/manifests/scenarios/gmp/1_collector.yaml +++ b/manifests/scenarios/gmp/1_collector.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ServiceAccount metadata: name: collector - namespace: gmp-system + namespace: {{ .Env.BENCH_NAME }} annotations: iam.gke.io/gcp-service-account: gmp-prombench@{{ .Env.PROJECT_ID }}.iam.gserviceaccount.com --- @@ -10,7 +10,7 @@ metadata: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: gmp-system:collector + name: {{ .Env.BENCH_NAME }}:collector rules: - resources: - endpoints @@ -31,23 +31,94 @@ rules: apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: gmp-system:collector + name: {{ .Env.BENCH_NAME }}:collector roleRef: - name: gmp-system:collector + name: {{ .Env.BENCH_NAME }}:collector kind: ClusterRole apiGroup: rbac.authorization.k8s.io subjects: - name: collector - namespace: gmp-system + namespace: {{ .Env.BENCH_NAME }} kind: ServiceAccount --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collector + namespace: {{ .Env.BENCH_NAME }} +data: + config.yaml: | + global: {} + scrape_configs: + - job_name: PodMonitoring/gmp/avalanche/metrics + honor_timestamps: false + scrape_interval: 15s + scrape_timeout: 15s + metrics_path: /metrics + follow_redirects: true + enable_http2: true + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: {{ .Env.BENCH_NAME }} + action: keep + - source_labels: [__meta_kubernetes_pod_label_app] + regex: avalanche + action: keep + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + action: replace + - source_labels: [__meta_kubernetes_pod_container_name] + target_label: container + action: replace + - source_labels: [__meta_kubernetes_namespace] + target_label: namespace + action: replace + - target_label: job + replacement: avalanche + action: replace + - source_labels: [__meta_kubernetes_pod_phase] + regex: (Failed|Succeeded) + action: drop + - target_label: project_id + replacement: {{ .Env.PROJECT_ID }} + action: replace + - target_label: location + replacement: {{ .Env.ZONE }} + action: replace + - target_label: cluster + replacement: {{ .Env.CLUSTER_NAME }} + action: replace + - source_labels: [__meta_kubernetes_pod_name] + target_label: __tmp_instance + action: replace + - source_labels: [__meta_kubernetes_pod_controller_kind, __meta_kubernetes_pod_node_name] + regex: DaemonSet;(.*) + target_label: __tmp_instance + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_container_port_name] + regex: metrics + action: keep + - source_labels: [__tmp_instance, __meta_kubernetes_pod_container_port_name] + regex: (.+);(.+) + target_label: instance + replacement: $1:$2 + action: replace + kubernetes_sd_configs: + - role: pod + kubeconfig_file: "" + follow_redirects: true + enable_http2: true + selectors: + - role: pod + field: spec.nodeName=$(NODE_NAME) +--- apiVersion: apps/v1 kind: DaemonSet metadata: name: collector - namespace: gmp-system + namespace: {{ .Env.BENCH_NAME }} labels: - prometheus: "gmp" benchmark: {{ .Env.BENCH_NAME }} spec: selector: @@ -58,10 +129,8 @@ spec: template: metadata: labels: - app: managed-prometheus-collector + app: collector app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.11.0 - prometheus: "gmp" benchmark: {{ .Env.BENCH_NAME }} annotations: # The emptyDir for the storage and config directories prevents cluster @@ -71,7 +140,6 @@ spec: spec: serviceAccountName: collector automountServiceAccountToken: true - priorityClassName: gmp-critical initContainers: - name: config-init image: gke.gcr.io/gke-distroless/bash:20220419 @@ -122,7 +190,7 @@ spec: - all privileged: false - name: prometheus - image: gke.gcr.io/prometheus-engine/prometheus:v2.45.3-gmp.7-gke.0 + image: gke.gcr.io/prometheus-engine/prometheus:v2.45.3-gmp.9-gke.0 args: - --config.file=/prometheus/config_out/config.yaml - --enable-feature=exemplar-storage @@ -162,7 +230,7 @@ spec: value: "25" resources: limits: - memory: 2G + memory: 3G # Limit on GKE standard. requests: cpu: 4m memory: 32M @@ -172,9 +240,9 @@ spec: - name: config-out readOnly: true mountPath: /prometheus/config_out - - name: collection-secret - readOnly: true - mountPath: /etc/secrets +# - name: collection-secret +# readOnly: true +# mountPath: /etc/secrets livenessProbe: httpGet: port: 19090 @@ -199,9 +267,9 @@ spec: name: collector - name: config-out emptyDir: {} - - name: collection-secret - secret: - secretName: collection +# - name: collection-secret +# secret: +# secretName: collection affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -216,11 +284,6 @@ spec: operator: In values: - linux - tolerations: - - effect: NoExecute - operator: Exists - - effect: NoSchedule - operator: Exists securityContext: runAsGroup: 1000 runAsNonRoot: true @@ -229,19 +292,3 @@ spec: type: RuntimeDefault nodeSelector: role: {{ .Env.BENCH_NAME }}-work ---- -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: avalanche - namespace: {{ .Env.BENCH_NAME }} - labels: - app: avalanche -spec: - endpoints: - - port: metrics - interval: 15s - path: /metrics - selector: - matchLabels: - app: avalanche diff --git a/manifests/scenarios/otel-prom/1_collector.yaml b/manifests/scenarios/otel-prom/1_collector.yaml deleted file mode 100644 index 8dd4623..0000000 --- a/manifests/scenarios/otel-prom/1_collector.yaml +++ /dev/null @@ -1,245 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: otel-prom ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: collector - namespace: otel-prom - annotations: - iam.gke.io/gcp-service-account: gmp-prombench@{{ .Env.PROJECT_ID }}.iam.gserviceaccount.com ---- -# Source: prometheus-engine/templates/role.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: otel-prom:collector -rules: -- resources: - - endpoints - - nodes - - nodes/metrics - - pods - - services - apiGroups: [""] - verbs: ["get", "list", "watch"] -- resources: - - configmaps - apiGroups: [""] - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -# Source: prometheus-engine/templates/rolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: otel-prom:collector -roleRef: - name: otel-prom:collector - kind: ClusterRole - apiGroup: rbac.authorization.k8s.io -subjects: -- name: collector - namespace: otel-prom - kind: ServiceAccount ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: collector - namespace: otel-prom - labels: - benchmark: {{ .Env.BENCH_NAME }} -spec: - selector: - matchLabels: - # DO NOT MODIFY - label selectors are immutable by the Kubernetes API. - # see: https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/#pod-selector. - app.kubernetes.io/name: collector - template: - metadata: - labels: - app: managed-prometheus-collector-otel - app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.11.0 - benchmark: {{ .Env.BENCH_NAME }} - annotations: - # The emptyDir for the storage and config directories prevents cluster - # autoscaling unless this annotation is set. - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" - spec: - serviceAccountName: collector - automountServiceAccountToken: true - containers: - - name: otel-prom - image: otel/opentelemetry-collector-contrib:0.105.0 - command: - - "/otelcol-contrib" - - "--config=/conf/collector.yaml" - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: status.podIP - volumeMounts: - - name: collector-config - mountPath: /conf - readinessProbe: - httpGet: - path: / - port: 13133 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - all - privileged: false - volumes: - - name: collector-config - configMap: - name: collector-config - items: - - key: collector.yaml - path: collector.yaml - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - - amd64 - - key: kubernetes.io/os - operator: In - values: - - linux - tolerations: - - effect: NoExecute - operator: Exists - - effect: NoSchedule - operator: Exists - securityContext: - runAsGroup: 1000 - runAsNonRoot: true - runAsUser: 1000 - seccompProfile: - type: RuntimeDefault - nodeSelector: - role: {{ .Env.BENCH_NAME }}-work ---- -apiVersion: v1 -kind: ConfigMap -metadata: - creationTimestamp: null - name: collector-config - namespace: otel-prom -data: - collector.yaml: | - exporters: - googlemanagedprometheus: - - extensions: - health_check: - endpoint: ${env:MY_POD_IP}:13133 - processors: - resource/self-metrics: - attributes: - - key: "cluster" - value: {{ .Env.BENCH_NAME }} - action: upsert - - key: "namespace" - value: "otel-prom" - action: upsert - - key: "location" - value: "us-central1-a" - action: upsert - - batch: - send_batch_max_size: 200 - send_batch_size: 200 - timeout: 5s - - resourcedetection: - detectors: [gcp] - timeout: 10s - - transform/collision: - metric_statements: - - context: datapoint - statements: - - set(attributes["exported_location"], attributes["location"]) - - delete_key(attributes, "location") - - set(attributes["exported_cluster"], attributes["cluster"]) - - delete_key(attributes, "cluster") - - set(attributes["exported_namespace"], attributes["namespace"]) - - delete_key(attributes, "namespace") - - set(attributes["exported_job"], attributes["job"]) - - delete_key(attributes, "job") - - set(attributes["exported_instance"], attributes["instance"]) - - delete_key(attributes, "instance") - - set(attributes["exported_project_id"], attributes["project_id"]) - - delete_key(attributes, "project_id") - - receivers: - prometheus/bench: - config: - scrape_configs: - - job_name: otel-prom-bench - scrape_interval: 15s - kubernetes_sd_configs: - - role: pod - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_app] - action: keep - regex: avalanche - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] - action: replace - target_label: __metrics_path__ - regex: (.+) - - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] - action: replace - regex: (.+):(?:\d+);(\d+) - replacement: $$1:$$2 - target_label: __address__ - - action: labelmap - regex: __meta_kubernetes_pod_label_(.+) - - prometheus/self-metrics: - config: - scrape_configs: - - job_name: otel-self-metrics - scrape_interval: 1m - static_configs: - - targets: - - ${env:MY_POD_IP}:8888 - service: - extensions: - - health_check - pipelines: - metrics: - exporters: - - googlemanagedprometheus - processors: - - resourcedetection - - batch - - transform/collision - receivers: - - prometheus/bench - metrics/self-metrics: - exporters: - - googlemanagedprometheus - processors: - - resource/self-metrics - - resourcedetection - - batch - receivers: - - prometheus/self-metrics - telemetry: - metrics: - address: ${env:MY_POD_IP}:8888 diff --git a/scripts/bench-start.sh b/scripts/bench-start.sh index 11da05c..870f9e3 100755 --- a/scripts/bench-start.sh +++ b/scripts/bench-start.sh @@ -46,10 +46,19 @@ else --num-nodes=1 fi +# Performing steps for workload identity https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity#authenticating_to and https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-unmanaged#gmp-wli-svcacct +gcloud iam service-accounts add-iam-policy-binding \ + --role roles/iam.workloadIdentityUser \ + --member "serviceAccount:${PROJECT_ID}.svc.id.goog[${BENCH_NAME}/collector]" \ + gmp-prombench@${PROJECT_ID}.iam.gserviceaccount.com \ + --quiet + echo "## Applying scenario resources" -# TODO(bwplotka): All scenarios has the same load and requires GMP operator. Make it more flexible -# if needed later on. -# kubectlExpandApply "./manifests/gmp-operator" -kubectlExpandApply "./manifests/load/avalanche.yaml" +export BENCH_NAME +export PROJECT_ID +export ZONE +export CLUSTER_NAME + +kubectlExpandApply "./manifests/load/avalanche.exampletarget.yaml" kubectlExpandApply "${SCENARIO}" diff --git a/scripts/bench-stop.sh b/scripts/bench-stop.sh index 4bcacfb..ffb22ed 100755 --- a/scripts/bench-stop.sh +++ b/scripts/bench-stop.sh @@ -27,13 +27,17 @@ fi echo "## Assuming ${CLUSTER_NAME}" gcloud container clusters get-credentials ${CLUSTER_NAME} --zone ${ZONE} --project ${PROJECT_ID} -# n2-highmem-8 -- 8 vCPUs 64 GB -gcloud container node-pools delete --async --cluster ${CLUSTER_NAME} --zone ${ZONE} ${BENCH_NAME}-work-pool +export BENCH_NAME +export PROJECT_ID +export ZONE +export CLUSTER_NAME -# TODO(bwplotka): All scenarios has the same load and requires GMP operator. Make it more flexible -# if needed later on. -#kubectlExpandApply "./manifests/gmp-operator" kubectlExpandDelete "./manifests/load/avalanche.yaml" kubectlExpandDelete "${SCENARIO}" +# n2-highmem-8 -- 8 vCPUs 64 GB +gcloud container node-pools delete --async --cluster ${CLUSTER_NAME} --zone ${ZONE} ${BENCH_NAME}-work-pool + + + diff --git a/scripts/cluster-setup.sh b/scripts/cluster-setup.sh index cdabbc2..9b3d751 100755 --- a/scripts/cluster-setup.sh +++ b/scripts/cluster-setup.sh @@ -48,54 +48,25 @@ trap "exit 1" HUP INT PIPE QUIT TERM trap 'rm -r "$TEMP_DIR"' EXIT # Make sure the gmp-prombench Service Account exists +# TODO(bwplotka): Clean this up, test again after changes. SA="gmp-prombench" if ! gcloud iam service-accounts list --project=${PROJECT_ID} | grep ${SA} then gcloud iam service-accounts create "${SA}" --project=${PROJECT_ID} \ --description="A service account just to used for the core GMP manifests" \ - --display-name="GMP Prombench Service Account" \ - --quiet -fi - -gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${SA}@${PROJECT_ID}.iam.gserviceaccount.com" \ - --role="roles/iam.serviceAccountUser" \ + --display-name="GMP Prombenchy Service Account" \ --quiet -gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${SA}@${PROJECT_ID}.iam.gserviceaccount.com" \ - --role="roles/monitoring.metricWriter" \ - --quiet - -gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${SA}@${PROJECT_ID}.iam.gserviceaccount.com" \ - --role="roles/monitoring.metricWriter" \ - --quiet + gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${SA}@${PROJECT_ID}.iam.gserviceaccount.com" \ + --role="roles/iam.serviceAccountUser" \ + --quiet -gcloud iam service-accounts add-iam-policy-binding ${SA}@${PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:${PROJECT_ID}.svc.id.goog[core/prometheus]" \ - --project ${PROJECT_ID} - -gcloud iam service-accounts add-iam-policy-binding ${SA}@${PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:${PROJECT_ID}.svc.id.goog[gmp-system/operator]" \ - --project ${PROJECT_ID} - -gcloud iam service-accounts add-iam-policy-binding ${SA}@${PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:${PROJECT_ID}.svc.id.goog[gmp-system/collector]" \ - --project ${PROJECT_ID} - -gcloud iam service-accounts add-iam-policy-binding ${SA}@${PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:${PROJECT_ID}.svc.id.goog[otel-prom/collector]" \ - --project ${PROJECT_ID} - -gcloud iam service-accounts add-iam-policy-binding ${SA}@${PROJECT_ID}.iam.gserviceaccount.com \ - --role roles/iam.workloadIdentityUser \ - --member "serviceAccount:${PROJECT_ID}.svc.id.goog[otel-bench/collector]" \ - --project ${PROJECT_ID} + gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${SA}@${PROJECT_ID}.iam.gserviceaccount.com" \ + --role="roles/monitoring.metricWriter" \ + --quiet +fi echo "## Installing core resources" PROJECT_ID=${PROJECT_ID} ${GOMPLATE} --input-dir=./manifests/core --output-dir="${TEMP_DIR}" diff --git a/tools/mtypes/README.md b/tools/mtypes/README.md new file mode 100644 index 0000000..8980d8f --- /dev/null +++ b/tools/mtypes/README.md @@ -0,0 +1,50 @@ +# mtypes + +Go CLI gathering statistics around the distribution of types, average number of buckets (and more) across your Prometheus metrics/series. + +## Usage + +The main usage allows to take resource (from stdin, file or HTTP /metrics endpoint) and calculate type statistics e.g.: + +```bash +$ mtypes -resource=http://localhost:9090/metrics +$ mtypes -resource=./metrics.prometheus.txt +$ cat ./metrics.prometheus.txt | mtypes +``` + +```bash +Metric Type Metric Families Series Series % Series % (complex type adjusted) Average Buckets/Objectives +GAUGE 77 94 30.618893 15.112540 - +COUNTER 104 167 54.397394 26.848875 - +HISTOGRAM 11 19 6.188925 39.710611 11.000000 +SUMMARY 15 27 8.794788 18.327974 2.222222 +``` + +> NOTE: "Adjusted" series, means actual number of individual series stored in Prometheus. Classic histograms and summaries are stored as a set of counters. This is relevant as the cost of indexing new series is higher than storing complex values (this is why we slowly move to native histograms). + +Additionally, you can pass `--avalanche-flags-for-adjusted-series=10000` to print Avalanche v0.6.0+ flags to configure, for avalanche to generate metric target with the given amount of adjusted series, while maintaining a similar distribution e.g. + +```bash +cat ../../manifests/load/exampleprometheustarget.txt | go run main.go --avalanche-flags-for-adjusted-series=10000 +Metric Type Metric Families Series (adjusted) Series (adjusted) % Average Buckets/Objectives +GAUGE 77 94 (94) 30.921053 (15.719064) - +COUNTER 104 166 (166) 54.605263 (27.759197) - +HISTOGRAM 11 17 (224) 5.592105 (37.458194) 11.176471 +SUMMARY 15 27 (114) 8.881579 (19.063545) 2.222222 +--- --- --- --- --- +* 207 304 (598) 100.000000 (100.000000) - + +Avalanche flags for the similar distribution to get to the adjusted series goal of: 10000 +--gauge-metric-count=157 +--counter-metric-count=277 +--histogram-metric-count=28 +--histogram-metric-bucket-count=10 +--native-histogram-metric-count=0 +--summary-metric-count=47 +--summary-metric-objective-count=2 +--series-count=10 +--value-interval=300 # Changes values every 5m. +--series-interval=3600 # 1h series churn. +--metric-interval=0 +This should give the total adjusted series to: 9860 +``` diff --git a/tools/mtypes/go.mod b/tools/mtypes/go.mod new file mode 100644 index 0000000..beafbb8 --- /dev/null +++ b/tools/mtypes/go.mod @@ -0,0 +1,14 @@ +module github.com/bwplotka/prombenchy/tools/mtypes + +go 1.22.6 + +require ( + github.com/google/go-cmp v0.6.0 + github.com/prometheus/client_model v0.6.1 + github.com/prometheus/common v0.60.0 +) + +require ( + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + google.golang.org/protobuf v1.34.2 // indirect +) diff --git a/tools/mtypes/go.sum b/tools/mtypes/go.sum new file mode 100644 index 0000000..9016186 --- /dev/null +++ b/tools/mtypes/go.sum @@ -0,0 +1,18 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.60.0 h1:+V9PAREWNvJMAuJ1x1BaWl9dewMW4YrHZQbx0sJNllA= +github.com/prometheus/common v0.60.0/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/tools/mtypes/main.go b/tools/mtypes/main.go new file mode 100644 index 0000000..898f2dc --- /dev/null +++ b/tools/mtypes/main.go @@ -0,0 +1,225 @@ +// Package main implements mtypes CLI, see README for details. +package main + +import ( + "errors" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "strings" + "text/tabwriter" + + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" +) + +type stats struct { + families, series, buckets, objectives int + + // adjustedSeries represents series that would result in "series" in Prometheus data model + // (includes _bucket, _count, _sum, _quantile). + adjustedSeries int +} + +var metricType_NATIVE_HISTOGRAM dto.MetricType = 999 + +func main() { + resource := flag.String("resource", "", "Path or URL to the resource (file, /metrics) containing Prometheus metric format.") + avalancheFlagsForTotal := flag.Int("avalanche-flags-for-adjusted-series", 0, "If more than zero, it additionally prints flags for the avalanche 0.6.0 command line to generate metrics for the similar type distribution; to get the total number of adjusted series to the given value.") + flag.Parse() + + var input io.Reader = os.Stdin + if *resource != "" { + switch { + case strings.HasPrefix(*resource, "https://"), strings.HasPrefix(*resource, "http://"): + if _, err := url.Parse(*resource); err != nil { + log.Fatalf("error parsing HTTP URL to the resource %v; got %v", *resource, err) + } + resp, err := http.Get(*resource) + if err != nil { + log.Fatalf("http get against %v failed", err) + } + defer resp.Body.Close() + input = resp.Body + default: + // Open the input file. + file, err := os.Open(*resource) + if err != nil { + log.Fatalf("Error opening file: %v", err) //nolint:gocritic + } + defer file.Close() + input = file + } + } + statistics, err := calculateTargetStatistics(input) + if err != nil { + log.Fatal(err) + } + var total stats + for _, s := range statistics { + total.families += s.families + total.series += s.series + total.adjustedSeries += s.adjustedSeries + } + + writeStatistics(os.Stdout, total, statistics) + + if *avalancheFlagsForTotal > 0 { + // adjustedGoal is tracking the # of adjusted series we want to generate with avalanche. + adjustedGoal := float64(*avalancheFlagsForTotal) + fmt.Println() + fmt.Println("Avalanche flags for the similar distribution to get to the adjusted series goal of:", adjustedGoal) + + adjustedGoal /= 10.0 // Assuming --series-count=10 + // adjustedSum is tracking the total sum of series so far (at the end hopefully adjustedSum ~= adjustedGoal) + adjustedSum := 0 + for _, mtype := range allTypes { + s := statistics[mtype] + + // adjustedSeriesRatio is tracking the ratio of this type in the input file. + // We try to get similar ratio, but with different absolute counts, given the total sum of series we are aiming for. + adjustedSeriesRatio := float64(s.adjustedSeries) / float64(total.adjustedSeries) + + // adjustedSeriesForType is tracking (per metric type), how many unique series of that + // metric type avalanche needs to create according to the ratio we got from our input. + adjustedSeriesForType := int(adjustedGoal * adjustedSeriesRatio) + + switch mtype { + case dto.MetricType_GAUGE: + fmt.Printf("--gauge-metric-count=%v\n", adjustedSeriesForType) + adjustedSum += adjustedSeriesForType + case dto.MetricType_COUNTER: + fmt.Printf("--counter-metric-count=%v\n", adjustedSeriesForType) + adjustedSum += adjustedSeriesForType + case dto.MetricType_HISTOGRAM: + avgBkts := s.buckets / s.series + adjustedSeriesForType /= 2 + avgBkts + fmt.Printf("--histogram-metric-count=%v\n", adjustedSeriesForType) + fmt.Printf("--histogram-metric-bucket-count=%v\n", avgBkts-1) // -1 is due to caveat of additional +Inf not added by avalanche. + adjustedSum += adjustedSeriesForType * (2 + avgBkts) + case metricType_NATIVE_HISTOGRAM: + fmt.Printf("--native-histogram-metric-count=%v\n", adjustedSeriesForType) + adjustedSum += adjustedSeriesForType + case dto.MetricType_SUMMARY: + avgObjs := s.objectives / s.series + adjustedSeriesForType /= 2 + avgObjs + fmt.Printf("--summary-metric-count=%v\n", adjustedSeriesForType) + fmt.Printf("--summary-metric-objective-count=%v\n", avgObjs) + adjustedSum += adjustedSeriesForType * (2 + avgObjs) + default: + if s.series > 0 { + log.Fatalf("not supported %v metric in avalanche", mtype) + } + } + } + fmt.Printf("--series-count=10\n") + fmt.Printf("--value-interval=300 # Changes values every 5m.\n") + fmt.Printf("--series-interval=3600 # 1h series churn.\n") + fmt.Printf("--metric-interval=0\n") + + fmt.Println("This should give the total adjusted series to:", adjustedSum*10) + } +} + +var allTypes = []dto.MetricType{dto.MetricType_GAUGE, dto.MetricType_COUNTER, dto.MetricType_HISTOGRAM, metricType_NATIVE_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM, dto.MetricType_SUMMARY, dto.MetricType_UNTYPED} + +func writeStatistics(writer io.Writer, total stats, statistics map[dto.MetricType]stats) { + w := tabwriter.NewWriter(writer, 0, 0, 4, ' ', 0) + fmt.Fprintln(w, "Metric Type\tMetric Families\tSeries (adjusted)\tSeries (adjusted) %\tAverage Buckets/Objectives") + + for _, mtype := range allTypes { + s, ok := statistics[mtype] + if !ok { + continue + } + + mtypeStr := mtype.String() + if mtype == metricType_NATIVE_HISTOGRAM { + mtypeStr = "HISTOGRAM (native)" + } + + seriesRatio := 100 * float64(s.series) / float64(total.series) + adjustedSeriesRatio := 100 * float64(s.adjustedSeries) / float64(total.adjustedSeries) + switch { + case s.buckets > 0: + fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t%f\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio, float64(s.buckets)/float64(s.series)) + case s.objectives > 0: + fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t%f\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio, float64(s.objectives)/float64(s.series)) + default: + fmt.Fprintf(w, "%s\t%d\t%d (%d)\t%f (%f)\t-\n", mtypeStr, s.families, s.series, s.adjustedSeries, seriesRatio, adjustedSeriesRatio) + } + } + fmt.Fprintf(w, "---\t---\t---\t---\t---\n") + fmt.Fprintf(w, "*\t%d\t%d (%d)\t%f (%f)\t-\n", total.families, total.series, total.adjustedSeries, 100.0, 100.0) + _ = w.Flush() +} + +func calculateTargetStatistics(r io.Reader) (statistics map[dto.MetricType]stats, _ error) { + // Parse the Prometheus Text format. + parser := expfmt.NewDecoder(r, expfmt.NewFormat(expfmt.TypeProtoText)) + + statistics = map[dto.MetricType]stats{} + nativeS := statistics[metricType_NATIVE_HISTOGRAM] + for { + var mf dto.MetricFamily + if err := parser.Decode(&mf); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("parsing %w", err) + } + + s := statistics[mf.GetType()] + + var mfAccounted, mfAccountedNative bool + switch mf.GetType() { + case dto.MetricType_GAUGE_HISTOGRAM, dto.MetricType_HISTOGRAM: + for _, m := range mf.GetMetric() { + if m.GetHistogram().GetSchema() == 0 { + // classic one. + s.series++ + s.buckets += len(m.GetHistogram().GetBucket()) + s.adjustedSeries += 2 + len(m.GetHistogram().GetBucket()) + + if !mfAccounted { + s.families++ + mfAccounted = true + } + } else { + // native one. + nativeS.series++ + nativeS.buckets += len(m.GetHistogram().GetNegativeDelta()) + nativeS.buckets += len(m.GetHistogram().GetNegativeCount()) + nativeS.buckets += len(m.GetHistogram().GetPositiveDelta()) + nativeS.buckets += len(m.GetHistogram().GetPositiveCount()) + nativeS.adjustedSeries++ + + if !mfAccountedNative { + nativeS.families++ + mfAccountedNative = true + } + } + } + case dto.MetricType_SUMMARY: + s.series += len(mf.GetMetric()) + s.families++ + for _, m := range mf.GetMetric() { + s.objectives += len(m.GetSummary().GetQuantile()) + s.adjustedSeries += 2 + len(m.GetSummary().GetQuantile()) + } + default: + s.series += len(mf.GetMetric()) + s.families++ + s.adjustedSeries += len(mf.GetMetric()) + } + statistics[mf.GetType()] = s + } + if nativeS.series > 0 { + statistics[metricType_NATIVE_HISTOGRAM] = nativeS + } + return statistics, nil +} diff --git a/tools/mtypes/main_test.go b/tools/mtypes/main_test.go new file mode 100644 index 0000000..c00c087 --- /dev/null +++ b/tools/mtypes/main_test.go @@ -0,0 +1,1039 @@ +package main + +import ( + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + dto "github.com/prometheus/client_model/go" +) + +func TestCalculateTargetStatistics(t *testing.T) { + s, err := calculateTargetStatistics(strings.NewReader(testInput)) + if err != nil { + t.Fatal(err) + } + expected := map[dto.MetricType]stats{ + dto.MetricType_COUNTER: {families: 104, series: 166, adjustedSeries: 166}, + dto.MetricType_GAUGE: {families: 77, series: 94, adjustedSeries: 94}, + dto.MetricType_HISTOGRAM: {families: 11, series: 17, adjustedSeries: 224, buckets: 190}, + dto.MetricType_SUMMARY: {families: 15, series: 27, adjustedSeries: 114, objectives: 60}, + } + if diff := cmp.Diff(expected, s, cmp.AllowUnexported(stats{})); diff != "" { + t.Fatal(diff) + } +} + +const testInput = `# HELP gcm_export_pending_requests Number of in-flight requests to GCM. +# TYPE gcm_export_pending_requests gauge +gcm_export_pending_requests 1 +# HELP gcm_export_projects_per_batch Number of different projects in a batch that's being sent. +# TYPE gcm_export_projects_per_batch histogram +gcm_export_projects_per_batch_bucket{le="1"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="2"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="4"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="8"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="16"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="32"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="64"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="128"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="256"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="512"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="1024"} 1.0832458e+07 +gcm_export_projects_per_batch_bucket{le="+Inf"} 1.0832458e+07 +gcm_export_projects_per_batch_sum 1.0832458e+07 +gcm_export_projects_per_batch_count 1.0832458e+07 +# HELP gcm_export_samples_exported_total Number of samples exported at scrape time. +# TYPE gcm_export_samples_exported_total counter +gcm_export_samples_exported_total 1.966333233e+09 +# HELP gcm_export_samples_per_rpc_batch Number of samples that ended up in a single RPC batch. +# TYPE gcm_export_samples_per_rpc_batch histogram +gcm_export_samples_per_rpc_batch_bucket{le="1"} 236541 +gcm_export_samples_per_rpc_batch_bucket{le="2"} 304313 +gcm_export_samples_per_rpc_batch_bucket{le="5"} 355002 +gcm_export_samples_per_rpc_batch_bucket{le="10"} 483585 +gcm_export_samples_per_rpc_batch_bucket{le="20"} 579284 +gcm_export_samples_per_rpc_batch_bucket{le="50"} 1.027749e+06 +gcm_export_samples_per_rpc_batch_bucket{le="100"} 1.704702e+06 +gcm_export_samples_per_rpc_batch_bucket{le="150"} 2.355089e+06 +gcm_export_samples_per_rpc_batch_bucket{le="200"} 1.0832458e+07 +gcm_export_samples_per_rpc_batch_bucket{le="+Inf"} 1.0832458e+07 +gcm_export_samples_per_rpc_batch_sum 1.83976418e+09 +gcm_export_samples_per_rpc_batch_count 1.0832458e+07 +# HELP gcm_export_samples_sent_total Number of exported samples sent to GCM. +# TYPE gcm_export_samples_sent_total counter +gcm_export_samples_sent_total 1.839764124e+09 +# HELP gcm_export_send_iterations_total Number of processing iterations of the sample export send handler. +# TYPE gcm_export_send_iterations_total counter +gcm_export_send_iterations_total 1.2444615e+07 +# HELP gcm_export_shard_process_pending_total Number of shard retrievals with an empty result. +# TYPE gcm_export_shard_process_pending_total counter +gcm_export_shard_process_pending_total 8.66546153e+08 +# HELP gcm_export_shard_process_samples_taken Number of samples taken when processing a shard. +# TYPE gcm_export_shard_process_samples_taken histogram +gcm_export_shard_process_samples_taken_bucket{le="1"} 5.6291878e+07 +gcm_export_shard_process_samples_taken_bucket{le="2"} 9.1249561e+07 +gcm_export_shard_process_samples_taken_bucket{le="5"} 1.27173414e+08 +gcm_export_shard_process_samples_taken_bucket{le="10"} 1.34384486e+08 +gcm_export_shard_process_samples_taken_bucket{le="20"} 1.68076229e+08 +gcm_export_shard_process_samples_taken_bucket{le="50"} 2.04738182e+08 +gcm_export_shard_process_samples_taken_bucket{le="100"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="150"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="200"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_bucket{le="+Inf"} 2.04762012e+08 +gcm_export_shard_process_samples_taken_sum 1.83976418e+09 +gcm_export_shard_process_samples_taken_count 2.04762012e+08 +# HELP gcm_export_shard_process_total Number of shard retrievals. +# TYPE gcm_export_shard_process_total counter +gcm_export_shard_process_total 2.488923e+09 +# HELP gcm_pool_intern_total Time series memory intern operations. +# TYPE gcm_pool_intern_total counter +gcm_pool_intern_total 4.8525498e+07 +# HELP gcm_pool_release_total Time series memory intern release operations. +# TYPE gcm_pool_release_total counter +gcm_pool_release_total 4.8514709e+07 +# HELP gcm_prometheus_samples_discarded_total Samples that were discarded during data model conversion. +# TYPE gcm_prometheus_samples_discarded_total counter +gcm_prometheus_samples_discarded_total{reason="staleness-marker"} 9919 +gcm_prometheus_samples_discarded_total{reason="zero-buckets-bounds"} 1.076142e+07 +# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 5.8641e-05 +go_gc_duration_seconds{quantile="0.25"} 8.4045e-05 +go_gc_duration_seconds{quantile="0.5"} 0.000119609 +go_gc_duration_seconds{quantile="0.75"} 0.000149195 +go_gc_duration_seconds{quantile="1"} 0.000312434 +go_gc_duration_seconds_sum 11.324308382 +go_gc_duration_seconds_count 92364 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 112 +# HELP go_info Information about the Go environment. +# TYPE go_info gauge +go_info{version="go1.20.14"} 1 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 1.09818568e+08 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 7.420978933248e+12 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 3.653156e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 1.19996693238e+11 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 1.6556264e+07 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 1.09818568e+08 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 1.8628608e+08 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.3860864e+08 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 738856 +# HELP go_memstats_heap_released_bytes Number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes gauge +go_memstats_heap_released_bytes 1.42557184e+08 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 3.2489472e+08 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.7278073317025118e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 0 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 1.19997432094e+11 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 4800 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 15600 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 1.8024e+06 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 3.24768e+06 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 1.636618e+08 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 1.202956e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 2.260992e+06 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 2.260992e+06 +# HELP go_memstats_sys_bytes Number of bytes obtained from system. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 3.51831368e+08 +# HELP go_threads Number of OS threads created. +# TYPE go_threads gauge +go_threads 12 +# HELP grpc_client_handled_total Total number of RPCs completed by the client, regardless of success or failure. +# TYPE grpc_client_handled_total counter +grpc_client_handled_total{grpc_code="Canceled",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 9 +grpc_client_handled_total{grpc_code="DeadlineExceeded",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 82 +grpc_client_handled_total{grpc_code="Internal",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 4 +grpc_client_handled_total{grpc_code="OK",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0831867e+07 +grpc_client_handled_total{grpc_code="Unauthenticated",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1 +grpc_client_handled_total{grpc_code="Unavailable",grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 494 +# HELP grpc_client_handling_seconds Histogram of response latency (seconds) of the gRPC until it is finished by the application. +# TYPE grpc_client_handling_seconds histogram +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.005"} 0 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.01"} 0 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.025"} 34059 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.05"} 1.127825e+06 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.1"} 9.058302e+06 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.25"} 1.0721886e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="0.5"} 1.0759498e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="1"} 1.0774023e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="2.5"} 1.079026e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="5"} 1.0800098e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="10"} 1.0832159e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="15"} 1.0832261e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="20"} 1.0832299e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="30"} 1.0832376e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="40"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="50"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="60"} 1.0832457e+07 +grpc_client_handling_seconds_bucket{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary",le="+Inf"} 1.0832457e+07 +grpc_client_handling_seconds_sum{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.2123103039707085e+06 +grpc_client_handling_seconds_count{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832457e+07 +# HELP grpc_client_msg_received_total Total number of RPC stream messages received by the client. +# TYPE grpc_client_msg_received_total counter +grpc_client_msg_received_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 590 +# HELP grpc_client_msg_sent_total Total number of gRPC stream messages sent by the client. +# TYPE grpc_client_msg_sent_total counter +grpc_client_msg_sent_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832458e+07 +# HELP grpc_client_started_total Total number of RPCs started on the client. +# TYPE grpc_client_started_total counter +grpc_client_started_total{grpc_method="CreateTimeSeries",grpc_service="google.monitoring.v3.MetricService",grpc_type="unary"} 1.0832458e+07 +# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name. +# TYPE net_conntrack_dialer_conn_attempted_total counter +net_conntrack_dialer_conn_attempted_total{dialer_name="cadvisor"} 94 +net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_attempted_total{dialer_name="kube-state-metrics"} 2 +net_conntrack_dialer_conn_attempted_total{dialer_name="pods"} 179445 +# HELP net_conntrack_dialer_conn_closed_total Total number of connections closed which originated from the dialer of a given name. +# TYPE net_conntrack_dialer_conn_closed_total counter +net_conntrack_dialer_conn_closed_total{dialer_name="cadvisor"} 3 +net_conntrack_dialer_conn_closed_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_closed_total{dialer_name="kube-state-metrics"} 0 +net_conntrack_dialer_conn_closed_total{dialer_name="pods"} 179394 +# HELP net_conntrack_dialer_conn_established_total Total number of connections successfully established by the given dialer a given name. +# TYPE net_conntrack_dialer_conn_established_total counter +net_conntrack_dialer_conn_established_total{dialer_name="cadvisor"} 4 +net_conntrack_dialer_conn_established_total{dialer_name="default"} 0 +net_conntrack_dialer_conn_established_total{dialer_name="kube-state-metrics"} 2 +net_conntrack_dialer_conn_established_total{dialer_name="pods"} 179399 +# HELP net_conntrack_dialer_conn_failed_total Total number of connections failed to dial by the dialer a given name. +# TYPE net_conntrack_dialer_conn_failed_total counter +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="refused"} 7 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="timeout"} 83 +net_conntrack_dialer_conn_failed_total{dialer_name="cadvisor",reason="unknown"} 90 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="refused"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="timeout"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="default",reason="unknown"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="refused"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="timeout"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="kube-state-metrics",reason="unknown"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="refused"} 4 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="resolution"} 0 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="timeout"} 42 +net_conntrack_dialer_conn_failed_total{dialer_name="pods",reason="unknown"} 46 +# HELP net_conntrack_listener_conn_accepted_total Total number of connections opened to the listener of a given name. +# TYPE net_conntrack_listener_conn_accepted_total counter +net_conntrack_listener_conn_accepted_total{listener_name="http"} 8 +# HELP net_conntrack_listener_conn_closed_total Total number of connections closed that were made to the listener of a given name. +# TYPE net_conntrack_listener_conn_closed_total counter +net_conntrack_listener_conn_closed_total{listener_name="http"} 3 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 64026.65 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 105 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 2.81624576e+08 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.72511698039e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 2.8450332672e+10 +# HELP process_virtual_memory_max_bytes Maximum amount of virtual memory available in bytes. +# TYPE process_virtual_memory_max_bytes gauge +process_virtual_memory_max_bytes 1.8446744073709552e+19 +# HELP prometheus_api_remote_read_queries The current number of remote read queries being executed or waiting. +# TYPE prometheus_api_remote_read_queries gauge +prometheus_api_remote_read_queries 0 +# HELP prometheus_build_info A metric with a constant '1' value labeled by version, revision, branch, goversion from which prometheus was built, and the goos and goarch for the build. +# TYPE prometheus_build_info gauge +prometheus_build_info{branch="",goarch="amd64",goos="linux",goversion="go1.20.14",revision="d7b199739aa7e0d00e7ebd0792339dd4b167a269-modified",tags="builtinassets",version="2.45.3"} 1 +# HELP prometheus_config_last_reload_success_timestamp_seconds Timestamp of the last successful configuration reload. +# TYPE prometheus_config_last_reload_success_timestamp_seconds gauge +prometheus_config_last_reload_success_timestamp_seconds 1.725116982549508e+09 +# HELP prometheus_config_last_reload_successful Whether the last configuration reload attempt was successful. +# TYPE prometheus_config_last_reload_successful gauge +prometheus_config_last_reload_successful 1 +# HELP prometheus_engine_queries The current number of queries being executed or waiting. +# TYPE prometheus_engine_queries gauge +prometheus_engine_queries 0 +# HELP prometheus_engine_queries_concurrent_max The max number of concurrent queries. +# TYPE prometheus_engine_queries_concurrent_max gauge +prometheus_engine_queries_concurrent_max 20 +# HELP prometheus_engine_query_duration_seconds Query timings +# TYPE prometheus_engine_query_duration_seconds summary +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="inner_eval",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="inner_eval"} 0 +prometheus_engine_query_duration_seconds_count{slice="inner_eval"} 0 +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="prepare_time",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="prepare_time"} 0 +prometheus_engine_query_duration_seconds_count{slice="prepare_time"} 0 +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="queue_time",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="queue_time"} 0 +prometheus_engine_query_duration_seconds_count{slice="queue_time"} 0 +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.5"} NaN +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.9"} NaN +prometheus_engine_query_duration_seconds{slice="result_sort",quantile="0.99"} NaN +prometheus_engine_query_duration_seconds_sum{slice="result_sort"} 0 +prometheus_engine_query_duration_seconds_count{slice="result_sort"} 0 +# HELP prometheus_engine_query_log_enabled State of the query log. +# TYPE prometheus_engine_query_log_enabled gauge +prometheus_engine_query_log_enabled 0 +# HELP prometheus_engine_query_log_failures_total The number of query log failures. +# TYPE prometheus_engine_query_log_failures_total counter +prometheus_engine_query_log_failures_total 0 +# HELP prometheus_engine_query_samples_total The total number of samples loaded by all queries. +# TYPE prometheus_engine_query_samples_total counter +prometheus_engine_query_samples_total 0 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.1"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.2"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="0.4"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="1"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="3"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="8"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="20"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="60"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="120"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/ready",le="+Inf"} 2 +prometheus_http_request_duration_seconds_sum{handler="/-/ready"} 4.7443999999999995e-05 +prometheus_http_request_duration_seconds_count{handler="/-/ready"} 2 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.1"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.2"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="0.4"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="1"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="3"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="8"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="20"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="60"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="120"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/-/reload",le="+Inf"} 1 +prometheus_http_request_duration_seconds_sum{handler="/-/reload"} 0.002356799 +prometheus_http_request_duration_seconds_count{handler="/-/reload"} 1 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.1"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.2"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="0.4"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="1"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="3"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="8"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="20"} 358716 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="60"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="120"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/debug/*subpath",le="+Inf"} 448346 +prometheus_http_request_duration_seconds_sum{handler="/debug/*subpath"} 2.692262582005182e+06 +prometheus_http_request_duration_seconds_count{handler="/debug/*subpath"} 448346 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.1"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.2"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="0.4"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="1"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="3"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="8"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="20"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="60"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="120"} 179357 +prometheus_http_request_duration_seconds_bucket{handler="/metrics",le="+Inf"} 179357 +prometheus_http_request_duration_seconds_sum{handler="/metrics"} 552.2173053479947 +prometheus_http_request_duration_seconds_count{handler="/metrics"} 179357 +# HELP prometheus_http_requests_total Counter of HTTP requests. +# TYPE prometheus_http_requests_total counter +prometheus_http_requests_total{code="200",handler="/-/ready"} 1 +prometheus_http_requests_total{code="200",handler="/-/reload"} 1 +prometheus_http_requests_total{code="200",handler="/debug/*subpath"} 448346 +prometheus_http_requests_total{code="200",handler="/metrics"} 179357 +prometheus_http_requests_total{code="503",handler="/-/ready"} 1 +# HELP prometheus_http_response_size_bytes Histogram of response size for HTTP requests. +# TYPE prometheus_http_response_size_bytes histogram +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="100"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="10000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="100000"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+06"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+07"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+08"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="1e+09"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/ready",le="+Inf"} 2 +prometheus_http_response_size_bytes_sum{handler="/-/ready"} 47 +prometheus_http_response_size_bytes_count{handler="/-/ready"} 2 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="100"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="10000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="100000"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+06"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+07"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+08"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="1e+09"} 1 +prometheus_http_response_size_bytes_bucket{handler="/-/reload",le="+Inf"} 1 +prometheus_http_response_size_bytes_sum{handler="/-/reload"} 0 +prometheus_http_response_size_bytes_count{handler="/-/reload"} 1 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="100"} 0 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1000"} 179358 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="10000"} 269558 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="100000"} 359969 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+06"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+07"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+08"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="1e+09"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/debug/*subpath",le="+Inf"} 448346 +prometheus_http_response_size_bytes_sum{handler="/debug/*subpath"} 1.7640059511e+10 +prometheus_http_response_size_bytes_count{handler="/debug/*subpath"} 448346 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="100"} 0 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1000"} 0 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="10000"} 191 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="100000"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+06"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+07"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+08"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="1e+09"} 179357 +prometheus_http_response_size_bytes_bucket{handler="/metrics",le="+Inf"} 179357 +prometheus_http_response_size_bytes_sum{handler="/metrics"} 1.895799365e+09 +prometheus_http_response_size_bytes_count{handler="/metrics"} 179357 +# HELP prometheus_notifications_alertmanagers_discovered The number of alertmanagers discovered and active. +# TYPE prometheus_notifications_alertmanagers_discovered gauge +prometheus_notifications_alertmanagers_discovered 0 +# HELP prometheus_notifications_dropped_total Total number of alerts dropped due to errors when sending to Alertmanager. +# TYPE prometheus_notifications_dropped_total counter +prometheus_notifications_dropped_total 0 +# HELP prometheus_notifications_queue_capacity The capacity of the alert notifications queue. +# TYPE prometheus_notifications_queue_capacity gauge +prometheus_notifications_queue_capacity 10000 +# HELP prometheus_notifications_queue_length The number of alert notifications in the queue. +# TYPE prometheus_notifications_queue_length gauge +prometheus_notifications_queue_length 0 +# HELP prometheus_ready Whether Prometheus startup was fully completed and the server is ready for normal operation. +# TYPE prometheus_ready gauge +prometheus_ready 1 +# HELP prometheus_remote_storage_exemplars_in_total Exemplars in to remote storage, compare to exemplars out for queue managers. +# TYPE prometheus_remote_storage_exemplars_in_total counter +prometheus_remote_storage_exemplars_in_total 0 +# HELP prometheus_remote_storage_highest_timestamp_in_seconds Highest timestamp that has come into the remote storage via the Appender interface, in seconds since epoch. +# TYPE prometheus_remote_storage_highest_timestamp_in_seconds gauge +prometheus_remote_storage_highest_timestamp_in_seconds 1.727807345e+09 +# HELP prometheus_remote_storage_histograms_in_total HistogramSamples in to remote storage, compare to histograms out for queue managers. +# TYPE prometheus_remote_storage_histograms_in_total counter +prometheus_remote_storage_histograms_in_total 0 +# HELP prometheus_remote_storage_samples_in_total Samples in to remote storage, compare to samples out for queue managers. +# TYPE prometheus_remote_storage_samples_in_total counter +prometheus_remote_storage_samples_in_total 1.966333233e+09 +# HELP prometheus_remote_storage_string_interner_zero_reference_releases_total The number of times release has been called for strings that are not interned. +# TYPE prometheus_remote_storage_string_interner_zero_reference_releases_total counter +prometheus_remote_storage_string_interner_zero_reference_releases_total 0 +# HELP prometheus_rule_evaluation_duration_seconds The duration for a rule to execute. +# TYPE prometheus_rule_evaluation_duration_seconds summary +prometheus_rule_evaluation_duration_seconds{quantile="0.5"} NaN +prometheus_rule_evaluation_duration_seconds{quantile="0.9"} NaN +prometheus_rule_evaluation_duration_seconds{quantile="0.99"} NaN +prometheus_rule_evaluation_duration_seconds_sum 0 +prometheus_rule_evaluation_duration_seconds_count 0 +# HELP prometheus_rule_group_duration_seconds The duration of rule group evaluations. +# TYPE prometheus_rule_group_duration_seconds summary +prometheus_rule_group_duration_seconds{quantile="0.01"} NaN +prometheus_rule_group_duration_seconds{quantile="0.05"} NaN +prometheus_rule_group_duration_seconds{quantile="0.5"} NaN +prometheus_rule_group_duration_seconds{quantile="0.9"} NaN +prometheus_rule_group_duration_seconds{quantile="0.99"} NaN +prometheus_rule_group_duration_seconds_sum 0 +prometheus_rule_group_duration_seconds_count 0 +# HELP prometheus_sd_azure_failures_total Number of Azure service discovery refresh failures. +# TYPE prometheus_sd_azure_failures_total counter +prometheus_sd_azure_failures_total 0 +# HELP prometheus_sd_consul_rpc_duration_seconds The duration of a Consul RPC call in seconds. +# TYPE prometheus_sd_consul_rpc_duration_seconds summary +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.5"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.9"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="service",endpoint="catalog",quantile="0.99"} NaN +prometheus_sd_consul_rpc_duration_seconds_sum{call="service",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds_count{call="service",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.5"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.9"} NaN +prometheus_sd_consul_rpc_duration_seconds{call="services",endpoint="catalog",quantile="0.99"} NaN +prometheus_sd_consul_rpc_duration_seconds_sum{call="services",endpoint="catalog"} 0 +prometheus_sd_consul_rpc_duration_seconds_count{call="services",endpoint="catalog"} 0 +# HELP prometheus_sd_consul_rpc_failures_total The number of Consul RPC call failures. +# TYPE prometheus_sd_consul_rpc_failures_total counter +prometheus_sd_consul_rpc_failures_total 0 +# HELP prometheus_sd_discovered_targets Current number of discovered targets. +# TYPE prometheus_sd_discovered_targets gauge +prometheus_sd_discovered_targets{config="cadvisor",name="scrape"} 2 +prometheus_sd_discovered_targets{config="kube-state-metrics",name="scrape"} 9 +prometheus_sd_discovered_targets{config="pods",name="scrape"} 82 +# HELP prometheus_sd_dns_lookup_failures_total The number of DNS-SD lookup failures. +# TYPE prometheus_sd_dns_lookup_failures_total counter +prometheus_sd_dns_lookup_failures_total 0 +# HELP prometheus_sd_dns_lookups_total The number of DNS-SD lookups. +# TYPE prometheus_sd_dns_lookups_total counter +prometheus_sd_dns_lookups_total 0 +# HELP prometheus_sd_failed_configs Current number of service discovery configurations that failed to load. +# TYPE prometheus_sd_failed_configs gauge +prometheus_sd_failed_configs{name="notify"} 0 +prometheus_sd_failed_configs{name="scrape"} 0 +# HELP prometheus_sd_file_read_errors_total The number of File-SD read errors. +# TYPE prometheus_sd_file_read_errors_total counter +prometheus_sd_file_read_errors_total 0 +# HELP prometheus_sd_file_scan_duration_seconds The duration of the File-SD scan in seconds. +# TYPE prometheus_sd_file_scan_duration_seconds summary +prometheus_sd_file_scan_duration_seconds{quantile="0.5"} NaN +prometheus_sd_file_scan_duration_seconds{quantile="0.9"} NaN +prometheus_sd_file_scan_duration_seconds{quantile="0.99"} NaN +prometheus_sd_file_scan_duration_seconds_sum 0 +prometheus_sd_file_scan_duration_seconds_count 0 +# HELP prometheus_sd_file_watcher_errors_total The number of File-SD errors caused by filesystem watch failures. +# TYPE prometheus_sd_file_watcher_errors_total counter +prometheus_sd_file_watcher_errors_total 0 +# HELP prometheus_sd_http_failures_total Number of HTTP service discovery refresh failures. +# TYPE prometheus_sd_http_failures_total counter +prometheus_sd_http_failures_total 0 +# HELP prometheus_sd_kubernetes_events_total The number of Kubernetes events handled. +# TYPE prometheus_sd_kubernetes_events_total counter +prometheus_sd_kubernetes_events_total{event="add",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="add",role="node"} 5 +prometheus_sd_kubernetes_events_total{event="add",role="pod"} 169 +prometheus_sd_kubernetes_events_total{event="add",role="service"} 9 +prometheus_sd_kubernetes_events_total{event="delete",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="delete",role="node"} 3 +prometheus_sd_kubernetes_events_total{event="delete",role="pod"} 128 +prometheus_sd_kubernetes_events_total{event="delete",role="service"} 2 +prometheus_sd_kubernetes_events_total{event="update",role="endpoints"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="endpointslice"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="ingress"} 0 +prometheus_sd_kubernetes_events_total{event="update",role="node"} 35525 +prometheus_sd_kubernetes_events_total{event="update",role="pod"} 1034 +prometheus_sd_kubernetes_events_total{event="update",role="service"} 29 +# HELP prometheus_sd_kubernetes_http_request_duration_seconds Summary of latencies for HTTP requests to the Kubernetes API by endpoint. +# TYPE prometheus_sd_kubernetes_http_request_duration_seconds summary +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/nodes"} 0.017348603 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/nodes"} 4 +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/pods"} 0.038949225999999997 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/pods"} 4 +prometheus_sd_kubernetes_http_request_duration_seconds_sum{endpoint="/api/v1/services"} 0.014277334000000001 +prometheus_sd_kubernetes_http_request_duration_seconds_count{endpoint="/api/v1/services"} 4 +# HELP prometheus_sd_kubernetes_http_request_total Total number of HTTP requests to the Kubernetes API by status code. +# TYPE prometheus_sd_kubernetes_http_request_total counter +prometheus_sd_kubernetes_http_request_total{status_code="200"} 17957 +prometheus_sd_kubernetes_http_request_total{status_code=""} 83 +# HELP prometheus_sd_kubernetes_workqueue_depth Current depth of the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_depth gauge +prometheus_sd_kubernetes_workqueue_depth{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_depth{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_depth{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_items_total Total number of items added to the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_items_total counter +prometheus_sd_kubernetes_workqueue_items_total{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_items_total{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_items_total{queue_name="service"} 40 +# HELP prometheus_sd_kubernetes_workqueue_latency_seconds How long an item stays in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_latency_seconds summary +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="node"} 0.49772388200000356 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="pod"} 4.155762530999996 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_latency_seconds_sum{queue_name="service"} 0.8281205150000001 +prometheus_sd_kubernetes_workqueue_latency_seconds_count{queue_name="service"} 40 +# HELP prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds Duration of the longest running processor in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds gauge +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_longest_running_processor_seconds{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_unfinished_work_seconds How long an item has remained unfinished in the work queue. +# TYPE prometheus_sd_kubernetes_workqueue_unfinished_work_seconds gauge +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="node"} 0 +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="pod"} 0 +prometheus_sd_kubernetes_workqueue_unfinished_work_seconds{queue_name="service"} 0 +# HELP prometheus_sd_kubernetes_workqueue_work_duration_seconds How long processing an item from the work queue takes. +# TYPE prometheus_sd_kubernetes_workqueue_work_duration_seconds summary +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="node"} 5.840500786999983 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="node"} 35533 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="pod"} 0.034607483000000085 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="pod"} 1329 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_sum{queue_name="service"} 0.0010254919999999998 +prometheus_sd_kubernetes_workqueue_work_duration_seconds_count{queue_name="service"} 40 +# HELP prometheus_sd_kuma_fetch_duration_seconds The duration of a Kuma MADS fetch call. +# TYPE prometheus_sd_kuma_fetch_duration_seconds summary +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.5"} NaN +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.9"} NaN +prometheus_sd_kuma_fetch_duration_seconds{quantile="0.99"} NaN +prometheus_sd_kuma_fetch_duration_seconds_sum 0 +prometheus_sd_kuma_fetch_duration_seconds_count 0 +# HELP prometheus_sd_kuma_fetch_failures_total The number of Kuma MADS fetch call failures. +# TYPE prometheus_sd_kuma_fetch_failures_total counter +prometheus_sd_kuma_fetch_failures_total 0 +# HELP prometheus_sd_kuma_fetch_skipped_updates_total The number of Kuma MADS fetch calls that result in no updates to the targets. +# TYPE prometheus_sd_kuma_fetch_skipped_updates_total counter +prometheus_sd_kuma_fetch_skipped_updates_total 0 +# HELP prometheus_sd_linode_failures_total Number of Linode service discovery refresh failures. +# TYPE prometheus_sd_linode_failures_total counter +prometheus_sd_linode_failures_total 0 +# HELP prometheus_sd_nomad_failures_total Number of nomad service discovery refresh failures. +# TYPE prometheus_sd_nomad_failures_total counter +prometheus_sd_nomad_failures_total 0 +# HELP prometheus_sd_received_updates_total Total number of update events received from the SD providers. +# TYPE prometheus_sd_received_updates_total counter +prometheus_sd_received_updates_total{name="scrape"} 36897 +# HELP prometheus_sd_updates_total Total number of update events sent to the SD consumers. +# TYPE prometheus_sd_updates_total counter +prometheus_sd_updates_total{name="scrape"} 34137 +# HELP prometheus_target_interval_length_seconds Actual intervals between scrapes. +# TYPE prometheus_target_interval_length_seconds summary +prometheus_target_interval_length_seconds{interval="15s",quantile="0.01"} 14.99914058 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.05"} 14.999310634 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.5"} 15.000008779 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.9"} 15.000545764 +prometheus_target_interval_length_seconds{interval="15s",quantile="0.99"} 15.000857257 +prometheus_target_interval_length_seconds_sum{interval="15s"} 2.4210266343189236e+07 +prometheus_target_interval_length_seconds_count{interval="15s"} 1.614017e+06 +# HELP prometheus_target_metadata_cache_bytes The number of bytes that are currently used for storing metric metadata in the cache +# TYPE prometheus_target_metadata_cache_bytes gauge +prometheus_target_metadata_cache_bytes{scrape_job="cadvisor"} 6898 +prometheus_target_metadata_cache_bytes{scrape_job="kube-state-metrics"} 1933 +prometheus_target_metadata_cache_bytes{scrape_job="pods"} 34437 +# HELP prometheus_target_metadata_cache_entries Total number of metric metadata entries in the cache +# TYPE prometheus_target_metadata_cache_entries gauge +prometheus_target_metadata_cache_entries{scrape_job="cadvisor"} 138 +prometheus_target_metadata_cache_entries{scrape_job="kube-state-metrics"} 39 +prometheus_target_metadata_cache_entries{scrape_job="pods"} 583 +# HELP prometheus_target_scrape_pool_exceeded_label_limits_total Total number of times scrape pools hit the label limits, during sync or config reload. +# TYPE prometheus_target_scrape_pool_exceeded_label_limits_total counter +prometheus_target_scrape_pool_exceeded_label_limits_total 0 +# HELP prometheus_target_scrape_pool_exceeded_target_limit_total Total number of times scrape pools hit the target limit, during sync or config reload. +# TYPE prometheus_target_scrape_pool_exceeded_target_limit_total counter +prometheus_target_scrape_pool_exceeded_target_limit_total 0 +# HELP prometheus_target_scrape_pool_reloads_failed_total Total number of failed scrape pool reloads. +# TYPE prometheus_target_scrape_pool_reloads_failed_total counter +prometheus_target_scrape_pool_reloads_failed_total 0 +# HELP prometheus_target_scrape_pool_reloads_total Total number of scrape pool reloads. +# TYPE prometheus_target_scrape_pool_reloads_total counter +prometheus_target_scrape_pool_reloads_total 0 +# HELP prometheus_target_scrape_pool_sync_total Total number of syncs that were executed on a scrape pool. +# TYPE prometheus_target_scrape_pool_sync_total counter +prometheus_target_scrape_pool_sync_total{scrape_job="cadvisor"} 34137 +prometheus_target_scrape_pool_sync_total{scrape_job="kube-state-metrics"} 34137 +prometheus_target_scrape_pool_sync_total{scrape_job="pods"} 34137 +# HELP prometheus_target_scrape_pool_target_limit Maximum number of targets allowed in this scrape pool. +# TYPE prometheus_target_scrape_pool_target_limit gauge +prometheus_target_scrape_pool_target_limit{scrape_job="cadvisor"} 0 +prometheus_target_scrape_pool_target_limit{scrape_job="kube-state-metrics"} 0 +prometheus_target_scrape_pool_target_limit{scrape_job="pods"} 0 +# HELP prometheus_target_scrape_pool_targets Current number of targets in this scrape pool. +# TYPE prometheus_target_scrape_pool_targets gauge +prometheus_target_scrape_pool_targets{scrape_job="cadvisor"} 2 +prometheus_target_scrape_pool_targets{scrape_job="kube-state-metrics"} 2 +prometheus_target_scrape_pool_targets{scrape_job="pods"} 5 +# HELP prometheus_target_scrape_pools_failed_total Total number of scrape pool creations that failed. +# TYPE prometheus_target_scrape_pools_failed_total counter +prometheus_target_scrape_pools_failed_total 0 +# HELP prometheus_target_scrape_pools_total Total number of scrape pool creation attempts. +# TYPE prometheus_target_scrape_pools_total counter +prometheus_target_scrape_pools_total 3 +# HELP prometheus_target_scrapes_cache_flush_forced_total How many times a scrape cache was flushed due to getting big while scrapes are failing. +# TYPE prometheus_target_scrapes_cache_flush_forced_total counter +prometheus_target_scrapes_cache_flush_forced_total 0 +# HELP prometheus_target_scrapes_exceeded_body_size_limit_total Total number of scrapes that hit the body size limit +# TYPE prometheus_target_scrapes_exceeded_body_size_limit_total counter +prometheus_target_scrapes_exceeded_body_size_limit_total 0 +# HELP prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total Total number of scrapes that hit the native histogram bucket limit and were rejected. +# TYPE prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total counter +prometheus_target_scrapes_exceeded_native_histogram_bucket_limit_total 0 +# HELP prometheus_target_scrapes_exceeded_sample_limit_total Total number of scrapes that hit the sample limit and were rejected. +# TYPE prometheus_target_scrapes_exceeded_sample_limit_total counter +prometheus_target_scrapes_exceeded_sample_limit_total 0 +# HELP prometheus_target_scrapes_exemplar_out_of_order_total Total number of exemplar rejected due to not being out of the expected order. +# TYPE prometheus_target_scrapes_exemplar_out_of_order_total counter +prometheus_target_scrapes_exemplar_out_of_order_total 0 +# HELP prometheus_target_scrapes_sample_duplicate_timestamp_total Total number of samples rejected due to duplicate timestamps but different values. +# TYPE prometheus_target_scrapes_sample_duplicate_timestamp_total counter +prometheus_target_scrapes_sample_duplicate_timestamp_total 0 +# HELP prometheus_target_scrapes_sample_out_of_bounds_total Total number of samples rejected due to timestamp falling outside of the time bounds. +# TYPE prometheus_target_scrapes_sample_out_of_bounds_total counter +prometheus_target_scrapes_sample_out_of_bounds_total 0 +# HELP prometheus_target_scrapes_sample_out_of_order_total Total number of samples rejected due to not being out of the expected order. +# TYPE prometheus_target_scrapes_sample_out_of_order_total counter +prometheus_target_scrapes_sample_out_of_order_total 0 +# HELP prometheus_target_sync_failed_total Total number of target sync failures. +# TYPE prometheus_target_sync_failed_total counter +prometheus_target_sync_failed_total{scrape_job="cadvisor"} 0 +prometheus_target_sync_failed_total{scrape_job="kube-state-metrics"} 0 +prometheus_target_sync_failed_total{scrape_job="pods"} 0 +# HELP prometheus_target_sync_length_seconds Actual interval to sync the scrape pool. +# TYPE prometheus_target_sync_length_seconds summary +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.01"} 0.00016778 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.05"} 0.00016778 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.5"} 0.000201532 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.9"} 0.000217346 +prometheus_target_sync_length_seconds{scrape_job="cadvisor",quantile="0.99"} 0.000217346 +prometheus_target_sync_length_seconds_sum{scrape_job="cadvisor"} 9.36278804700008 +prometheus_target_sync_length_seconds_count{scrape_job="cadvisor"} 34137 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.01"} 0.000148145 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.05"} 0.000148145 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.5"} 0.000175667 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.9"} 0.000188701 +prometheus_target_sync_length_seconds{scrape_job="kube-state-metrics",quantile="0.99"} 0.000188701 +prometheus_target_sync_length_seconds_sum{scrape_job="kube-state-metrics"} 6.007913164999995 +prometheus_target_sync_length_seconds_count{scrape_job="kube-state-metrics"} 34137 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.01"} 0.000867282 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.05"} 0.000867282 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.5"} 0.000913952 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.9"} 0.001163668 +prometheus_target_sync_length_seconds{scrape_job="pods",quantile="0.99"} 0.001163668 +prometheus_target_sync_length_seconds_sum{scrape_job="pods"} 44.38431514700025 +prometheus_target_sync_length_seconds_count{scrape_job="pods"} 34137 +# HELP prometheus_template_text_expansion_failures_total The total number of template text expansion failures. +# TYPE prometheus_template_text_expansion_failures_total counter +prometheus_template_text_expansion_failures_total 0 +# HELP prometheus_template_text_expansions_total The total number of template text expansions. +# TYPE prometheus_template_text_expansions_total counter +prometheus_template_text_expansions_total 0 +# HELP prometheus_treecache_watcher_goroutines The current number of watcher goroutines. +# TYPE prometheus_treecache_watcher_goroutines gauge +prometheus_treecache_watcher_goroutines 0 +# HELP prometheus_treecache_zookeeper_failures_total The total number of ZooKeeper failures. +# TYPE prometheus_treecache_zookeeper_failures_total counter +prometheus_treecache_zookeeper_failures_total 0 +# HELP prometheus_tsdb_blocks_loaded Number of currently loaded data blocks +# TYPE prometheus_tsdb_blocks_loaded gauge +prometheus_tsdb_blocks_loaded 16 +# HELP prometheus_tsdb_checkpoint_creations_failed_total Total number of checkpoint creations that failed. +# TYPE prometheus_tsdb_checkpoint_creations_failed_total counter +prometheus_tsdb_checkpoint_creations_failed_total 0 +# HELP prometheus_tsdb_checkpoint_creations_total Total number of checkpoint creations attempted. +# TYPE prometheus_tsdb_checkpoint_creations_total counter +prometheus_tsdb_checkpoint_creations_total 187 +# HELP prometheus_tsdb_checkpoint_deletions_failed_total Total number of checkpoint deletions that failed. +# TYPE prometheus_tsdb_checkpoint_deletions_failed_total counter +prometheus_tsdb_checkpoint_deletions_failed_total 0 +# HELP prometheus_tsdb_checkpoint_deletions_total Total number of checkpoint deletions attempted. +# TYPE prometheus_tsdb_checkpoint_deletions_total counter +prometheus_tsdb_checkpoint_deletions_total 187 +# HELP prometheus_tsdb_clean_start -1: lockfile is disabled. 0: a lockfile from a previous execution was replaced. 1: lockfile creation was clean +# TYPE prometheus_tsdb_clean_start gauge +prometheus_tsdb_clean_start -1 +# HELP prometheus_tsdb_compaction_chunk_range_seconds Final time range of chunks on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_range_seconds histogram +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="100"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="400"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="1600"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="6400"} 673 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="25600"} 952 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="102400"} 2954 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="409600"} 11240 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="1.6384e+06"} 34940 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="6.5536e+06"} 1.3837075e+07 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="2.62144e+07"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_range_seconds_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_range_seconds_sum 2.9219718662064e+13 +prometheus_tsdb_compaction_chunk_range_seconds_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_chunk_samples Final number of samples on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_samples histogram +prometheus_tsdb_compaction_chunk_samples_bucket{le="4"} 1813 +prometheus_tsdb_compaction_chunk_samples_bucket{le="6"} 2625 +prometheus_tsdb_compaction_chunk_samples_bucket{le="9"} 5359 +prometheus_tsdb_compaction_chunk_samples_bucket{le="13.5"} 7578 +prometheus_tsdb_compaction_chunk_samples_bucket{le="20.25"} 10695 +prometheus_tsdb_compaction_chunk_samples_bucket{le="30.375"} 14153 +prometheus_tsdb_compaction_chunk_samples_bucket{le="45.5625"} 20641 +prometheus_tsdb_compaction_chunk_samples_bucket{le="68.34375"} 26828 +prometheus_tsdb_compaction_chunk_samples_bucket{le="102.515625"} 37088 +prometheus_tsdb_compaction_chunk_samples_bucket{le="153.7734375"} 1.3192758e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="230.66015625"} 1.3830353e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="345.990234375"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_samples_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_samples_sum 1.852852608e+09 +prometheus_tsdb_compaction_chunk_samples_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction +# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 5907 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 3.717611e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 3.972949e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 4.043949e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4.106797e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 4.42655e+06 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 1.075848e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 1.2225892e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 1.3311939e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 1.3795122e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 1.3836776e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 1.3837077e+07 +prometheus_tsdb_compaction_chunk_size_bytes_sum 4.281044268e+09 +prometheus_tsdb_compaction_chunk_size_bytes_count 1.3837077e+07 +# HELP prometheus_tsdb_compaction_duration_seconds Duration of compaction runs +# TYPE prometheus_tsdb_compaction_duration_seconds histogram +prometheus_tsdb_compaction_duration_seconds_bucket{le="1"} 540 +prometheus_tsdb_compaction_duration_seconds_bucket{le="2"} 540 +prometheus_tsdb_compaction_duration_seconds_bucket{le="4"} 554 +prometheus_tsdb_compaction_duration_seconds_bucket{le="8"} 559 +prometheus_tsdb_compaction_duration_seconds_bucket{le="16"} 559 +prometheus_tsdb_compaction_duration_seconds_bucket{le="32"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="64"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="128"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="256"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="512"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="1024"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="2048"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="4096"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="8192"} 561 +prometheus_tsdb_compaction_duration_seconds_bucket{le="+Inf"} 561 +prometheus_tsdb_compaction_duration_seconds_sum 272.2973793669999 +prometheus_tsdb_compaction_duration_seconds_count 561 +# HELP prometheus_tsdb_compaction_populating_block Set to 1 when a block is currently being written to the disk. +# TYPE prometheus_tsdb_compaction_populating_block gauge +prometheus_tsdb_compaction_populating_block 0 +# HELP prometheus_tsdb_compactions_failed_total Total number of compactions that failed for the partition. +# TYPE prometheus_tsdb_compactions_failed_total counter +prometheus_tsdb_compactions_failed_total 0 +# HELP prometheus_tsdb_compactions_skipped_total Total number of skipped compactions due to disabled auto compaction. +# TYPE prometheus_tsdb_compactions_skipped_total counter +prometheus_tsdb_compactions_skipped_total 0 +# HELP prometheus_tsdb_compactions_total Total number of compactions that were executed for the partition. +# TYPE prometheus_tsdb_compactions_total counter +prometheus_tsdb_compactions_total 561 +# HELP prometheus_tsdb_compactions_triggered_total Total number of triggered compactions for the partition. +# TYPE prometheus_tsdb_compactions_triggered_total counter +prometheus_tsdb_compactions_triggered_total 44842 +# HELP prometheus_tsdb_data_replay_duration_seconds Time taken to replay the data on disk. +# TYPE prometheus_tsdb_data_replay_duration_seconds gauge +prometheus_tsdb_data_replay_duration_seconds 0.767674068 +# HELP prometheus_tsdb_exemplar_exemplars_appended_total Total number of appended exemplars. +# TYPE prometheus_tsdb_exemplar_exemplars_appended_total counter +prometheus_tsdb_exemplar_exemplars_appended_total 0 +# HELP prometheus_tsdb_exemplar_exemplars_in_storage Number of exemplars currently in circular storage. +# TYPE prometheus_tsdb_exemplar_exemplars_in_storage gauge +prometheus_tsdb_exemplar_exemplars_in_storage 0 +# HELP prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds The timestamp of the oldest exemplar stored in circular storage. Useful to check for what timerange the current exemplar buffer limit allows. This usually means the last timestampfor all exemplars for a typical setup. This is not true though if one of the series timestamp is in future compared to rest series. +# TYPE prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds gauge +prometheus_tsdb_exemplar_last_exemplars_timestamp_seconds 0 +# HELP prometheus_tsdb_exemplar_max_exemplars Total number of exemplars the exemplar storage can store, resizeable. +# TYPE prometheus_tsdb_exemplar_max_exemplars gauge +prometheus_tsdb_exemplar_max_exemplars 0 +# HELP prometheus_tsdb_exemplar_out_of_order_exemplars_total Total number of out of order exemplar ingestion failed attempts. +# TYPE prometheus_tsdb_exemplar_out_of_order_exemplars_total counter +prometheus_tsdb_exemplar_out_of_order_exemplars_total 0 +# HELP prometheus_tsdb_exemplar_series_with_exemplars_in_storage Number of series with exemplars currently in circular storage. +# TYPE prometheus_tsdb_exemplar_series_with_exemplars_in_storage gauge +prometheus_tsdb_exemplar_series_with_exemplars_in_storage 0 +# HELP prometheus_tsdb_head_active_appenders Number of currently active appender transactions +# TYPE prometheus_tsdb_head_active_appenders gauge +prometheus_tsdb_head_active_appenders 0 +# HELP prometheus_tsdb_head_chunks Total number of chunks in the head block. +# TYPE prometheus_tsdb_head_chunks gauge +prometheus_tsdb_head_chunks 47276 +# HELP prometheus_tsdb_head_chunks_created_total Total number of chunks created in the head +# TYPE prometheus_tsdb_head_chunks_created_total counter +prometheus_tsdb_head_chunks_created_total 1.3884353e+07 +# HELP prometheus_tsdb_head_chunks_removed_total Total number of chunks removed in the head +# TYPE prometheus_tsdb_head_chunks_removed_total counter +prometheus_tsdb_head_chunks_removed_total 1.3837077e+07 +# HELP prometheus_tsdb_head_chunks_storage_size_bytes Size of the chunks_head directory. +# TYPE prometheus_tsdb_head_chunks_storage_size_bytes gauge +prometheus_tsdb_head_chunks_storage_size_bytes 2.0828256e+07 +# HELP prometheus_tsdb_head_gc_duration_seconds Runtime of garbage collection in the head block. +# TYPE prometheus_tsdb_head_gc_duration_seconds summary +prometheus_tsdb_head_gc_duration_seconds_sum 3.114924039999997 +prometheus_tsdb_head_gc_duration_seconds_count 373 +# HELP prometheus_tsdb_head_max_time Maximum timestamp of the head block. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_head_max_time gauge +prometheus_tsdb_head_max_time 1.727807345546e+12 +# HELP prometheus_tsdb_head_max_time_seconds Maximum timestamp of the head block. +# TYPE prometheus_tsdb_head_max_time_seconds gauge +prometheus_tsdb_head_max_time_seconds 1.727807345e+09 +# HELP prometheus_tsdb_head_min_time Minimum time bound of the head block. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_head_min_time gauge +prometheus_tsdb_head_min_time 1.727798400141e+12 +# HELP prometheus_tsdb_head_min_time_seconds Minimum time bound of the head block. +# TYPE prometheus_tsdb_head_min_time_seconds gauge +prometheus_tsdb_head_min_time_seconds 1.7277984e+09 +# HELP prometheus_tsdb_head_out_of_order_samples_appended_total Total number of appended out of order samples. +# TYPE prometheus_tsdb_head_out_of_order_samples_appended_total counter +prometheus_tsdb_head_out_of_order_samples_appended_total 0 +# HELP prometheus_tsdb_head_samples_appended_total Total number of appended samples. +# TYPE prometheus_tsdb_head_samples_appended_total counter +prometheus_tsdb_head_samples_appended_total{type="float"} 1.856200861e+09 +prometheus_tsdb_head_samples_appended_total{type="histogram"} 0 +# HELP prometheus_tsdb_head_series Total number of series in the head block. +# TYPE prometheus_tsdb_head_series gauge +prometheus_tsdb_head_series 10789 +# HELP prometheus_tsdb_head_series_created_total Total number of series created in the head +# TYPE prometheus_tsdb_head_series_created_total counter +prometheus_tsdb_head_series_created_total 42838 +# HELP prometheus_tsdb_head_series_not_found_total Total number of requests for series that were not found. +# TYPE prometheus_tsdb_head_series_not_found_total counter +prometheus_tsdb_head_series_not_found_total 0 +# HELP prometheus_tsdb_head_series_removed_total Total number of series removed in the head +# TYPE prometheus_tsdb_head_series_removed_total counter +prometheus_tsdb_head_series_removed_total 32049 +# HELP prometheus_tsdb_head_truncations_failed_total Total number of head truncations that failed. +# TYPE prometheus_tsdb_head_truncations_failed_total counter +prometheus_tsdb_head_truncations_failed_total 0 +# HELP prometheus_tsdb_head_truncations_total Total number of head truncations attempted. +# TYPE prometheus_tsdb_head_truncations_total counter +prometheus_tsdb_head_truncations_total 373 +# HELP prometheus_tsdb_isolation_high_watermark The highest TSDB append ID that has been given out. +# TYPE prometheus_tsdb_isolation_high_watermark gauge +prometheus_tsdb_isolation_high_watermark 1.614044e+06 +# HELP prometheus_tsdb_isolation_low_watermark The lowest TSDB append ID that is still referenced. +# TYPE prometheus_tsdb_isolation_low_watermark gauge +prometheus_tsdb_isolation_low_watermark 1.614044e+06 +# HELP prometheus_tsdb_lowest_timestamp Lowest timestamp value stored in the database. The unit is decided by the library consumer. +# TYPE prometheus_tsdb_lowest_timestamp gauge +prometheus_tsdb_lowest_timestamp 1.711547243455e+12 +# HELP prometheus_tsdb_lowest_timestamp_seconds Lowest timestamp value stored in the database. +# TYPE prometheus_tsdb_lowest_timestamp_seconds gauge +prometheus_tsdb_lowest_timestamp_seconds 1.711547243e+09 +# HELP prometheus_tsdb_mmap_chunk_corruptions_total Total number of memory-mapped chunk corruptions. +# TYPE prometheus_tsdb_mmap_chunk_corruptions_total counter +prometheus_tsdb_mmap_chunk_corruptions_total 0 +# HELP prometheus_tsdb_out_of_bound_samples_total Total number of out of bound samples ingestion failed attempts with out of order support disabled. +# TYPE prometheus_tsdb_out_of_bound_samples_total counter +prometheus_tsdb_out_of_bound_samples_total{type="float"} 0 +# HELP prometheus_tsdb_out_of_order_samples_total Total number of out of order samples ingestion failed attempts due to out of order being disabled. +# TYPE prometheus_tsdb_out_of_order_samples_total counter +prometheus_tsdb_out_of_order_samples_total{type="float"} 0 +prometheus_tsdb_out_of_order_samples_total{type="histogram"} 0 +# HELP prometheus_tsdb_reloads_failures_total Number of times the database failed to reloadBlocks block data from disk. +# TYPE prometheus_tsdb_reloads_failures_total counter +prometheus_tsdb_reloads_failures_total 0 +# HELP prometheus_tsdb_reloads_total Number of times the database reloaded block data from disk. +# TYPE prometheus_tsdb_reloads_total counter +prometheus_tsdb_reloads_total 45030 +# HELP prometheus_tsdb_retention_limit_bytes Max number of bytes to be retained in the tsdb blocks, configured 0 means disabled +# TYPE prometheus_tsdb_retention_limit_bytes gauge +prometheus_tsdb_retention_limit_bytes 5.36870912e+11 +# HELP prometheus_tsdb_size_retentions_total The number of times that blocks were deleted because the maximum number of bytes was exceeded. +# TYPE prometheus_tsdb_size_retentions_total counter +prometheus_tsdb_size_retentions_total 0 +# HELP prometheus_tsdb_snapshot_replay_error_total Total number snapshot replays that failed. +# TYPE prometheus_tsdb_snapshot_replay_error_total counter +prometheus_tsdb_snapshot_replay_error_total 0 +# HELP prometheus_tsdb_storage_blocks_bytes The number of bytes that are currently used for local storage by all blocks. +# TYPE prometheus_tsdb_storage_blocks_bytes gauge +prometheus_tsdb_storage_blocks_bytes 2.7078242758e+10 +# HELP prometheus_tsdb_symbol_table_size_bytes Size of symbol table in memory for loaded blocks +# TYPE prometheus_tsdb_symbol_table_size_bytes gauge +prometheus_tsdb_symbol_table_size_bytes 6624 +# HELP prometheus_tsdb_time_retentions_total The number of times that blocks were deleted because the maximum time limit was exceeded. +# TYPE prometheus_tsdb_time_retentions_total counter +prometheus_tsdb_time_retentions_total 0 +# HELP prometheus_tsdb_tombstone_cleanup_seconds The time taken to recompact blocks to remove tombstones. +# TYPE prometheus_tsdb_tombstone_cleanup_seconds histogram +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.005"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.01"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.025"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.05"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.1"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.25"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="0.5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="1"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="2.5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="5"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="10"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_bucket{le="+Inf"} 0 +prometheus_tsdb_tombstone_cleanup_seconds_sum 0 +prometheus_tsdb_tombstone_cleanup_seconds_count 0 +# HELP prometheus_tsdb_too_old_samples_total Total number of out of order samples ingestion failed attempts with out of support enabled, but sample outside of time window. +# TYPE prometheus_tsdb_too_old_samples_total counter +prometheus_tsdb_too_old_samples_total{type="float"} 0 +# HELP prometheus_tsdb_vertical_compactions_total Total number of compactions done on overlapping blocks. +# TYPE prometheus_tsdb_vertical_compactions_total counter +prometheus_tsdb_vertical_compactions_total 0 +# HELP prometheus_tsdb_wal_completed_pages_total Total number of completed pages. +# TYPE prometheus_tsdb_wal_completed_pages_total counter +prometheus_tsdb_wal_completed_pages_total 397233 +# HELP prometheus_tsdb_wal_corruptions_total Total number of WAL corruptions. +# TYPE prometheus_tsdb_wal_corruptions_total counter +prometheus_tsdb_wal_corruptions_total 0 +# HELP prometheus_tsdb_wal_fsync_duration_seconds Duration of write log fsync. +# TYPE prometheus_tsdb_wal_fsync_duration_seconds summary +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.5"} NaN +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.9"} NaN +prometheus_tsdb_wal_fsync_duration_seconds{quantile="0.99"} NaN +prometheus_tsdb_wal_fsync_duration_seconds_sum 0.805116427 +prometheus_tsdb_wal_fsync_duration_seconds_count 373 +# HELP prometheus_tsdb_wal_page_flushes_total Total number of page flushes. +# TYPE prometheus_tsdb_wal_page_flushes_total counter +prometheus_tsdb_wal_page_flushes_total 2.011145e+06 +# HELP prometheus_tsdb_wal_segment_current Write log segment index that TSDB is currently writing to. +# TYPE prometheus_tsdb_wal_segment_current gauge +prometheus_tsdb_wal_segment_current 2277 +# HELP prometheus_tsdb_wal_storage_size_bytes Size of the write log directory. +# TYPE prometheus_tsdb_wal_storage_size_bytes gauge +prometheus_tsdb_wal_storage_size_bytes 9.6264943e+07 +# HELP prometheus_tsdb_wal_truncate_duration_seconds Duration of WAL truncation. +# TYPE prometheus_tsdb_wal_truncate_duration_seconds summary +prometheus_tsdb_wal_truncate_duration_seconds_sum 69.80804534300002 +prometheus_tsdb_wal_truncate_duration_seconds_count 187 +# HELP prometheus_tsdb_wal_truncations_failed_total Total number of write log truncations that failed. +# TYPE prometheus_tsdb_wal_truncations_failed_total counter +prometheus_tsdb_wal_truncations_failed_total 0 +# HELP prometheus_tsdb_wal_truncations_total Total number of write log truncations attempted. +# TYPE prometheus_tsdb_wal_truncations_total counter +prometheus_tsdb_wal_truncations_total 187 +# HELP prometheus_tsdb_wal_writes_failed_total Total number of write log writes that failed. +# TYPE prometheus_tsdb_wal_writes_failed_total counter +prometheus_tsdb_wal_writes_failed_total 0 +# HELP prometheus_web_federation_errors_total Total number of errors that occurred while sending federation responses. +# TYPE prometheus_web_federation_errors_total counter +prometheus_web_federation_errors_total 0 +# HELP prometheus_web_federation_warnings_total Total number of warnings that occurred while sending federation responses. +# TYPE prometheus_web_federation_warnings_total counter +prometheus_web_federation_warnings_total 0 +# HELP promhttp_metric_handler_requests_in_flight Current number of scrapes being served. +# TYPE promhttp_metric_handler_requests_in_flight gauge +promhttp_metric_handler_requests_in_flight 1 +# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code. +# TYPE promhttp_metric_handler_requests_total counter +promhttp_metric_handler_requests_total{code="200"} 179357 +promhttp_metric_handler_requests_total{code="500"} 0 +promhttp_metric_handler_requests_total{code="503"} 0 +`