From 541898ef55ab50a51aac22b5dd45b8aab0e654b5 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 21:23:24 -0700 Subject: [PATCH 1/3] added extra notes about the parameters --- NOTES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NOTES.md b/NOTES.md index 6d8bfe8..28ee793 100644 --- a/NOTES.md +++ b/NOTES.md @@ -307,6 +307,8 @@ To get more verbose output, also pass these arguments in the `extraArgs` section ``` Again, `--log.level=debug` and `--print-config-stderr` are pretty useless until you get your `aws.s3` configuration correct. You'll be stuck with generic errors until you get that sorted +**Note:** There is no typo on `-config.expand-env=true`, it only prefixes with 1 dash. Don't ask me why + ## Bonus Garbage Oh, also. A whole bunch of these docs talk about using boltdb_shipper. That thing is deprecated! (https://grafana.com/docs/loki/latest/configure/storage/#boltdb-deprecated) There is a new one (https://grafana.com/docs/loki/latest/configure/storage/#tsdb-recommended), but man...documentation ? Where is it ? Nobody appears to be using this yet either \ No newline at end of file From 791ea13ada1bd829429e1133bedb26253e6c0b31 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 22:04:03 -0700 Subject: [PATCH 2/3] Fixed prometheus-adapter bug where node metrics weren't appearing --- NOTES.md | 24 +++++++++++++++++++ .../res/prometheus-adapter-values.yaml | 4 ---- .../res/kube-prometheus-stack-values.yaml | 14 +++++------ .../modules/promtail/res/promtail-values.yaml | 6 ++--- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/NOTES.md b/NOTES.md index 28ee793..0ee27c0 100644 --- a/NOTES.md +++ b/NOTES.md @@ -211,6 +211,30 @@ Helm ignores this feature, and instead focuses on trying to template out all com ## Prometheus-Adapter has a bug in it, out the gate: https://github.com/kubernetes-sigs/prometheus-adapter/issues/385 +Basically, depending on how you installed prometheus. You may not be providing a `node` value to the `node_cpu_seconds_total` - a critical metric with the default configuration of prometheus-adapter. + +There are multiple ways you can fix this issue, depending whether you would like to relabel the metric in prometheus, or search for wherever the correct one is from the prometheus-adapter. I chose to fix it within prometheus, as its more helpful to have anyway, and with the prometheus UI, you can debug and prove whether you fixed it or not + +Basically, within prometheus you need to add the following relabeling rule: +```yaml +prometheus-node-exporter: + monitor: + relabelings: + - sourceLabels: [__meta_kubernetes_pod_node_name] + separator: ; + regex: ^(.*)$ + targetLabel: node + replacement: $1 + action: replace +``` + +Once you've applied the change, test that the label exists within the UI and putting into the search `node_cpu_seconds_total`. You should see your new label in there + +Now, with the default setup of prometheus-adapter, you should successfully be able to get your top usage nodes from kubectl: +```bash +kubectl top nodes +``` + ## S3 external storage documentation and secure configuration of keys is basically all out of date, scattered around, or broken! The grafana docs are complete shit. I've read it from multiple forums already, but this is my first experience where its truly shown its colors. In order to get proper cloud storage setup, i've had to jump between a bunch of forums, blind guess through a whole bunch of possibilities, and then stumble on a makeshift of a couple options in order to get everything working diff --git a/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml b/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml index 851fea0..9efe23f 100644 --- a/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml +++ b/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml @@ -171,8 +171,6 @@ rules: resource: namespace pod: resource: pod - instance: - resource: node containerLabel: container memory: containerQuery: | @@ -187,8 +185,6 @@ rules: ) resources: overrides: - instance: - resource: node node: resource: node namespace: diff --git a/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml b/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml index 2edb0ef..4877995 100644 --- a/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml +++ b/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml @@ -2141,13 +2141,13 @@ prometheus-node-exporter: ## RelabelConfigs to apply to samples before scraping ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig ## - relabelings: [] - # - sourceLabels: [__meta_kubernetes_pod_node_name] - # separator: ; - # regex: ^(.*)$ - # targetLabel: nodename - # replacement: $1 - # action: replace + relabelings: + - sourceLabels: [__meta_kubernetes_pod_node_name] + separator: ; + regex: ^(.*)$ + targetLabel: node + replacement: $1 + action: replace rbac: ## If true, create PSPs for node-exporter ## diff --git a/modules/k8config/modules/promtail/res/promtail-values.yaml b/modules/k8config/modules/promtail/res/promtail-values.yaml index bb8322e..633b752 100644 --- a/modules/k8config/modules/promtail/res/promtail-values.yaml +++ b/modules/k8config/modules/promtail/res/promtail-values.yaml @@ -48,12 +48,12 @@ daemonset: deployment: # -- Deploys Promtail as a Deployment enabled: true - replicaCount: 3 + replicaCount: 1 autoscaling: # -- Creates a HorizontalPodAutoscaler for the deployment enabled: true - minReplicas: 3 - maxReplicas: 10 + minReplicas: 1 + maxReplicas: 3 targetCPUUtilizationPercentage: 80 targetMemoryUtilizationPercentage: # behavior: {} From 81cf9c0d4d3942b5764ee0de0224e0c65c711978 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 22:06:25 -0700 Subject: [PATCH 3/3] completed notes on prometheus-adapter --- NOTES.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/NOTES.md b/NOTES.md index 0ee27c0..2ec2258 100644 --- a/NOTES.md +++ b/NOTES.md @@ -209,7 +209,10 @@ CRDs are meant to be the powerhouse of Kubernetes. To make something Cloud/Kuber Helm ignores this feature, and instead focuses on trying to template out all components. It leave this to working with the Kubernetes primitive, Pod/Service/Secrets services. Which are the basics, but aren't the full capabilities of the framework. They are really just the surface, and Helm encourage people away from those advanced and powerful capabilities with its workflows. ## Prometheus-Adapter has a bug in it, out the gate: -https://github.com/kubernetes-sigs/prometheus-adapter/issues/385 +* https://github.com/kubernetes-sigs/prometheus-adapter/issues/385 +* https://github.com/kubernetes-sigs/prometheus-adapter/issues/398 + +Actually it was this comment all the way down that gave me hope into what the issue could be: https://github.com/kubernetes-sigs/prometheus-adapter/issues/398#issuecomment-1443580236 Basically, depending on how you installed prometheus. You may not be providing a `node` value to the `node_cpu_seconds_total` - a critical metric with the default configuration of prometheus-adapter.