From d0d82e8b4b712857e47f73aa8a18134f597d6c94 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 21:11:54 -0700 Subject: [PATCH 1/3] removed elasticsearch, reinstated loki. added promtail and prometheus-adapter --- NOTES.md | 119 +- README.md | 7 +- main.tf | 9 +- modules/k8config/main.tf | 42 +- .../{ => _archive}/elasticsearch/README.md | 0 .../{ => _archive}/elasticsearch/main.tf | 15 + .../res/elasticsearch-values.yaml | 0 .../elasticsearch/res/elasticsearch.yaml | 39 + .../{loki => elasticsearch}/variables.tf | 0 .../k8config/modules/_archive/grafana/main.tf | 4 +- .../res/grafana-agent-operator-values.yaml | 80 + .../_archive/loki/res/loki-values.yaml | 10 - modules/k8config/modules/crds/main.tf | 4 +- modules/k8config/modules/loki/README.md | 12 + modules/k8config/modules/loki/main.tf | 63 + .../loki/res/loki-distributed-values.yaml | 2111 +++++++++++++++++ modules/k8config/modules/loki/variables.tf | 11 + .../modules/prometheus-adapter/README.md | 26 + .../modules/prometheus-adapter/main.tf | 32 + .../res/prometheus-adapter-values.yaml | 296 +++ .../variables.tf | 0 modules/k8config/modules/prometheus/README.md | 4 +- .../res/kube-prometheus-stack-values.yaml | 7 +- .../{_archive/loki => promtail}/main.tf | 10 +- .../modules/promtail/res/promtail-values.yaml | 635 +++++ .../k8config/modules/promtail/variables.tf | 0 modules/k8config/variables.tf | 12 + modules/k8infra/variables.tf | 5 - variables.tf | 15 + 29 files changed, 3523 insertions(+), 45 deletions(-) rename modules/k8config/modules/{ => _archive}/elasticsearch/README.md (100%) rename modules/k8config/modules/{ => _archive}/elasticsearch/main.tf (69%) rename modules/k8config/modules/{ => _archive}/elasticsearch/res/elasticsearch-values.yaml (100%) create mode 100644 modules/k8config/modules/_archive/elasticsearch/res/elasticsearch.yaml rename modules/k8config/modules/_archive/{loki => elasticsearch}/variables.tf (100%) create mode 100644 modules/k8config/modules/_archive/grafana/res/grafana-agent-operator-values.yaml delete mode 100644 modules/k8config/modules/_archive/loki/res/loki-values.yaml create mode 100644 modules/k8config/modules/loki/README.md create mode 100644 modules/k8config/modules/loki/main.tf create mode 100644 modules/k8config/modules/loki/res/loki-distributed-values.yaml create mode 100644 modules/k8config/modules/loki/variables.tf create mode 100644 modules/k8config/modules/prometheus-adapter/README.md create mode 100644 modules/k8config/modules/prometheus-adapter/main.tf create mode 100644 modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml rename modules/k8config/modules/{elasticsearch => prometheus-adapter}/variables.tf (100%) rename modules/k8config/modules/{_archive/loki => promtail}/main.tf (68%) create mode 100644 modules/k8config/modules/promtail/res/promtail-values.yaml create mode 100644 modules/k8config/modules/promtail/variables.tf diff --git a/NOTES.md b/NOTES.md index 1dd9d3b..6d8bfe8 100644 --- a/NOTES.md +++ b/NOTES.md @@ -49,7 +49,7 @@ spec: ``` Notice how this ingress for ArgoCD is created in the `traefik` namespace (metadata.namespace). And then the `services` definition within the `routes` now inclues a `namespace` section pointing to `argocd`. This allows traefik in the traefik namespace to forward traffic to ArgoCD, which has been setup in the `argocd` namespace -# The Traefik Dashboard Can Be Fully Secured With Basic Auth + Certificates From cert-manager Within its Helm Chart +## The Traefik Dashboard Can Be Fully Secured With Basic Auth + Certificates From cert-manager Within its Helm Chart Theres no documentation for this example scenario, and I guess in retropsect it is rather intuitive. But here is an example of how to set it up. With traefik setup with Helm, configure the following in your `values.yaml`: @@ -109,7 +109,7 @@ You can also create this within `extraObjects` section above, but I did it seper Once all of that is applied, the changes may not be immediate, and thats because cert-manager needs to provision your certificate still. During that time, traefik will server the website using its default built in certificate. Once the certificate is ready, traefik will be reloaded with it! -# Setup DNS01 validation with non-wildcard domains and sub-domains using cert-manager and CloudFlare +## Setup DNS01 validation with non-wildcard domains and sub-domains using cert-manager and CloudFlare This workflow is not well documented either. But the configuration all exists, it just takes way too much digging then it should. There at a point was actually a bug in this workflow, where cert-manager couldn't actually find the domain on cloudflare. If you run into these issues, there is a couple things you can do and try to resolve it: @@ -126,10 +126,10 @@ dns01RecursiveNameservers: "1.1.1.1:53,1.0.0.1:53,8.8.8.8:53,8.8.4.4:53" ``` As a bonus, I also included Google's DNS servers as well. They always are pretty quick to pickup changes as well -# Setup Dev and Prod Issuers with LetsEncrypt +## Setup Dev and Prod Issuers with LetsEncrypt Having both is helpful during the debugging process as it allows you to end-to-end create certificates and not run into issues with limits. By using LetsEncrypts dev endpoint, you can save yourself some debugging headache -# ArgoCD Needs to be setup with the --insecure flag if you want it to be public facing +## ArgoCD Needs to be setup with the --insecure flag if you want it to be public facing ArgoCD has its own certificate to work with when you use the kubectl proxy. But if you want it to be public facing, you'll need to disable this functionality. Otherwise, you will end up with constant redirect loops To have argocd run insecure, from Helm, configure the following in your `values.yaml`: @@ -141,14 +141,14 @@ server: Or wherever you run the ArgoCD container, make sure to pass the argument `--insecure` to the binary -# Terraform Kubernetes provider 'kubernetes_manifest' has a bug for cluster setup workflows +## Terraform Kubernetes provider 'kubernetes_manifest' has a bug for cluster setup workflows Youll need to use the kubectl provider instead, and specifically the fork created by `alekc` as the original provider also had its own bug and the project has not been regularly maintained by the owner anymore -# cert-manager has a bug in how its CRDs are installed +## cert-manager has a bug in how its CRDs are installed The deprecated 'installCRDS' variable is actually the only way to install the CRDs via helm. The replaced options of `crds.keep` and `crds.install` do not actually work -# Install your CRDS seperatly first +## Install your CRDS seperatly first Not doing this is a pain in the ass when it comes to IaC and wanting to cross configure various tools within your cluster. @@ -206,4 +206,107 @@ Oh not to mention, these days there is also an option where people are insisting ## Helm is a Design Flaw. It deviates away from Kubernetes design and architecture CRDs are meant to be the powerhouse of Kubernetes. To make something Cloud/Kubernetes native. You create CRDs which are the building blocks to create and configure your application within the cluster. -Helm ignores this feature, and instead focuses on trying to template out all components. It leave this to working with the Kubernetes primitive, Pod/Service/Secrets services. Which are the basics, but aren't the full capabilities of the framework. They are really just the surface, and Helm encourage people away from those advanced and powerful capabilities with its workflows. \ No newline at end of file +Helm ignores this feature, and instead focuses on trying to template out all components. It leave this to working with the Kubernetes primitive, Pod/Service/Secrets services. Which are the basics, but aren't the full capabilities of the framework. They are really just the surface, and Helm encourage people away from those advanced and powerful capabilities with its workflows. + +## Prometheus-Adapter has a bug in it, out the gate: +https://github.com/kubernetes-sigs/prometheus-adapter/issues/385 + +## S3 external storage documentation and secure configuration of keys is basically all out of date, scattered around, or broken! +The grafana docs are complete shit. I've read it from multiple forums already, but this is my first experience where its truly shown its colors. In order to get proper cloud storage setup, i've had to jump between a bunch of forums, blind guess through a whole bunch of possibilities, and then stumble on a makeshift of a couple options in order to get everything working + +Additionally, loki logging in its components won't give their additional debugging and help outputs if your S3 configuration is incorrect. So your stuck blind debugging until you get it mostly right! + +Here is some of the places I looked that ended up completely wrong: +* https://github.com/grafana/loki/issues/12218 +* https://github.com/grafana/loki/issues/8572 +* https://community.grafana.com/t/provide-s3-credentials-using-environment-variables/100132/2 + +This one ended up being half right, but the format is out of date with the latest versions and helm charts +* https://akyriako.medium.com/kubernetes-logging-with-grafana-loki-promtail-in-under-10-minutes-d2847d526f9e + +And this one, for digital ocean itself, was a complete mess of outdated information: +* https://www.digitalocean.com/community/developer-center/how-to-install-loki-stack-in-doks-cluster + +And it was only some blind guessing around with this example on Grafana that I found something that accidently worked: https://grafana.com/docs/loki/latest/configure/storage/#aws-deployment-s3-single-store + +Unfortunatly, I can't even really tell you why what I have works. But at the very least I can show you what did work for me + + +## Setting Up S3 / Digital Ocean Backed Storage with Loki and Securely Storing Access Keys + +Im installing Loki via helm using the loki-distributed chart (because theres multiple of them and they seem to differ some even there in what they can and can not do). I am using version `0.79.0` + +My `storageConfig` section was setup like this: +```yaml + storageConfig: + boltdb_shipper: + shared_store: aws + active_index_directory: /var/loki/index + cache_location: /var/loki/cache + cache_ttl: 1h + filesystem: + directory: /var/loki/chunks +# -- Uncomment to configure each storage individually +# azure: {} +# gcs: {} + aws: + s3: s3://${S3_LOKI_ACCESS_KEY}:${S3_LOKI_SECRET_ACCESS_KEY}@nyc3 + bucketnames: k8stack-resources + endpoint: nyc3.digitaloceanspaces.com + region: nyc3 + s3forcepathstyle: false + insecure: false + http_config: + idle_conn_timeout: 90s + response_header_timeout: 0s + insecure_skip_verify: false +``` +It seems like using `secretAccessKey` or `accessKeyId` does not resolve the variables that are in the environment. It only appears to work within the `s3` string. And that value is custom to this project too! This is not AWS s3 connection syntax from what I have experienced. + +Being Digital Ocean, I had to be a bit of tinkering with the `endpoint` value. Fortunatly, the log output spelt that one out for me + +A key piece I also had to do is go through this configuration and look for all the `shared_store` values, as these sometimes were set to `s3`. From the Grafana docs I read `s3` and `aws` is an alias. But I don't trust it. So I'd recommend changing those values to `aws`. I _think_ what is happening here is this `aws` value used elsewhere is being used to find the `aws` key listed under `storageConfig` so as to find the access credentials etc + +I then configured my secrets within the `extraEnv` section for each component I was deploying: +```yaml + extraEnvFrom: + - secretRef: + name: loki-s3-credentials +``` +This is an opaque secret with the following data: +``` +S3_LOKI_ACCESS_KEY : +S3_LOKI_SECRET_ACCESS_KEY: +``` +Don't listen to some of the documentation talking about these values needing to be URL encoded. Pass them in as they are when you received them. Kubernetes will base64 encode them as always, but you don't need to do anything to them yourself. Copy, paste, and let kubernetes do the rest + + +## Debugging and Post Deployment Checks + +Once things appear to have booted successfully for you I would check your S3 or Digital Ocean bucket. Loki should have filled it with some content. If you have no content, something has _definitly_ gone wrong without you knowing it. Loki doesn't seem to be very obvious or giving about any issues + +Some helpful commands I used with `kubectl` were: +```bash +# Get an overview, are things running or rebooting and failing ? +kubectl get all -n loki + +# Get details of a pod. This includes boot highlights, but also allows you to confirm what environment variables were passed to your container +kubectl describe pod -n loki + +# Finally, output the log output of the container. Again, this will be pretty useless until you have it mostly right! +kubectl logs -n loki --follow +``` +These allowed me to deduce what the hell was going on + +To get more verbose output, also pass these arguments in the `extraArgs` section of each of the components you are deploying: +```yaml + extraArgs: + - -config.expand-env=true # you NEED this in order for environment variables to work in your storageConfig + - --log.level=debug + - --print-config-stderr +``` +Again, `--log.level=debug` and `--print-config-stderr` are pretty useless until you get your `aws.s3` configuration correct. You'll be stuck with generic errors until you get that sorted + + +## Bonus Garbage +Oh, also. A whole bunch of these docs talk about using boltdb_shipper. That thing is deprecated! (https://grafana.com/docs/loki/latest/configure/storage/#boltdb-deprecated) There is a new one (https://grafana.com/docs/loki/latest/configure/storage/#tsdb-recommended), but man...documentation ? Where is it ? Nobody appears to be using this yet either \ No newline at end of file diff --git a/README.md b/README.md index 27c8ffe..ca66dcc 100644 --- a/README.md +++ b/README.md @@ -32,9 +32,10 @@ Below is a table of each piece installed in my cluster at the moment, and what r | Traefik | Ingress Controller | | | Kyverno | RBAC and Admissions Controller | | | Prometheus | Observability - Metrics Server | | -| Grafana | Observability - Metrics Dashbaord | | -| Elasticsearch | Observability - Logging Database | | -| Kibana | Observability - Logging Dashboard | Coming Soon | +| Prometheus Adapter | Metrics for Kubernetes Metrics API | Replaces metrics-server to work with Prometheus instead | +| Grafana | Observability - Metrics & Logging Dashbaord | | +| Loki| Observability - Logging Database | | +| Promtail | Observability - Container Stdout Log Scraping | Forwards to Loki | | Vault | Secrets Manager | Coming Soon | Below now is another table of the tech being used for managing and configuring my Kubernetes cluster: diff --git a/main.tf b/main.tf index 50de57d..6124b92 100644 --- a/main.tf +++ b/main.tf @@ -1,5 +1,5 @@ terraform { - required_version = "~> 1.7.5" + required_version = "~> 1.8.1" required_providers { digitalocean = { @@ -34,8 +34,7 @@ terraform { module "k8infra" { - source = "./modules/k8infra" - do_token = var.do_token + source = "./modules/k8infra" providers = { digitalocean = digitalocean @@ -58,6 +57,10 @@ module "k8config" { cf_token = var.cf_token domain = var.domain + + s3_access_key_id = var.do_spaces_access_key_id + s3_secret_access_key = var.do_spaces_secret_access_key + providers = { kubernetes = kubernetes helm = helm diff --git a/modules/k8config/main.tf b/modules/k8config/main.tf index 3d44a0c..e6268c6 100644 --- a/modules/k8config/main.tf +++ b/modules/k8config/main.tf @@ -116,15 +116,45 @@ module "kyverno" { ] } -module "elasticsearch" { - source = "./modules/elasticsearch" + +module "loki" { + source = "./modules/loki" + + s3_access_key_id = var.s3_access_key_id + s3_secret_access_key = var.s3_secret_access_key providers = { - kubectl = kubectl - helm = helm + helm = helm } - depends_on = [ + depends_on = [ time_sleep.wait_60_seconds - ] + ] } + +module "promtail" { + source = "./modules/promtail" + + providers = { + helm = helm + } + + depends_on = [ + time_sleep.wait_60_seconds, + module.loki + ] +} + + +module "prometheus-adapter" { + source = "./modules/prometheus-adapter" + + providers = { + helm = helm + } + + depends_on = [ + time_sleep.wait_60_seconds, + module.prometheus + ] +} \ No newline at end of file diff --git a/modules/k8config/modules/elasticsearch/README.md b/modules/k8config/modules/_archive/elasticsearch/README.md similarity index 100% rename from modules/k8config/modules/elasticsearch/README.md rename to modules/k8config/modules/_archive/elasticsearch/README.md diff --git a/modules/k8config/modules/elasticsearch/main.tf b/modules/k8config/modules/_archive/elasticsearch/main.tf similarity index 69% rename from modules/k8config/modules/elasticsearch/main.tf rename to modules/k8config/modules/_archive/elasticsearch/main.tf index a7eeaa9..65e0a2b 100644 --- a/modules/k8config/modules/elasticsearch/main.tf +++ b/modules/k8config/modules/_archive/elasticsearch/main.tf @@ -33,3 +33,18 @@ resource "helm_release" "elasticsearch" { file("${abspath(path.module)}/res/elasticsearch-values.yaml") ] } + +resource "time_sleep" "wait_60_seconds" { + depends_on = [helm_release.elasticsearch] + create_duration = "60s" +} + +/* +resource "kubectl_manifest" "elasticsearch_cluster" { + yaml_body = file("${abspath(path.module)}/res/elasticsearch.yaml") + + depends_on = [ + time_sleep.wait_60_seconds + ] +} +*/ diff --git a/modules/k8config/modules/elasticsearch/res/elasticsearch-values.yaml b/modules/k8config/modules/_archive/elasticsearch/res/elasticsearch-values.yaml similarity index 100% rename from modules/k8config/modules/elasticsearch/res/elasticsearch-values.yaml rename to modules/k8config/modules/_archive/elasticsearch/res/elasticsearch-values.yaml diff --git a/modules/k8config/modules/_archive/elasticsearch/res/elasticsearch.yaml b/modules/k8config/modules/_archive/elasticsearch/res/elasticsearch.yaml new file mode 100644 index 0000000..b8d05df --- /dev/null +++ b/modules/k8config/modules/_archive/elasticsearch/res/elasticsearch.yaml @@ -0,0 +1,39 @@ +# https://github.com/elastic/cloud-on-k8s/blob/main/config/samples/elasticsearch/elasticsearch.yaml +apiVersion: elasticsearch.k8s.elastic.co/v1 +kind: Elasticsearch +metadata: + name: elasticsearch + namespace: elasticsearch +spec: + version: 8.13.2 + volumeClaimDeletePolicy: DeleteOnScaledownOnly + nodeSets: + - name: default + count: 1 # 3 cluster nodes + volumeClaimTemplates: + - metadata: + name: elasticsearch-data + spec: + storageClassName: do-block-storage + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + podTemplate: + spec: + containers: + - name: elasticsearch + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 1Gi + cpu: 1 + #config: + # node.store.allow_mmap: false + http: + service: + spec: + type: ClusterIP \ No newline at end of file diff --git a/modules/k8config/modules/_archive/loki/variables.tf b/modules/k8config/modules/_archive/elasticsearch/variables.tf similarity index 100% rename from modules/k8config/modules/_archive/loki/variables.tf rename to modules/k8config/modules/_archive/elasticsearch/variables.tf diff --git a/modules/k8config/modules/_archive/grafana/main.tf b/modules/k8config/modules/_archive/grafana/main.tf index 3f39a9d..ea7f235 100644 --- a/modules/k8config/modules/_archive/grafana/main.tf +++ b/modules/k8config/modules/_archive/grafana/main.tf @@ -19,7 +19,7 @@ resource "helm_release" "grafana" { name = "grafana" repository = "https://grafana.github.io/helm-charts" - chart = "grafana" + chart = "grafana-agent-operator" atomic = true @@ -34,7 +34,7 @@ resource "helm_release" "grafana" { dependency_update = true values = [ - file("${abspath(path.module)}/res/grafana-values.yaml") + file("${abspath(path.module)}/res/grafana-agent-operator-values.yaml") ] depends_on = [ diff --git a/modules/k8config/modules/_archive/grafana/res/grafana-agent-operator-values.yaml b/modules/k8config/modules/_archive/grafana/res/grafana-agent-operator-values.yaml new file mode 100644 index 0000000..af50f32 --- /dev/null +++ b/modules/k8config/modules/_archive/grafana/res/grafana-agent-operator-values.yaml @@ -0,0 +1,80 @@ +# -- Overrides the chart's name +nameOverride: "" + +# -- Overrides the chart's computed fullname +fullnameOverride: "" + +# -- Annotations for the Deployment +annotations: {} + +# -- Annotations for the Deployment Pods +podAnnotations: {} + +# -- Annotations for the Deployment Pods +podLabels: {} + +# -- Pod security context (runAsUser, etc.) +podSecurityContext: {} + +# -- Container security context (allowPrivilegeEscalation, etc.) +containerSecurityContext: {} + +rbac: + # -- Toggle to create ClusterRole and ClusterRoleBinding + create: true + # -- Name of a PodSecurityPolicy to use in the ClusterRole. If unset, no PodSecurityPolicy is used. + podSecurityPolicyName: '' + +serviceAccount: + # -- Toggle to create ServiceAccount + create: true + # -- Service account name + name: + +image: + # -- Image registry + registry: docker.io + # -- Image repo + repository: grafana/agent-operator + # -- Image tag + tag: v0.40.4 + # -- Image pull policy + pullPolicy: IfNotPresent + # -- Image pull secrets + pullSecrets: [] + +test: + image: + # -- Test image registry + registry: docker.io + # -- Test image repo + repository: library/busybox + # -- Test image tag + tag: latest + +# -- hostAliases to add +hostAliases: [] +# - ip: 1.2.3.4 +# hostnames: +# - domain.tld + +# -- If both are set, Agent Operator will create and maintain a service for scraping kubelets +# https://grafana.com/docs/agent/latest/operator/getting-started/#monitor-kubelets +kubeletService: + namespace: default + serviceName: kubelet + +# -- List of additional cli arguments to configure agent-operator (example: `--log.level`) +extraArgs: [] + +# -- Resource limits and requests config +resources: {} + +# -- nodeSelector configuration +nodeSelector: {} + +# -- Tolerations applied to Pods +tolerations: [] + +# -- Pod affinity configuration +affinity: {} \ No newline at end of file diff --git a/modules/k8config/modules/_archive/loki/res/loki-values.yaml b/modules/k8config/modules/_archive/loki/res/loki-values.yaml deleted file mode 100644 index 79d04a4..0000000 --- a/modules/k8config/modules/_archive/loki/res/loki-values.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# All Values: https://grafana.com/docs/loki/latest/setup/install/helm/reference/ - -# Small temp storage option -minio: - enabled: true - -monitoring: - dashboards: - enabled: true - namespace: grafana \ No newline at end of file diff --git a/modules/k8config/modules/crds/main.tf b/modules/k8config/modules/crds/main.tf index e3f2a5d..3617fc5 100644 --- a/modules/k8config/modules/crds/main.tf +++ b/modules/k8config/modules/crds/main.tf @@ -35,7 +35,9 @@ resource "kubectl_manifest" "kyverno-crds" { server_side_apply = true } + /* Elasticsearch */ +/* resource "helm_release" "elasticsearch-crds" { name = "eck-operator" @@ -56,4 +58,4 @@ resource "helm_release" "elasticsearch-crds" { #values = [ # file("${abspath(path.module)}/res/elasticsearch-crd-values.yaml") #] -} \ No newline at end of file +}*/ \ No newline at end of file diff --git a/modules/k8config/modules/loki/README.md b/modules/k8config/modules/loki/README.md new file mode 100644 index 0000000..44f526f --- /dev/null +++ b/modules/k8config/modules/loki/README.md @@ -0,0 +1,12 @@ +# loki Module +This module is configured to work with S3 buckets to provide elastic storage for Log storage. Digital Ocean's blob storage (Spaces) mimmicks the AWS S3 api for compatibility and ease of integration. + +This module thus is partially hardcoded with configuration settings for S3, but altered to work with Digital Ocean's implementation. You will need to look through the values files s3 configuration settings to ensure they work with your setup - whether that be Digital Ocean, AWS or another cloud provider that supports S3's blob storage protocols + +# Resources +loki distributed walkthrough: +https://akyriako.medium.com/kubernetes-logging-with-grafana-loki-promtail-in-under-10-minutes-d2847d526f9e + +https://www.digitalocean.com/community/developer-center/how-to-install-loki-stack-in-doks-cluster + + diff --git a/modules/k8config/modules/loki/main.tf b/modules/k8config/modules/loki/main.tf new file mode 100644 index 0000000..00c51b6 --- /dev/null +++ b/modules/k8config/modules/loki/main.tf @@ -0,0 +1,63 @@ +terraform { + required_providers { + helm = { + source = "hashicorp/helm" + version = ">= 2.0.1" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.27.0" + } + } +} + +resource "helm_release" "loki" { + name = "loki" + + repository = "https://grafana.github.io/helm-charts" + chart = "loki-distributed" + version = "0.79.0" + + atomic = true + + create_namespace = true + namespace = "loki" + + recreate_pods = true + reuse_values = true + force_update = true + cleanup_on_fail = true + dependency_update = true + + values = [ + file("${abspath(path.module)}/res/loki-distributed-values.yaml") + ] + + depends_on = [ + kubernetes_namespace.loki_namespace, + kubernetes_secret.loki_s3_credentials + ] +} + +resource "kubernetes_secret" "loki_s3_credentials" { + metadata { + name = "loki-s3-credentials" + namespace = "loki" + } + + data = { + S3_LOKI_SECRET_ACCESS_KEY = var.s3_secret_access_key + S3_LOKI_ACCESS_KEY = var.s3_access_key_id + } + + depends_on = [ + kubernetes_namespace.loki_namespace + ] +} + +resource "kubernetes_namespace" "loki_namespace" { + metadata { + name = "loki" + } +} + diff --git a/modules/k8config/modules/loki/res/loki-distributed-values.yaml b/modules/k8config/modules/loki/res/loki-distributed-values.yaml new file mode 100644 index 0000000..040f537 --- /dev/null +++ b/modules/k8config/modules/loki/res/loki-distributed-values.yaml @@ -0,0 +1,2111 @@ +global: + image: + # -- Overrides the Docker registry globally for all images + registry: null + # -- Overrides the priorityClassName for all pods + priorityClassName: null + # -- configures cluster domain ("cluster.local" by default) + clusterDomain: "cluster.local" + # -- configures DNS service name + dnsService: "kube-dns" + # -- configures DNS service namespace + dnsNamespace: "kube-system" + + #extraEnvFrom: + # - name: S3_LOKI_ACCESS_KEY + # valueFrom: + # secretKeyRef: + # name: loki-s3-credentials + # key: S3_LOKI_ACCESS_KEY + # - name: S3_LOKI_SECRET_ACCESS_KEY + # valueFrom: + # secretKeyRef: + # name: loki-s3-credentials + # key: S3_LOKI_SECRET_ACCESS_KEY + +# -- Overrides the chart's name +nameOverride: null + +# -- Overrides the chart's computed fullname +fullnameOverride: null + +# -- Image pull secrets for Docker images +imagePullSecrets: [] + +# -- hostAliases to add +hostAliases: [] +# - ip: 1.2.3.4 +# hostnames: +# - domain.tld + +loki: + # -- If set, these annotations are added to all of the Kubernetes controllers + # (Deployments, StatefulSets, etc) that this chart launches. Use this to + # implement something like the "Wave" controller or another controller that + # is monitoring top level deployment resources. + annotations: {} + # Configures the readiness probe for all of the Loki pods + readinessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 30 + timeoutSeconds: 1 + livenessProbe: + httpGet: + path: /ready + port: http + initialDelaySeconds: 300 + image: + # -- The Docker registry + registry: docker.io + # -- Docker image repository + repository: grafana/loki + # -- Overrides the image tag whose default is the chart's appVersion + tag: null + # -- Docker image pull policy + pullPolicy: IfNotPresent + # -- Common labels for all pods + podLabels: {} + # -- Common annotations for all pods + podAnnotations: {} + # -- Common command override for all pods (except gateway) + command: null + # -- The number of old ReplicaSets to retain to allow rollback + revisionHistoryLimit: 10 + # -- The SecurityContext for Loki pods + podSecurityContext: + fsGroup: 10001 + runAsGroup: 10001 + runAsNonRoot: true + runAsUser: 10001 + # -- The SecurityContext for Loki containers + containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + # -- Specify an existing secret containing loki configuration. If non-empty, overrides `loki.config` + existingSecretForConfig: "" + # -- Store the loki configuration as a secret. + configAsSecret: false + # -- Annotations for the secret with loki configuration. + configSecretAnnotations: {} + # -- Additional labels for the secret with loki configuration. + configSecretLabels: {} + # -- Adds the appProtocol field to the memberlist service. This allows memberlist to work with istio protocol selection. Ex: "http" or "tcp" + appProtocol: "" + # -- Common annotations for all loki services + serviceAnnotations: {} + # Loki server configuration + # Refers to https://grafana.com/docs/loki/latest/configuration/#server + server: + # -- HTTP server listen port + http_listen_port: 3100 + # -- Config file contents for Loki + # @default -- See values.yaml + config: | + auth_enabled: false + + server: + {{- toYaml .Values.loki.server | nindent 6 }} + + common: + compactor_address: http://{{ include "loki.compactorFullname" . }}:3100 + + distributor: + ring: + kvstore: + store: memberlist + + memberlist: + join_members: + - {{ include "loki.fullname" . }}-memberlist + + ingester_client: + grpc_client_config: + grpc_compression: gzip + + ingester: + lifecycler: + ring: + kvstore: + store: memberlist + replication_factor: 1 + chunk_idle_period: 30m + chunk_block_size: 262144 + chunk_encoding: snappy + chunk_retain_period: 1m + max_transfer_retries: 0 + wal: + dir: /var/loki/wal + + limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + max_cache_freshness_per_query: 10m + split_queries_by_interval: 15m + + {{- if .Values.loki.schemaConfig}} + schema_config: + {{- toYaml .Values.loki.schemaConfig | nindent 2}} + {{- end}} + {{- if .Values.loki.storageConfig}} + storage_config: + {{- if .Values.indexGateway.enabled}} + {{- $indexGatewayClient := dict "server_address" (printf "dns:///%s:9095" (include "loki.indexGatewayFullname" .)) }} + {{- $_ := set .Values.loki.storageConfig.boltdb_shipper "index_gateway_client" $indexGatewayClient }} + {{- end}} + {{- toYaml .Values.loki.storageConfig | nindent 2}} + {{- if .Values.memcachedIndexQueries.enabled }} + index_queries_cache_config: + memcached_client: + addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedIndexQueriesFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }} + consistent_hash: true + {{- end}} + {{- end}} + + runtime_config: + file: /var/{{ include "loki.name" . }}-runtime/runtime.yaml + + chunk_store_config: + max_look_back_period: 0s + {{- if .Values.memcachedChunks.enabled }} + chunk_cache_config: + embedded_cache: + enabled: false + memcached_client: + consistent_hash: true + addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedChunksFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }} + {{- end }} + {{- if .Values.memcachedIndexWrites.enabled }} + write_dedupe_cache_config: + memcached_client: + consistent_hash: true + addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedIndexWritesFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }} + {{- end }} + + table_manager: + retention_deletes_enabled: false + retention_period: 0s + + query_range: + align_queries_with_step: true + max_retries: 5 + cache_results: true + results_cache: + cache: + {{- if .Values.memcachedFrontend.enabled }} + memcached_client: + addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedFrontendFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }} + consistent_hash: true + {{- else }} + embedded_cache: + enabled: true + ttl: 24h + {{- end }} + + frontend_worker: + {{- if .Values.queryScheduler.enabled }} + scheduler_address: {{ include "loki.querySchedulerFullname" . }}:9095 + {{- else }} + frontend_address: {{ include "loki.queryFrontendFullname" . }}-headless:9095 + {{- end }} + + frontend: + log_queries_longer_than: 5s + compress_responses: true + {{- if .Values.queryScheduler.enabled }} + scheduler_address: {{ include "loki.querySchedulerFullname" . }}:9095 + {{- end }} + tail_proxy_url: http://{{ include "loki.querierFullname" . }}:3100 + + compactor: + shared_store: aws + working_directory: /var/loki/compactor + compaction_interval: 2m + + ruler: + storage: + type: local + local: + directory: /etc/loki/rules + ring: + kvstore: + store: memberlist + rule_path: /tmp/loki/scratch + alertmanager_url: https://alertmanager.xx + external_url: https://alertmanager.xx + + # -- Check https://grafana.com/docs/loki/latest/configuration/#schema_config for more info on how to configure schemas + schemaConfig: + configs: + - from: "2020-09-07" + store: boltdb-shipper + object_store: aws + schema: v11 + index: + prefix: loki_index_ + period: 24h + + # -- Check https://grafana.com/docs/loki/latest/configuration/#storage_config for more info on how to configure storages + storageConfig: + boltdb_shipper: + shared_store: aws + active_index_directory: /var/loki/index + cache_location: /var/loki/cache + cache_ttl: 1h + filesystem: + directory: /var/loki/chunks +# -- Uncomment to configure each storage individually +# azure: {} +# gcs: {} + aws: + s3: s3://${S3_LOKI_ACCESS_KEY}:${S3_LOKI_SECRET_ACCESS_KEY}@nyc3 + bucketnames: k8stack-resources + endpoint: nyc3.digitaloceanspaces.com + region: nyc3 + s3forcepathstyle: false + insecure: false + http_config: + idle_conn_timeout: 90s + response_header_timeout: 0s + insecure_skip_verify: false + +# boltdb: {} + + # -- Structured loki configuration, takes precedence over `loki.config`, `loki.schemaConfig`, `loki.storageConfig` + structuredConfig: {} + +# -- Provides a reloadable runtime configuration file for some specific configuration +runtimeConfig: {} + +serviceAccount: + # -- Specifies whether a ServiceAccount should be created + create: true + # -- The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: null + # -- Image pull secrets for the service account + imagePullSecrets: [] + # -- Labels for the service account + labels: {} + # -- Annotations for the service account + annotations: {} + # -- Set this toggle to false to opt out of automounting API credentials for the service account + automountServiceAccountToken: true + +# RBAC configuration +rbac: + # -- If pspEnabled true, a PodSecurityPolicy is created for K8s that use psp. + pspEnabled: false + # -- For OpenShift set pspEnabled to 'false' and sccEnabled to 'true' to use the SecurityContextConstraints. + sccEnabled: false + +# ServiceMonitor configuration +serviceMonitor: + # -- If enabled, ServiceMonitor resources for Prometheus Operator are created + enabled: true + # -- Alternative namespace for ServiceMonitor resources + namespace: null + # -- Namespace selector for ServiceMonitor resources + namespaceSelector: {} + # -- Optional expressions to match on + matchExpressions: [] + # - key: prometheus.io/service-monitor + # operator: NotIn + # values: + # - "false" + # -- ServiceMonitor annotations + annotations: {} + # -- Additional ServiceMonitor labels + labels: {} + # -- ServiceMonitor scrape interval + interval: null + # -- ServiceMonitor scrape timeout in Go duration format (e.g. 15s) + scrapeTimeout: null + # -- ServiceMonitor relabel configs to apply to samples before scraping + # https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + relabelings: [] + # -- ServiceMonitor metric relabel configs to apply to samples before ingestion + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#endpoint + metricRelabelings: [] + # --ServiceMonitor will add labels from the service to the Prometheus metric + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitorspec + targetLabels: [] + # -- ServiceMonitor will use http by default, but you can pick https as well + scheme: http + # -- ServiceMonitor will use these tlsConfig settings to make the health check requests + tlsConfig: null + +# Rules for the Prometheus Operator +prometheusRule: + # -- If enabled, a PrometheusRule resource for Prometheus Operator is created + enabled: false + # -- Alternative namespace for the PrometheusRule resource + namespace: null + # -- PrometheusRule annotations + annotations: {} + # -- Additional PrometheusRule labels + labels: {} + # -- Contents of Prometheus rules file + groups: [] + # - name: loki_rules + # rules: + # - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, job)) + # record: cluster_job:loki_request_duration_seconds:99quantile + # - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, job)) + # record: cluster_job:loki_request_duration_seconds:50quantile + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) / sum(rate(loki_request_duration_seconds_count[1m])) + # by (cluster, job) + # record: cluster_job:loki_request_duration_seconds:avg + # - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job) + # record: cluster_job:loki_request_duration_seconds_bucket:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job) + # record: cluster_job:loki_request_duration_seconds_sum:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job) + # record: cluster_job:loki_request_duration_seconds_count:sum_rate + # - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, job, route)) + # record: cluster_job_route:loki_request_duration_seconds:99quantile + # - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, job, route)) + # record: cluster_job_route:loki_request_duration_seconds:50quantile + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) + # / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + # record: cluster_job_route:loki_request_duration_seconds:avg + # - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, job, + # route) + # record: cluster_job_route:loki_request_duration_seconds_bucket:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, job, route) + # record: cluster_job_route:loki_request_duration_seconds_sum:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, job, route) + # record: cluster_job_route:loki_request_duration_seconds_count:sum_rate + # - expr: histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, namespace, job, route)) + # record: cluster_namespace_job_route:loki_request_duration_seconds:99quantile + # - expr: histogram_quantile(0.50, sum(rate(loki_request_duration_seconds_bucket[1m])) + # by (le, cluster, namespace, job, route)) + # record: cluster_namespace_job_route:loki_request_duration_seconds:50quantile + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, + # job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, + # namespace, job, route) + # record: cluster_namespace_job_route:loki_request_duration_seconds:avg + # - expr: sum(rate(loki_request_duration_seconds_bucket[1m])) by (le, cluster, namespace, + # job, route) + # record: cluster_namespace_job_route:loki_request_duration_seconds_bucket:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_sum[1m])) by (cluster, namespace, + # job, route) + # record: cluster_namespace_job_route:loki_request_duration_seconds_sum:sum_rate + # - expr: sum(rate(loki_request_duration_seconds_count[1m])) by (cluster, namespace, + # job, route) + # record: cluster_namespace_job_route:loki_request_duration_seconds_count:sum_rate + +# Configuration for the ingester +ingester: + # -- Kind of deployment [StatefulSet/Deployment] + kind: StatefulSet + # -- Number of replicas for the ingester + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + autoscaling: + # -- Enable autoscaling for the ingester + enabled: true + # -- Minimum autoscaling replicas for the ingester + minReplicas: 1 + # -- Maximum autoscaling replicas for the ingester + maxReplicas: 3 + # -- Target CPU utilisation percentage for the ingester + targetCPUUtilizationPercentage: 60 + # -- Target memory utilisation percentage for the ingester + targetMemoryUtilizationPercentage: null + # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) + customMetrics: [] + # - type: Pods + # pods: + # metric: + # name: loki_lines_total + # target: + # type: AverageValue + # averageValue: 10k + behavior: + # -- Enable autoscaling behaviours + enabled: false + # -- define scale down policies, must conform to HPAScalingRules + scaleDown: {} + # -- define scale up policies, must conform to HPAScalingRules + scaleUp: {} + image: + # -- The Docker registry for the ingester image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the ingester image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the ingester image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for ingester pods + priorityClassName: null + # -- Labels for ingester pods + podLabels: {} + # -- Annotations for ingester pods + podAnnotations: {} + # -- Labels for ingestor service + serviceLabels: {} + # -- Additional CLI args for the ingester + extraArgs: + - -config.expand-env=true + - --log.level=debug + - --print-config-stderr + # -- Environment variables to add to the ingester pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the ingester pods + extraEnvFrom: + - secretRef: + name: loki-s3-credentials + # -- Volume mounts to add to the ingester pods + extraVolumeMounts: [] + # -- Volumes to add to the ingester pods + extraVolumes: [] + # -- Resource requests and limits for the ingester + resources: {} + # -- Containers to add to the ingester pods + extraContainers: [] + # -- Init containers to add to the ingester pods + initContainers: [] + # -- Grace period to allow the ingester to shutdown before it is killed. Especially for the ingestor, + # this must be increased. It must be long enough so ingesters can be gracefully shutdown flushing/transferring + # all data and to successfully leave the member ring on shutdown. + terminationGracePeriodSeconds: 300 + # -- Lifecycle for the ingester container + lifecycle: {} + # -- topologySpread for ingester pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Defaults to allow skew no more then 1 node per AZ + topologySpreadConstraints: | + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "loki.ingesterSelectorLabels" . | nindent 6 }} + # -- Affinity for ingester pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.ingesterSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.ingesterSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Max Surge for ingester pods + maxSurge: 0 + # -- Node selector for ingester pods + nodeSelector: {} + # -- Tolerations for ingester pods + tolerations: [] + # -- readiness probe settings for ingester pods. If empty, use `loki.readinessProbe` + readinessProbe: {} + # -- liveness probe settings for ingester pods. If empty use `loki.livenessProbe` + livenessProbe: {} + persistence: + # -- Enable creating PVCs which is required when using boltdb-shipper + enabled: true + # -- Use emptyDir with ramdisk for storage. **Please note that all data in ingester will be lost on pod restart** + inMemory: false + # -- List of the ingester PVCs + # @notationType -- list + claims: + - name: data + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: do-block-storage + # - name: wal + # size: 150Gi + # -- Enable StatefulSetAutoDeletePVC feature + enableStatefulSetAutoDeletePVC: false + whenDeleted: Retain + whenScaled: Retain + # -- Adds the appProtocol field to the ingester service. This allows ingester to work with istio protocol selection. + appProtocol: + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + grpc: "" + +# Configuration for the distributor +distributor: + # -- Number of replicas for the distributor + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + autoscaling: + # -- Enable autoscaling for the distributor + enabled: true + # -- Minimum autoscaling replicas for the distributor + minReplicas: 1 + # -- Maximum autoscaling replicas for the distributor + maxReplicas: 3 + # -- Target CPU utilisation percentage for the distributor + targetCPUUtilizationPercentage: 60 + # -- Target memory utilisation percentage for the distributor + targetMemoryUtilizationPercentage: null + # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) + customMetrics: [] + # - type: Pods + # pods: + # metric: + # name: loki_lines_total + # target: + # type: AverageValue + # averageValue: 10k + behavior: + # -- Enable autoscaling behaviours + enabled: false + # -- define scale down policies, must conform to HPAScalingRules + scaleDown: {} + # -- define scale up policies, must conform to HPAScalingRules + scaleUp: {} + image: + # -- The Docker registry for the distributor image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the distributor image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the distributor image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for distributor pods + priorityClassName: null + # -- Labels for distributor pods + podLabels: {} + # -- Annotations for distributor pods + podAnnotations: {} + # -- Labels for distributor service + serviceLabels: {} + # -- Additional CLI args for the distributor + extraArgs: + - -config.expand-env=true + - --log.level=debug + - --print-config-stderr + # -- Environment variables to add to the distributor pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the distributor pods + extraEnvFrom: + - secretRef: + name: loki-s3-credentials + # -- Volume mounts to add to the distributor pods + extraVolumeMounts: [] + # -- Volumes to add to the distributor pods + extraVolumes: [] + # -- Resource requests and limits for the distributor + resources: {} + # -- Containers to add to the distributor pods + extraContainers: [] + # -- Grace period to allow the distributor to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for distributor pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.distributorSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.distributorSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Max Surge for distributor pods + maxSurge: 0 + # -- Node selector for distributor pods + nodeSelector: {} + # -- Tolerations for distributor pods + tolerations: [] + # -- Adds the appProtocol field to the distributor service. This allows distributor to work with istio protocol selection. + appProtocol: + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + grpc: "" + +# Configuration for the querier +querier: + # -- Number of replicas for the querier + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + autoscaling: + # -- Enable autoscaling for the querier, this is only used if `indexGateway.enabled: true` + enabled: true + # -- Minimum autoscaling replicas for the querier + minReplicas: 1 + # -- Maximum autoscaling replicas for the querier + maxReplicas: 3 + # -- Target CPU utilisation percentage for the querier + targetCPUUtilizationPercentage: 60 + # -- Target memory utilisation percentage for the querier + targetMemoryUtilizationPercentage: null + # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) + customMetrics: [] + # - type: External + # external: + # metric: + # name: loki_inflight_queries + # target: + # type: AverageValue + # averageValue: 12 + behavior: + # -- Enable autoscaling behaviours + enabled: false + # -- define scale down policies, must conform to HPAScalingRules + scaleDown: {} + # -- define scale up policies, must conform to HPAScalingRules + scaleUp: {} + image: + # -- The Docker registry for the querier image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the querier image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the querier image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for querier pods + priorityClassName: null + # -- Labels for querier pods + podLabels: {} + # -- Annotations for querier pods + podAnnotations: {} + # -- Labels for querier service + serviceLabels: {} + # -- Additional CLI args for the querier + extraArgs: + - -config.expand-env=true + - --log.level=debug + - --print-config-stderr + # -- Environment variables to add to the querier pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the querier pods + extraEnvFrom: + - secretRef: + name: loki-s3-credentials + # -- Volume mounts to add to the querier pods + extraVolumeMounts: [] + # -- Volumes to add to the querier pods + extraVolumes: [] + # -- Resource requests and limits for the querier + resources: {} + # -- Containers to add to the querier pods + extraContainers: [] + # -- Init containers to add to the querier pods + initContainers: [] + # -- Grace period to allow the querier to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- topologySpread for querier pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Defaults to allow skew no more then 1 node per AZ + topologySpreadConstraints: | + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "loki.querierSelectorLabels" . | nindent 6 }} + # -- Affinity for querier pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.querierSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.querierSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Max Surge for querier pods + maxSurge: 0 + # -- Node selector for querier pods + nodeSelector: {} + # -- Tolerations for querier pods + tolerations: [] + # -- DNSConfig for querier pods + dnsConfig: {} + persistence: + # -- Enable creating PVCs for the querier cache + enabled: true + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: do-block-storage + # -- Annotations for querier PVCs + annotations: {} + # -- Adds the appProtocol field to the querier service. This allows querier to work with istio protocol selection. + appProtocol: + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + grpc: "" + +# Configuration for the query-frontend +queryFrontend: + # -- Number of replicas for the query-frontend + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + autoscaling: + # -- Enable autoscaling for the query-frontend + enabled: true + # -- Minimum autoscaling replicas for the query-frontend + minReplicas: 1 + # -- Maximum autoscaling replicas for the query-frontend + maxReplicas: 3 + # -- Target CPU utilisation percentage for the query-frontend + targetCPUUtilizationPercentage: 60 + # -- Target memory utilisation percentage for the query-frontend + targetMemoryUtilizationPercentage: null + # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Pods, Object or External metrics) + customMetrics: [] + # - type: Pods + # pods: + # metric: + # name: loki_query_rate + # target: + # type: AverageValue + # averageValue: 100 + behavior: + # -- Enable autoscaling behaviours + enabled: false + # -- define scale down policies, must conform to HPAScalingRules + scaleDown: {} + # -- define scale up policies, must conform to HPAScalingRules + scaleUp: {} + image: + # -- The Docker registry for the query-frontend image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the query-frontend image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the query-frontend image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for query-frontend pods + priorityClassName: null + # -- Labels for query-frontend pods + podLabels: {} + # -- Annotations for query-frontend pods + podAnnotations: {} + # -- Labels for query-frontend service + serviceLabels: {} + # -- Additional CLI args for the query-frontend + extraArgs: + - -config.expand-env=true + - --log.level=debug + - --print-config-stderr + # -- Environment variables to add to the query-frontend pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the query-frontend pods + extraEnvFrom: + - secretRef: + name: loki-s3-credentials + # -- Volume mounts to add to the query-frontend pods + extraVolumeMounts: [] + # -- Volumes to add to the query-frontend pods + extraVolumes: [] + # -- Resource requests and limits for the query-frontend + resources: {} + # -- Containers to add to the query-frontend pods + extraContainers: [] + # -- Grace period to allow the query-frontend to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for query-frontend pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.queryFrontendSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.queryFrontendSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for query-frontend pods + nodeSelector: {} + # -- Tolerations for query-frontend pods + tolerations: [] + # -- Adds the appProtocol field to the queryFrontend service. This allows queryFrontend to work with istio protocol selection. + appProtocol: + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + grpc: "" + +# Configuration for the query-scheduler +queryScheduler: + # -- Specifies whether the query-scheduler should be decoupled from the query-frontend + enabled: false + # -- Number of replicas for the query-scheduler. + # It should be lower than `-querier.max-concurrent` to avoid generating back-pressure in queriers; + # it's also recommended that this value evenly divides the latter + replicas: 2 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + image: + # -- The Docker registry for the query-scheduler image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the query-scheduler image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the query-scheduler image. Overrides `loki.image.tag` + tag: null + # -- The name of the PriorityClass for query-scheduler pods + priorityClassName: null + # -- Labels for query-scheduler pods + podLabels: {} + # -- Annotations for query-scheduler pods + podAnnotations: {} + # -- Labels for query-scheduler service + serviceLabels: {} + # -- Additional CLI args for the query-scheduler + extraArgs: [] + # -- Environment variables to add to the query-scheduler pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the query-scheduler pods + extraEnvFrom: [] + # -- Volume mounts to add to the query-scheduler pods + extraVolumeMounts: [] + # -- Volumes to add to the query-scheduler pods + extraVolumes: [] + # -- Resource requests and limits for the query-scheduler + resources: {} + # -- Containers to add to the query-scheduler pods + extraContainers: [] + # -- Grace period to allow the query-scheduler to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for query-scheduler pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.querySchedulerSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.querySchedulerSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: 1 + # -- Node selector for query-scheduler pods + nodeSelector: {} + # -- Tolerations for query-scheduler pods + tolerations: [] + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + appProtocol: + grpc: "" + +# Configuration for the table-manager +tableManager: + # -- Specifies whether the table-manager should be enabled + enabled: false + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + image: + # -- The Docker registry for the table-manager image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the table-manager image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the table-manager image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for table-manager pods + priorityClassName: null + # -- Labels for table-manager pods + podLabels: {} + # -- Annotations for table-manager pods + podAnnotations: {} + # -- Labels for table-manager service + serviceLabels: {} + # -- Additional CLI args for the table-manager + extraArgs: [] + # -- Environment variables to add to the table-manager pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the table-manager pods + extraEnvFrom: [] + # -- Volume mounts to add to the table-manager pods + extraVolumeMounts: [] + # -- Volumes to add to the table-manager pods + extraVolumes: [] + # -- Resource requests and limits for the table-manager + resources: {} + # -- Containers to add to the table-manager pods + extraContainers: [] + # -- Grace period to allow the table-manager to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for table-manager pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.tableManagerSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.tableManagerSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Node selector for table-manager pods + nodeSelector: {} + # -- Tolerations for table-manager pods + tolerations: [] + +# Use either this ingress or the gateway, but not both at once. +# If you enable this, make sure to disable the gateway. +# You'll need to supply authn configuration for your ingress controller. +ingress: + enabled: false +# ingressClassName: nginx + annotations: {} +# nginx.ingress.kubernetes.io/auth-type: basic +# nginx.ingress.kubernetes.io/auth-secret: loki-distributed-basic-auth +# nginx.ingress.kubernetes.io/auth-secret-type: auth-map +# nginx.ingress.kubernetes.io/configuration-snippet: | +# proxy_set_header X-Scope-OrgID $remote_user; + paths: + distributor: + - /api/prom/push + - /loki/api/v1/push + querier: + - /api/prom/tail + - /loki/api/v1/tail + query-frontend: + - /loki/api + ruler: + - /api/prom/rules + - /loki/api/v1/rules + - /prometheus/api/v1/rules + - /prometheus/api/v1/alerts + hosts: + - loki.example.com + # tls: + # - secretName: loki-distributed-tls + # hosts: + # - loki.example.com + +# Configuration for the gateway +gateway: + # -- Specifies whether the gateway should be enabled + enabled: true + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + # -- Number of replicas for the gateway + replicas: 1 + # -- Enable logging of 2xx and 3xx HTTP requests + verboseLogging: true + autoscaling: + # -- Enable autoscaling for the gateway + enabled: true + # -- Minimum autoscaling replicas for the gateway + minReplicas: 1 + # -- Maximum autoscaling replicas for the gateway + maxReplicas: 3 + # -- Target CPU utilisation percentage for the gateway + targetCPUUtilizationPercentage: 60 + # -- Target memory utilisation percentage for the gateway + targetMemoryUtilizationPercentage: null + # -- Allows one to define custom metrics using the HPA/v2 schema (for example, Resource, Object or External metrics) + customMetrics: [] + # - type: Object + # object: + # metric: + # name: requests-per-second + # describedObject: + # apiVersion: networking.k8s.io/v1 + # kind: Ingress + # name: main-route + # target: + # type: Values + # averageValue: 10k + behavior: + # -- Enable autoscaling behaviours + enabled: false + # -- define scale down policies, must conform to HPAScalingRules + scaleDown: {} + # -- define scale up policies, must conform to HPAScalingRules + scaleUp: {} + # -- See `kubectl explain deployment.spec.strategy` for more, + # ref: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#strategy + deploymentStrategy: + type: RollingUpdate + image: + # -- The Docker registry for the gateway image + registry: docker.io + # -- The gateway image repository + repository: nginxinc/nginx-unprivileged + # -- The gateway image tag + tag: 1.20.2-alpine + # -- The gateway image pull policy + pullPolicy: IfNotPresent + # -- The name of the PriorityClass for gateway pods + priorityClassName: null + # -- Labels for gateway pods + podLabels: {} + # -- Annotations for gateway pods + podAnnotations: {} + # -- Additional CLI args for the gateway + extraArgs: [] + # -- Environment variables to add to the gateway pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the gateway pods + extraEnvFrom: [] + # -- Volumes to add to the gateway pods + extraVolumes: [] + # -- Volume mounts to add to the gateway pods + extraVolumeMounts: [] + # -- The SecurityContext for gateway containers + podSecurityContext: + fsGroup: 101 + runAsGroup: 101 + runAsNonRoot: true + runAsUser: 101 + # -- The SecurityContext for gateway containers + containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + # -- Resource requests and limits for the gateway + resources: {} + # -- Containers to add to the gateway pods + extraContainers: [] + # -- Grace period to allow the gateway to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for gateway pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.gatewaySelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.gatewaySelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for gateway pods + nodeSelector: {} + # -- Tolerations for gateway pods + tolerations: [] + # -- DNSConfig for gateway pods + dnsConfig: {} + # Gateway service configuration + service: + # -- Port of the gateway service + port: 80 + # -- Type of the gateway service + type: ClusterIP + # -- ClusterIP of the gateway service + clusterIP: null + # -- Node port if service type is NodePort + nodePort: null + # -- Load balancer IPO address if service type is LoadBalancer + loadBalancerIP: null + # -- Load balancer allow traffic from CIDR list if service type is LoadBalancer + loadBalancerSourceRanges: [] + # -- Set appProtocol for the service + appProtocol: null + # -- Annotations for the gateway service + annotations: {} + # -- Labels for gateway service + labels: {} + # Gateway ingress configuration + ingress: + # -- Specifies whether an ingress for the gateway should be created + enabled: false + # -- Ingress Class Name. MAY be required for Kubernetes versions >= 1.18 + # For example: `ingressClassName: nginx` + ingressClassName: '' + + # -- Annotations for the gateway ingress + annotations: {} + # -- Hosts configuration for the gateway ingress + hosts: + - host: gateway.loki.example.com + paths: + - path: / + # -- pathType (e.g. ImplementationSpecific, Prefix, .. etc.) might also be required by some Ingress Controllers + # pathType: Prefix + # -- TLS configuration for the gateway ingress + tls: [] + # tls: + # - secretName: loki-gateway-tls + # hosts: + # - gateway.loki.example.com + + # Basic auth configuration + basicAuth: + # -- Enables basic authentication for the gateway + enabled: false + # -- The basic auth username for the gateway + username: null + # -- The basic auth password for the gateway + password: null + # -- Uses the specified username and password to compute a htpasswd using Sprig's `htpasswd` function. + # The value is templated using `tpl`. Override this to use a custom htpasswd, e.g. in case the default causes + # high CPU load. + # @default -- See values.yaml + htpasswd: >- + {{ htpasswd (required "'gateway.basicAuth.username' is required" .Values.gateway.basicAuth.username) (required "'gateway.basicAuth.password' is required" .Values.gateway.basicAuth.password) }} + # -- Existing basic auth secret to use. Must contain '.htpasswd' + existingSecret: null + # Configures the readiness probe for the gateway + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 15 + timeoutSeconds: 1 + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + nginxConfig: + # -- NGINX log format + # @default -- See values.yaml + logFormat: |- + main '$remote_addr - $remote_user [$time_local] $status ' + '"$request" $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + # -- Allows appending custom configuration to the server block + serverSnippet: "" + # -- Allows appending custom configuration to the http block + httpSnippet: "" + # -- Allows overriding the DNS resolver address nginx will use. + resolver: "" + # -- Config file contents for Nginx. Passed through the `tpl` function to allow templating + # @default -- See values.yaml + file: | + worker_processes 5; ## Default: 1 + error_log /dev/stderr; + pid /tmp/nginx.pid; + worker_rlimit_nofile 8192; + + events { + worker_connections 4096; ## Default: 1024 + } + + http { + client_body_temp_path /tmp/client_temp; + proxy_temp_path /tmp/proxy_temp_path; + fastcgi_temp_path /tmp/fastcgi_temp; + uwsgi_temp_path /tmp/uwsgi_temp; + scgi_temp_path /tmp/scgi_temp; + + proxy_http_version 1.1; + + default_type application/octet-stream; + log_format {{ .Values.gateway.nginxConfig.logFormat }} + + {{- if .Values.gateway.verboseLogging }} + access_log /dev/stderr main; + {{- else }} + + map $status $loggable { + ~^[23] 0; + default 1; + } + access_log /dev/stderr main if=$loggable; + {{- end }} + + sendfile on; + tcp_nopush on; + {{- if .Values.gateway.nginxConfig.resolver }} + resolver {{ .Values.gateway.nginxConfig.resolver }}; + {{- else }} + resolver {{ .Values.global.dnsService }}.{{ .Values.global.dnsNamespace }}.svc.{{ .Values.global.clusterDomain }}; + {{- end }} + + {{- with .Values.gateway.nginxConfig.httpSnippet }} + {{ . | nindent 2 }} + {{- end }} + + server { + listen 8080; + + {{- if .Values.gateway.basicAuth.enabled }} + auth_basic "Loki"; + auth_basic_user_file /etc/nginx/secrets/.htpasswd; + {{- end }} + + location = / { + return 200 'OK'; + auth_basic off; + access_log off; + } + + location = /api/prom/push { + set $api_prom_push_backend http://{{ include "loki.distributorFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $api_prom_push_backend:3100$request_uri; + proxy_http_version 1.1; + } + + location = /api/prom/tail { + set $api_prom_tail_backend http://{{ include "loki.querierFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $api_prom_tail_backend:3100$request_uri; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + } + + # Ruler + location ~ /prometheus/api/v1/alerts.* { + proxy_pass http://{{ include "loki.rulerFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:3100$request_uri; + } + location ~ /prometheus/api/v1/rules.* { + proxy_pass http://{{ include "loki.rulerFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:3100$request_uri; + } + location ~ /api/prom/rules.* { + proxy_pass http://{{ include "loki.rulerFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:3100$request_uri; + } + location ~ /api/prom/alerts.* { + proxy_pass http://{{ include "loki.rulerFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}:3100$request_uri; + } + + location ~ /api/prom/.* { + set $api_prom_backend http://{{ include "loki.queryFrontendFullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $api_prom_backend:3100$request_uri; + proxy_http_version 1.1; + } + + location = /loki/api/v1/push { + set $loki_api_v1_push_backend http://{{ include "loki.distributorFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $loki_api_v1_push_backend:3100$request_uri; + proxy_http_version 1.1; + } + + location = /loki/api/v1/tail { + set $loki_api_v1_tail_backend http://{{ include "loki.querierFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $loki_api_v1_tail_backend:3100$request_uri; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + } + + location ~ /loki/api/.* { + set $loki_api_backend http://{{ include "loki.queryFrontendFullname" . }}-headless.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}; + proxy_pass $loki_api_backend:3100$request_uri; + proxy_http_version 1.1; + } + + {{- with .Values.gateway.nginxConfig.serverSnippet }} + {{ . | nindent 4 }} + {{- end }} + } + } + +# Configuration for the compactor +compactor: + # -- Kind of deployment [StatefulSet/Deployment] + kind: StatefulSet + # -- Number of replicas for the compactor + replicas: 1 + # -- Specifies whether compactor should be enabled + enabled: true + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + image: + # -- The Docker registry for the compactor image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the compactor image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the compactor image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for compactor pods + priorityClassName: null + # -- Labels for compactor pods + podLabels: {} + # -- Annotations for compactor pods + podAnnotations: {} + # -- Affinity for compactor pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.compactorSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.compactorSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Labels for compactor service + serviceLabels: {} + # -- Additional CLI args for the compactor + extraArgs: + - -config.expand-env=true + - --log.level=debug + - --print-config-stderr + # -- Environment variables to add to the compactor pods + extraEnv: + # -- Environment variables from secrets or configmaps to add to the compactor pods + extraEnvFrom: + - secretRef: + name: loki-s3-credentials + # -- Volume mounts to add to the compactor pods + extraVolumeMounts: [] + # -- Volumes to add to the compactor pods + extraVolumes: [] + # -- readiness probe settings for ingester pods. If empty, use `loki.readinessProbe` + readinessProbe: {} + # -- liveness probe settings for ingester pods. If empty use `loki.livenessProbe` + livenessProbe: {} + # -- Resource requests and limits for the compactor + resources: {} + # -- Containers to add to the compactor pods + extraContainers: [] + # -- Init containers to add to the compactor pods + initContainers: [] + # -- Grace period to allow the compactor to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Node selector for compactor pods + nodeSelector: {} + # -- Tolerations for compactor pods + tolerations: [] + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + appProtocol: + grpc: "" + persistence: + # -- Enable creating PVCs for the compactor + enabled: true + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: do-block-storage + # -- Annotations for compactor PVCs + annotations: {} + # -- List of the compactor PVCs + # @notationType -- list + claims: + - name: data + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: do-block-storage + # - name: wal + # size: 150Gi + # -- Enable StatefulSetAutoDeletePVC feature + enableStatefulSetAutoDeletePVC: false + whenDeleted: Retain + whenScaled: Retain + + serviceAccount: + create: false + # -- The name of the ServiceAccount to use for the compactor. + # If not set and create is true, a name is generated by appending + # "-compactor" to the common ServiceAccount. + name: null + # -- Image pull secrets for the compactor service account + imagePullSecrets: [] + # -- Annotations for the compactor service account + annotations: {} + # -- Set this toggle to false to opt out of automounting API credentials for the service account + automountServiceAccountToken: true + +# Configuration for the ruler +ruler: + # -- Specifies whether the ruler should be enabled + enabled: false + # -- Kind of deployment [StatefulSet/Deployment] + kind: Deployment + # -- Number of replicas for the ruler + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + image: + # -- The Docker registry for the ruler image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the ruler image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the ruler image. Overrides `loki.image.tag` + tag: null + # -- Command to execute instead of defined in Docker image + command: null + # -- The name of the PriorityClass for ruler pods + priorityClassName: null + # -- Labels for compactor pods + podLabels: {} + # -- Annotations for ruler pods + podAnnotations: {} + # -- Labels for ruler service + serviceLabels: {} + # -- Additional CLI args for the ruler + extraArgs: [] + # -- Environment variables to add to the ruler pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the ruler pods + extraEnvFrom: [] + # -- Volume mounts to add to the ruler pods + extraVolumeMounts: [] + # -- Volumes to add to the ruler pods + extraVolumes: [] + # -- Resource requests and limits for the ruler + resources: {} + # -- Containers to add to the ruler pods + extraContainers: [] + # -- Init containers to add to the ruler pods + initContainers: [] + # -- Grace period to allow the ruler to shutdown before it is killed + terminationGracePeriodSeconds: 300 + # -- Affinity for ruler pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.rulerSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.rulerSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for ruler pods + nodeSelector: {} + # -- Tolerations for ruler pods + tolerations: [] + # -- DNSConfig for ruler pods + dnsConfig: {} + persistence: + # -- Enable creating PVCs which is required when using recording rules + enabled: false + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + # -- Annotations for ruler PVCs + annotations: {} + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + appProtocol: + grpc: "" + # -- Directories containing rules files + directories: {} + # tenant_foo: + # rules1.txt: | + # groups: + # - name: should_fire + # rules: + # - alert: HighPercentageError + # expr: | + # sum(rate({app="foo", env="production"} |= "error" [5m])) by (job) + # / + # sum(rate({app="foo", env="production"}[5m])) by (job) + # > 0.05 + # for: 10m + # labels: + # severity: warning + # annotations: + # summary: High error rate + # - name: credentials_leak + # rules: + # - alert: http-credentials-leaked + # annotations: + # message: "{{ $labels.job }} is leaking http basic auth credentials." + # expr: 'sum by (cluster, job, pod) (count_over_time({namespace="prod"} |~ "http(s?)://(\\w+):(\\w+)@" [5m]) > 0)' + # for: 10m + # labels: + # severity: critical + # rules2.txt: | + # groups: + # - name: example + # rules: + # - alert: HighThroughputLogStreams + # expr: sum by(container) (rate({job=~"loki-dev/.*"}[1m])) > 1000 + # for: 2m + # tenant_bar: + # rules1.txt: | + # groups: + # - name: should_fire + # rules: + # - alert: HighPercentageError + # expr: | + # sum(rate({app="foo", env="production"} |= "error" [5m])) by (job) + # / + # sum(rate({app="foo", env="production"}[5m])) by (job) + # > 0.05 + # for: 10m + # labels: + # severity: warning + # annotations: + # summary: High error rate + # - name: credentials_leak + # rules: + # - alert: http-credentials-leaked + # annotations: + # message: "{{ $labels.job }} is leaking http basic auth credentials." + # expr: 'sum by (cluster, job, pod) (count_over_time({namespace="prod"} |~ "http(s?)://(\\w+):(\\w+)@" [5m]) > 0)' + # for: 10m + # labels: + # severity: critical + # rules2.txt: | + # groups: + # - name: example + # rules: + # - alert: HighThroughputLogStreams + # expr: sum by(container) (rate({job=~"loki-dev/.*"}[1m])) > 1000 + # for: 2m + +# Configuration for the index-gateway +indexGateway: + # -- Specifies whether the index-gateway should be enabled + enabled: false + # -- Number of replicas for the index-gateway + replicas: 1 + # -- Whether the index gateway should join the memberlist hashring + joinMemberlist: true + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + image: + # -- The Docker registry for the index-gateway image. Overrides `loki.image.registry` + registry: null + # -- Docker image repository for the index-gateway image. Overrides `loki.image.repository` + repository: null + # -- Docker image tag for the index-gateway image. Overrides `loki.image.tag` + tag: null + # -- The name of the PriorityClass for index-gateway pods + priorityClassName: null + # -- Labels for index-gateway pods + podLabels: {} + # -- Annotations for index-gateway pods + podAnnotations: {} + # -- Labels for index-gateway service + serviceLabels: {} + # -- Additional CLI args for the index-gateway + extraArgs: [] + # -- Environment variables to add to the index-gateway pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the index-gateway pods + extraEnvFrom: [] + # -- Volume mounts to add to the index-gateway pods + extraVolumeMounts: [] + # -- Volumes to add to the index-gateway pods + extraVolumes: [] + # -- Resource requests and limits for the index-gateway + resources: {} + # -- Containers to add to the index-gateway pods + extraContainers: [] + # -- Init containers to add to the index-gateway pods + initContainers: [] + # -- Grace period to allow the index-gateway to shutdown before it is killed. + terminationGracePeriodSeconds: 300 + # -- Affinity for index-gateway pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.indexGatewaySelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.indexGatewaySelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for index-gateway pods + nodeSelector: {} + # -- Tolerations for index-gateway pods + tolerations: [] + persistence: + # -- Enable creating PVCs which is required when using boltdb-shipper + enabled: false + # -- Use emptyDir with ramdisk for storage. **Please note that all data in indexGateway will be lost on pod restart** + inMemory: false + # -- Size of persistent or memory disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + # -- Annotations for index gateway PVCs + annotations: {} + # -- Enable StatefulSetAutoDeletePVC feature + enableStatefulSetAutoDeletePVC: false + whenDeleted: Retain + whenScaled: Retain + # -- Set the optional grpc service protocol. Ex: "grpc", "http2" or "https" + appProtocol: + grpc: "" + +memcached: + readinessProbe: + tcpSocket: + port: http + initialDelaySeconds: 5 + timeoutSeconds: 1 + livenessProbe: + tcpSocket: + port: http + initialDelaySeconds: 10 + image: + # -- The Docker registry for the memcached + registry: docker.io + # -- Memcached Docker image repository + repository: memcached + # -- Memcached Docker image tag + tag: 1.6.21-alpine + # -- Memcached Docker image pull policy + pullPolicy: IfNotPresent + # -- Labels for memcached pods + podLabels: {} + # -- The SecurityContext for memcached pods + podSecurityContext: + fsGroup: 11211 + runAsGroup: 11211 + runAsNonRoot: true + runAsUser: 11211 + # -- The SecurityContext for memcached containers + containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + # -- Common annotations for all memcached services + serviceAnnotations: {} + # -- Adds the appProtocol field to the memcached services. This allows memcached to work with istio protocol selection. Ex: "http" or "tcp" + appProtocol: "" + +memcachedExporter: + # -- Specifies whether the Memcached Exporter should be enabled + enabled: false + image: + # -- The Docker registry for the Memcached Exporter + registry: docker.io + # -- Memcached Exporter Docker image repository + repository: prom/memcached-exporter + # -- Memcached Exporter Docker image tag + tag: v0.13.0 + # -- Memcached Exporter Docker image pull policy + pullPolicy: IfNotPresent + # -- Labels for memcached-exporter pods + podLabels: {} + # -- Memcached Exporter resource requests and limits + resources: {} + # -- The SecurityContext for memcachedExporter containers + containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + +memcachedChunks: + # -- Specifies whether the Memcached chunks cache should be enabled + enabled: false + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + # -- Number of replicas for memcached-chunks + replicas: 1 + # -- The name of the PriorityClass for memcached-chunks pods + priorityClassName: null + # -- Labels for memcached-chunks pods + podLabels: {} + # -- Annotations for memcached-chunks pods + podAnnotations: {} + # -- Labels for memcached-chunks service + serviceLabels: {} + # -- Additional CLI args for memcached-chunks + extraArgs: + - -I 32m + # -- Environment variables to add to memcached-chunks pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to memcached-chunks pods + extraEnvFrom: [] + # -- Resource requests and limits for memcached-chunks + resources: {} + # -- Containers to add to the memcached-chunks pods + extraContainers: [] + # -- Grace period to allow memcached-chunks to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for memcached-chunks pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.memcachedChunksSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.memcachedChunksSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for memcached-chunks pods + nodeSelector: {} + # -- Tolerations for memcached-chunks pods + tolerations: [] + persistence: + # -- Enable creating PVCs which will persist cached data through restarts + enabled: false + # -- Size of persistent or memory disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + # -- List of additional PVCs to be created for the memcached-chunks statefulset + volumeClaimTemplates: [] + # -- List of additional volumes to be mounted for the memcached-chunks statefulset + extraVolumeMounts: [] + +memcachedFrontend: + # -- Specifies whether the Memcached frontend cache should be enabled + enabled: false + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + # -- Number of replicas for memcached-frontend + replicas: 1 + # -- The name of the PriorityClass for memcached-frontend pods + priorityClassName: null + # -- Labels for memcached-frontend pods + podLabels: {} + # -- Annotations for memcached-frontend pods + podAnnotations: {} + # -- Labels for memcached-frontend service + serviceLabels: {} + # -- Additional CLI args for memcached-frontend + extraArgs: + - -I 32m + # -- Environment variables to add to memcached-frontend pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to memcached-frontend pods + extraEnvFrom: [] + # -- Resource requests and limits for memcached-frontend + resources: {} + # -- Containers to add to the memcached-frontend pods + extraContainers: [] + # -- Grace period to allow memcached-frontend to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for memcached-frontend pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.memcachedFrontendSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.memcachedFrontendSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: 1 + # -- Node selector for memcached-frontend pods + nodeSelector: {} + # -- Tolerations for memcached-frontend pods + tolerations: [] + persistence: + # -- Enable creating PVCs which will persist cached data through restarts + enabled: false + # -- Size of persistent or memory disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + +memcachedIndexQueries: + # -- Specifies whether the Memcached index queries cache should be enabled + enabled: false + # -- Number of replicas for memcached-index-queries + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + # -- The name of the PriorityClass for memcached-index-queries pods + priorityClassName: null + # -- Labels for memcached-index-queries pods + podLabels: {} + # -- Annotations for memcached-index-queries pods + podAnnotations: {} + # -- Labels for memcached-index-queries service + serviceLabels: {} + # -- Additional CLI args for memcached-index-queries + extraArgs: + - -I 32m + # -- Environment variables to add to memcached-index-queries pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to memcached-index-queries pods + extraEnvFrom: [] + # -- Resource requests and limits for memcached-index-queries + resources: {} + # -- Containers to add to the memcached-index-queries pods + extraContainers: [] + # -- Grace period to allow memcached-index-queries to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for memcached-index-queries pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.memcachedIndexQueriesSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.memcachedIndexQueriesSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for memcached-index-queries pods + nodeSelector: {} + # -- Tolerations for memcached-index-queries pods + tolerations: [] + persistence: + # -- Enable creating PVCs which will persist cached data through restarts + enabled: false + # -- Size of persistent or memory disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + +memcachedIndexWrites: + # -- Specifies whether the Memcached index writes cache should be enabled + enabled: false + # -- Number of replicas for memcached-index-writes + replicas: 1 + # -- hostAliases to add + hostAliases: [] + # - ip: 1.2.3.4 + # hostnames: + # - domain.tld + # -- The name of the PriorityClass for memcached-index-writes pods + priorityClassName: null + # -- Labels for memcached-index-writes pods + podLabels: {} + # -- Annotations for memcached-index-writes pods + podAnnotations: {} + # -- Labels for memcached-index-writes service + serviceLabels: {} + # -- Additional CLI args for memcached-index-writes + extraArgs: + - -I 32m + # -- Environment variables to add to memcached-index-writes pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to memcached-index-writes pods + extraEnvFrom: [] + # -- Resource requests and limits for memcached-index-writes + resources: {} + # -- Containers to add to the memcached-index-writes pods + extraContainers: [] + # -- Grace period to allow memcached-index-writes to shutdown before it is killed + terminationGracePeriodSeconds: 30 + # -- Affinity for memcached-index-writes pods. Passed through `tpl` and, thus, to be configured as string + # @default -- Hard node and soft zone anti-affinity + affinity: | + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + {{- include "loki.memcachedIndexWritesSelectorLabels" . | nindent 10 }} + topologyKey: kubernetes.io/hostname + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + {{- include "loki.memcachedIndexWritesSelectorLabels" . | nindent 12 }} + topologyKey: failure-domain.beta.kubernetes.io/zone + # -- Pod Disruption Budget maxUnavailable + maxUnavailable: null + # -- Node selector for memcached-index-writes pods + nodeSelector: {} + # -- Tolerations for memcached-index-writes pods + tolerations: [] + persistence: + # -- Enable creating PVCs which will persist cached data through restarts + enabled: false + # -- Size of persistent or memory disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null + +networkPolicy: + # -- Specifies whether Network Policies should be created + enabled: false + metrics: + # -- Specifies the Pods which are allowed to access the metrics port. + # As this is cross-namespace communication, you also need the namespaceSelector. + podSelector: {} + # -- Specifies the namespaces which are allowed to access the metrics port + namespaceSelector: {} + # -- Specifies specific network CIDRs which are allowed to access the metrics port. + # In case you use namespaceSelector, you also have to specify your kubelet networks here. + # The metrics ports are also used for probes. + cidrs: [] + ingress: + # -- Specifies the Pods which are allowed to access the http port. + # As this is cross-namespace communication, you also need the namespaceSelector. + podSelector: {} + # -- Specifies the namespaces which are allowed to access the http port + namespaceSelector: {} + alertmanager: + # -- Specify the alertmanager port used for alerting + port: 9093 + # -- Specifies the alertmanager Pods. + # As this is cross-namespace communication, you also need the namespaceSelector. + podSelector: {} + # -- Specifies the namespace the alertmanager is running in + namespaceSelector: {} + externalStorage: + # -- Specify the port used for external storage, e.g. AWS S3 + ports: [] + # -- Specifies specific network CIDRs you want to limit access to + cidrs: [] + discovery: + # -- Specify the port used for discovery + port: null + # -- Specifies the Pods labels used for discovery. + # As this is cross-namespace communication, you also need the namespaceSelector. + podSelector: {} + # -- Specifies the namespace the discovery Pods are running in + namespaceSelector: {} \ No newline at end of file diff --git a/modules/k8config/modules/loki/variables.tf b/modules/k8config/modules/loki/variables.tf new file mode 100644 index 0000000..c4d8841 --- /dev/null +++ b/modules/k8config/modules/loki/variables.tf @@ -0,0 +1,11 @@ +variable "s3_access_key_id" { + description = "S3 Access Key Id" + sensitive = true + type = string +} + +variable "s3_secret_access_key" { + description = "S3 Secret Access Key" + sensitive = true + type = string +} \ No newline at end of file diff --git a/modules/k8config/modules/prometheus-adapter/README.md b/modules/k8config/modules/prometheus-adapter/README.md new file mode 100644 index 0000000..908a9be --- /dev/null +++ b/modules/k8config/modules/prometheus-adapter/README.md @@ -0,0 +1,26 @@ +# prometheus-adapter Module +This module installs the prometheus-adapter project into the cluster under the `prometheus-adapter` namespace. This adapter replaces the metrics-server (https://github.com/kubernetes-sigs/metrics-server) which is most popularly used in kubernetes clusters. The reason `prometheus-adapter` is being used instead is because we already have prometheus installed. See the prometheus module for details on that component. + +By using `prometheus-adapter` we can use prometheus to query the metrics that are already being collected by it, and supply that to the Kubernetes Metrics API, instead of having to collect them ourselves + +# Usage +The primary usage of this module is to provide the Metrics API with metrics. You can access these metrics with `kubectl`. The purpose is to be able to quickly get a view of CPU and Memory usage of Nodes and Pods from the kubectl command line + +## Top CPU and Memory Consuming Pods +Run the following command to view an ordered list of the top consuming pods +```bash +kubectl top pod --all-namespaces +``` + +## Top CPU and Memory Consuming Nodes +Run thie following command to view a list of the top consuming nodes +```bash +kubectl top node --all-namespaces +``` + +# Resources + +Helm Chart: https://artifacthub.io/packages/helm/prometheus-community/prometheus-adapter +Configuration Related Issues and Help: https://github.com/prometheus-community/helm-charts/issues/1974 + +prometheus-adapter has a bug in it out the gate: https://github.com/kubernetes-sigs/prometheus-adapter/issues/385 diff --git a/modules/k8config/modules/prometheus-adapter/main.tf b/modules/k8config/modules/prometheus-adapter/main.tf new file mode 100644 index 0000000..42dcdd4 --- /dev/null +++ b/modules/k8config/modules/prometheus-adapter/main.tf @@ -0,0 +1,32 @@ +terraform { + required_providers { + helm = { + source = "hashicorp/helm" + version = ">= 2.0.1" + } + } +} + +resource "helm_release" "prometheus-adapter" { + name = "prometheus-adapter" + + repository = "https://prometheus-community.github.io/helm-charts" + chart = "prometheus-adapter" + version = "4.10.0" + + atomic = true + + create_namespace = true + namespace = "prometheus-adapter" + + recreate_pods = true + reuse_values = true + force_update = true + cleanup_on_fail = true + dependency_update = true + + values = [ + file("${abspath(path.module)}/res/prometheus-adapter-values.yaml") + ] + +} \ No newline at end of file diff --git a/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml b/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml new file mode 100644 index 0000000..851fea0 --- /dev/null +++ b/modules/k8config/modules/prometheus-adapter/res/prometheus-adapter-values.yaml @@ -0,0 +1,296 @@ +affinity: {} + +topologySpreadConstraints: [] + +image: + repository: registry.k8s.io/prometheus-adapter/prometheus-adapter + # if not set appVersion field from Chart.yaml is used + tag: "" + pullPolicy: IfNotPresent + +logLevel: 4 + +metricsRelistInterval: 1m + +listenPort: 6443 + +nodeSelector: {} + +priorityClassName: "" + +## Override the release namespace (for multi-namespace deployments in combined charts) +namespaceOverride: "" + +## Additional annotations to add to all resources +customAnnotations: {} + # role: custom-metrics + +## Additional labels to add to all resources +customLabels: {} + # monitoring: prometheus-adapter + +# Url to access prometheus +prometheus: + # Value is templated + url: http://kube-prometheus-stack-prometheus.prometheus.svc + port: 9090 + path: "" + +replicas: 1 + +# k8s 1.21 needs fsGroup to be set for non root deployments +# ref: https://github.com/kubernetes/kubernetes/issues/70679 +podSecurityContext: + fsGroup: 10001 + +# SecurityContext of the container +# ref. https://kubernetes.io/docs/tasks/configure-pod-container/security-context +securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 10001 + seccompProfile: + type: RuntimeDefault + +rbac: + # Specifies whether RBAC resources should be created + create: true + # Specifies if a Cluster Role should be used for the Auth Reader + useAuthReaderClusterRole: false + externalMetrics: + resources: ["*"] + customMetrics: + resources: ["*"] + +psp: + # Specifies whether PSP resources should be created + create: false + # Annotations added to the pod security policy + annotations: {} + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + # ServiceAccount annotations. + # Use case: AWS EKS IAM roles for service accounts + # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html + annotations: {} + +# Custom DNS configuration to be added to prometheus-adapter pods +dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + +resources: +# requests: +# cpu: 100m +# memory: 128Mi +# limits: +# cpu: 100m +# memory: 128Mi + +# Configure liveness probe +# https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Probe +livenessProbe: + httpGet: + path: /healthz + port: https + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 5 + +# Configure readiness probe +readinessProbe: + httpGet: + path: /healthz + port: https + scheme: HTTPS + initialDelaySeconds: 30 + timeoutSeconds: 5 + +# Configure startup probe +# Use if prometheus-adapter takes a long time to finish startup e.g. polling a lot of API versions in cluster +startupProbe: {} + +rules: + default: true + + custom: [] + # - seriesQuery: '{__name__=~"^some_metric_count$"}' + # resources: + # template: <<.Resource>> + # name: + # matches: "" + # as: "my_custom_metric" + # metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + + # Mounts a configMap with pre-generated rules for use. Overrides the + # default, custom, external and resource entries + existing: + + external: [] + # - seriesQuery: '{__name__=~"^some_metric_count$"}' + # resources: + # template: <<.Resource>> + # name: + # matches: "" + # as: "my_external_metric" + # metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + + resource: + cpu: + containerQuery: | + sum by (<<.GroupBy>>) ( + rate(container_cpu_usage_seconds_total{container!="",<<.LabelMatchers>>}[3m]) + ) + nodeQuery: | + sum by (<<.GroupBy>>) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",<<.LabelMatchers>>}[3m]) + ) + resources: + overrides: + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + instance: + resource: node + containerLabel: container + memory: + containerQuery: | + sum by (<<.GroupBy>>) ( + avg_over_time(container_memory_working_set_bytes{container!="",<<.LabelMatchers>>}[3m]) + ) + nodeQuery: | + sum by (<<.GroupBy>>) ( + avg_over_time(node_memory_MemTotal_bytes{<<.LabelMatchers>>}[3m]) + - + avg_over_time(node_memory_MemAvailable_bytes{<<.LabelMatchers>>}[3m]) + ) + resources: + overrides: + instance: + resource: node + node: + resource: node + namespace: + resource: namespace + pod: + resource: pod + containerLabel: container + window: 3m + +service: + annotations: {} + port: 443 + type: ClusterIP + # clusterIP: 1.2.3.4 + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" +tls: + enable: false + ca: |- + # Public CA file that signed the APIService + key: |- + # Private key of the APIService + certificate: |- + # Public key of the APIService + +# Set environment variables from secrets, configmaps or by setting them as name/value +env: [] + # - name: TMP_DIR + # value: /tmp + # - name: PASSWORD + # valueFrom: + # secretKeyRef: + # name: mysecret + # key: password + # optional: false + +# Any extra arguments +extraArguments: [] + # - --tls-private-key-file=/etc/tls/tls.key + # - --tls-cert-file=/etc/tls/tls.crt + +# Additional containers to add to the pod +extraContainers: [] + +# Any extra volumes +extraVolumes: [] + # - name: example-name + # hostPath: + # path: /path/on/host + # type: DirectoryOrCreate + # - name: ssl-certs + # hostPath: + # path: /etc/ssl/certs/ca-bundle.crt + # type: File + +# Any extra volume mounts +extraVolumeMounts: [] + # - name: example-name + # mountPath: /path/in/container + # - name: ssl-certs + # mountPath: /etc/ssl/certs/ca-certificates.crt + # readOnly: true + +tolerations: [] + +# Labels added to the pod +podLabels: {} + +# Annotations added to the pod +podAnnotations: {} + +# Annotations added to the deployment +deploymentAnnotations: {} + +hostNetwork: + # Specifies if prometheus-adapter should be started in hostNetwork mode. + # + # You would require this enabled if you use alternate overlay networking for pods and + # API server unable to communicate with metrics-server. As an example, this is required + # if you use Weave network on EKS. See also dnsPolicy + enabled: false + +# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet +# dnsPolicy: ClusterFirstWithHostNet + +# Deployment strategy type +strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 25% + maxSurge: 25% + +podDisruptionBudget: + # Specifies if PodDisruptionBudget should be enabled + # When enabled, minAvailable or maxUnavailable should also be defined. + enabled: false + minAvailable: + maxUnavailable: 1 + +certManager: + enabled: false + caCertDuration: 43800h0m0s + certDuration: 8760h0m0s \ No newline at end of file diff --git a/modules/k8config/modules/elasticsearch/variables.tf b/modules/k8config/modules/prometheus-adapter/variables.tf similarity index 100% rename from modules/k8config/modules/elasticsearch/variables.tf rename to modules/k8config/modules/prometheus-adapter/variables.tf diff --git a/modules/k8config/modules/prometheus/README.md b/modules/k8config/modules/prometheus/README.md index 943c97c..7ce373c 100644 --- a/modules/k8config/modules/prometheus/README.md +++ b/modules/k8config/modules/prometheus/README.md @@ -7,6 +7,8 @@ # Configuration Help For Scraping Metris * https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus/README.md#configuration +https://github.com/prometheus-operator/kube-prometheus -https://github.com/prometheus-operator/kube-prometheus \ No newline at end of file +# Grafana Dashboards: +* https://grafana.com/grafana/dashboards/15141-kubernetes-service-logs/?source=post_page-----d2847d526f9e-------------------------------- \ No newline at end of file diff --git a/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml b/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml index aa96290..2edb0ef 100644 --- a/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml +++ b/modules/k8config/modules/prometheus/res/kube-prometheus-stack-values.yaml @@ -1107,7 +1107,12 @@ grafana: ## Configure additional grafana datasources (passed through tpl) ## ref: http://docs.grafana.org/administration/provisioning/#datasources - additionalDataSources: [] + additionalDataSources: + - name: Loki + type: loki + editable: false + url: http://loki-loki-distributed-gateway.loki.svc.cluster.local + # - name: prometheus-sample # access: proxy # basicAuth: true diff --git a/modules/k8config/modules/_archive/loki/main.tf b/modules/k8config/modules/promtail/main.tf similarity index 68% rename from modules/k8config/modules/_archive/loki/main.tf rename to modules/k8config/modules/promtail/main.tf index f163b27..5d205f4 100644 --- a/modules/k8config/modules/_archive/loki/main.tf +++ b/modules/k8config/modules/promtail/main.tf @@ -7,16 +7,16 @@ terraform { } } -resource "helm_release" "loki" { - name = "loki" +resource "helm_release" "promtail" { + name = "promtail" repository = "https://grafana.github.io/helm-charts" - chart = "loki" + chart = "promtail" atomic = true create_namespace = true - namespace = "loki" + namespace = "promtail" recreate_pods = true reuse_values = true @@ -25,7 +25,7 @@ resource "helm_release" "loki" { dependency_update = true values = [ - file("${abspath(path.module)}/res/loki-values.yaml") + file("${abspath(path.module)}/res/promtail-values.yaml") ] } \ No newline at end of file diff --git a/modules/k8config/modules/promtail/res/promtail-values.yaml b/modules/k8config/modules/promtail/res/promtail-values.yaml new file mode 100644 index 0000000..bb8322e --- /dev/null +++ b/modules/k8config/modules/promtail/res/promtail-values.yaml @@ -0,0 +1,635 @@ +# -- Overrides the chart's name +nameOverride: null + +# -- Overrides the chart's computed fullname +fullnameOverride: null + +global: + # -- Allow parent charts to override registry hostname + imageRegistry: "" + # -- Allow parent charts to override registry credentials + imagePullSecrets: [] + +daemonset: + # -- Deploys Promtail as a DaemonSet + enabled: true + autoscaling: + # -- Creates a VerticalPodAutoscaler for the daemonset + enabled: true + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # -- List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # -- Defines the max allowed resources for the pod + maxAllowed: + cpu: 200m + memory: 100Mi + # -- Defines the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto + +deployment: + # -- Deploys Promtail as a Deployment + enabled: true + replicaCount: 3 + autoscaling: + # -- Creates a HorizontalPodAutoscaler for the deployment + enabled: true + minReplicas: 3 + maxReplicas: 10 + targetCPUUtilizationPercentage: 80 + targetMemoryUtilizationPercentage: + # behavior: {} + + # -- Set deployment object update strategy + strategy: + type: RollingUpdate + +secret: + # -- Labels for the Secret + labels: {} + # -- Annotations for the Secret + annotations: {} + +configmap: + # -- If enabled, promtail config will be created as a ConfigMap instead of a secret + enabled: false + +initContainer: [] + # # -- Specifies whether the init container for setting inotify max user instances is to be enabled + # - name: init + # # -- Docker registry, image and tag for the init container image + # image: docker.io/busybox:1.33 + # # -- Docker image pull policy for the init container image + # imagePullPolicy: IfNotPresent + # # -- The inotify max user instances to configure + # command: + # - sh + # - -c + # - sysctl -w fs.inotify.max_user_instances=128 + # securityContext: + # privileged: true + +image: + # -- The Docker registry + registry: docker.io + # -- Docker image repository + repository: grafana/promtail + # -- Overrides the image tag whose default is the chart's appVersion + tag: null + # -- Docker image pull policy + pullPolicy: IfNotPresent + +# -- Image pull secrets for Docker images +imagePullSecrets: [] + +# -- hostAliases to add +hostAliases: [] +# - ip: 1.2.3.4 +# hostnames: +# - domain.tld + +# -- Controls whether the pod has the `hostNetwork` flag set. +hostNetwork: null + +# -- Annotations for the DaemonSet +annotations: {} + +# -- Number of old history to retain to allow rollback (If not set, default Kubernetes value is set to 10) +# revisionHistoryLimit: 1 + +# -- The update strategy for the DaemonSet +updateStrategy: {} + +# -- Pod labels +podLabels: {} + +# -- Pod annotations +podAnnotations: {} +# prometheus.io/scrape: "true" +# prometheus.io/port: "http-metrics" + +# -- The name of the PriorityClass +priorityClassName: null + +# -- Liveness probe +livenessProbe: {} + +# -- Readiness probe +# @default -- See `values.yaml` +readinessProbe: + failureThreshold: 5 + httpGet: + path: "{{ printf `%s/ready` .Values.httpPathPrefix }}" + port: http-metrics + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +# -- Resource requests and limits +resources: + limits: + cpu: 200m + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +# -- The security context for pods +podSecurityContext: + runAsUser: 0 + runAsGroup: 0 + +# -- The security context for containers +containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + +rbac: + # -- Specifies whether RBAC resources are to be created + create: true + # -- Specifies whether a PodSecurityPolicy is to be created + pspEnabled: false + +# -- The name of the Namespace to deploy +# If not set, `.Release.Namespace` is used +namespace: null + +serviceAccount: + # -- Specifies whether a ServiceAccount should be created + create: true + # -- The name of the ServiceAccount to use. + # If not set and `create` is true, a name is generated using the fullname template + name: null + # -- Image pull secrets for the service account + imagePullSecrets: [] + # -- Annotations for the service account + annotations: {} + +# -- Node selector for pods +nodeSelector: {} + +# -- Affinity configuration for pods +affinity: {} + +# -- Tolerations for pods. By default, pods will be scheduled on master/control-plane nodes. +tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + +# -- Default volumes that are mounted into pods. In most cases, these should not be changed. +# Use `extraVolumes`/`extraVolumeMounts` for additional custom volumes. +# @default -- See `values.yaml` +defaultVolumes: + - name: run + hostPath: + path: /run/promtail + - name: containers + hostPath: + path: /var/lib/docker/containers + - name: pods + hostPath: + path: /var/log/pods + +# -- Default volume mounts. Corresponds to `volumes`. +# @default -- See `values.yaml` +defaultVolumeMounts: + - name: run + mountPath: /run/promtail + - name: containers + mountPath: /var/lib/docker/containers + readOnly: true + - name: pods + mountPath: /var/log/pods + readOnly: true + +# Extra volumes to be added in addition to those specified under `defaultVolumes`. +extraVolumes: [] + +# Extra volume mounts together. Corresponds to `extraVolumes`. +extraVolumeMounts: [] + +# Extra args for the Promtail container. +extraArgs: [] +# -- Example: +# -- extraArgs: +# -- - -client.external-labels=hostname=$(HOSTNAME) + +# -- Extra environment variables. Set up tracing enviroment variables here if .Values.config.enableTracing is true. +# Tracing currently only support configure via environment variables. See: +# https://grafana.com/docs/loki/latest/clients/promtail/configuration/#tracing_config +# https://www.jaegertracing.io/docs/1.16/client-features/ +extraEnv: [] + +# -- Extra environment variables from secrets or configmaps +extraEnvFrom: [] + +# -- Configure enableServiceLinks in pod +enableServiceLinks: true + +# ServiceMonitor configuration +serviceMonitor: + # -- If enabled, ServiceMonitor resources for Prometheus Operator are created + enabled: true + # -- Alternative namespace for ServiceMonitor resources + namespace: null + # -- Namespace selector for ServiceMonitor resources + namespaceSelector: {} + # -- ServiceMonitor annotations + annotations: {} + # -- Additional ServiceMonitor labels + labels: {} + # -- ServiceMonitor scrape interval + interval: null + # -- ServiceMonitor scrape timeout in Go duration format (e.g. 15s) + scrapeTimeout: null + # -- ServiceMonitor relabel configs to apply to samples before scraping + # https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + # (defines `relabel_configs`) + relabelings: [] + # -- ServiceMonitor relabel configs to apply to samples as the last + # step before ingestion + # https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#relabelconfig + # (defines `metric_relabel_configs`) + metricRelabelings: [] + # -- ServiceMonitor will add labels from the service to the Prometheus metric + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitorspec + targetLabels: [] + # -- ServiceMonitor will use http by default, but you can pick https as well + scheme: http + # -- ServiceMonitor will use these tlsConfig settings to make the health check requests + tlsConfig: null + # -- Prometheus rules will be deployed for alerting purposes + prometheusRule: + enabled: false + additionalLabels: {} + # namespace: + rules: [] + # - alert: PromtailRequestErrors + # expr: 100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10 + # for: 5m + # labels: + # severity: critical + # annotations: + # description: | + # The {{ $labels.job }} {{ $labels.route }} is experiencing + # {{ printf \"%.2f\" $value }} errors. + # VALUE = {{ $value }} + # LABELS = {{ $labels }} + # summary: Promtail request errors (instance {{ $labels.instance }}) + # - alert: PromtailRequestLatency + # expr: histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket[5m])) by (le)) > 1 + # for: 5m + # labels: + # severity: critical + # annotations: + # summary: Promtail request latency (instance {{ $labels.instance }}) + # description: | + # The {{ $labels.job }} {{ $labels.route }} is experiencing + # {{ printf \"%.2f\" $value }}s 99th percentile latency. + # VALUE = {{ $value }} + # LABELS = {{ $labels }} + +# Extra containers created as part of a Promtail Deployment resource +# - spec for Container: +# https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.23/#container-v1-core +# +# Note that the key is used as the `name` field, i.e. below will create a +# container named `promtail-proxy`. +extraContainers: {} + # promtail-proxy: + # image: nginx + # ... + +# -- Configure additional ports and services. For each configured port, a corresponding service is created. +# See values.yaml for details +extraPorts: {} +# syslog: +# name: tcp-syslog +# annotations: {} +# labels: {} +# containerPort: 1514 +# protocol: TCP +# service: +# type: ClusterIP +# clusterIP: null +# port: 1514 +# externalIPs: [] +# nodePort: null +# loadBalancerIP: null +# loadBalancerSourceRanges: [] +# externalTrafficPolicy: null +# ingress: +# # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName +# # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress +# # ingressClassName: nginx +# # Values can be templated +# annotations: {} +# # kubernetes.io/ingress.class: nginx +# # kubernetes.io/tls-acme: "true" +# paths: "/" +# hosts: +# - chart-example.local +# +# tls: [] +# # - secretName: chart-example-tls +# # hosts: +# # - chart-example.local + + +# -- PodSecurityPolicy configuration. +# @default -- See `values.yaml` +podSecurityPolicy: + privileged: true + allowPrivilegeEscalation: true + volumes: + - 'secret' + - 'hostPath' + - 'downwardAPI' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'RunAsAny' + fsGroup: + rule: 'RunAsAny' + readOnlyRootFilesystem: true + requiredDropCapabilities: + - ALL + +# -- Section for crafting Promtails config file. The only directly relevant value is `config.file` +# which is a templated string that references the other values and snippets below this key. +# @default -- See `values.yaml` +config: + # -- Enable Promtail config from Helm chart + # Set `configmap.enabled: true` and this to `false` to manage your own Promtail config + # See default config in `values.yaml` + enabled: true + # -- The log level of the Promtail server + # Must be reference in `config.file` to configure `server.log_level` + # See default config in `values.yaml` + logLevel: info + # -- The log format of the Promtail server + # Must be reference in `config.file` to configure `server.log_format` + # Valid formats: `logfmt, json` + # See default config in `values.yaml` + logFormat: logfmt + # -- The port of the Promtail server + # Must be reference in `config.file` to configure `server.http_listen_port` + # See default config in `values.yaml` + serverPort: 3101 + # -- The config of clients of the Promtail server + # Must be reference in `config.file` to configure `clients` + # @default -- See `values.yaml` + clients: + - url: http://loki-loki-distributed-gateway.loki.svc.cluster.local/loki/api/v1/push + # -- Configures where Promtail will save it's positions file, to resume reading after restarts. + # Must be referenced in `config.file` to configure `positions` + positions: + filename: /run/promtail/positions.yaml + # -- The config to enable tracing + enableTracing: false + # -- A section of reusable snippets that can be reference in `config.file`. + # Custom snippets may be added in order to reduce redundancy. + # This is especially helpful when multiple `kubernetes_sd_configs` are use which usually have large parts in common. + # @default -- See `values.yaml` + snippets: + pipelineStages: + - cri: {} + common: + - action: replace + source_labels: + - __meta_kubernetes_pod_node_name + target_label: node_name + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + replacement: $1 + separator: / + source_labels: + - namespace + - app + target_label: job + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - action: replace + source_labels: + - __meta_kubernetes_pod_container_name + target_label: container + - action: replace + replacement: /var/log/pods/*$1/*.log + separator: / + source_labels: + - __meta_kubernetes_pod_uid + - __meta_kubernetes_pod_container_name + target_label: __path__ + - action: replace + replacement: /var/log/pods/*$1/*.log + regex: true/(.*) + separator: / + source_labels: + - __meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash + - __meta_kubernetes_pod_annotation_kubernetes_io_config_hash + - __meta_kubernetes_pod_container_name + target_label: __path__ + + # If set to true, adds an additional label for the scrape job. + # This helps debug the Promtail config. + addScrapeJobLabel: false + + # -- You can put here any keys that will be directly added to the config file's 'limits_config' block. + # @default -- empty + extraLimitsConfig: "" + + # -- You can put here any keys that will be directly added to the config file's 'server' block. + # @default -- empty + extraServerConfigs: "" + + # -- You can put here any additional scrape configs you want to add to the config file. + # @default -- empty + extraScrapeConfigs: "" + + # -- You can put here any additional relabel_configs to "kubernetes-pods" job + extraRelabelConfigs: [] + + scrapeConfigs: | + # See also https://github.com/grafana/loki/blob/master/production/ksonnet/promtail/scrape_config.libsonnet for reference + - job_name: kubernetes-pods + pipeline_stages: + {{- toYaml .Values.config.snippets.pipelineStages | nindent 4 }} + kubernetes_sd_configs: + - role: pod + relabel_configs: + - source_labels: + - __meta_kubernetes_pod_controller_name + regex: ([0-9a-z-.]+?)(-[0-9a-f]{8,10})? + action: replace + target_label: __tmp_controller_name + - source_labels: + - __meta_kubernetes_pod_label_app_kubernetes_io_name + - __meta_kubernetes_pod_label_app + - __tmp_controller_name + - __meta_kubernetes_pod_name + regex: ^;*([^;]+)(;.*)?$ + action: replace + target_label: app + - source_labels: + - __meta_kubernetes_pod_label_app_kubernetes_io_instance + - __meta_kubernetes_pod_label_instance + regex: ^;*([^;]+)(;.*)?$ + action: replace + target_label: instance + - source_labels: + - __meta_kubernetes_pod_label_app_kubernetes_io_component + - __meta_kubernetes_pod_label_component + regex: ^;*([^;]+)(;.*)?$ + action: replace + target_label: component + {{- if .Values.config.snippets.addScrapeJobLabel }} + - replacement: kubernetes-pods + target_label: scrape_job + {{- end }} + {{- toYaml .Values.config.snippets.common | nindent 4 }} + {{- with .Values.config.snippets.extraRelabelConfigs }} + {{- toYaml . | nindent 4 }} + {{- end }} + + # -- Config file contents for Promtail. + # Must be configured as string. + # It is templated so it can be assembled from reusable snippets in order to avoid redundancy. + # @default -- See `values.yaml` + file: | + server: + log_level: {{ .Values.config.logLevel }} + log_format: {{ .Values.config.logFormat }} + http_listen_port: {{ .Values.config.serverPort }} + {{- with .Values.httpPathPrefix }} + http_path_prefix: {{ . }} + {{- end }} + {{- tpl .Values.config.snippets.extraServerConfigs . | nindent 2 }} + + clients: + {{- tpl (toYaml .Values.config.clients) . | nindent 2 }} + + positions: + {{- tpl (toYaml .Values.config.positions) . | nindent 2 }} + + scrape_configs: + {{- tpl .Values.config.snippets.scrapeConfigs . | nindent 2 }} + {{- tpl .Values.config.snippets.extraScrapeConfigs . | nindent 2 }} + + limits_config: + {{- tpl .Values.config.snippets.extraLimitsConfig . | nindent 2 }} + + tracing: + enabled: {{ .Values.config.enableTracing }} + +networkPolicy: + # -- Specifies whether Network Policies should be created + enabled: false + metrics: + # -- Specifies the Pods which are allowed to access the metrics port. + # As this is cross-namespace communication, you also neeed the namespaceSelector. + podSelector: {} + # -- Specifies the namespaces which are allowed to access the metrics port + namespaceSelector: {} + # -- Specifies specific network CIDRs which are allowed to access the metrics port. + # In case you use namespaceSelector, you also have to specify your kubelet networks here. + # The metrics ports are also used for probes. + cidrs: [] + k8sApi: + # -- Specify the k8s API endpoint port + port: 8443 + # -- Specifies specific network CIDRs you want to limit access to + cidrs: [] + +# -- Base path to server all API routes fro +httpPathPrefix: "" + +sidecar: + configReloader: + enabled: true + image: + # -- The Docker registry for sidecar config-reloader + registry: docker.io + # -- Docker image repository for sidecar config-reloader + repository: jimmidyson/configmap-reload + # -- Docker image tag for sidecar config-reloader + tag: v0.8.0 + # -- Docker image pull policy for sidecar config-reloader + pullPolicy: IfNotPresent + # Extra args for the config-reloader container. + extraArgs: [] + # -- Extra environment variables for sidecar config-reloader + extraEnv: [] + # -- Extra environment variables from secrets or configmaps for sidecar config-reloader + extraEnvFrom: [] + # -- The security context for containers for sidecar config-reloader + containerSecurityContext: + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + allowPrivilegeEscalation: false + # -- Readiness probe for sidecar config-reloader + readinessProbe: {} + # -- Liveness probe for sidecar config-reloader + livenessProbe: {} + # -- Resource requests and limits for sidecar config-reloader + resources: {} + # limits: + # cpu: 200m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + config: + # -- The port of the config-reloader server + serverPort: 9533 + serviceMonitor: + enabled: true + +# -- Extra K8s manifests to deploy +extraObjects: [] + # - apiVersion: "kubernetes-client.io/v1" + # kind: ExternalSecret + # metadata: + # name: promtail-secrets + # spec: + # backendType: gcpSecretsManager + # data: + # - key: promtail-oauth2-creds + # name: client_secret \ No newline at end of file diff --git a/modules/k8config/modules/promtail/variables.tf b/modules/k8config/modules/promtail/variables.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/k8config/variables.tf b/modules/k8config/variables.tf index a551cc3..143df8c 100644 --- a/modules/k8config/variables.tf +++ b/modules/k8config/variables.tf @@ -30,4 +30,16 @@ variable "domain" { description = "Root Domain For Service" sensitive = true type = string +} + +variable "s3_access_key_id" { + description = "S3 Access Key Id" + sensitive = true + type = string +} + +variable "s3_secret_access_key" { + description = "S3 Secret Access Key" + sensitive = true + type = string } \ No newline at end of file diff --git a/modules/k8infra/variables.tf b/modules/k8infra/variables.tf index fd79a4a..e69de29 100644 --- a/modules/k8infra/variables.tf +++ b/modules/k8infra/variables.tf @@ -1,5 +0,0 @@ -variable "do_token" { - description = "Digital Ocean API Auth Token" - sensitive = true - type = string -} \ No newline at end of file diff --git a/variables.tf b/variables.tf index 8e7f7cd..36ed64f 100644 --- a/variables.tf +++ b/variables.tf @@ -4,6 +4,19 @@ variable "do_token" { type = string } +variable "do_spaces_access_key_id" { + description = "Digital Ocean Spaces Access Key Id" + sensitive = true + type = string +} + +variable "do_spaces_secret_access_key" { + description = "Digital Ocean Spaces Secret Access Key" + sensitive = true + type = string +} + + variable "cf_token" { description = "CloudFlare API Auth Token" @@ -22,3 +35,5 @@ variable "domain" { sensitive = true type = string } + + From e902edcb593ba3eb2968d803bc553b004d450ad5 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 21:13:02 -0700 Subject: [PATCH 2/3] terraform formatting fix --- modules/k8config/main.tf | 12 ++++++------ modules/k8config/modules/loki/main.tf | 8 ++++---- modules/k8config/modules/prometheus-adapter/main.tf | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/k8config/main.tf b/modules/k8config/main.tf index e6268c6..3ef5f45 100644 --- a/modules/k8config/main.tf +++ b/modules/k8config/main.tf @@ -120,16 +120,16 @@ module "kyverno" { module "loki" { source = "./modules/loki" - s3_access_key_id = var.s3_access_key_id + s3_access_key_id = var.s3_access_key_id s3_secret_access_key = var.s3_secret_access_key providers = { helm = helm } - depends_on = [ + depends_on = [ time_sleep.wait_60_seconds - ] + ] } module "promtail" { @@ -139,10 +139,10 @@ module "promtail" { helm = helm } - depends_on = [ + depends_on = [ time_sleep.wait_60_seconds, module.loki - ] + ] } @@ -153,7 +153,7 @@ module "prometheus-adapter" { helm = helm } - depends_on = [ + depends_on = [ time_sleep.wait_60_seconds, module.prometheus ] diff --git a/modules/k8config/modules/loki/main.tf b/modules/k8config/modules/loki/main.tf index 00c51b6..e87f866 100644 --- a/modules/k8config/modules/loki/main.tf +++ b/modules/k8config/modules/loki/main.tf @@ -16,7 +16,7 @@ resource "helm_release" "loki" { repository = "https://grafana.github.io/helm-charts" chart = "loki-distributed" - version = "0.79.0" + version = "0.79.0" atomic = true @@ -33,10 +33,10 @@ resource "helm_release" "loki" { file("${abspath(path.module)}/res/loki-distributed-values.yaml") ] - depends_on = [ + depends_on = [ kubernetes_namespace.loki_namespace, kubernetes_secret.loki_s3_credentials - ] + ] } resource "kubernetes_secret" "loki_s3_credentials" { @@ -47,7 +47,7 @@ resource "kubernetes_secret" "loki_s3_credentials" { data = { S3_LOKI_SECRET_ACCESS_KEY = var.s3_secret_access_key - S3_LOKI_ACCESS_KEY = var.s3_access_key_id + S3_LOKI_ACCESS_KEY = var.s3_access_key_id } depends_on = [ diff --git a/modules/k8config/modules/prometheus-adapter/main.tf b/modules/k8config/modules/prometheus-adapter/main.tf index 42dcdd4..83e9073 100644 --- a/modules/k8config/modules/prometheus-adapter/main.tf +++ b/modules/k8config/modules/prometheus-adapter/main.tf @@ -12,7 +12,7 @@ resource "helm_release" "prometheus-adapter" { repository = "https://prometheus-community.github.io/helm-charts" chart = "prometheus-adapter" - version = "4.10.0" + version = "4.10.0" atomic = true From 4951726ecc8862eac0220e053a6d24d4e8d276f9 Mon Sep 17 00:00:00 2001 From: bensoer Date: Sun, 21 Apr 2024 21:16:54 -0700 Subject: [PATCH 3/3] updated terraform linting to use same version --- .github/workflows/terraform-linting.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/terraform-linting.yml b/.github/workflows/terraform-linting.yml index 3cd4ce8..8b24a5a 100644 --- a/.github/workflows/terraform-linting.yml +++ b/.github/workflows/terraform-linting.yml @@ -12,7 +12,7 @@ on: workflow_dispatch: env: - TF_VERSION: "1.7.5" + TF_VERSION: "1.8.1" GITHUB_TOKEN: ${{ github.token }} # A workflow run is made up of one or more jobs that can run sequentially or in parallel diff --git a/README.md b/README.md index ca66dcc..38d7718 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Below is a table of each piece installed in my cluster at the moment, and what r | Prometheus Adapter | Metrics for Kubernetes Metrics API | Replaces metrics-server to work with Prometheus instead | | Grafana | Observability - Metrics & Logging Dashbaord | | | Loki| Observability - Logging Database | | -| Promtail | Observability - Container Stdout Log Scraping | Forwards to Loki | +| Promtail | Observability - Container Stdout/Stderr Log Scraping | Forwards to Loki | | Vault | Secrets Manager | Coming Soon | Below now is another table of the tech being used for managing and configuring my Kubernetes cluster: