From 82adb42a25a6fc79a2fac971193ec5b61bd0b8c6 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Wed, 27 Sep 2023 19:44:32 -0700 Subject: [PATCH 01/21] Add template generation script and template files for each relevant non-helm file --- .../deploy-agent-operator-resources.md.templ | 434 ++++++++++++ .../sources/operator/getting-started.md.templ | 155 +++++ .../node-exporter-config.md.templ | 433 ++++++++++++ .../process-exporter-config.md.templ | 184 +++++ .../install/install-agent-docker.md.templ | 76 +++ pkg/operator/defaults.go.templ | 15 + production/kubernetes/agent-bare.yaml.templ | 115 ++++ production/kubernetes/agent-loki.yaml.templ | 100 +++ production/kubernetes/agent-traces.yaml.templ | 154 +++++ .../build/lib/version.libsonnet.templ | 1 + .../templates/operator/main.jsonnet.templ | 160 +++++ production/kubernetes/install-bare.sh.templ | 34 + .../templates/agent-operator.yaml.templ | 645 ++++++++++++++++++ .../grafana-agent/v1/main.libsonnet.templ | 142 ++++ .../v2/internal/base.libsonnet.templ | 56 ++ .../v2/internal/syncer.libsonnet.templ | 62 ++ tools/generate-version-files.bash | 12 + 17 files changed, 2778 insertions(+) create mode 100644 docs/sources/operator/deploy-agent-operator-resources.md.templ create mode 100644 docs/sources/operator/getting-started.md.templ create mode 100644 docs/sources/static/configuration/integrations/node-exporter-config.md.templ create mode 100644 docs/sources/static/configuration/integrations/process-exporter-config.md.templ create mode 100644 docs/sources/static/set-up/install/install-agent-docker.md.templ create mode 100644 pkg/operator/defaults.go.templ create mode 100644 production/kubernetes/agent-bare.yaml.templ create mode 100644 production/kubernetes/agent-loki.yaml.templ create mode 100644 production/kubernetes/agent-traces.yaml.templ create mode 100644 production/kubernetes/build/lib/version.libsonnet.templ create mode 100644 production/kubernetes/build/templates/operator/main.jsonnet.templ create mode 100644 production/kubernetes/install-bare.sh.templ create mode 100644 production/operator/templates/agent-operator.yaml.templ create mode 100644 production/tanka/grafana-agent/v1/main.libsonnet.templ create mode 100644 production/tanka/grafana-agent/v2/internal/base.libsonnet.templ create mode 100644 production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ create mode 100755 tools/generate-version-files.bash diff --git a/docs/sources/operator/deploy-agent-operator-resources.md.templ b/docs/sources/operator/deploy-agent-operator-resources.md.templ new file mode 100644 index 000000000000..b390ec8e1712 --- /dev/null +++ b/docs/sources/operator/deploy-agent-operator-resources.md.templ @@ -0,0 +1,434 @@ +--- +aliases: +- /docs/grafana-cloud/agent/operator/deploy-agent-operator-resources/ +- /docs/grafana-cloud/monitor-infrastructure/agent/operator/deploy-agent-operator-resources/ +- /docs/grafana-cloud/monitor-infrastructure/integrations/agent/operator/deploy-agent-operator-resources/ +- custom-resource-quickstart/ +canonical: https://grafana.com/docs/agent/latest/operator/deploy-agent-operator-resources/ +title: Deploy Operator resources +description: Learn how to deploy Operator resources +weight: 120 +--- +# Deploy Operator resources + +To start collecting telemetry data, you need to roll out Grafana Agent Operator custom resources into your Kubernetes cluster. Before you can create the custom resources, you must first apply the Agent Custom Resource Definitions (CRDs) and install Agent Operator, with or without Helm. If you haven't yet taken these steps, follow the instructions in one of the following topics: + +- [Install Agent Operator]({{< relref "./getting-started/" >}}) +- [Install Agent Operator with Helm]({{< relref "./helm-getting-started/" >}}) + +Follow the steps in this guide to roll out the Grafana Agent Operator custom resources to: + +- Scrape and ship cAdvisor and kubelet metrics to a Prometheus-compatible metrics endpoint. +- Collect and ship your Pods’ container logs to a Loki-compatible logs endpoint. + +The hierarchy of custom resources is as follows: + +- `GrafanaAgent` + - `MetricsInstance` + - `PodMonitor` + - `Probe` + - `ServiceMonitor` + - `LogsInstance` + - `PodLogs` + +To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture/" >}}). + +{{% admonition type="note" %}} +Agent Operator is currently in [beta]({{< relref "../stability.md#beta" >}}) and its custom resources are subject to change. +{{% /admonition %}} + +## Before you begin + +Before you begin, make sure that you have deployed the Grafana Agent Operator CRDs and installed Agent Operator into your cluster. See [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started.md" >}}) or [Install Grafana Agent Operator]({{< relref "./getting-started.md" >}}) for instructions. + +## Deploy the GrafanaAgent resource + +In this section, you'll roll out a `GrafanaAgent` resource. See [Grafana Agent Operator architecture]({{< relref "./architecture.md" >}}) for a discussion of the resources in the `GrafanaAgent` resource hierarchy. + +{{% admonition type="note" %}} +Due to the variety of possible deployment architectures, the official Agent Operator Helm chart does not provide built-in templates for the custom resources described in this guide. You must configure and deploy these manually as described in this section. We recommend templating and adding the following manifests to your own in-house Helm charts and GitOps flows. +{{% /admonition %}} + +To deploy the `GrafanaAgent` resource: + +1. Copy the following manifests to a file: + + ```yaml + apiVersion: monitoring.grafana.com/v1alpha1 + kind: GrafanaAgent + metadata: + name: grafana-agent + namespace: default + labels: + app: grafana-agent + spec: + image: grafana/agent:$AGENT_VERSION + integrations: + selector: + matchLabels: + agent: grafana-agent-integrations + logLevel: info + serviceAccountName: grafana-agent + metrics: + instanceSelector: + matchLabels: + agent: grafana-agent-metrics + externalLabels: + cluster: cloud + + logs: + instanceSelector: + matchLabels: + agent: grafana-agent-logs + + --- + + apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-agent + namespace: default + + --- + + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: grafana-agent + rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - events + verbs: + - get + - list + - watch + - apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + - /metrics/cadvisor + verbs: + - get + + --- + + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: grafana-agent + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent + subjects: + - kind: ServiceAccount + name: grafana-agent + namespace: default + ``` + + In the first manifest, the `GrafanaAgent` resource: + + - Specifies an Agent image version. + - Specifies `MetricsInstance` and `LogsInstance` selectors. These search for `MetricsInstances` and `LogsInstances` in the same namespace with labels matching `agent: grafana-agent-metrics` and `agent: grafana-agent-logs`, respectively. + - Sets a `cluster: cloud` label for all metrics shipped to your Prometheus-compatible endpoint. Change this label to your cluster name. To search for `MetricsInstances` or `LogsInstances` in a *different* namespace, use the `instanceNamespaceSelector` field. To learn more about this field, see the `GrafanaAgent` [CRD specification](https://github.com/grafana/agent/blob/main/production/operator/crds/monitoring.grafana.com_grafanaagents.yaml). + +1. Customize the manifests as needed and roll them out to your cluster using `kubectl apply -f` followed by the filename. + + This step creates a `ServiceAccount`, `ClusterRole`, and `ClusterRoleBinding` for the `GrafanaAgent` resource. + + Deploying a `GrafanaAgent` resource on its own does not spin up Agent Pods. Agent Operator creates Agent Pods once `MetricsInstance` and `LogsIntance` resources have been created. Follow the instructions in the [Deploy a MetricsInstance resource](#deploy-a-metricsinstance-resource) and [Deploy LogsInstance and PodLogs resources](#deploy-logsinstance-and-podlogs-resources) sections to create these resources. + +### Disable feature flags reporting + +To disable the [reporting]({{< relref "../static/configuration/flags.md#report-information-usage" >}}) usage of feature flags to Grafana, set `disableReporting` field to `true`. + +### Disable support bundle generation + +To disable the [support bundles functionality]({{< relref "../static/configuration/flags.md#support-bundles" >}}), set the `disableSupportBundle` field to `true`. + +## Deploy a MetricsInstance resource + +Next, you'll roll out a `MetricsInstance` resource. `MetricsInstance` resources define a `remote_write` sink for metrics and configure one or more selectors to watch for creation and updates to `*Monitor` objects. These objects allow you to define Agent scrape targets via Kubernetes manifests: + +- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) +- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#podmonitor) +- [Probes](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#probe) + +To deploy a `MetricsInstance` resource: + +1. Copy the following manifest to a file: + + ```yaml + apiVersion: monitoring.grafana.com/v1alpha1 + kind: MetricsInstance + metadata: + name: primary + namespace: default + labels: + agent: grafana-agent-metrics + spec: + remoteWrite: + - url: your_remote_write_URL + basicAuth: + username: + name: primary-credentials-metrics + key: username + password: + name: primary-credentials-metrics + key: password + + # Supply an empty namespace selector to look in all namespaces. Remove + # this to only look in the same namespace as the MetricsInstance CR + serviceMonitorNamespaceSelector: {} + serviceMonitorSelector: + matchLabels: + instance: primary + + # Supply an empty namespace selector to look in all namespaces. Remove + # this to only look in the same namespace as the MetricsInstance CR. + podMonitorNamespaceSelector: {} + podMonitorSelector: + matchLabels: + instance: primary + + # Supply an empty namespace selector to look in all namespaces. Remove + # this to only look in the same namespace as the MetricsInstance CR. + probeNamespaceSelector: {} + probeSelector: + matchLabels: + instance: primary + ``` + +1. Replace the `remote_write` URL and customize the namespace and label configuration as necessary. + + This step associates the `MetricsInstance` resource with the `agent: grafana-agent` `GrafanaAgent` resource deployed in the previous step. The `MetricsInstance` resource watches for creation and updates to `*Monitors` with the `instance: primary` label. + +1. Once you've rolled out the manifest, create the `basicAuth` credentials [using a Kubernetes Secret](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-config-file/): + + ```yaml + apiVersion: v1 + kind: Secret + metadata: + name: primary-credentials-metrics + namespace: default + stringData: + username: 'your_cloud_prometheus_username' + password: 'your_cloud_prometheus_API_key' + ``` + +If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](https://grafana.com/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. + +Once you've rolled out the `MetricsInstance` and its Secret, you can confirm that the `MetricsInstance` Agent is up and running using `kubectl get pod`. Since you haven't defined any monitors yet, this Agent doesn't have any scrape targets defined. In the next section, you'll create scrape targets for the cAdvisor and kubelet endpoints exposed by the `kubelet` service in the cluster. + +## Create ServiceMonitors for kubelet and cAdvisor endpoints + +Next, you'll create ServiceMonitors for kubelet and cAdvisor metrics exposed by the `kubelet` service. Every Node in your cluster exposes kubelet and cAdvisor metrics at `/metrics` and `/metrics/cadvisor`, respectively. Agent Operator creates a `kubelet` service that exposes these Node endpoints so that they can be scraped using ServiceMonitors. + +To scrape the kubelet and cAdvisor endpoints: + +1. Copy the following kubelet ServiceMonitor manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. + + ```yaml + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + labels: + instance: primary + name: kubelet-monitor + namespace: default + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 60s + metricRelabelings: + - action: keep + regex: kubelet_cgroup_manager_duration_seconds_count|go_goroutines|kubelet_pod_start_duration_seconds_count|kubelet_runtime_operations_total|kubelet_pleg_relist_duration_seconds_bucket|volume_manager_total_volumes|kubelet_volume_stats_capacity_bytes|container_cpu_usage_seconds_total|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_network_receive_bytes_total|container_memory_swap|container_network_receive_packets_total|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|kubelet_running_pod_count|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate|container_memory_working_set_bytes|storage_operation_errors_total|kubelet_pleg_relist_duration_seconds_count|kubelet_running_pods|rest_client_request_duration_seconds_bucket|process_resident_memory_bytes|storage_operation_duration_seconds_count|kubelet_running_containers|kubelet_runtime_operations_duration_seconds_bucket|kubelet_node_config_error|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_running_container_count|kubelet_volume_stats_available_bytes|kubelet_volume_stats_inodes|container_memory_rss|kubelet_pod_worker_duration_seconds_count|kubelet_node_name|kubelet_pleg_relist_interval_seconds_bucket|container_network_receive_packets_dropped_total|kubelet_pod_worker_duration_seconds_bucket|container_start_time_seconds|container_network_transmit_packets_dropped_total|process_cpu_seconds_total|storage_operation_duration_seconds_bucket|container_memory_cache|container_network_transmit_packets_total|kubelet_volume_stats_inodes_used|up|rest_client_requests_total + sourceLabels: + - __name__ + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - action: replace + targetLabel: job + replacement: integrations/kubernetes/kubelet + scheme: https + tlsConfig: + insecureSkipVerify: true + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + app.kubernetes.io/name: kubelet + ``` + +1. Copy the following cAdvisor ServiceMonitor manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. + + ```yaml + apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + labels: + instance: primary + name: cadvisor-monitor + namespace: default + spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + honorTimestamps: false + interval: 60s + metricRelabelings: + - action: keep + regex: kubelet_cgroup_manager_duration_seconds_count|go_goroutines|kubelet_pod_start_duration_seconds_count|kubelet_runtime_operations_total|kubelet_pleg_relist_duration_seconds_bucket|volume_manager_total_volumes|kubelet_volume_stats_capacity_bytes|container_cpu_usage_seconds_total|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_network_receive_bytes_total|container_memory_swap|container_network_receive_packets_total|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|kubelet_running_pod_count|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate|container_memory_working_set_bytes|storage_operation_errors_total|kubelet_pleg_relist_duration_seconds_count|kubelet_running_pods|rest_client_request_duration_seconds_bucket|process_resident_memory_bytes|storage_operation_duration_seconds_count|kubelet_running_containers|kubelet_runtime_operations_duration_seconds_bucket|kubelet_node_config_error|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_running_container_count|kubelet_volume_stats_available_bytes|kubelet_volume_stats_inodes|container_memory_rss|kubelet_pod_worker_duration_seconds_count|kubelet_node_name|kubelet_pleg_relist_interval_seconds_bucket|container_network_receive_packets_dropped_total|kubelet_pod_worker_duration_seconds_bucket|container_start_time_seconds|container_network_transmit_packets_dropped_total|process_cpu_seconds_total|storage_operation_duration_seconds_bucket|container_memory_cache|container_network_transmit_packets_total|kubelet_volume_stats_inodes_used|up|rest_client_requests_total + sourceLabels: + - __name__ + path: /metrics/cadvisor + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - action: replace + targetLabel: job + replacement: integrations/kubernetes/cadvisor + scheme: https + tlsConfig: + insecureSkipVerify: true + namespaceSelector: + matchNames: + - default + selector: + matchLabels: + app.kubernetes.io/name: kubelet + ``` + +These two ServiceMonitors configure Agent to scrape all the kubelet and cAdvisor endpoints in your Kubernetes cluster (one of each per Node). In addition, it defines a `job` label which you can update (it is preset here for compatibility with Grafana Cloud's Kubernetes integration). It also provides an allowlist containing a core set of Kubernetes metrics to reduce remote metrics usage. If you don't need this allowlist, you can omit it, however, your metrics usage will increase significantly. + + When you're done, Agent should now be shipping kubelet and cAdvisor metrics to your remote Prometheus endpoint. To check this in Grafana Cloud, go to your dashboards, select **Integration - Kubernetes**, then select **Kubernetes / Kubelet**. + +## Deploy LogsInstance and PodLogs resources + +Next, you'll deploy a `LogsInstance` resource to collect logs from your cluster Nodes and ship these to your remote Loki endpoint. Agent Operator deploys a DaemonSet of Agents in your cluster that will tail log files defined in `PodLogs` resources. + +To deploy the `LogsInstance` resource into your cluster: + +1. Copy the following manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. + + ```yaml + apiVersion: monitoring.grafana.com/v1alpha1 + kind: LogsInstance + metadata: + name: primary + namespace: default + labels: + agent: grafana-agent-logs + spec: + clients: + - url: your_remote_logs_URL + basicAuth: + username: + name: primary-credentials-logs + key: username + password: + name: primary-credentials-logs + key: password + + # Supply an empty namespace selector to look in all namespaces. Remove + # this to only look in the same namespace as the LogsInstance CR + podLogsNamespaceSelector: {} + podLogsSelector: + matchLabels: + instance: primary + ``` + + This `LogsInstance` picks up `PodLogs` resources with the `instance: primary` label. Be sure to set the Loki URL to the correct push endpoint. For Grafana Cloud, this will look similar to `logs-prod-us-central1.grafana.net/loki/api/v1/push`, however check the [Grafana Cloud Portal](https://grafana.com/profile/org) to confirm by clicking **Details** on the Loki tile. + + Also note that this example uses the `agent: grafana-agent-logs` label, which associates this `LogsInstance` with the `GrafanaAgent` resource defined earlier. This means that it will inherit requests, limits, affinities and other properties defined in the `GrafanaAgent` custom resource. + +1. To create the Secret for the `LogsInstance` resource, copy the following Secret manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. + + ```yaml + apiVersion: v1 + kind: Secret + metadata: + name: primary-credentials-logs + namespace: default + stringData: + username: 'your_username_here' + password: 'your_password_here' + ``` + + If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](https://grafana.com/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. + +1. Copy the following `PodLogs` manifest to a file, then roll it to your cluster using `kubectl apply -f` followed by the filename. The manifest defines your logging targets. Agent Operator turns this into Agent configuration for the logs subsystem, and rolls it out to the DaemonSet of logging Agents. + + {{% admonition type="note" %}} + The following is a minimal working example which you should adapt to your production needs. + {{% /admonition %}} + + ```yaml + apiVersion: monitoring.grafana.com/v1alpha1 + kind: PodLogs + metadata: + labels: + instance: primary + name: kubernetes-pods + namespace: default + spec: + pipelineStages: + - docker: {} + namespaceSelector: + matchNames: + - default + selector: + matchLabels: {} + ``` + + This example tails container logs for all Pods in the `default` namespace. You can restrict the set of matched Pods by using the `matchLabels` selector. You can also set additional `pipelineStages` and create `relabelings` to add or modify log line labels. To learn more about the `PodLogs` specification and available resource fields, see the [PodLogs CRD](https://github.com/grafana/agent/blob/main/production/operator/crds/monitoring.grafana.com_podlogs.yaml). + + The above `PodLogs` resource adds the following labels to log lines: + + - `namespace` + - `service` + - `pod` + - `container` + - `job` (set to `PodLogs_namespace/PodLogs_name`) + - `__path__` (the path to log files, set to `/var/log/pods/*$1/*.log` where `$1` is `__meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name`) + + To learn more about this configuration format and other available labels, see the [Promtail Scraping](https://grafana.com/docs/loki/latest/clients/promtail/scraping/#promtail-scraping-service-discovery) documentation. Agent Operator loads this configuration into the `LogsInstance` agents automatically. + +The DaemonSet of logging agents should be tailing your container logs, applying default labels to the log lines, and shipping them to your remote Loki endpoint. + +## Summary + +You've now rolled out the following into your cluster: + +- A `GrafanaAgent` resource that discovers one or more `MetricsInstance` and `LogsInstances` resources. +- A `MetricsInstance` resource that defines where to ship collected metrics. +- A `ServiceMonitor` resource to collect cAdvisor and kubelet metrics. +- A `LogsInstance` resource that defines where to ship collected logs. +- A `PodLogs` resource to collect container logs from Kubernetes Pods. + +## What's next + +You can verify that everything is working correctly by navigating to your Grafana instance and querying your Loki and Prometheus data sources. + +> Tip: You can deploy multiple GrafanaAgent resources to isolate allocated resources to the agent pods. By default, the GrafanaAgent resource determines the resources of all deployed agent containers. However, you might want different memory limits for metrics versus logs. diff --git a/docs/sources/operator/getting-started.md.templ b/docs/sources/operator/getting-started.md.templ new file mode 100644 index 000000000000..56265faf33e6 --- /dev/null +++ b/docs/sources/operator/getting-started.md.templ @@ -0,0 +1,155 @@ +--- +aliases: +- /docs/grafana-cloud/agent/operator/getting-started/ +- /docs/grafana-cloud/monitor-infrastructure/agent/operator/getting-started/ +- /docs/grafana-cloud/monitor-infrastructure/integrations/agent/operator/getting-started/ +canonical: https://grafana.com/docs/agent/latest/operator/getting-started/ +title: Install the Operator +description: Learn how to install the Operator +weight: 110 +--- + +# Install the Operator + +In this guide, you'll learn how to deploy [Grafana Agent Operator]({{< relref "./_index.md" >}}) into your Kubernetes cluster. This guide does not use Helm. To learn how to deploy Agent Operator using the [grafana-agent-operator Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/agent-operator), see [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started.md" >}}). + +> **Note**: If you are shipping your data to Grafana Cloud, use [Kubernetes Monitoring](https://grafana.com/docs/grafana-cloud/kubernetes-monitoring/) to set up Agent Operator. Kubernetes Monitoring provides a simplified approach and preconfigured dashboards and alerts. +## Before you begin + +To deploy Agent Operator, make sure that you have the following: + +- A Kubernetes cluster +- The `kubectl` command-line client installed and configured on your machine + +> **Note:** Agent Operator is currently in beta and its custom resources are subject to change. + +## Deploy the Agent Operator Custom Resource Definitions (CRDs) + +Before you can create the custom resources for a Grafana Agent deployment, +you need to deploy the +[Custom Resource Definitions](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) +to the cluster. These definitions describe the schema that the custom +resources will conform to. This is also required for Grafana Agent Operator to run; it +will fail if it can't find the Custom Resource Definitions of objects it is +looking to use. To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture/" >}}). + +You can find the set of Custom Resource Definitions for Grafana Agent Operator in the Grafana Agent repository under +[production/operator/crds](https://github.com/grafana/agent/tree/main/production/operator/crds). + +To deploy the CRDs: + +1. Clone the agent repo and then apply the CRDs from the root of the agent repository: + ``` + kubectl apply -f production/operator/crds + ``` + + This step _must_ be completed before installing Agent Operator—it will +fail to start if the CRDs do not exist. + +2. To check that the CRDs are deployed to your Kubernetes cluster and to access documentation for each resource, use `kubectl explain `. + + For example, `kubectl explain GrafanaAgent` describes the GrafanaAgent CRD, and `kubectl explain GrafanaAgent.spec` gives you information on its spec field. + +## Install Grafana Agent Operator + +Next, install Agent Operator by applying the Agent Operator deployment schema. + +To install Agent Operator: + +1. Copy the following deployment schema to a file, updating the namespace if needed: + + ```yaml + apiVersion: apps/v1 + kind: Deployment + metadata: + name: grafana-agent-operator + namespace: default + labels: + app: grafana-agent-operator + spec: + replicas: 1 + selector: + matchLabels: + app: grafana-agent-operator + template: + metadata: + labels: + app: grafana-agent-operator + spec: + serviceAccountName: grafana-agent-operator + containers: + - name: operator + image: grafana/agent-operator:$AGENT_VERSION + args: + - --kubelet-service=default/kubelet + --- + + apiVersion: v1 + kind: ServiceAccount + metadata: + name: grafana-agent-operator + namespace: default + + --- + + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: grafana-agent-operator + rules: + - apiGroups: [monitoring.grafana.com] + resources: + - grafanaagents + - metricsinstances + - logsinstances + - podlogs + - integrations + verbs: [get, list, watch] + - apiGroups: [monitoring.coreos.com] + resources: + - podmonitors + - probes + - servicemonitors + verbs: [get, list, watch] + - apiGroups: [""] + resources: + - namespaces + - nodes + verbs: [get, list, watch] + - apiGroups: [""] + resources: + - secrets + - services + - configmaps + - endpoints + verbs: [get, list, watch, create, update, patch, delete] + - apiGroups: ["apps"] + resources: + - statefulsets + - daemonsets + - deployments + verbs: [get, list, watch, create, update, patch, delete] + + --- + + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: grafana-agent-operator + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent-operator + subjects: + - kind: ServiceAccount + name: grafana-agent-operator + namespace: default + ``` + +2. Roll out the deployment in your cluster using `kubectl apply -f` followed by your deployment filename. + +> **Note**: If you want to run Agent Operator locally, make sure your kubectl context is correct. Running locally uses your current kubectl context. If it is set to your production environment, you could accidentally deploy a new Grafana Agent to production. Install CRDs on the cluster prior to running locally. Afterwards, you can run Agent Operator using `go run ./cmd/grafana-agent-operator`. + +## Deploy the Grafana Agent Operator resources + +Agent Operator is now up and running. Next, you need to install a Grafana Agent for Agent Operator to run for you. To do so, follow the instructions in the [Deploy the Grafana Agent Operator resources]({{< relref "./deploy-agent-operator-resources.md" >}}) topic. diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md.templ b/docs/sources/static/configuration/integrations/node-exporter-config.md.templ new file mode 100644 index 000000000000..ac84dacf6194 --- /dev/null +++ b/docs/sources/static/configuration/integrations/node-exporter-config.md.templ @@ -0,0 +1,433 @@ +--- +aliases: +- ../../../configuration/integrations/node-exporter-config/ +canonical: https://grafana.com/docs/agent/latest/static/configuration/integrations/node-exporter-config/ +title: node_exporter_config +description: Learn about node_exporter_config +--- + +# node_exporter_config + +The `node_exporter_config` block configures the `node_exporter` integration, +which is an embedded version of +[`node_exporter`](https://github.com/prometheus/node_exporter) +and allows for collecting metrics from the UNIX system that `node_exporter` is +running on. It provides a significant amount of collectors that are responsible +for monitoring various aspects of the host system. + +Note that if running the Agent in a container, you will need to bind mount +folders from the host system so the integration can monitor them. You can use +the example below, making sure to replace `/path/to/config.yaml` with a path on +your host machine where an Agent configuration file is: + +``` +docker run \ + --net="host" \ + --pid="host" \ + --cap-add=SYS_TIME \ + -v "/:/host/root:ro,rslave" \ + -v "/sys:/host/sys:ro,rslave" \ + -v "/proc:/host/proc:ro,rslave" \ + -v /tmp/agent:/etc/agent \ + -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ + grafana/agent:$AGENT_VERSION \ + --config.file=/etc/agent-config/agent.yaml +``` + +Use this configuration file for testing out `node_exporter` support, replacing +the `remote_write` settings with settings appropriate for you: + +```yaml +server: + log_level: info + +metrics: + wal_directory: /tmp/agent + global: + scrape_interval: 60s + remote_write: + - url: https://prometheus-us-central1.grafana.net/api/prom/push + basic_auth: + username: user-id + password: api-token + +integrations: + node_exporter: + enabled: true + rootfs_path: /host/root + sysfs_path: /host/sys + procfs_path: /host/proc + udev_data_path: /host/root/run/udev/data +``` + +For running on Kubernetes, ensure to set the equivalent mounts and capabilities +there as well: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: agent +spec: + containers: + - image: grafana/agent:$AGENT_VERSION + name: agent + args: + - --config.file=/etc/agent-config/agent.yaml + securityContext: + capabilities: + add: ["SYS_TIME"] + privileged: true + runAsUser: 0 + volumeMounts: + - name: rootfs + mountPath: /host/root + readOnly: true + - name: sysfs + mountPath: /host/sys + readOnly: true + - name: procfs + mountPath: /host/proc + readOnly: true + hostPID: true + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + volumes: + - name: rootfs + hostPath: + path: / + - name: sysfs + hostPath: + path: /sys + - name: procfs + hostPath: + path: /proc +``` + +The manifest and Tanka configs provided by this repository do not have the +mounts or capabilities required for running this integration. + +Some collectors only work on specific operating systems, documented in the +table below. Enabling a collector that is not supported by the operating system +the Agent is running on is a no-op. + +| Name | Description | OS | Enabled by default | +| ---------------- | ----------- | -- | ------------------ | +| arp | Exposes ARP statistics from /proc/net/arp. | Linux | yes | +| bcache | Exposes bcache statistics from /sys/fs/bcache. | Linux | yes | +| bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux | yes | +| boottime | Exposes system boot time derived from the kern.boottime sysctl. | Darwin, Dragonfly, FreeBSD, NetBSD, OpenBSD, Solaris | yes | +| btrfs | Exposes statistics on btrfs. | Linux | yes | +| buddyinfo | Exposes statistics of memory fragments as reported by /proc/buddyinfo. | Linux | no | +| cgroups | Exposes number of active and enabled cgroups. | Linux | no | +| conntrack | Shows conntrack statistics (does nothing if no /proc/sys/net/netfilter/ present). | Linux | yes | +| cpu | Exposes CPU statistics. | Darwin, Dragonfly, FreeBSD, Linux, Solaris, NetBSD | yes | +| cpufreq | Exposes CPU frequency statistics. | Linux, Solaris | yes | +| devstat | Exposes device statistics. | Dragonfly, FreeBSD | no | +| diskstats | Exposes disk I/O statistics. | Darwin, Linux, OpenBSD | yes | +| dmi | Exposes DMI information. | Linux | yes | +| drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4). | Linux | no | +| drm | Exposes GPU card info from /sys/class/drm/card?/device | Linux | no | +| edac | Exposes error detection and correction statistics. | Linux | yes | +| entropy | Exposes available entropy. | Linux | yes | +| ethtool | Exposes ethtool stats | Linux | no | +| exec | Exposes execution statistics. | Dragonfly, FreeBSD | yes | +| fibrechannel | Exposes FibreChannel statistics. | Linux | yes | +| filefd | Exposes file descriptor statistics from /proc/sys/fs/file-nr. | Linux | yes | +| filesystem | Exposes filesystem statistics, such as disk space used. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD | yes | +| hwmon | Exposes hardware monitoring and sensor data from /sys/class/hwmon. | Linux | yes | +| infiniband | Exposes network statistics specific to InfiniBand and Intel OmniPath configurations. | Linux | yes | +| interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD | no | +| ipvs | Exposes IPVS status from /proc/net/ip_vs and stats from /proc/net/ip_vs_stats. | Linux | yes | +| ksmd | Exposes kernel and system statistics from /sys/kernel/mm/ksm. | Linux | no | +| lnstat | Exposes Linux network cache stats | Linux | no | +| loadavg | Exposes load average. | Darwin, Dragonfly, FreeBSD, Linux, NetBSD, OpenBSD, Solaris | yes | +| logind | Exposes session counts from logind. | Linux | no | +| mdadm | Exposes statistics about devices in /proc/mdstat (does nothing if no /proc/mdstat present). | Linux | yes | +| meminfo | Exposes memory statistics. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD, NetBSD | yes | +| meminfo_numa | Exposes memory statistics from /proc/meminfo_numa. | Linux | no | +| mountstats | Exposes filesystem statistics from /proc/self/mountstats. Exposes detailed NFS client statistics. | Linux | no | +| netclass | Exposes network interface info from /sys/class/net. | Linux | yes | +| netisr | Exposes netisr statistics. | FreeBSD | yes | +| netdev | Exposes network interface statistics such as bytes transferred. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD | yes | +| netstat | Exposes network statistics from /proc/net/netstat. This is the same information as netstat -s. | Linux | yes | +| network_route | Exposes network route statistics. | Linux | no | +| nfs | Exposes NFS client statistics from /proc/net/rpc/nfs. This is the same information as nfsstat -c. | Linux | yes | +| nfsd | Exposes NFS kernel server statistics from /proc/net/rpc/nfsd. This is the same information as nfsstat -s. | Linux | yes | +| ntp | Exposes local NTP daemon health to check time. | any | no | +| nvme | Exposes NVMe statistics. | Linux | yes | +| os | Exposes os-release information. | Linux | yes | +| perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux | no | +| powersupplyclass | Collects information on power supplies. | any | yes | +| pressure | Exposes pressure stall statistics from /proc/pressure/. | Linux (kernel 4.20+ and/or CONFIG_PSI) | yes | +| processes | Exposes aggregate process statistics from /proc. | Linux | no | +| qdisc | Exposes queuing discipline statistics. | Linux | no | +| rapl | Exposes various statistics from /sys/class/powercap. | Linux | yes | +| runit | Exposes service status from runit. | any | no | +| schedstat | Exposes task scheduler statistics from /proc/schedstat. | Linux | yes | +| selinux | Exposes SELinux statistics. | Linux | yes | +| slabinfo | Exposes slab statistics from `/proc/slabinfo`. | Linux | no | +| softirqs | Exposes detailed softirq statistics from `/proc/softirqs`. | Linux | no | +| sockstat | Exposes various statistics from /proc/net/sockstat. | Linux | yes | +| softnet | Exposes statistics from /proc/net/softnet_stat. | Linux | yes | +| stat | Exposes various statistics from /proc/stat. This includes boot time, forks and interrupts. | Linux | yes | +| supervisord | Exposes service status from supervisord. | any | no | +| sysctl | Expose sysctl values from `/proc/sys`. | Linux | no | +| systemd | Exposes service and system status from systemd. | Linux | no | +| tapestats | Exposes tape device stats. | Linux | yes | +| tcpstat | Exposes TCP connection status information from /proc/net/tcp and /proc/net/tcp6. (Warning: the current version has potential performance issues in high load situations). | Linux | no | +| textfile | Collects metrics from files in a directory matching the filename pattern *.prom. The files must be using the text format defined here: https://prometheus.io/docs/instrumenting/exposition_formats/ | any | yes | +| thermal | Exposes thermal statistics. | Darwin | yes | +| thermal_zone | Exposes thermal zone & cooling device statistics from /sys/class/thermal. | Linux | yes | +| time | Exposes the current system time. | any | yes | +| timex | Exposes selected adjtimex(2) system call stats. | Linux | yes | +| udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from /proc/net/udp and /proc/net/udp6. | Linux | yes | +| uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD, NetBSD | yes | +| vmstat | Exposes statistics from /proc/vmstat. | Linux | yes | +| wifi | Exposes WiFi device and station statistics. | Linux | no | +| xfs | Exposes XFS runtime statistics. | Linux (kernel 4.4+) | yes | +| zfs | Exposes ZFS performance statistics. | Linux, Solaris | yes | +| zoneinfo | Exposes zone stats. | Linux | no | + +```yaml + # Enables the node_exporter integration, allowing the Agent to automatically + # collect system metrics from the host UNIX system. + [enabled: | default = false] + + # Sets an explicit value for the instance label when the integration is + # self-scraped. Overrides inferred values. + # + # The default value for this integration is inferred from the agent hostname + # and HTTP listen port, delimited by a colon. + [instance: ] + + # Automatically collect metrics from this integration. If disabled, + # the node_exporter integration will be run but not scraped and thus not remote-written. Metrics for the + # integration will be exposed at /integrations/node_exporter/metrics and can + # be scraped by an external process. + [scrape_integration: | default = ] + + # How often should the metrics be collected? Defaults to + # prometheus.global.scrape_interval. + [scrape_interval: | default = ] + + # The timtout before considering the scrape a failure. Defaults to + # prometheus.global.scrape_timeout. + [scrape_timeout: | default = ] + + # Allows for relabeling labels on the target. + relabel_configs: + [- ... ] + + # Relabel metrics coming from the integration, allowing to drop series + # from the integration that you don't care about. + metric_relabel_configs: + [ - ... ] + + # How frequent to truncate the WAL for this integration. + [wal_truncate_frequency: | default = "60m"] + + # Monitor the exporter itself and include those metrics in the results. + [include_exporter_metrics: | default = false] + + # Optionally defines the list of enabled-by-default collectors. + # Anything not provided in the list below will be disabled by default, + # but requires at least one element to be treated as defined. + # + # This is useful if you have a very explicit set of collectors you wish + # to run. + set_collectors: + - [] + + # Additional collectors to enable on top of the default set of enabled + # collectors or on top of the list provided by set_collectors. + # + # This is useful if you have a few collectors you wish to run that are + # not enabled by default, but do not want to explicitly provide an entire + # list through set_collectors. + enable_collectors: + - [] + + # Additional collectors to disable on top of the default set of disabled + # collectors. Takes precedence over enable_collectors. + # + # This is useful if you have a few collectors you do not want to run that + # are enabled by default, but do not want to explicitly provide an entire + # list through set_collectors. + disable_collectors: + - [] + + # procfs mountpoint. + [procfs_path: | default = "/proc"] + + # sysfs mountpoint. + [sysfs_path: | default = "/sys"] + + # rootfs mountpoint. If running in docker, the root filesystem of the host + # machine should be mounted and this value should be changed to the mount + # directory. + [rootfs_path: | default = "/"] + + # udev data path needed for diskstats from Node exporter. When running + # in Kubernetes it should be set to /host/root/run/udev/data. + [udev_data_path: | default = "/run/udev/data"] + + # Expose expensive bcache priority stats. + [enable_bcache_priority_stats: ] + + # Regexp of `bugs` field in cpu info to filter. + [cpu_bugs_include: ] + + # Enable the node_cpu_guest_seconds_total metric. + [enable_cpu_guest_seconds_metric: | default = true] + + # Enable the cpu_info metric for the cpu collector. + [enable_cpu_info_metric: | default = true] + + # Regexp of `flags` field in cpu info to filter. + [cpu_flags_include: ] + + # Regexp of devices to ignore for diskstats. + [diskstats_device_exclude: | default = "^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"] + + # Regexp of devices to include for diskstats. If set, the diskstat_device_exclude field is ignored. + [diskstats_device_include: ] + + # Regexp of ethtool devices to exclude (mutually exclusive with ethtool_device_include) + [ethtool_device_exclude: ] + + # Regexp of ethtool devices to include (mutually exclusive with ethtool_device_exclude) + [ethtool_device_include: ] + + # Regexp of ethtool stats to include. + [ethtool_metrics_include: | default = ".*"] + + # Regexp of mount points to ignore for filesystem collector. + [filesystem_mount_points_exclude: | default = "^/(dev|proc|sys|var/lib/docker/.+)($|/)"] + + # Regexp of filesystem types to ignore for filesystem collector. + [filesystem_fs_types_exclude: | default = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"] + + # How long to wait for a mount to respond before marking it as stale. + [filesystem_mount_timeout: | default = "5s"] + + # Array of IPVS backend stats labels. + # + # The default is [local_address, local_port, remote_address, remote_port, proto, local_mark]. + ipvs_backend_labels: + [- ] + + # NTP server to use for ntp collector + [ntp_server: | default = "127.0.0.1"] + + # NTP protocol version + [ntp_protocol_version: | default = 4] + + # Certify that the server address is not a public ntp server. + [ntp_server_is_local: | default = false] + + # IP TTL to use wile sending NTP query. + [ntp_ip_ttl: | default = 1] + + # Max accumulated distance to the root. + [ntp_max_distance: | default = "3466080us"] + + # Offset between local clock and local ntpd time to tolerate. + [ntp_local_offset_tolerance: | default = "1ms"] + + # Regexp of net devices to ignore for netclass collector. + [netclass_ignored_devices: | default = "^$"] + + # Ignore net devices with invalid speed values. This will default to true in + # node_exporter 2.0. + [netclass_ignore_invalid_speed_device: | default = false] + + # Enable collecting address-info for every device. + [netdev_address_info: ] + + # Regexp of net devices to exclude (mutually exclusive with include) + [netdev_device_exclude: | default = ""] + + # Regexp of net devices to include (mutually exclusive with exclude) + [netdev_device_include: | default = ""] + + # Regexp of fields to return for netstat collector. + [netstat_fields: | default = "^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans|TCPTimeouts)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$"] + + # List of CPUs from which perf metrics should be collected. + [perf_cpus: | default = ""] + + # Array of perf tracepoints that should be collected. + perf_tracepoint: + [- ] + + # Disable perf hardware profilers. + [perf_disable_hardware_profilers: | default = false] + + # Perf hardware profilers that should be collected. + perf_hardware_profilers: + [- ] + + # Disable perf software profilers. + [perf_disable_software_profilers: | default = false] + + # Perf software profilers that should be collected. + perf_software_profilers: + [- ] + + # Disable perf cache profilers. + [perf_disable_cache_profilers: | default = false] + + # Perf cache profilers that should be collected. + perf_cache_profilers: + [- ] + + # Regexp of power supplies to ignore for the powersupplyclass collector. + [powersupply_ignored_supplies: | default = "^$"] + + # Path to runit service directory. + [runit_service_dir: | default = "/etc/service"] + + # XML RPC endpoint for the supervisord collector. + # + # Setting SUPERVISORD_URL in the environment will override the default value. + # An explicit value in the YAML config takes precedence over the environment + # variable. + [supervisord_url: | default = "http://localhost:9001/RPC2"] + + # Numeric sysctl values to expose. + # For sysctl with multiple numeric values, + # an optional mapping can be given to expose each value as its own metric. + sysctl_include: + [- ] + + # String sysctl values to expose. + sysctl_include_info: + [- ] + + # Regexp of systemd units to include. Units must both match include and not + # match exclude to be collected. + [systemd_unit_include: | default = ".+"] + + # Regexp of systemd units to exclude. Units must both match include and not + # match exclude to be collected. + [systemd_unit_exclude: | default = ".+\\.(automount|device|mount|scope|slice)"] + + # Enables service unit tasks metrics unit_tasks_current and unit_tasks_max + [systemd_enable_task_metrics: | default = false] + + # Enables service unit metric service_restart_total + [systemd_enable_restarts_metrics: | default = false] + + # Enables service unit metric unit_start_time_seconds + [systemd_enable_start_time_metrics: | default = false] + + # Regexp of tapestats devices to ignore. + [tapestats_ignored_devices: | default = "^$"] + + # Directory to read *.prom files from for the textfile collector. + [textfile_directory: | default = ""] + + # Regexp of fields to return for the vmstat collector. + [vmstat_fields: | default = "^(oom_kill|pgpg|pswp|pg.*fault).*"] +``` diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.md.templ b/docs/sources/static/configuration/integrations/process-exporter-config.md.templ new file mode 100644 index 000000000000..3f417b7b3faa --- /dev/null +++ b/docs/sources/static/configuration/integrations/process-exporter-config.md.templ @@ -0,0 +1,184 @@ +--- +aliases: +- ../../../configuration/integrations/process-exporter-config/ +canonical: https://grafana.com/docs/agent/latest/static/configuration/integrations/process-exporter-config/ +title: process_exporter_config +description: Learn about process_exporter_config +--- + +# process_exporter_config + +The `process_exporter_config` block configures the `process_exporter` integration, +which is an embedded version of +[`process-exporter`](https://github.com/ncabatoff/process-exporter) +and allows for collection metrics based on the /proc filesystem on Linux +systems. Note that on non-Linux systems, enabling this exporter is a no-op. + +Note that if running the Agent in a container, you will need to bind mount +folders from the host system so the integration can monitor them: + +``` +docker run \ + -v "/proc:/proc:ro" \ + -v /tmp/agent:/etc/agent \ + -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ + grafana/agent:$AGENT_VERSION \ + --config.file=/etc/agent-config/agent.yaml +``` + +Replace `/path/to/config.yaml` with the appropriate path on your host system +where an Agent config file can be found. + +For running on Kubernetes, ensure to set the equivalent mounts and capabilities +there as well: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: agent +spec: + containers: + - image: grafana/agent:$AGENT_VERSION + name: agent + args: + - --config.file=/etc/agent-config/agent.yaml + volumeMounts: + - name: procfs + mountPath: /proc + readOnly: true + volumes: + - name: procfs + hostPath: + path: /proc +``` + +The manifest and Tanka configs provided by this repository do not have the +mounts or capabilities required for running this integration. + +An example config for `process_exporter_config` that tracks all processes is the +following: + +``` +enabled: true +process_names: +- name: "{{.Comm}}" + cmdline: + - '.+' +``` + +Full reference of options: + +```yaml + # Enables the process_exporter integration, allowing the Agent to automatically + # collect system metrics from the host UNIX system. + [enabled: | default = false] + + # Sets an explicit value for the instance label when the integration is + # self-scraped. Overrides inferred values. + # + # The default value for this integration is inferred from the agent hostname + # and HTTP listen port, delimited by a colon. + [instance: ] + + # Automatically collect metrics from this integration. If disabled, + # the process_exporter integration will be run but not scraped and thus not + # remote-written. Metrics for the integration will be exposed at + # /integrations/process_exporter/metrics and can be scraped by an external + # process. + [scrape_integration: | default = ] + + # How often should the metrics be collected? Defaults to + # prometheus.global.scrape_interval. + [scrape_interval: | default = ] + + # The timeout before considering the scrape a failure. Defaults to + # prometheus.global.scrape_timeout. + [scrape_timeout: | default = ] + + # Allows for relabeling labels on the target. + relabel_configs: + [- ... ] + + # Relabel metrics coming from the integration, allowing to drop series + # from the integration that you don't care about. + metric_relabel_configs: + [ - ... ] + + # How frequent to truncate the WAL for this integration. + [wal_truncate_frequency: | default = "60m"] + + # procfs mountpoint. + [procfs_path: | default = "/proc"] + + # If a proc is tracked, track with it any children that aren't a part of their + # own group. + [track_children: | default = true] + + # Report on per-threadname metrics as well. + [track_threads: | default = true] + + # Gather metrics from smaps file, which contains proportional resident memory + # size. + [gather_smaps: | default = true] + + # Recheck process names on each scrape. + [recheck_on_scrape: | default = false] + + # A collection of matching rules to use for deciding which processes to + # monitor. Each config can match multiple processes to be tracked as a single + # process "group." + process_names: + [- ] +``` + +## process_matcher_config + +```yaml +# The name to use for identifying the process group name in the metric. By +# default, it uses the base path of the executable. +# +# The following template variables are available: +# +# - {{.Comm}}: Basename of the original executable from /proc//stat +# - {{.ExeBase}}: Basename of the executable from argv[0] +# - {{.ExeFull}}: Fully qualified path of the executable +# - {{.Username}}: Username of the effective user +# - {{.Matches}}: Map containing all regex capture groups resulting from +# matching a process with the cmdline rule group. +# - {{.PID}}: PID of the process. Note that the PID is copied from the +# first executable found. +# - {{.StartTime}}: The start time of the process. This is useful when combined +# with PID as PIDS get reused over time. +# - `{{.Cgroups}}`: The cgroups, if supported, of the process (`/proc/self/cgroup`). This is particularly useful for identifying to which container a process belongs. +# +# **NOTE**: Using `PID` or `StartTime` is discouraged, as it is almost never what you want, and is likely to result in high cardinality metrics. + + +[name: | default = "{{.ExeBase}}"] + +# A list of strings that match the base executable name for a process, truncated +# at 15 characters. It is derived from reading the second field of +# /proc//stat minus the parens. +# +# If any of the strings match, the process will be tracked. +comm: + [- ] + +# A list of strings that match argv[0] for a process. If there are no slashes, +# only the basename of argv[0] needs to match. Otherwise the name must be an +# exact match. For example, "postgres" may match any postgres binary but +# "/usr/local/bin/postgres" can only match a postgres at that path exactly. +# +# If any of the strings match, the process will be tracked. +exe: + [- ] + +# A list of regular expressions applied to the argv of the process. Each +# regex here must match the corresponding argv for the process to be tracked. +# The first element that is matched is argv[1]. +# +# Regex Captures are added to the .Matches map for use in the name. +cmdline: + [- ] +``` diff --git a/docs/sources/static/set-up/install/install-agent-docker.md.templ b/docs/sources/static/set-up/install/install-agent-docker.md.templ new file mode 100644 index 000000000000..536ef3fc5978 --- /dev/null +++ b/docs/sources/static/set-up/install/install-agent-docker.md.templ @@ -0,0 +1,76 @@ +--- +aliases: +- ../../set-up/install-agent-docker/ +- ../set-up/install-agent-docker/ +canonical: https://grafana.com/docs/agent/latest/static/set-up/install/install-agent-docker/ +menuTitle: Docker +title: Run Grafana Agent in static mode in a Docker container +description: Learn how to run Grafana Agent in static mode in a Docker container +weight: 200 +--- + +# Run Grafana Agent in static mode in a Docker container + +Grafana Agent is available as a Docker container image on the following platforms: + +* [Linux containers][] for AMD64 and ARM64. +* [Windows containers][] for AMD64. + +[Linux containers]: #run-a-linux-docker-container +[Windows containers]: #run-a-windows-docker-container + +## Before you begin + +* Install [Docker][] on your computer. +* Create and save a Grafana Agent YAML [configuration file][configure] on your computer. + +[Docker]: https://docker.io + +## Run a Linux Docker container + +To run a Grafana Agent Docker container on Linux, run the following command in a terminal window: + +```shell +docker run \ + -v WAL_DATA_DIRECTORY:/etc/agent/data \ + -v CONFIG_FILE_PATH:/etc/agent/agent.yaml \ + grafana/agent:$AGENT_VERSION +``` + +Replace `CONFIG_FILE_PATH` with the configuration file path on your Linux host system. + +{{% admonition type="note" %}} +For the flags to work correctly, you must expose the paths on your Linux host to the Docker container through a bind mount. +{{%/admonition %}} + +## Run a Windows Docker container + +To run a Grafana Agent Docker container on Windows, run the following command in a Windows command prompt: + +```shell +docker run ^ + -v WAL_DATA_DIRECTORY:C:\etc\grafana-agent\data ^ + -v CONFIG_FILE_PATH:C:\etc\grafana-agent ^ + grafana/agent:$AGENT_VERSION-windows +``` + +Replace the following: + +* `CONFIG_FILE_PATH`: The configuration file path on your Windows host system. +* `WAL_DATA_DIRECTORY`: the directory used to store your metrics before sending them to Prometheus. Old WAL data is cleaned up every hour and is used for recovery if the process crashes. + +{{% admonition type="note" %}} +For the flags to work correctly, you must expose the paths on your Windows host to the Docker container through a bind mount. +{{%/admonition %}} + +## Next steps + +- [Start Grafana Agent][start] +- [Configure Grafana Agent][configure] + +{{% docs/reference %}} +[start]: "/docs/agent/ -> /docs/agent//static/set-up/start-agent" +[start]: "/docs/grafana-cloud/ -> ../start-agent" +[configure]: "/docs/agent/ -> /docs/agent//static/configuration/create-config-file" +[configure]: "/docs/grafana-cloud/ -> ../../configuration/create-config-file" +{{% /docs/reference %}} diff --git a/pkg/operator/defaults.go.templ b/pkg/operator/defaults.go.templ new file mode 100644 index 000000000000..fe5c2b2b70b6 --- /dev/null +++ b/pkg/operator/defaults.go.templ @@ -0,0 +1,15 @@ +package operator + +// Supported versions of the Grafana Agent. +var ( + DefaultAgentVersion = "$AGENT_VERSION" + DefaultAgentBaseImage = "grafana/agent" + DefaultAgentImage = DefaultAgentBaseImage + ":" + DefaultAgentVersion +) + +// Defaults for Prometheus Config Reloader. +var ( + DefaultConfigReloaderVersion = "v0.67.1" + DefaultConfigReloaderBaseImage = "quay.io/prometheus-operator/prometheus-config-reloader" + DefaultConfigReloaderImage = DefaultConfigReloaderBaseImage + ":" + DefaultConfigReloaderVersion +) diff --git a/production/kubernetes/agent-bare.yaml.templ b/production/kubernetes/agent-bare.yaml.templ new file mode 100644 index 000000000000..e306191ee170 --- /dev/null +++ b/production/kubernetes/agent-bare.yaml.templ @@ -0,0 +1,115 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + - events + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: ${NAMESPACE} +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: grafana-agent + name: grafana-agent + namespace: ${NAMESPACE} +spec: + clusterIP: None + ports: + - name: grafana-agent-http-metrics + port: 80 + targetPort: 80 + selector: + name: grafana-agent +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: grafana-agent + namespace: ${NAMESPACE} +spec: + replicas: 1 + selector: + matchLabels: + name: grafana-agent + serviceName: grafana-agent + template: + metadata: + labels: + name: grafana-agent + spec: + containers: + - args: + - -config.expand-env=true + - -config.file=/etc/agent/agent.yaml + - -enable-features=integrations-next + - -server.http.address=0.0.0.0:80 + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: grafana/agent:$AGENT_VERSION + imagePullPolicy: IfNotPresent + name: grafana-agent + ports: + - containerPort: 80 + name: http-metrics + volumeMounts: + - mountPath: /var/lib/agent + name: agent-wal + - mountPath: /etc/agent + name: grafana-agent + serviceAccountName: grafana-agent + volumes: + - configMap: + name: grafana-agent + name: grafana-agent + updateStrategy: + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: agent-wal + namespace: ${NAMESPACE} + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi diff --git a/production/kubernetes/agent-loki.yaml.templ b/production/kubernetes/agent-loki.yaml.templ new file mode 100644 index 000000000000..5e279d5d9e84 --- /dev/null +++ b/production/kubernetes/agent-loki.yaml.templ @@ -0,0 +1,100 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent-logs + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent-logs +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + - events + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent-logs +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent-logs +subjects: +- kind: ServiceAccount + name: grafana-agent-logs + namespace: ${NAMESPACE} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: grafana-agent-logs + namespace: ${NAMESPACE} +spec: + minReadySeconds: 10 + selector: + matchLabels: + name: grafana-agent-logs + template: + metadata: + labels: + name: grafana-agent-logs + spec: + containers: + - args: + - -config.expand-env=true + - -config.file=/etc/agent/agent.yaml + - -server.http.address=0.0.0.0:80 + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: grafana/agent:$AGENT_VERSION + imagePullPolicy: IfNotPresent + name: grafana-agent-logs + ports: + - containerPort: 80 + name: http-metrics + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - mountPath: /etc/agent + name: grafana-agent-logs + - mountPath: /var/log + name: varlog + - mountPath: /var/lib/docker/containers + name: varlibdockercontainers + readOnly: true + serviceAccountName: grafana-agent-logs + tolerations: + - effect: NoSchedule + operator: Exists + volumes: + - configMap: + name: grafana-agent-logs + name: grafana-agent-logs + - hostPath: + path: /var/log + name: varlog + - hostPath: + path: /var/lib/docker/containers + name: varlibdockercontainers + updateStrategy: + type: RollingUpdate diff --git a/production/kubernetes/agent-traces.yaml.templ b/production/kubernetes/agent-traces.yaml.templ new file mode 100644 index 000000000000..f77edbd855dc --- /dev/null +++ b/production/kubernetes/agent-traces.yaml.templ @@ -0,0 +1,154 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent-traces + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent-traces +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + - events + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent-traces +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent-traces +subjects: +- kind: ServiceAccount + name: grafana-agent-traces + namespace: ${NAMESPACE} +--- +apiVersion: v1 +kind: Service +metadata: + labels: + name: grafana-agent-traces + name: grafana-agent-traces + namespace: ${NAMESPACE} +spec: + ports: + - name: grafana-agent-traces-http-metrics + port: 80 + targetPort: 80 + - name: grafana-agent-traces-thrift-compact + port: 6831 + protocol: UDP + targetPort: 6831 + - name: grafana-agent-traces-thrift-binary + port: 6832 + protocol: UDP + targetPort: 6832 + - name: grafana-agent-traces-thrift-http + port: 14268 + protocol: TCP + targetPort: 14268 + - name: grafana-agent-traces-thrift-grpc + port: 14250 + protocol: TCP + targetPort: 14250 + - name: grafana-agent-traces-zipkin + port: 9411 + protocol: TCP + targetPort: 9411 + - name: grafana-agent-traces-otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - name: grafana-agent-traces-otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + - name: grafana-agent-traces-opencensus + port: 55678 + protocol: TCP + targetPort: 55678 + selector: + name: grafana-agent-traces +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana-agent-traces + namespace: ${NAMESPACE} +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: grafana-agent-traces + template: + metadata: + labels: + name: grafana-agent-traces + spec: + containers: + - args: + - -config.expand-env=true + - -config.file=/etc/agent/agent.yaml + - -server.http.address=0.0.0.0:80 + env: + - name: HOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + image: grafana/agent:$AGENT_VERSION + imagePullPolicy: IfNotPresent + name: grafana-agent-traces + ports: + - containerPort: 80 + name: http-metrics + - containerPort: 6831 + name: thrift-compact + protocol: UDP + - containerPort: 6832 + name: thrift-binary + protocol: UDP + - containerPort: 14268 + name: thrift-http + protocol: TCP + - containerPort: 14250 + name: thrift-grpc + protocol: TCP + - containerPort: 9411 + name: zipkin + protocol: TCP + - containerPort: 4317 + name: otlp-grpc + protocol: TCP + - containerPort: 4318 + name: otlp-http + protocol: TCP + - containerPort: 55678 + name: opencensus + protocol: TCP + volumeMounts: + - mountPath: /etc/agent + name: grafana-agent-traces + serviceAccountName: grafana-agent-traces + volumes: + - configMap: + name: grafana-agent-traces + name: grafana-agent-traces diff --git a/production/kubernetes/build/lib/version.libsonnet.templ b/production/kubernetes/build/lib/version.libsonnet.templ new file mode 100644 index 000000000000..2c54521fbc08 --- /dev/null +++ b/production/kubernetes/build/lib/version.libsonnet.templ @@ -0,0 +1 @@ +'grafana/agent:$AGENT_VERSION' diff --git a/production/kubernetes/build/templates/operator/main.jsonnet.templ b/production/kubernetes/build/templates/operator/main.jsonnet.templ new file mode 100644 index 000000000000..0a769df58975 --- /dev/null +++ b/production/kubernetes/build/templates/operator/main.jsonnet.templ @@ -0,0 +1,160 @@ +local k = import 'ksonnet-util/kausal.libsonnet'; +local secret = k.core.v1.secret; +local pvc = k.core.v1.persistentVolumeClaim; + +local gen = import 'agent-operator-gen/main.libsonnet'; +local ga = gen.monitoring.v1alpha1.grafanaAgent; +local mi = gen.monitoring.v1alpha1.metricsInstance; +local li = gen.monitoring.v1alpha1.logsInstance; +local pl = gen.monitoring.v1alpha1.podLogs; +local int = gen.monitoring.v1alpha1.integration; + +local op = import 'grafana-agent-operator/operator.libsonnet'; +local ga_util = import 'grafana-agent-operator/util/grafana-agent.libsonnet'; +local mi_util = import 'grafana-agent-operator/util/metricsinstance.libsonnet'; +local li_util = import 'grafana-agent-operator/util/logsinstance.libsonnet'; +local pl_util = import 'grafana-agent-operator/util/k8slogs.libsonnet'; +local mon_util = import 'grafana-agent-operator/util/k8smonitors.libsonnet'; +local int_util = import 'grafana-agent-operator/util/integrations.libsonnet'; + +local ksm = import 'kube-state-metrics/kube-state-metrics.libsonnet'; + +{ + local this = self, + + _images:: { + agent: 'grafana/agent:$AGENT_VERSION', + agent_operator: 'grafana/agent-operator:$AGENT_VERSION', + ksm: 'registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.5.0', + }, + + _config:: { + namespace: '${NAMESPACE}', + metrics_url: '${METRICS_URL}', + metrics_user: '${METRICS_USER}', + metrics_key: '${METRICS_KEY}', + logs_url: '${LOGS_URL}', + logs_user: '${LOGS_USER}', + logs_key: '${LOGS_KEY}', + cluster_label: { cluster: '${CLUSTER}' }, + kubelet_job: 'kubelet', + cadvisor_job: 'cadvisor', + ksm_job: 'kube-state-metrics', + ksm_version: '2.5.0', + }, + + operator: + op.new(name='grafana-agent-operator', namespace=this._config.namespace, image=this._images.agent_operator, serviceAccount='grafana-agent-operator') + + op.withRbac(name='grafana-agent-operator', namespace=this._config.namespace), + + grafana_agent: + ga.new(name='grafana-agent') + + ga.metadata.withNamespace(this._config.namespace) + + ga.spec.withServiceAccountName('grafana-agent') + + ga.spec.withImage(this._images.agent) + + ga.spec.metrics.instanceSelector.withMatchLabels({ agent: 'grafana-agent' }) + + ga.spec.logs.instanceSelector.withMatchLabels({ agent: 'grafana-agent' }) + + ga.spec.integrations.selector.withMatchLabels({ agent: 'grafana-agent' }) + + ga.spec.metrics.withExternalLabels(this._config.cluster_label), + rbac: + ga_util.withRbac(name='grafana-agent', namespace=this._config.namespace), + + metrics_instance: + mi.new(name='grafana-agent-metrics') + + mi.metadata.withNamespace(this._config.namespace) + + mi.metadata.withLabels({ agent: 'grafana-agent' }) + + mi.spec.serviceMonitorSelector.withMatchLabels({ instance: 'primary' }) + + mi_util.withRemoteWrite(secretName='metrics-secret', metricsUrl=this._config.metrics_url) + + mi_util.withNilServiceMonitorNamespace(), + metrics_secret: + secret.new('metrics-secret', {}) + + secret.withStringData({ + username: this._config.metrics_user, + password: this._config.metrics_key, + }) + secret.mixin.metadata.withNamespace(this._config.namespace), + + logs_instance: + li.new(name='grafana-agent-logs') + + li.metadata.withNamespace(this._config.namespace) + + li.metadata.withLabels({ agent: 'grafana-agent' }) + + li.spec.podLogsSelector.withMatchLabels({ instance: 'primary' }) + + li_util.withLogsClient(secretName='logs-secret', logsUrl=this._config.logs_url, externalLabels=this._config.cluster_label) + + li_util.withNilPodLogsNamespace(), + logs_secret: + secret.new('logs-secret', {}) + + secret.withStringData({ + username: this._config.logs_user, + password: this._config.logs_key, + }) + secret.mixin.metadata.withNamespace(this._config.namespace), + + pod_logs: + pl.new('kubernetes-logs') + + pl.metadata.withNamespace(this._config.namespace) + + pl.metadata.withLabels({ instance: 'primary' }) + + pl.spec.withPipelineStages(pl.spec.pipelineStages.withCri({})) + + pl.spec.namespaceSelector.withAny(true) + + pl.spec.selector.withMatchLabels({}) + + pl.spec.withRelabelings(pl_util.withK8sLogsRelabeling()), + + k8s_monitors: [ + mon_util.newKubernetesMonitor( + name='kubelet-monitor', + namespace=this._config.namespace, + monitorLabels={ instance: 'primary' }, + targetNamespace=this._config.namespace, + targetLabels={ 'app.kubernetes.io/name': 'kubelet' }, + jobLabel=this._config.kubelet_job, + metricsPath='/metrics', + allowlist=false, + allowlistMetrics=[] + ), + mon_util.newKubernetesMonitor( + name='cadvisor-monitor', + namespace='default', + monitorLabels={ instance: 'primary' }, + targetNamespace=this._config.namespace, + targetLabels={ 'app.kubernetes.io/name': 'kubelet' }, + jobLabel=this._config.cadvisor_job, + metricsPath='/metrics/cadvisor', + allowlist=false, + allowlistMetrics=[] + ), + mon_util.newServiceMonitor( + name='ksm-monitor', + namespace=this._config.namespace, + monitorLabels={ instance: 'primary' }, + targetNamespace=this._config.namespace, + targetLabels={ 'app.kubernetes.io/name': 'kube-state-metrics' }, + jobLabel=this._config.ksm_job, + metricsPath='/metrics', + allowlist=false, + allowlistMetrics=[] + ), + ], + + kube_state_metrics: + ksm { + name:: 'kube-state-metrics', + namespace:: this._config.namespace, + version:: this._config.ksm_version, + image:: this._images.ksm, + }, + + events: + int.new('agent-eventhandler') + + int.metadata.withNamespace(this._config.namespace) + + int.metadata.withLabels({ agent: 'grafana-agent' }) + + int.spec.withName('eventhandler') + + int.spec.type.withUnique(true) + + int.spec.withConfig({ + logs_instance: this._config.namespace + '/' + 'grafana-agent-logs', + cache_path: '/etc/eventhandler/eventhandler.cache', + }) + + int_util.withPVC('agent-eventhandler'), + pvc: + pvc.new('agent-eventhandler') + + pvc.mixin.metadata.withNamespace(this._config.namespace) + + pvc.mixin.spec.withAccessModes('ReadWriteOnce') + + pvc.mixin.spec.resources.withRequests({ storage: '1Gi' }), + +} diff --git a/production/kubernetes/install-bare.sh.templ b/production/kubernetes/install-bare.sh.templ new file mode 100644 index 000000000000..c13b3cb97cd6 --- /dev/null +++ b/production/kubernetes/install-bare.sh.templ @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# shellcheck shell=bash + +# +# install-bare.sh is an installer for the Agent without a ConfigMap. It is +# used during the Grafana Cloud integrations wizard and is not recommended +# to be used directly. Instead of calling this script directly, please +# make a copy of ./agent-bare.yaml and modify it for your needs. +# +# Note that agent-bare.yaml does not have a ConfigMap, so the Grafana Agent +# will not launch until one is created. For more information on setting up +# a ConfigMap, please refer to: +# +# Metrics quickstart: https://grafana.com/docs/grafana-cloud/quickstart/agent-k8s/k8s_agent_metrics/ +# Logs quickstart: https://grafana.com/docs/grafana-cloud/quickstart/agent-k8s/k8s_agent_logs/ +# + +check_installed() { + if ! type "$1" >/dev/null 2>&1; then + echo "error: $1 not installed" >&2 + exit 1 + fi +} + +check_installed curl +check_installed envsubst + +MANIFEST_BRANCH=$AGENT_VERSION +MANIFEST_URL=${MANIFEST_URL:-https://raw.githubusercontent.com/grafana/agent/${MANIFEST_BRANCH}/production/kubernetes/agent-bare.yaml} +NAMESPACE=${NAMESPACE:-default} + +export NAMESPACE + +curl -fsSL "$MANIFEST_URL" | envsubst diff --git a/production/operator/templates/agent-operator.yaml.templ b/production/operator/templates/agent-operator.yaml.templ new file mode 100644 index 000000000000..449ac6acefea --- /dev/null +++ b/production/operator/templates/agent-operator.yaml.templ @@ -0,0 +1,645 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent + namespace: ${NAMESPACE} +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: grafana-agent-operator + namespace: ${NAMESPACE} +--- +apiVersion: v1 +automountServiceAccountToken: false +kind: ServiceAccount +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + name: kube-state-metrics + namespace: ${NAMESPACE} +--- +apiVersion: v1 +data: {} +kind: Secret +metadata: + name: logs-secret + namespace: ${NAMESPACE} +stringData: + password: ${LOGS_KEY} + username: ${LOGS_USER} +type: Opaque +--- +apiVersion: v1 +data: {} +kind: Secret +metadata: + name: metrics-secret + namespace: ${NAMESPACE} +stringData: + password: ${METRICS_KEY} + username: ${METRICS_USER} +type: Opaque +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: agent-eventhandler + namespace: ${NAMESPACE} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent +rules: +- apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - events + verbs: + - get + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- nonResourceURLs: + - /metrics + - /metrics/cadvisor + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent-operator +rules: +- apiGroups: + - monitoring.grafana.com + resources: + - grafanaagents + - metricsinstances + - logsinstances + - podlogs + - integrations + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.grafana.com + resources: + - grafanaagents/finalizers + - metricsinstances/finalizers + - logsinstances/finalizers + - podlogs/finalizers + - integrations/finalizers + verbs: + - get + - list + - watch + - update +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors + - probes + - servicemonitors + verbs: + - get + - list + - watch +- apiGroups: + - monitoring.coreos.com + resources: + - podmonitors/finalizers + - probes/finalizers + - servicemonitors/finalizers + verbs: + - get + - list + - watch + - update +- apiGroups: + - "" + resources: + - namespaces + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - secrets + - services + - configmaps + - endpoints + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + name: kube-state-metrics +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + - nodes + - pods + - services + - resourcequotas + - replicationcontrollers + - limitranges + - persistentvolumeclaims + - persistentvolumes + - namespaces + - endpoints + verbs: + - list + - watch +- apiGroups: + - apps + resources: + - statefulsets + - daemonsets + - deployments + - replicasets + verbs: + - list + - watch +- apiGroups: + - batch + resources: + - cronjobs + - jobs + verbs: + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - list + - watch +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - certificatesigningrequests + verbs: + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + - volumeattachments + verbs: + - list + - watch +- apiGroups: + - admissionregistration.k8s.io + resources: + - mutatingwebhookconfigurations + - validatingwebhookconfigurations + verbs: + - list + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent +subjects: +- kind: ServiceAccount + name: grafana-agent + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: grafana-agent-operator +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: grafana-agent-operator +subjects: +- kind: ServiceAccount + name: grafana-agent-operator + namespace: ${NAMESPACE} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + name: kube-state-metrics +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-state-metrics +subjects: +- kind: ServiceAccount + name: kube-state-metrics + namespace: ${NAMESPACE} +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + name: kube-state-metrics + namespace: ${NAMESPACE} +spec: + clusterIP: None + ports: + - name: http-metrics + port: 8080 + targetPort: http-metrics + - name: telemetry + port: 8081 + targetPort: telemetry + selector: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana-agent-operator + namespace: ${NAMESPACE} +spec: + minReadySeconds: 10 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + name: grafana-agent-operator + template: + metadata: + labels: + name: grafana-agent-operator + spec: + containers: + - args: + - --kubelet-service=default/kubelet + image: grafana/agent-operator:$AGENT_VERSION + imagePullPolicy: IfNotPresent + name: grafana-agent-operator + serviceAccount: grafana-agent-operator +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + name: kube-state-metrics + namespace: ${NAMESPACE} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics + template: + metadata: + labels: + app.kubernetes.io/component: exporter + app.kubernetes.io/name: kube-state-metrics + app.kubernetes.io/version: 2.5.0 + spec: + automountServiceAccountToken: true + containers: + - image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.5.0 + livenessProbe: + httpGet: + path: /healthz + port: 8080 + initialDelaySeconds: 5 + timeoutSeconds: 5 + name: kube-state-metrics + ports: + - containerPort: 8080 + name: http-metrics + - containerPort: 8081 + name: telemetry + readinessProbe: + httpGet: + path: / + port: 8081 + initialDelaySeconds: 5 + timeoutSeconds: 5 + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + runAsUser: 65534 + nodeSelector: + kubernetes.io/os: linux + serviceAccountName: kube-state-metrics +--- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: GrafanaAgent +metadata: + name: grafana-agent + namespace: ${NAMESPACE} +spec: + image: grafana/agent:$AGENT_VERSION + integrations: + selector: + matchLabels: + agent: grafana-agent + logs: + instanceSelector: + matchLabels: + agent: grafana-agent + metrics: + externalLabels: + cluster: ${CLUSTER} + instanceSelector: + matchLabels: + agent: grafana-agent + serviceAccountName: grafana-agent +--- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: Integration +metadata: + labels: + agent: grafana-agent + name: agent-eventhandler + namespace: ${NAMESPACE} +spec: + config: + cache_path: /etc/eventhandler/eventhandler.cache + logs_instance: ${NAMESPACE}/grafana-agent-logs + name: eventhandler + type: + unique: true + volumeMounts: + - mountPath: /etc/eventhandler + name: agent-eventhandler + volumes: + - name: agent-eventhandler + persistentVolumeClaim: + claimName: agent-eventhandler +--- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: LogsInstance +metadata: + labels: + agent: grafana-agent + name: grafana-agent-logs + namespace: ${NAMESPACE} +spec: + clients: + - basicAuth: + password: + key: password + name: logs-secret + username: + key: username + name: logs-secret + externalLabels: + cluster: ${CLUSTER} + url: ${LOGS_URL} + podLogsNamespaceSelector: {} + podLogsSelector: + matchLabels: + instance: primary +--- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: MetricsInstance +metadata: + labels: + agent: grafana-agent + name: grafana-agent-metrics + namespace: ${NAMESPACE} +spec: + remoteWrite: + - basicAuth: + password: + key: password + name: metrics-secret + username: + key: username + name: metrics-secret + url: ${METRICS_URL} + serviceMonitorNamespaceSelector: {} + serviceMonitorSelector: + matchLabels: + instance: primary +--- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: PodLogs +metadata: + labels: + instance: primary + name: kubernetes-logs + namespace: ${NAMESPACE} +spec: + namespaceSelector: + any: true + pipelineStages: + - cri: {} + relabelings: + - sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: __host__ + - action: replace + sourceLabels: + - __meta_kubernetes_namespace + targetLabel: namespace + - action: replace + sourceLabels: + - __meta_kubernetes_pod_name + targetLabel: pod + - action: replace + sourceLabels: + - __meta_kubernetes_pod_container_name + targetLabel: container + - replacement: /var/log/pods/*$1/*.log + separator: / + sourceLabels: + - __meta_kubernetes_pod_uid + - __meta_kubernetes_pod_container_name + targetLabel: __path__ + selector: + matchLabels: {} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + instance: primary + name: ksm-monitor + namespace: ${NAMESPACE} +spec: + endpoints: + - honorLabels: true + interval: 60s + path: /metrics + port: http-metrics + relabelings: + - action: replace + replacement: kube-state-metrics + targetLabel: job + namespaceSelector: + matchNames: + - ${NAMESPACE} + selector: + matchLabels: + app.kubernetes.io/name: kube-state-metrics +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + instance: primary + name: kubelet-monitor + namespace: ${NAMESPACE} +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 60s + path: /metrics + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - action: replace + replacement: kubelet + targetLabel: job + scheme: https + tlsConfig: + insecureSkipVerify: true + namespaceSelector: + matchNames: + - ${NAMESPACE} + selector: + matchLabels: + app.kubernetes.io/name: kubelet +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + instance: primary + name: cadvisor-monitor + namespace: default +spec: + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + honorLabels: true + interval: 60s + path: /metrics/cadvisor + port: https-metrics + relabelings: + - sourceLabels: + - __metrics_path__ + targetLabel: metrics_path + - action: replace + replacement: cadvisor + targetLabel: job + scheme: https + tlsConfig: + insecureSkipVerify: true + namespaceSelector: + matchNames: + - ${NAMESPACE} + selector: + matchLabels: + app.kubernetes.io/name: kubelet diff --git a/production/tanka/grafana-agent/v1/main.libsonnet.templ b/production/tanka/grafana-agent/v1/main.libsonnet.templ new file mode 100644 index 000000000000..21ae76113f24 --- /dev/null +++ b/production/tanka/grafana-agent/v1/main.libsonnet.templ @@ -0,0 +1,142 @@ +local agent = import './internal/agent.libsonnet'; +local utils = import './internal/utils.libsonnet'; +local k = import 'ksonnet-util/kausal.libsonnet'; + +local container = k.core.v1.container; +local configMap = k.core.v1.configMap; +local service = k.core.v1.service; + +// Merge all of our libraries to create the final exposed library. +(import './lib/deployment.libsonnet') + +(import './lib/integrations.libsonnet') + +(import './lib/metrics.libsonnet') + +(import './lib/scraping_service.libsonnet') + +(import './lib/logs.libsonnet') + +(import './lib/traces.libsonnet') + +{ + _images:: { + agent: 'grafana/agent:$AGENT_VERSION', + agentctl: 'grafana/agentctl:$AGENT_VERSION', + }, + + // new creates a new DaemonSet deployment of the grafana-agent. By default, + // the deployment will do no collection. You must merge the result of this + // function with one or more of the following: + // + // - withMetricsConfig, withMetricsInstances (and optionally withRemoteWrite) + // - withLogsConfig + // + // When using withMetricsInstances, a [name]-etc deployment + // with one replica will be created alongside the DaemonSet. This deployment + // is responsible for handling scrape configs that will not work on the host + // machine. + // + // For example, if a scrape_config scrapes the Kubernetes API, that must be + // handled by the [name]-etc deployment as the Kubernetes API does not run + // on any node in the cluster. + // + // scrapeInstanceKubernetes provides the default + // MetricsInstanceConfig Grafana Labs uses in production. + new(name='grafana-agent', namespace='default'):: { + local this = self, + + _mode:: 'daemonset', + _images:: $._images, + _config_hash:: true, + + local has_logs_config = std.objectHasAll(self, '_logs_config'), + local has_trace_config = std.objectHasAll(self, '_trace_config'), + local has_metrics_config = std.objectHasAll(self, '_metrics_config'), + local has_metrics_instances = std.objectHasAll(self, '_metrics_instances'), + local has_integrations = std.objectHasAll(self, '_integrations'), + local has_sampling_strategies = std.objectHasAll(self, '_traces_sampling_strategies'), + + local metrics_instances = + if has_metrics_instances then this._metrics_instances else [], + local host_filter_instances = utils.transformInstances(metrics_instances, true), + local etc_instances = utils.transformInstances(metrics_instances, false), + + config:: { + server: { + log_level: 'info', + }, + } + ( + if has_metrics_config + then { metrics: this._metrics_config { configs: host_filter_instances } } + else {} + ) + ( + if has_logs_config then { + logs: { + positions_directory: '/tmp/positions', + configs: [this._logs_config { + name: 'default', + }], + }, + } else {} + ) + ( + if has_trace_config then { + traces: { + configs: [this._trace_config { + name: 'default', + }], + }, + } + else {} + ) + ( + if has_integrations then { integrations: this._integrations } else {} + ), + + etc_config:: if has_metrics_config then this.config { + // Hide logs and integrations from our extra configs, we just want the + // scrape configs that wouldn't work for the DaemonSet. + metrics+: { + configs: std.map(function(cfg) cfg { host_filter: false }, etc_instances), + }, + logs:: {}, + traces:: {}, + integrations:: {}, + }, + + agent: + agent.newAgent(name, namespace, self._images.agent, self.config, use_daemonset=true) + + agent.withConfigHash(self._config_hash) + { + // If sampling strategies were defined, we need to mount them as a JSON + // file. + config_map+: + if has_sampling_strategies + then configMap.withDataMixin({ + 'strategies.json': std.toString(this._traces_sampling_strategies), + }) + else {}, + + // If we're deploying for tracing, applications will want to write to + // a service for load balancing span delivery. + service: + if has_trace_config + then k.util.serviceFor(self.agent) + service.mixin.metadata.withNamespace(namespace) + else {}, + } + ( + if has_logs_config then $.logsPermissionsMixin else {} + ) + ( + if has_integrations && std.objectHas(this._integrations, 'node_exporter') then $.integrationsMixin else {} + ), + + agent_etc: if std.length(etc_instances) > 0 then + agent.newAgent(name + '-etc', namespace, self._images.agent, self.etc_config, use_daemonset=false) + + agent.withConfigHash(self._config_hash), + }, + + // withImages sets the images used for launching the Agent. + // Keys supported: agent, agentctl + withImages(images):: { _images+: images }, + + // Includes or excludes the config hash annotation. + withConfigHash(include=true):: { _config_hash:: include }, + + // withPortsMixin adds extra ports to expose. + withPortsMixin(ports=[]):: { + agent+: { + container+:: container.withPortsMixin(ports), + }, + }, +} diff --git a/production/tanka/grafana-agent/v2/internal/base.libsonnet.templ b/production/tanka/grafana-agent/v2/internal/base.libsonnet.templ new file mode 100644 index 000000000000..a0b01078f994 --- /dev/null +++ b/production/tanka/grafana-agent/v2/internal/base.libsonnet.templ @@ -0,0 +1,56 @@ +function(name='grafana-agent', namespace='') { + local k = (import 'ksonnet-util/kausal.libsonnet') { _config+:: { namespace: namespace } }, + + local container = k.core.v1.container, + local configMap = k.core.v1.configMap, + local containerPort = k.core.v1.containerPort, + local policyRule = k.rbac.v1.policyRule, + local serviceAccount = k.core.v1.serviceAccount, + local envVar = k.core.v1.envVar, + + local this = self, + + _images:: { + agent: 'grafana/agent:$AGENT_VERSION', + agentctl: 'grafana/agentctl:$AGENT_VERSION', + }, + _config:: { + name: name, + namespace: namespace, + config_hash: true, + agent_config: '', + agent_port: 80, + agent_args: { + 'config.file': '/etc/agent/agent.yaml', + 'server.http.address': '0.0.0.0:80', + 'config.expand-env': 'true', + }, + }, + + rbac: k.util.rbac(name, [ + policyRule.withApiGroups(['']) + + policyRule.withResources(['nodes', 'nodes/proxy', 'services', 'endpoints', 'pods', 'events']) + + policyRule.withVerbs(['get', 'list', 'watch']), + + policyRule.withNonResourceUrls('/metrics') + + policyRule.withVerbs(['get']), + ]) { + service_account+: serviceAccount.mixin.metadata.withNamespace(namespace), + }, + + configMap: + configMap.new(name) + + configMap.mixin.metadata.withNamespace(namespace) + + configMap.withData({ + 'agent.yaml': k.util.manifestYaml(this._config.agent_config), + }), + + container:: + container.new(name, this._images.agent) + + container.withPorts(containerPort.new('http-metrics', this._config.agent_port)) + + container.withArgsMixin(k.util.mapToFlags(this._config.agent_args)) + + // `HOSTNAME` is required for promtail (logs) otherwise it will silently do nothing + container.withEnvMixin([ + envVar.fromFieldPath('HOSTNAME', 'spec.nodeName'), + ]), +} diff --git a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ new file mode 100644 index 000000000000..7b9fb7fd420b --- /dev/null +++ b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ @@ -0,0 +1,62 @@ +local k = import 'ksonnet-util/kausal.libsonnet'; + +local cronJob = k.batch.v1.cronJob; +local configMap = k.core.v1.configMap; +local container = k.core.v1.container; +local deployment = k.apps.v1.deployment; +local volumeMount = k.core.v1.volumeMount; +local volume = k.core.v1.volume; + +function( + name='grafana-agent-syncer', + namespace='', + config={}, +) { + local _config = { + api: error 'api must be set', + image: 'grafana/agentctl:$AGENT_VERSION', + schedule: '*/5 * * * *', + configs: [], + } + config, + + local this = self, + local _configs = std.foldl( + function(agg, cfg) + // Sanitize the name and remove / so every file goes into the same + // folder. + local name = std.strReplace(cfg.name, '/', '_'); + + agg { ['%s.yml' % name]: k.util.manifestYaml(cfg) }, + _config.configs, + {}, + ), + + configMap: + configMap.new(name) + + configMap.mixin.metadata.withNamespace(namespace) + + configMap.withData(_configs), + + container:: + container.new(name, _config.image) + + container.withArgsMixin([ + 'config-sync', + '--addr=%s' % _config.api, + '/etc/configs', + ]) + + container.withVolumeMounts(volumeMount.new(name, '/etc/configs')), + + job: + cronJob.new(name, _config.schedule, this.container) + + cronJob.mixin.metadata.withNamespace(namespace) + + cronJob.mixin.spec.withSuccessfulJobsHistoryLimit(1) + + cronJob.mixin.spec.withFailedJobsHistoryLimit(3) + + cronJob.mixin.spec.jobTemplate.spec.template.spec.withRestartPolicy('OnFailure') + + cronJob.mixin.spec.jobTemplate.spec.template.spec.withActiveDeadlineSeconds(600) + + cronJob.mixin.spec.jobTemplate.spec.withTtlSecondsAfterFinished(120) + + cronJob.mixin.spec.jobTemplate.spec.template.spec.withVolumes([ + volume.fromConfigMap( + name=name, + configMapName=this.configMap.metadata.name, + ), + ]), +} diff --git a/tools/generate-version-files.bash b/tools/generate-version-files.bash new file mode 100755 index 000000000000..7480b40143fb --- /dev/null +++ b/tools/generate-version-files.bash @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z "$AGENT_VERSION" ]; then + echo "AGENT_VERSION env var is not set" + exit 1 +fi + +templates=$(find . -type f -name "*.templ" -not -path "./.git/*") +for template in $templates; do + echo "Generating ${template%.templ}" + envsubst < $template > ${template%.templ} +done From 3e2ced80591a44619872c7b19a07815dc206c9af Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 07:34:16 -0700 Subject: [PATCH 02/21] Add pattern matching and use sed --- tools/generate-version-files.bash | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/generate-version-files.bash b/tools/generate-version-files.bash index 7480b40143fb..f8a76a3c72f5 100755 --- a/tools/generate-version-files.bash +++ b/tools/generate-version-files.bash @@ -5,8 +5,15 @@ if [ -z "$AGENT_VERSION" ]; then exit 1 fi +versionMatcher='^v[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+)?$' + +if [[ ! $AGENT_VERSION =~ $versionMatcher ]]; then + echo "AGENT_VERSION env var is not in the correct format. It should be in the format of vX.Y.Z or vX.Y.Z-rcN" + exit 1 +fi + templates=$(find . -type f -name "*.templ" -not -path "./.git/*") for template in $templates; do echo "Generating ${template%.templ}" - envsubst < $template > ${template%.templ} + sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < $template > ${template%.templ} done From 956205c0ba619a736e6c4c4db1a4e1a09ae56f4f Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 09:41:48 -0700 Subject: [PATCH 03/21] Rename .templ to .t --- ...ources.md.templ => deploy-agent-operator-resources.md.t} | 0 .../{getting-started.md.templ => getting-started.md.t} | 0 ...e-exporter-config.md.templ => node-exporter-config.md.t} | 0 ...xporter-config.md.templ => process-exporter-config.md.t} | 0 ...tall-agent-docker.md.templ => install-agent-docker.md.t} | 0 pkg/operator/{defaults.go.templ => defaults.go.t} | 0 .../kubernetes/{agent-bare.yaml.templ => agent-bare.yaml.t} | 0 .../kubernetes/{agent-loki.yaml.templ => agent-loki.yaml.t} | 0 .../{agent-traces.yaml.templ => agent-traces.yaml.t} | 0 .../lib/{version.libsonnet.templ => version.libsonnet.t} | 0 .../operator/{main.jsonnet.templ => main.jsonnet.t} | 0 .../kubernetes/{install-bare.sh.templ => install-bare.sh.t} | 0 .../{agent-operator.yaml.templ => agent-operator.yaml.t} | 0 .../v1/{main.libsonnet.templ => main.libsonnet.t} | 0 .../v2/internal/{base.libsonnet.templ => base.libsonnet.t} | 0 .../internal/{syncer.libsonnet.templ => syncer.libsonnet.t} | 0 tools/generate-version-files.bash | 6 +++--- 17 files changed, 3 insertions(+), 3 deletions(-) rename docs/sources/operator/{deploy-agent-operator-resources.md.templ => deploy-agent-operator-resources.md.t} (100%) rename docs/sources/operator/{getting-started.md.templ => getting-started.md.t} (100%) rename docs/sources/static/configuration/integrations/{node-exporter-config.md.templ => node-exporter-config.md.t} (100%) rename docs/sources/static/configuration/integrations/{process-exporter-config.md.templ => process-exporter-config.md.t} (100%) rename docs/sources/static/set-up/install/{install-agent-docker.md.templ => install-agent-docker.md.t} (100%) rename pkg/operator/{defaults.go.templ => defaults.go.t} (100%) rename production/kubernetes/{agent-bare.yaml.templ => agent-bare.yaml.t} (100%) rename production/kubernetes/{agent-loki.yaml.templ => agent-loki.yaml.t} (100%) rename production/kubernetes/{agent-traces.yaml.templ => agent-traces.yaml.t} (100%) rename production/kubernetes/build/lib/{version.libsonnet.templ => version.libsonnet.t} (100%) rename production/kubernetes/build/templates/operator/{main.jsonnet.templ => main.jsonnet.t} (100%) rename production/kubernetes/{install-bare.sh.templ => install-bare.sh.t} (100%) rename production/operator/templates/{agent-operator.yaml.templ => agent-operator.yaml.t} (100%) rename production/tanka/grafana-agent/v1/{main.libsonnet.templ => main.libsonnet.t} (100%) rename production/tanka/grafana-agent/v2/internal/{base.libsonnet.templ => base.libsonnet.t} (100%) rename production/tanka/grafana-agent/v2/internal/{syncer.libsonnet.templ => syncer.libsonnet.t} (100%) diff --git a/docs/sources/operator/deploy-agent-operator-resources.md.templ b/docs/sources/operator/deploy-agent-operator-resources.md.t similarity index 100% rename from docs/sources/operator/deploy-agent-operator-resources.md.templ rename to docs/sources/operator/deploy-agent-operator-resources.md.t diff --git a/docs/sources/operator/getting-started.md.templ b/docs/sources/operator/getting-started.md.t similarity index 100% rename from docs/sources/operator/getting-started.md.templ rename to docs/sources/operator/getting-started.md.t diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md.templ b/docs/sources/static/configuration/integrations/node-exporter-config.md.t similarity index 100% rename from docs/sources/static/configuration/integrations/node-exporter-config.md.templ rename to docs/sources/static/configuration/integrations/node-exporter-config.md.t diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.md.templ b/docs/sources/static/configuration/integrations/process-exporter-config.md.t similarity index 100% rename from docs/sources/static/configuration/integrations/process-exporter-config.md.templ rename to docs/sources/static/configuration/integrations/process-exporter-config.md.t diff --git a/docs/sources/static/set-up/install/install-agent-docker.md.templ b/docs/sources/static/set-up/install/install-agent-docker.md.t similarity index 100% rename from docs/sources/static/set-up/install/install-agent-docker.md.templ rename to docs/sources/static/set-up/install/install-agent-docker.md.t diff --git a/pkg/operator/defaults.go.templ b/pkg/operator/defaults.go.t similarity index 100% rename from pkg/operator/defaults.go.templ rename to pkg/operator/defaults.go.t diff --git a/production/kubernetes/agent-bare.yaml.templ b/production/kubernetes/agent-bare.yaml.t similarity index 100% rename from production/kubernetes/agent-bare.yaml.templ rename to production/kubernetes/agent-bare.yaml.t diff --git a/production/kubernetes/agent-loki.yaml.templ b/production/kubernetes/agent-loki.yaml.t similarity index 100% rename from production/kubernetes/agent-loki.yaml.templ rename to production/kubernetes/agent-loki.yaml.t diff --git a/production/kubernetes/agent-traces.yaml.templ b/production/kubernetes/agent-traces.yaml.t similarity index 100% rename from production/kubernetes/agent-traces.yaml.templ rename to production/kubernetes/agent-traces.yaml.t diff --git a/production/kubernetes/build/lib/version.libsonnet.templ b/production/kubernetes/build/lib/version.libsonnet.t similarity index 100% rename from production/kubernetes/build/lib/version.libsonnet.templ rename to production/kubernetes/build/lib/version.libsonnet.t diff --git a/production/kubernetes/build/templates/operator/main.jsonnet.templ b/production/kubernetes/build/templates/operator/main.jsonnet.t similarity index 100% rename from production/kubernetes/build/templates/operator/main.jsonnet.templ rename to production/kubernetes/build/templates/operator/main.jsonnet.t diff --git a/production/kubernetes/install-bare.sh.templ b/production/kubernetes/install-bare.sh.t similarity index 100% rename from production/kubernetes/install-bare.sh.templ rename to production/kubernetes/install-bare.sh.t diff --git a/production/operator/templates/agent-operator.yaml.templ b/production/operator/templates/agent-operator.yaml.t similarity index 100% rename from production/operator/templates/agent-operator.yaml.templ rename to production/operator/templates/agent-operator.yaml.t diff --git a/production/tanka/grafana-agent/v1/main.libsonnet.templ b/production/tanka/grafana-agent/v1/main.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v1/main.libsonnet.templ rename to production/tanka/grafana-agent/v1/main.libsonnet.t diff --git a/production/tanka/grafana-agent/v2/internal/base.libsonnet.templ b/production/tanka/grafana-agent/v2/internal/base.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v2/internal/base.libsonnet.templ rename to production/tanka/grafana-agent/v2/internal/base.libsonnet.t diff --git a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v2/internal/syncer.libsonnet.templ rename to production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t diff --git a/tools/generate-version-files.bash b/tools/generate-version-files.bash index f8a76a3c72f5..a677d41e249f 100755 --- a/tools/generate-version-files.bash +++ b/tools/generate-version-files.bash @@ -12,8 +12,8 @@ if [[ ! $AGENT_VERSION =~ $versionMatcher ]]; then exit 1 fi -templates=$(find . -type f -name "*.templ" -not -path "./.git/*") +templates=$(find . -type f -name "*.t" -not -path "./.git/*") for template in $templates; do - echo "Generating ${template%.templ}" - sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < $template > ${template%.templ} + echo "Generating ${template%.t}" + sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < $template > ${template%.t} done From ca3d41531ab990691bd1790f5eb18bc209e413c8 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 13:23:45 -0700 Subject: [PATCH 04/21] Make POSIX compliant --- ...erate-version-files.bash => generate-version-files.sh} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename tools/{generate-version-files.bash => generate-version-files.sh} (72%) diff --git a/tools/generate-version-files.bash b/tools/generate-version-files.sh similarity index 72% rename from tools/generate-version-files.bash rename to tools/generate-version-files.sh index a677d41e249f..b47da1aa1ff5 100755 --- a/tools/generate-version-files.bash +++ b/tools/generate-version-files.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh if [ -z "$AGENT_VERSION" ]; then echo "AGENT_VERSION env var is not set" @@ -7,7 +7,7 @@ fi versionMatcher='^v[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+)?$' -if [[ ! $AGENT_VERSION =~ $versionMatcher ]]; then +if echo "$AGENT_VERSION" | grep -Eq "$versionMatcher"; then echo "AGENT_VERSION env var is not in the correct format. It should be in the format of vX.Y.Z or vX.Y.Z-rcN" exit 1 fi @@ -15,5 +15,5 @@ fi templates=$(find . -type f -name "*.t" -not -path "./.git/*") for template in $templates; do echo "Generating ${template%.t}" - sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < $template > ${template%.t} -done + sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "${template%.t}" +done \ No newline at end of file From f0d10785feec97f1a95602aa6747320c9122efa0 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 13:27:50 -0700 Subject: [PATCH 05/21] Fix condition --- tools/generate-version-files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/generate-version-files.sh b/tools/generate-version-files.sh index b47da1aa1ff5..15e0f5cb9f32 100755 --- a/tools/generate-version-files.sh +++ b/tools/generate-version-files.sh @@ -7,7 +7,7 @@ fi versionMatcher='^v[0-9]+\.[0-9]+\.[0-9]+(-rc[0-9]+)?$' -if echo "$AGENT_VERSION" | grep -Eq "$versionMatcher"; then +if ! echo "$AGENT_VERSION" | grep -Eq "$versionMatcher"; then echo "AGENT_VERSION env var is not in the correct format. It should be in the format of vX.Y.Z or vX.Y.Z-rcN" exit 1 fi From 7dcc7972d2d12adabfa484db95cf49332835ec90 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 13:29:04 -0700 Subject: [PATCH 06/21] Add newline to end of file --- tools/generate-version-files.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/generate-version-files.sh b/tools/generate-version-files.sh index 15e0f5cb9f32..be346fbe3bdb 100755 --- a/tools/generate-version-files.sh +++ b/tools/generate-version-files.sh @@ -16,4 +16,4 @@ templates=$(find . -type f -name "*.t" -not -path "./.git/*") for template in $templates; do echo "Generating ${template%.t}" sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "${template%.t}" -done \ No newline at end of file +done From fcd3c99aa7434b285f8b671e3c6a322bae915e87 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 28 Sep 2023 13:31:56 -0700 Subject: [PATCH 07/21] Update release instructions --- docs/developer/release/3-update-version-in-code.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/developer/release/3-update-version-in-code.md b/docs/developer/release/3-update-version-in-code.md index a5488d2777e5..7507d7ee25af 100644 --- a/docs/developer/release/3-update-version-in-code.md +++ b/docs/developer/release/3-update-version-in-code.md @@ -24,6 +24,12 @@ The project must be updated to reference the upcoming release tag whenever a new 3. Update appropriate places in the codebase that have the previous version with the new version determined above. + You can run the following command to update the version in the codebase: + + ``` + AGENT_VERSION=VERSION ./tools/generate-version-files.sh + ``` + * Do **not** update the `operations/helm` directory. It is updated independently from Agent releases. 3. Create a PR to merge to main (must be merged before continuing). From ceaecf034044a78c64c22ef1191df200846dc6b1 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 12:31:05 -0700 Subject: [PATCH 08/21] Add agent-version.txt file and use that to make it easier to use GitHub actions --- tools/gen-versioned-files/agent-version.txt | 1 + .../gen-versioned-files.sh} | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 tools/gen-versioned-files/agent-version.txt rename tools/{generate-version-files.sh => gen-versioned-files/gen-versioned-files.sh} (76%) diff --git a/tools/gen-versioned-files/agent-version.txt b/tools/gen-versioned-files/agent-version.txt new file mode 100644 index 000000000000..434c18794f78 --- /dev/null +++ b/tools/gen-versioned-files/agent-version.txt @@ -0,0 +1 @@ +v0.36.2 \ No newline at end of file diff --git a/tools/generate-version-files.sh b/tools/gen-versioned-files/gen-versioned-files.sh similarity index 76% rename from tools/generate-version-files.sh rename to tools/gen-versioned-files/gen-versioned-files.sh index be346fbe3bdb..39d25a006d07 100755 --- a/tools/generate-version-files.sh +++ b/tools/gen-versioned-files/gen-versioned-files.sh @@ -1,7 +1,8 @@ #!/bin/sh +AGENT_VERSION=$(cat ./tools/gen-versioned-files/agent-version.txt | tr -d '\n') if [ -z "$AGENT_VERSION" ]; then - echo "AGENT_VERSION env var is not set" + echo "AGENT_VERSION can't be found. Are you running this from the repo root?" exit 1 fi From eeb363386c8e2965bb76441193649828681ff717 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 12:31:50 -0700 Subject: [PATCH 09/21] Add generate-versioned-files make target --- Makefile | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index bc568f81e332..937a95ab993a 100644 --- a/Makefile +++ b/Makefile @@ -50,16 +50,17 @@ ## ## Targets for generating assets: ## -## generate Generate everything. -## generate-crds Generate Grafana Agent Operator CRDs ands its documentation. -## generate-drone Generate the Drone YAML from Jsonnet. -## generate-helm-docs Generate Helm chart documentation. -## generate-helm-tests Generate Helm chart tests. -## generate-manifests Generate production/kubernetes YAML manifests. -## generate-dashboards Generate dashboards in example/docker-compose after -## changing Jsonnet. -## generate-protos Generate protobuf files. -## generate-ui Generate the UI assets. +## generate Generate everything. +## generate-crds Generate Grafana Agent Operator CRDs ands its documentation. +## generate-drone Generate the Drone YAML from Jsonnet. +## generate-helm-docs Generate Helm chart documentation. +## generate-helm-tests Generate Helm chart tests. +## generate-manifests Generate production/kubernetes YAML manifests. +## generate-dashboards Generate dashboards in example/docker-compose after +## changing Jsonnet. +## generate-protos Generate protobuf files. +## generate-ui Generate the UI assets. +## generate-versioned-files Generate versioned files. ## ## Other targets: ## @@ -280,8 +281,8 @@ smoke-image: # Targets for generating assets # -.PHONY: generate generate-crds generate-drone generate-helm-docs generate-helm-tests generate-manifests generate-dashboards generate-protos generate-ui -generate: generate-crds generate-drone generate-helm-docs generate-helm-tests generate-manifests generate-dashboards generate-protos generate-ui +.PHONY: generate generate-crds generate-drone generate-helm-docs generate-helm-tests generate-manifests generate-dashboards generate-protos generate-ui generate-versioned-files +generate: generate-crds generate-drone generate-helm-docs generate-helm-tests generate-manifests generate-dashboards generate-protos generate-ui generate-versioned-files generate-crds: ifeq ($(USE_CONTAINER),1) @@ -337,6 +338,13 @@ else cd ./web/ui && yarn --network-timeout=1200000 && yarn run build endif +generate-versioned-files: +ifeq ($(USE_CONTAINER),1) + $(RERUN_IN_CONTAINER) +else + sh ./tools/gen-versioned-files/gen-versioned-files.sh +endif + # # Other targets # From 772eff21f71e896fee4cc1f738120c848c902a4e Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 13:16:16 -0700 Subject: [PATCH 10/21] Add workflow check to check that templates and generated files are in sync --- .github/workflows/check-versioned-files.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/check-versioned-files.yml diff --git a/.github/workflows/check-versioned-files.yml b/.github/workflows/check-versioned-files.yml new file mode 100644 index 000000000000..a29b4de8e6cf --- /dev/null +++ b/.github/workflows/check-versioned-files.yml @@ -0,0 +1,16 @@ +name: Test Versioned Files +on: pull_request +jobs: + regenerate-docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Regenerate versioned files + run: | + make generate-versioned-files + if ! git diff --exit-code; then + echo "Newly generated versioned files differ from those checked in. Make sure to only update the templates manually and run 'make generate-versioned-files'!" >&2 + exit 1 + fi From b1d5683a2e3129a7f9c17380f4aadd1483549b34 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 13:17:51 -0700 Subject: [PATCH 11/21] Update developer docs --- docs/developer/release/3-update-version-in-code.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/developer/release/3-update-version-in-code.md b/docs/developer/release/3-update-version-in-code.md index 7507d7ee25af..b3de34187392 100644 --- a/docs/developer/release/3-update-version-in-code.md +++ b/docs/developer/release/3-update-version-in-code.md @@ -24,12 +24,14 @@ The project must be updated to reference the upcoming release tag whenever a new 3. Update appropriate places in the codebase that have the previous version with the new version determined above. - You can run the following command to update the version in the codebase: + First update `tools/gen-versioned-files/agent-version.txt` with the new `VERSION` and run: ``` - AGENT_VERSION=VERSION ./tools/generate-version-files.sh + make generate-versioned-files ``` + Next, commit the changes (including those to `tools/gen-versioned-files/agent-version.txt`, as a workflow will use this version to ensure that the templates and generated files are in sync). + * Do **not** update the `operations/helm` directory. It is updated independently from Agent releases. 3. Create a PR to merge to main (must be merged before continuing). From fd46e019e47d2b55492c64a7ae4506138b348545 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 13:28:12 -0700 Subject: [PATCH 12/21] Update templates --- .../deploy-agent-operator-resources.md.t | 18 +++++++++--------- docs/sources/operator/getting-started.md.t | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/sources/operator/deploy-agent-operator-resources.md.t b/docs/sources/operator/deploy-agent-operator-resources.md.t index b390ec8e1712..341e8ad96cb4 100644 --- a/docs/sources/operator/deploy-agent-operator-resources.md.t +++ b/docs/sources/operator/deploy-agent-operator-resources.md.t @@ -13,8 +13,8 @@ weight: 120 To start collecting telemetry data, you need to roll out Grafana Agent Operator custom resources into your Kubernetes cluster. Before you can create the custom resources, you must first apply the Agent Custom Resource Definitions (CRDs) and install Agent Operator, with or without Helm. If you haven't yet taken these steps, follow the instructions in one of the following topics: -- [Install Agent Operator]({{< relref "./getting-started/" >}}) -- [Install Agent Operator with Helm]({{< relref "./helm-getting-started/" >}}) +- [Install Agent Operator]({{< relref "./getting-started" >}}) +- [Install Agent Operator with Helm]({{< relref "./helm-getting-started" >}}) Follow the steps in this guide to roll out the Grafana Agent Operator custom resources to: @@ -31,7 +31,7 @@ The hierarchy of custom resources is as follows: - `LogsInstance` - `PodLogs` -To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture/" >}}). +To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture" >}}). {{% admonition type="note" %}} Agent Operator is currently in [beta]({{< relref "../stability.md#beta" >}}) and its custom resources are subject to change. @@ -39,11 +39,11 @@ Agent Operator is currently in [beta]({{< relref "../stability.md#beta" >}}) and ## Before you begin -Before you begin, make sure that you have deployed the Grafana Agent Operator CRDs and installed Agent Operator into your cluster. See [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started.md" >}}) or [Install Grafana Agent Operator]({{< relref "./getting-started.md" >}}) for instructions. +Before you begin, make sure that you have deployed the Grafana Agent Operator CRDs and installed Agent Operator into your cluster. See [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started" >}}) or [Install Grafana Agent Operator]({{< relref "./getting-started" >}}) for instructions. ## Deploy the GrafanaAgent resource -In this section, you'll roll out a `GrafanaAgent` resource. See [Grafana Agent Operator architecture]({{< relref "./architecture.md" >}}) for a discussion of the resources in the `GrafanaAgent` resource hierarchy. +In this section, you'll roll out a `GrafanaAgent` resource. See [Grafana Agent Operator architecture]({{< relref "./architecture" >}}) for a discussion of the resources in the `GrafanaAgent` resource hierarchy. {{% admonition type="note" %}} Due to the variety of possible deployment architectures, the official Agent Operator Helm chart does not provide built-in templates for the custom resources described in this guide. You must configure and deploy these manually as described in this section. We recommend templating and adding the following manifests to your own in-house Helm charts and GitOps flows. @@ -230,7 +230,7 @@ To deploy a `MetricsInstance` resource: password: 'your_cloud_prometheus_API_key' ``` -If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](https://grafana.com/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. +If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. Once you've rolled out the `MetricsInstance` and its Secret, you can confirm that the `MetricsInstance` Agent is up and running using `kubectl get pod`. Since you haven't defined any monitors yet, this Agent doesn't have any scrape targets defined. In the next section, you'll create scrape targets for the cAdvisor and kubelet endpoints exposed by the `kubelet` service in the cluster. @@ -359,7 +359,7 @@ To deploy the `LogsInstance` resource into your cluster: instance: primary ``` - This `LogsInstance` picks up `PodLogs` resources with the `instance: primary` label. Be sure to set the Loki URL to the correct push endpoint. For Grafana Cloud, this will look similar to `logs-prod-us-central1.grafana.net/loki/api/v1/push`, however check the [Grafana Cloud Portal](https://grafana.com/profile/org) to confirm by clicking **Details** on the Loki tile. + This `LogsInstance` picks up `PodLogs` resources with the `instance: primary` label. Be sure to set the Loki URL to the correct push endpoint. For Grafana Cloud, this will look similar to `logs-prod-us-central1.grafana.net/loki/api/v1/push`, however check the [Grafana Cloud Portal](/profile/org) to confirm by clicking **Details** on the Loki tile. Also note that this example uses the `agent: grafana-agent-logs` label, which associates this `LogsInstance` with the `GrafanaAgent` resource defined earlier. This means that it will inherit requests, limits, affinities and other properties defined in the `GrafanaAgent` custom resource. @@ -376,7 +376,7 @@ To deploy the `LogsInstance` resource into your cluster: password: 'your_password_here' ``` - If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](https://grafana.com/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. + If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. 1. Copy the following `PodLogs` manifest to a file, then roll it to your cluster using `kubectl apply -f` followed by the filename. The manifest defines your logging targets. Agent Operator turns this into Agent configuration for the logs subsystem, and rolls it out to the DaemonSet of logging Agents. @@ -413,7 +413,7 @@ To deploy the `LogsInstance` resource into your cluster: - `job` (set to `PodLogs_namespace/PodLogs_name`) - `__path__` (the path to log files, set to `/var/log/pods/*$1/*.log` where `$1` is `__meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name`) - To learn more about this configuration format and other available labels, see the [Promtail Scraping](https://grafana.com/docs/loki/latest/clients/promtail/scraping/#promtail-scraping-service-discovery) documentation. Agent Operator loads this configuration into the `LogsInstance` agents automatically. + To learn more about this configuration format and other available labels, see the [Promtail Scraping](/docs/loki/latest/clients/promtail/scraping/#promtail-scraping-service-discovery) documentation. Agent Operator loads this configuration into the `LogsInstance` agents automatically. The DaemonSet of logging agents should be tailing your container logs, applying default labels to the log lines, and shipping them to your remote Loki endpoint. diff --git a/docs/sources/operator/getting-started.md.t b/docs/sources/operator/getting-started.md.t index 56265faf33e6..ad8721ad27b0 100644 --- a/docs/sources/operator/getting-started.md.t +++ b/docs/sources/operator/getting-started.md.t @@ -13,7 +13,7 @@ weight: 110 In this guide, you'll learn how to deploy [Grafana Agent Operator]({{< relref "./_index.md" >}}) into your Kubernetes cluster. This guide does not use Helm. To learn how to deploy Agent Operator using the [grafana-agent-operator Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/agent-operator), see [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started.md" >}}). -> **Note**: If you are shipping your data to Grafana Cloud, use [Kubernetes Monitoring](https://grafana.com/docs/grafana-cloud/kubernetes-monitoring/) to set up Agent Operator. Kubernetes Monitoring provides a simplified approach and preconfigured dashboards and alerts. +> **Note**: If you are shipping your data to Grafana Cloud, use [Kubernetes Monitoring](/docs/grafana-cloud/kubernetes-monitoring/) to set up Agent Operator. Kubernetes Monitoring provides a simplified approach and preconfigured dashboards and alerts. ## Before you begin To deploy Agent Operator, make sure that you have the following: @@ -31,7 +31,7 @@ you need to deploy the to the cluster. These definitions describe the schema that the custom resources will conform to. This is also required for Grafana Agent Operator to run; it will fail if it can't find the Custom Resource Definitions of objects it is -looking to use. To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture/" >}}). +looking to use. To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture" >}}). You can find the set of Custom Resource Definitions for Grafana Agent Operator in the Grafana Agent repository under [production/operator/crds](https://github.com/grafana/agent/tree/main/production/operator/crds). @@ -152,4 +152,4 @@ To install Agent Operator: ## Deploy the Grafana Agent Operator resources -Agent Operator is now up and running. Next, you need to install a Grafana Agent for Agent Operator to run for you. To do so, follow the instructions in the [Deploy the Grafana Agent Operator resources]({{< relref "./deploy-agent-operator-resources.md" >}}) topic. +Agent Operator is now up and running. Next, you need to install a Grafana Agent for Agent Operator to run for you. To do so, follow the instructions in the [Deploy the Grafana Agent Operator resources]({{< relref "./deploy-agent-operator-resources" >}}) topic. From b6868c43726344ddd60785619371dba171f19a39 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 29 Sep 2023 19:08:25 -0700 Subject: [PATCH 13/21] Rename files to be FILENAME.t.EXTENSION so that editors will still detect the right syntax --- ....md.t => deploy-agent-operator-resources.t.md} | 0 ...{getting-started.md.t => getting-started.t.md} | 0 ...rter-config.md.t => node-exporter-config.t.md} | 0 ...r-config.md.t => process-exporter-config.t.md} | 0 ...gent-docker.md.t => install-agent-docker.t.md} | 0 pkg/operator/{defaults.go.t => defaults.t.go} | 0 .../{agent-bare.yaml.t => agent-bare.t.yaml} | 0 .../{agent-loki.yaml.t => agent-loki.t.yaml} | 0 .../{agent-traces.yaml.t => agent-traces.t.yaml} | 0 .../{version.libsonnet.t => version.t.libsonnet} | 0 .../operator/{main.jsonnet.t => main.t.jsonnet} | 0 .../{install-bare.sh.t => install-bare.t.sh} | 0 ...gent-operator.yaml.t => agent-operator.t.yaml} | 0 .../v1/{main.libsonnet.t => main.t.libsonnet} | 0 .../{base.libsonnet.t => base.t.libsonnet} | 0 .../{syncer.libsonnet.t => syncer.t.libsonnet} | 0 tools/gen-versioned-files/gen-versioned-files.sh | 15 ++++++++++++--- 17 files changed, 12 insertions(+), 3 deletions(-) rename docs/sources/operator/{deploy-agent-operator-resources.md.t => deploy-agent-operator-resources.t.md} (100%) rename docs/sources/operator/{getting-started.md.t => getting-started.t.md} (100%) rename docs/sources/static/configuration/integrations/{node-exporter-config.md.t => node-exporter-config.t.md} (100%) rename docs/sources/static/configuration/integrations/{process-exporter-config.md.t => process-exporter-config.t.md} (100%) rename docs/sources/static/set-up/install/{install-agent-docker.md.t => install-agent-docker.t.md} (100%) rename pkg/operator/{defaults.go.t => defaults.t.go} (100%) rename production/kubernetes/{agent-bare.yaml.t => agent-bare.t.yaml} (100%) rename production/kubernetes/{agent-loki.yaml.t => agent-loki.t.yaml} (100%) rename production/kubernetes/{agent-traces.yaml.t => agent-traces.t.yaml} (100%) rename production/kubernetes/build/lib/{version.libsonnet.t => version.t.libsonnet} (100%) rename production/kubernetes/build/templates/operator/{main.jsonnet.t => main.t.jsonnet} (100%) rename production/kubernetes/{install-bare.sh.t => install-bare.t.sh} (100%) rename production/operator/templates/{agent-operator.yaml.t => agent-operator.t.yaml} (100%) rename production/tanka/grafana-agent/v1/{main.libsonnet.t => main.t.libsonnet} (100%) rename production/tanka/grafana-agent/v2/internal/{base.libsonnet.t => base.t.libsonnet} (100%) rename production/tanka/grafana-agent/v2/internal/{syncer.libsonnet.t => syncer.t.libsonnet} (100%) diff --git a/docs/sources/operator/deploy-agent-operator-resources.md.t b/docs/sources/operator/deploy-agent-operator-resources.t.md similarity index 100% rename from docs/sources/operator/deploy-agent-operator-resources.md.t rename to docs/sources/operator/deploy-agent-operator-resources.t.md diff --git a/docs/sources/operator/getting-started.md.t b/docs/sources/operator/getting-started.t.md similarity index 100% rename from docs/sources/operator/getting-started.md.t rename to docs/sources/operator/getting-started.t.md diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md.t b/docs/sources/static/configuration/integrations/node-exporter-config.t.md similarity index 100% rename from docs/sources/static/configuration/integrations/node-exporter-config.md.t rename to docs/sources/static/configuration/integrations/node-exporter-config.t.md diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.md.t b/docs/sources/static/configuration/integrations/process-exporter-config.t.md similarity index 100% rename from docs/sources/static/configuration/integrations/process-exporter-config.md.t rename to docs/sources/static/configuration/integrations/process-exporter-config.t.md diff --git a/docs/sources/static/set-up/install/install-agent-docker.md.t b/docs/sources/static/set-up/install/install-agent-docker.t.md similarity index 100% rename from docs/sources/static/set-up/install/install-agent-docker.md.t rename to docs/sources/static/set-up/install/install-agent-docker.t.md diff --git a/pkg/operator/defaults.go.t b/pkg/operator/defaults.t.go similarity index 100% rename from pkg/operator/defaults.go.t rename to pkg/operator/defaults.t.go diff --git a/production/kubernetes/agent-bare.yaml.t b/production/kubernetes/agent-bare.t.yaml similarity index 100% rename from production/kubernetes/agent-bare.yaml.t rename to production/kubernetes/agent-bare.t.yaml diff --git a/production/kubernetes/agent-loki.yaml.t b/production/kubernetes/agent-loki.t.yaml similarity index 100% rename from production/kubernetes/agent-loki.yaml.t rename to production/kubernetes/agent-loki.t.yaml diff --git a/production/kubernetes/agent-traces.yaml.t b/production/kubernetes/agent-traces.t.yaml similarity index 100% rename from production/kubernetes/agent-traces.yaml.t rename to production/kubernetes/agent-traces.t.yaml diff --git a/production/kubernetes/build/lib/version.libsonnet.t b/production/kubernetes/build/lib/version.t.libsonnet similarity index 100% rename from production/kubernetes/build/lib/version.libsonnet.t rename to production/kubernetes/build/lib/version.t.libsonnet diff --git a/production/kubernetes/build/templates/operator/main.jsonnet.t b/production/kubernetes/build/templates/operator/main.t.jsonnet similarity index 100% rename from production/kubernetes/build/templates/operator/main.jsonnet.t rename to production/kubernetes/build/templates/operator/main.t.jsonnet diff --git a/production/kubernetes/install-bare.sh.t b/production/kubernetes/install-bare.t.sh similarity index 100% rename from production/kubernetes/install-bare.sh.t rename to production/kubernetes/install-bare.t.sh diff --git a/production/operator/templates/agent-operator.yaml.t b/production/operator/templates/agent-operator.t.yaml similarity index 100% rename from production/operator/templates/agent-operator.yaml.t rename to production/operator/templates/agent-operator.t.yaml diff --git a/production/tanka/grafana-agent/v1/main.libsonnet.t b/production/tanka/grafana-agent/v1/main.t.libsonnet similarity index 100% rename from production/tanka/grafana-agent/v1/main.libsonnet.t rename to production/tanka/grafana-agent/v1/main.t.libsonnet diff --git a/production/tanka/grafana-agent/v2/internal/base.libsonnet.t b/production/tanka/grafana-agent/v2/internal/base.t.libsonnet similarity index 100% rename from production/tanka/grafana-agent/v2/internal/base.libsonnet.t rename to production/tanka/grafana-agent/v2/internal/base.t.libsonnet diff --git a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t b/production/tanka/grafana-agent/v2/internal/syncer.t.libsonnet similarity index 100% rename from production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t rename to production/tanka/grafana-agent/v2/internal/syncer.t.libsonnet diff --git a/tools/gen-versioned-files/gen-versioned-files.sh b/tools/gen-versioned-files/gen-versioned-files.sh index 39d25a006d07..54bcda426af2 100755 --- a/tools/gen-versioned-files/gen-versioned-files.sh +++ b/tools/gen-versioned-files/gen-versioned-files.sh @@ -13,8 +13,17 @@ if ! echo "$AGENT_VERSION" | grep -Eq "$versionMatcher"; then exit 1 fi -templates=$(find . -type f -name "*.t" -not -path "./.git/*") +templates=$(find . -type f -name "*.t.*" -not -path "./.git/*") for template in $templates; do - echo "Generating ${template%.t}" - sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "${template%.t}" + # Extract the original file extension + file_extension="${template##*.}" + + # Extract the file name without the extension + file_name_without_ext="${template%.*}" + file_name_without_t="${file_name_without_ext%.*}" + + # Construct the new file path by the extension to the stripped file name + new_file="${file_name_without_t}.${file_extension}" + echo "Generating $new_file" + sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "$new_file" done From 250ba4afb4d4b4342ec8f309c31f95b18b283727 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Mon, 2 Oct 2023 07:48:23 -0700 Subject: [PATCH 14/21] Revert "Rename files to be FILENAME.t.EXTENSION so that editors will still detect the right syntax" This could cause problems, since the template files may be picked up by other tools. This reverts commit b6868c43726344ddd60785619371dba171f19a39. --- ....t.md => deploy-agent-operator-resources.md.t} | 0 ...{getting-started.t.md => getting-started.md.t} | 0 ...rter-config.t.md => node-exporter-config.md.t} | 0 ...r-config.t.md => process-exporter-config.md.t} | 0 ...gent-docker.t.md => install-agent-docker.md.t} | 0 pkg/operator/{defaults.t.go => defaults.go.t} | 0 .../{agent-bare.t.yaml => agent-bare.yaml.t} | 0 .../{agent-loki.t.yaml => agent-loki.yaml.t} | 0 .../{agent-traces.t.yaml => agent-traces.yaml.t} | 0 .../{version.t.libsonnet => version.libsonnet.t} | 0 .../operator/{main.t.jsonnet => main.jsonnet.t} | 0 .../{install-bare.t.sh => install-bare.sh.t} | 0 ...gent-operator.t.yaml => agent-operator.yaml.t} | 0 .../v1/{main.t.libsonnet => main.libsonnet.t} | 0 .../{base.t.libsonnet => base.libsonnet.t} | 0 .../{syncer.t.libsonnet => syncer.libsonnet.t} | 0 tools/gen-versioned-files/gen-versioned-files.sh | 15 +++------------ 17 files changed, 3 insertions(+), 12 deletions(-) rename docs/sources/operator/{deploy-agent-operator-resources.t.md => deploy-agent-operator-resources.md.t} (100%) rename docs/sources/operator/{getting-started.t.md => getting-started.md.t} (100%) rename docs/sources/static/configuration/integrations/{node-exporter-config.t.md => node-exporter-config.md.t} (100%) rename docs/sources/static/configuration/integrations/{process-exporter-config.t.md => process-exporter-config.md.t} (100%) rename docs/sources/static/set-up/install/{install-agent-docker.t.md => install-agent-docker.md.t} (100%) rename pkg/operator/{defaults.t.go => defaults.go.t} (100%) rename production/kubernetes/{agent-bare.t.yaml => agent-bare.yaml.t} (100%) rename production/kubernetes/{agent-loki.t.yaml => agent-loki.yaml.t} (100%) rename production/kubernetes/{agent-traces.t.yaml => agent-traces.yaml.t} (100%) rename production/kubernetes/build/lib/{version.t.libsonnet => version.libsonnet.t} (100%) rename production/kubernetes/build/templates/operator/{main.t.jsonnet => main.jsonnet.t} (100%) rename production/kubernetes/{install-bare.t.sh => install-bare.sh.t} (100%) rename production/operator/templates/{agent-operator.t.yaml => agent-operator.yaml.t} (100%) rename production/tanka/grafana-agent/v1/{main.t.libsonnet => main.libsonnet.t} (100%) rename production/tanka/grafana-agent/v2/internal/{base.t.libsonnet => base.libsonnet.t} (100%) rename production/tanka/grafana-agent/v2/internal/{syncer.t.libsonnet => syncer.libsonnet.t} (100%) diff --git a/docs/sources/operator/deploy-agent-operator-resources.t.md b/docs/sources/operator/deploy-agent-operator-resources.md.t similarity index 100% rename from docs/sources/operator/deploy-agent-operator-resources.t.md rename to docs/sources/operator/deploy-agent-operator-resources.md.t diff --git a/docs/sources/operator/getting-started.t.md b/docs/sources/operator/getting-started.md.t similarity index 100% rename from docs/sources/operator/getting-started.t.md rename to docs/sources/operator/getting-started.md.t diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.t.md b/docs/sources/static/configuration/integrations/node-exporter-config.md.t similarity index 100% rename from docs/sources/static/configuration/integrations/node-exporter-config.t.md rename to docs/sources/static/configuration/integrations/node-exporter-config.md.t diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.t.md b/docs/sources/static/configuration/integrations/process-exporter-config.md.t similarity index 100% rename from docs/sources/static/configuration/integrations/process-exporter-config.t.md rename to docs/sources/static/configuration/integrations/process-exporter-config.md.t diff --git a/docs/sources/static/set-up/install/install-agent-docker.t.md b/docs/sources/static/set-up/install/install-agent-docker.md.t similarity index 100% rename from docs/sources/static/set-up/install/install-agent-docker.t.md rename to docs/sources/static/set-up/install/install-agent-docker.md.t diff --git a/pkg/operator/defaults.t.go b/pkg/operator/defaults.go.t similarity index 100% rename from pkg/operator/defaults.t.go rename to pkg/operator/defaults.go.t diff --git a/production/kubernetes/agent-bare.t.yaml b/production/kubernetes/agent-bare.yaml.t similarity index 100% rename from production/kubernetes/agent-bare.t.yaml rename to production/kubernetes/agent-bare.yaml.t diff --git a/production/kubernetes/agent-loki.t.yaml b/production/kubernetes/agent-loki.yaml.t similarity index 100% rename from production/kubernetes/agent-loki.t.yaml rename to production/kubernetes/agent-loki.yaml.t diff --git a/production/kubernetes/agent-traces.t.yaml b/production/kubernetes/agent-traces.yaml.t similarity index 100% rename from production/kubernetes/agent-traces.t.yaml rename to production/kubernetes/agent-traces.yaml.t diff --git a/production/kubernetes/build/lib/version.t.libsonnet b/production/kubernetes/build/lib/version.libsonnet.t similarity index 100% rename from production/kubernetes/build/lib/version.t.libsonnet rename to production/kubernetes/build/lib/version.libsonnet.t diff --git a/production/kubernetes/build/templates/operator/main.t.jsonnet b/production/kubernetes/build/templates/operator/main.jsonnet.t similarity index 100% rename from production/kubernetes/build/templates/operator/main.t.jsonnet rename to production/kubernetes/build/templates/operator/main.jsonnet.t diff --git a/production/kubernetes/install-bare.t.sh b/production/kubernetes/install-bare.sh.t similarity index 100% rename from production/kubernetes/install-bare.t.sh rename to production/kubernetes/install-bare.sh.t diff --git a/production/operator/templates/agent-operator.t.yaml b/production/operator/templates/agent-operator.yaml.t similarity index 100% rename from production/operator/templates/agent-operator.t.yaml rename to production/operator/templates/agent-operator.yaml.t diff --git a/production/tanka/grafana-agent/v1/main.t.libsonnet b/production/tanka/grafana-agent/v1/main.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v1/main.t.libsonnet rename to production/tanka/grafana-agent/v1/main.libsonnet.t diff --git a/production/tanka/grafana-agent/v2/internal/base.t.libsonnet b/production/tanka/grafana-agent/v2/internal/base.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v2/internal/base.t.libsonnet rename to production/tanka/grafana-agent/v2/internal/base.libsonnet.t diff --git a/production/tanka/grafana-agent/v2/internal/syncer.t.libsonnet b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t similarity index 100% rename from production/tanka/grafana-agent/v2/internal/syncer.t.libsonnet rename to production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t diff --git a/tools/gen-versioned-files/gen-versioned-files.sh b/tools/gen-versioned-files/gen-versioned-files.sh index 54bcda426af2..39d25a006d07 100755 --- a/tools/gen-versioned-files/gen-versioned-files.sh +++ b/tools/gen-versioned-files/gen-versioned-files.sh @@ -13,17 +13,8 @@ if ! echo "$AGENT_VERSION" | grep -Eq "$versionMatcher"; then exit 1 fi -templates=$(find . -type f -name "*.t.*" -not -path "./.git/*") +templates=$(find . -type f -name "*.t" -not -path "./.git/*") for template in $templates; do - # Extract the original file extension - file_extension="${template##*.}" - - # Extract the file name without the extension - file_name_without_ext="${template%.*}" - file_name_without_t="${file_name_without_ext%.*}" - - # Construct the new file path by the extension to the stripped file name - new_file="${file_name_without_t}.${file_extension}" - echo "Generating $new_file" - sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "$new_file" + echo "Generating ${template%.t}" + sed -e "s/\$AGENT_VERSION/$AGENT_VERSION/g" < "$template" > "${template%.t}" done From d161b91e0eaad397788b43c9aeb44440858c9844 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Wed, 4 Oct 2023 12:16:21 -0700 Subject: [PATCH 15/21] Remove unnecessary templates and update docs pages --- docs/sources/_index.md.t | 110 +++++ .../deploy-agent-operator-resources.md | 2 +- .../deploy-agent-operator-resources.md.t | 434 ------------------ docs/sources/operator/getting-started.md | 2 +- docs/sources/operator/getting-started.md.t | 155 ------- .../integrations/node-exporter-config.md | 4 +- .../integrations/node-exporter-config.md.t | 433 ----------------- .../integrations/process-exporter-config.md | 4 +- .../integrations/process-exporter-config.md.t | 184 -------- .../set-up/install/install-agent-docker.md | 4 +- .../set-up/install/install-agent-docker.md.t | 76 --- 11 files changed, 118 insertions(+), 1290 deletions(-) create mode 100644 docs/sources/_index.md.t delete mode 100644 docs/sources/operator/deploy-agent-operator-resources.md.t delete mode 100644 docs/sources/operator/getting-started.md.t delete mode 100644 docs/sources/static/configuration/integrations/node-exporter-config.md.t delete mode 100644 docs/sources/static/configuration/integrations/process-exporter-config.md.t delete mode 100644 docs/sources/static/set-up/install/install-agent-docker.md.t diff --git a/docs/sources/_index.md.t b/docs/sources/_index.md.t new file mode 100644 index 000000000000..a494fed2676a --- /dev/null +++ b/docs/sources/_index.md.t @@ -0,0 +1,110 @@ +--- +aliases: +- /docs/grafana-cloud/agent/ +- /docs/grafana-cloud/monitor-infrastructure/agent/ +- /docs/grafana-cloud/monitor-infrastructure/integrations/agent/ +canonical: https://grafana.com/docs/agent/latest/ +title: Grafana Agent +description: Grafana Agent is a flexible, performant, vendor-neutral, telemetry collector +weight: 350 +cascade: + AGENT_RELEASE: $AGENT_VERSION +--- + +# Grafana Agent + +Grafana Agent is a vendor-neutral, batteries-included telemetry collector with +configuration inspired by [Terraform][]. It is designed to be flexible, +performant, and compatible with multiple ecosystems such as Prometheus and +OpenTelemetry. + +Grafana Agent is based around **components**. Components are wired together to +form programmable observability **pipelines** for telemetry collection, +processing, and delivery. + +{{% admonition type="note" %}} +This page focuses mainly on [Flow mode]({{< relref "./flow" >}}), the Terraform-inspired variant of Grafana Agent. + +For information on other variants of Grafana Agent, refer to [Introduction to Grafana Agent]({{< relref "./about.md" >}}). +{{% /admonition %}} + +Grafana Agent can collect, transform, and send data to: + +* The [Prometheus][] ecosystem +* The [OpenTelemetry][] ecosystem +* The Grafana open source ecosystem ([Loki][], [Grafana][], [Tempo][], [Mimir][], [Pyroscope][]) + +[Terraform]: https://terraform.io +[Prometheus]: https://prometheus.io +[OpenTelemetry]: https://opentelemetry.io +[Loki]: https://github.com/grafana/loki +[Grafana]: https://github.com/grafana/grafana +[Tempo]: https://github.com/grafana/tempo +[Mimir]: https://github.com/grafana/mimir +[Pyroscope]: https://github.com/grafana/pyroscope + +## Why use Grafana Agent? + +* **Vendor-neutral**: Fully compatible with the Prometheus, OpenTelemetry, and + Grafana open source ecosystems. +* **Every signal**: Collect telemetry data for metrics, logs, traces, and + continuous profiles. +* **Scalable**: Deploy on any number of machines to collect millions of active + series and terabytes of logs. +* **Battle-tested**: Grafana Agent extends the existing battle-tested code from + the Prometheus and OpenTelemetry Collector projects. +* **Powerful**: Write programmable pipelines with ease, and debug them using a + [built-in UI][UI]. +* **Batteries included**: Integrate with systems like MySQL, Kubernetes, and + Apache to get telemetry that's immediately useful. + +[UI]: {{< relref "./flow/monitoring/debugging.md#grafana-agent-flow-ui" >}} + +## Getting started + +* Choose a [variant][variants] of Grafana Agent to run. +* Refer to the documentation for the variant to use: + * [Static mode][] + * [Static mode Kubernetes operator][] + * [Flow mode][] + +[variants]: {{< relref "./about.md" >}} +[Static mode]: {{< relref "./static" >}} +[Static mode Kubernetes operator]: {{< relref "./operator" >}} +[Flow mode]: {{< relref "./flow" >}} + +## Supported platforms + +* Linux + + * Minimum version: kernel 2.6.32 or later + * Architectures: AMD64, ARM64 + +* Windows + + * Minimum version: Windows Server 2012 or later, or Windows 10 or later. + * Architectures: AMD64 + +* macOS + + * Minimum version: macOS 10.13 or later + * Architectures: AMD64 (Intel), ARM64 (Apple Silicon) + +* FreeBSD + + * Minimum version: FreeBSD 10 or later + * Architectures: AMD64 + +## Release cadence + +A new minor release is planned every six weeks for the entire Grafana Agent +project, including Static mode, the Static mode Kubernetes operator, and Flow +mode. + +The release cadence is best-effort: releases may be moved forwards or backwards +if needed. The planned release dates for future minor releases do not change if +one minor release is moved. + +Patch and security releases may be created at any time. + +[Milestones]: https://github.com/grafana/agent/milestones diff --git a/docs/sources/operator/deploy-agent-operator-resources.md b/docs/sources/operator/deploy-agent-operator-resources.md index 6c7bf9467540..09132e0d448c 100644 --- a/docs/sources/operator/deploy-agent-operator-resources.md +++ b/docs/sources/operator/deploy-agent-operator-resources.md @@ -62,7 +62,7 @@ To deploy the `GrafanaAgent` resource: labels: app: grafana-agent spec: - image: grafana/agent:v0.36.2 + image: grafana/agent:{{< param "AGENT_RELEASE" >}} integrations: selector: matchLabels: diff --git a/docs/sources/operator/deploy-agent-operator-resources.md.t b/docs/sources/operator/deploy-agent-operator-resources.md.t deleted file mode 100644 index 341e8ad96cb4..000000000000 --- a/docs/sources/operator/deploy-agent-operator-resources.md.t +++ /dev/null @@ -1,434 +0,0 @@ ---- -aliases: -- /docs/grafana-cloud/agent/operator/deploy-agent-operator-resources/ -- /docs/grafana-cloud/monitor-infrastructure/agent/operator/deploy-agent-operator-resources/ -- /docs/grafana-cloud/monitor-infrastructure/integrations/agent/operator/deploy-agent-operator-resources/ -- custom-resource-quickstart/ -canonical: https://grafana.com/docs/agent/latest/operator/deploy-agent-operator-resources/ -title: Deploy Operator resources -description: Learn how to deploy Operator resources -weight: 120 ---- -# Deploy Operator resources - -To start collecting telemetry data, you need to roll out Grafana Agent Operator custom resources into your Kubernetes cluster. Before you can create the custom resources, you must first apply the Agent Custom Resource Definitions (CRDs) and install Agent Operator, with or without Helm. If you haven't yet taken these steps, follow the instructions in one of the following topics: - -- [Install Agent Operator]({{< relref "./getting-started" >}}) -- [Install Agent Operator with Helm]({{< relref "./helm-getting-started" >}}) - -Follow the steps in this guide to roll out the Grafana Agent Operator custom resources to: - -- Scrape and ship cAdvisor and kubelet metrics to a Prometheus-compatible metrics endpoint. -- Collect and ship your Pods’ container logs to a Loki-compatible logs endpoint. - -The hierarchy of custom resources is as follows: - -- `GrafanaAgent` - - `MetricsInstance` - - `PodMonitor` - - `Probe` - - `ServiceMonitor` - - `LogsInstance` - - `PodLogs` - -To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture" >}}). - -{{% admonition type="note" %}} -Agent Operator is currently in [beta]({{< relref "../stability.md#beta" >}}) and its custom resources are subject to change. -{{% /admonition %}} - -## Before you begin - -Before you begin, make sure that you have deployed the Grafana Agent Operator CRDs and installed Agent Operator into your cluster. See [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started" >}}) or [Install Grafana Agent Operator]({{< relref "./getting-started" >}}) for instructions. - -## Deploy the GrafanaAgent resource - -In this section, you'll roll out a `GrafanaAgent` resource. See [Grafana Agent Operator architecture]({{< relref "./architecture" >}}) for a discussion of the resources in the `GrafanaAgent` resource hierarchy. - -{{% admonition type="note" %}} -Due to the variety of possible deployment architectures, the official Agent Operator Helm chart does not provide built-in templates for the custom resources described in this guide. You must configure and deploy these manually as described in this section. We recommend templating and adding the following manifests to your own in-house Helm charts and GitOps flows. -{{% /admonition %}} - -To deploy the `GrafanaAgent` resource: - -1. Copy the following manifests to a file: - - ```yaml - apiVersion: monitoring.grafana.com/v1alpha1 - kind: GrafanaAgent - metadata: - name: grafana-agent - namespace: default - labels: - app: grafana-agent - spec: - image: grafana/agent:$AGENT_VERSION - integrations: - selector: - matchLabels: - agent: grafana-agent-integrations - logLevel: info - serviceAccountName: grafana-agent - metrics: - instanceSelector: - matchLabels: - agent: grafana-agent-metrics - externalLabels: - cluster: cloud - - logs: - instanceSelector: - matchLabels: - agent: grafana-agent-logs - - --- - - apiVersion: v1 - kind: ServiceAccount - metadata: - name: grafana-agent - namespace: default - - --- - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRole - metadata: - name: grafana-agent - rules: - - apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - nodes/metrics - - services - - endpoints - - pods - - events - verbs: - - get - - list - - watch - - apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: - - get - - list - - watch - - nonResourceURLs: - - /metrics - - /metrics/cadvisor - verbs: - - get - - --- - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: grafana-agent - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent - subjects: - - kind: ServiceAccount - name: grafana-agent - namespace: default - ``` - - In the first manifest, the `GrafanaAgent` resource: - - - Specifies an Agent image version. - - Specifies `MetricsInstance` and `LogsInstance` selectors. These search for `MetricsInstances` and `LogsInstances` in the same namespace with labels matching `agent: grafana-agent-metrics` and `agent: grafana-agent-logs`, respectively. - - Sets a `cluster: cloud` label for all metrics shipped to your Prometheus-compatible endpoint. Change this label to your cluster name. To search for `MetricsInstances` or `LogsInstances` in a *different* namespace, use the `instanceNamespaceSelector` field. To learn more about this field, see the `GrafanaAgent` [CRD specification](https://github.com/grafana/agent/blob/main/production/operator/crds/monitoring.grafana.com_grafanaagents.yaml). - -1. Customize the manifests as needed and roll them out to your cluster using `kubectl apply -f` followed by the filename. - - This step creates a `ServiceAccount`, `ClusterRole`, and `ClusterRoleBinding` for the `GrafanaAgent` resource. - - Deploying a `GrafanaAgent` resource on its own does not spin up Agent Pods. Agent Operator creates Agent Pods once `MetricsInstance` and `LogsIntance` resources have been created. Follow the instructions in the [Deploy a MetricsInstance resource](#deploy-a-metricsinstance-resource) and [Deploy LogsInstance and PodLogs resources](#deploy-logsinstance-and-podlogs-resources) sections to create these resources. - -### Disable feature flags reporting - -To disable the [reporting]({{< relref "../static/configuration/flags.md#report-information-usage" >}}) usage of feature flags to Grafana, set `disableReporting` field to `true`. - -### Disable support bundle generation - -To disable the [support bundles functionality]({{< relref "../static/configuration/flags.md#support-bundles" >}}), set the `disableSupportBundle` field to `true`. - -## Deploy a MetricsInstance resource - -Next, you'll roll out a `MetricsInstance` resource. `MetricsInstance` resources define a `remote_write` sink for metrics and configure one or more selectors to watch for creation and updates to `*Monitor` objects. These objects allow you to define Agent scrape targets via Kubernetes manifests: - -- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) -- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#podmonitor) -- [Probes](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#probe) - -To deploy a `MetricsInstance` resource: - -1. Copy the following manifest to a file: - - ```yaml - apiVersion: monitoring.grafana.com/v1alpha1 - kind: MetricsInstance - metadata: - name: primary - namespace: default - labels: - agent: grafana-agent-metrics - spec: - remoteWrite: - - url: your_remote_write_URL - basicAuth: - username: - name: primary-credentials-metrics - key: username - password: - name: primary-credentials-metrics - key: password - - # Supply an empty namespace selector to look in all namespaces. Remove - # this to only look in the same namespace as the MetricsInstance CR - serviceMonitorNamespaceSelector: {} - serviceMonitorSelector: - matchLabels: - instance: primary - - # Supply an empty namespace selector to look in all namespaces. Remove - # this to only look in the same namespace as the MetricsInstance CR. - podMonitorNamespaceSelector: {} - podMonitorSelector: - matchLabels: - instance: primary - - # Supply an empty namespace selector to look in all namespaces. Remove - # this to only look in the same namespace as the MetricsInstance CR. - probeNamespaceSelector: {} - probeSelector: - matchLabels: - instance: primary - ``` - -1. Replace the `remote_write` URL and customize the namespace and label configuration as necessary. - - This step associates the `MetricsInstance` resource with the `agent: grafana-agent` `GrafanaAgent` resource deployed in the previous step. The `MetricsInstance` resource watches for creation and updates to `*Monitors` with the `instance: primary` label. - -1. Once you've rolled out the manifest, create the `basicAuth` credentials [using a Kubernetes Secret](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-config-file/): - - ```yaml - apiVersion: v1 - kind: Secret - metadata: - name: primary-credentials-metrics - namespace: default - stringData: - username: 'your_cloud_prometheus_username' - password: 'your_cloud_prometheus_API_key' - ``` - -If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. - -Once you've rolled out the `MetricsInstance` and its Secret, you can confirm that the `MetricsInstance` Agent is up and running using `kubectl get pod`. Since you haven't defined any monitors yet, this Agent doesn't have any scrape targets defined. In the next section, you'll create scrape targets for the cAdvisor and kubelet endpoints exposed by the `kubelet` service in the cluster. - -## Create ServiceMonitors for kubelet and cAdvisor endpoints - -Next, you'll create ServiceMonitors for kubelet and cAdvisor metrics exposed by the `kubelet` service. Every Node in your cluster exposes kubelet and cAdvisor metrics at `/metrics` and `/metrics/cadvisor`, respectively. Agent Operator creates a `kubelet` service that exposes these Node endpoints so that they can be scraped using ServiceMonitors. - -To scrape the kubelet and cAdvisor endpoints: - -1. Copy the following kubelet ServiceMonitor manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. - - ```yaml - apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - labels: - instance: primary - name: kubelet-monitor - namespace: default - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - interval: 60s - metricRelabelings: - - action: keep - regex: kubelet_cgroup_manager_duration_seconds_count|go_goroutines|kubelet_pod_start_duration_seconds_count|kubelet_runtime_operations_total|kubelet_pleg_relist_duration_seconds_bucket|volume_manager_total_volumes|kubelet_volume_stats_capacity_bytes|container_cpu_usage_seconds_total|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_network_receive_bytes_total|container_memory_swap|container_network_receive_packets_total|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|kubelet_running_pod_count|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate|container_memory_working_set_bytes|storage_operation_errors_total|kubelet_pleg_relist_duration_seconds_count|kubelet_running_pods|rest_client_request_duration_seconds_bucket|process_resident_memory_bytes|storage_operation_duration_seconds_count|kubelet_running_containers|kubelet_runtime_operations_duration_seconds_bucket|kubelet_node_config_error|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_running_container_count|kubelet_volume_stats_available_bytes|kubelet_volume_stats_inodes|container_memory_rss|kubelet_pod_worker_duration_seconds_count|kubelet_node_name|kubelet_pleg_relist_interval_seconds_bucket|container_network_receive_packets_dropped_total|kubelet_pod_worker_duration_seconds_bucket|container_start_time_seconds|container_network_transmit_packets_dropped_total|process_cpu_seconds_total|storage_operation_duration_seconds_bucket|container_memory_cache|container_network_transmit_packets_total|kubelet_volume_stats_inodes_used|up|rest_client_requests_total - sourceLabels: - - __name__ - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - action: replace - targetLabel: job - replacement: integrations/kubernetes/kubelet - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - app.kubernetes.io/name: kubelet - ``` - -1. Copy the following cAdvisor ServiceMonitor manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. - - ```yaml - apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - labels: - instance: primary - name: cadvisor-monitor - namespace: default - spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - honorTimestamps: false - interval: 60s - metricRelabelings: - - action: keep - regex: kubelet_cgroup_manager_duration_seconds_count|go_goroutines|kubelet_pod_start_duration_seconds_count|kubelet_runtime_operations_total|kubelet_pleg_relist_duration_seconds_bucket|volume_manager_total_volumes|kubelet_volume_stats_capacity_bytes|container_cpu_usage_seconds_total|container_network_transmit_bytes_total|kubelet_runtime_operations_errors_total|container_network_receive_bytes_total|container_memory_swap|container_network_receive_packets_total|container_cpu_cfs_periods_total|container_cpu_cfs_throttled_periods_total|kubelet_running_pod_count|node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate|container_memory_working_set_bytes|storage_operation_errors_total|kubelet_pleg_relist_duration_seconds_count|kubelet_running_pods|rest_client_request_duration_seconds_bucket|process_resident_memory_bytes|storage_operation_duration_seconds_count|kubelet_running_containers|kubelet_runtime_operations_duration_seconds_bucket|kubelet_node_config_error|kubelet_cgroup_manager_duration_seconds_bucket|kubelet_running_container_count|kubelet_volume_stats_available_bytes|kubelet_volume_stats_inodes|container_memory_rss|kubelet_pod_worker_duration_seconds_count|kubelet_node_name|kubelet_pleg_relist_interval_seconds_bucket|container_network_receive_packets_dropped_total|kubelet_pod_worker_duration_seconds_bucket|container_start_time_seconds|container_network_transmit_packets_dropped_total|process_cpu_seconds_total|storage_operation_duration_seconds_bucket|container_memory_cache|container_network_transmit_packets_total|kubelet_volume_stats_inodes_used|up|rest_client_requests_total - sourceLabels: - - __name__ - path: /metrics/cadvisor - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - action: replace - targetLabel: job - replacement: integrations/kubernetes/cadvisor - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - default - selector: - matchLabels: - app.kubernetes.io/name: kubelet - ``` - -These two ServiceMonitors configure Agent to scrape all the kubelet and cAdvisor endpoints in your Kubernetes cluster (one of each per Node). In addition, it defines a `job` label which you can update (it is preset here for compatibility with Grafana Cloud's Kubernetes integration). It also provides an allowlist containing a core set of Kubernetes metrics to reduce remote metrics usage. If you don't need this allowlist, you can omit it, however, your metrics usage will increase significantly. - - When you're done, Agent should now be shipping kubelet and cAdvisor metrics to your remote Prometheus endpoint. To check this in Grafana Cloud, go to your dashboards, select **Integration - Kubernetes**, then select **Kubernetes / Kubelet**. - -## Deploy LogsInstance and PodLogs resources - -Next, you'll deploy a `LogsInstance` resource to collect logs from your cluster Nodes and ship these to your remote Loki endpoint. Agent Operator deploys a DaemonSet of Agents in your cluster that will tail log files defined in `PodLogs` resources. - -To deploy the `LogsInstance` resource into your cluster: - -1. Copy the following manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. - - ```yaml - apiVersion: monitoring.grafana.com/v1alpha1 - kind: LogsInstance - metadata: - name: primary - namespace: default - labels: - agent: grafana-agent-logs - spec: - clients: - - url: your_remote_logs_URL - basicAuth: - username: - name: primary-credentials-logs - key: username - password: - name: primary-credentials-logs - key: password - - # Supply an empty namespace selector to look in all namespaces. Remove - # this to only look in the same namespace as the LogsInstance CR - podLogsNamespaceSelector: {} - podLogsSelector: - matchLabels: - instance: primary - ``` - - This `LogsInstance` picks up `PodLogs` resources with the `instance: primary` label. Be sure to set the Loki URL to the correct push endpoint. For Grafana Cloud, this will look similar to `logs-prod-us-central1.grafana.net/loki/api/v1/push`, however check the [Grafana Cloud Portal](/profile/org) to confirm by clicking **Details** on the Loki tile. - - Also note that this example uses the `agent: grafana-agent-logs` label, which associates this `LogsInstance` with the `GrafanaAgent` resource defined earlier. This means that it will inherit requests, limits, affinities and other properties defined in the `GrafanaAgent` custom resource. - -1. To create the Secret for the `LogsInstance` resource, copy the following Secret manifest to a file, then roll it out in your cluster using `kubectl apply -f` followed by the filename. - - ```yaml - apiVersion: v1 - kind: Secret - metadata: - name: primary-credentials-logs - namespace: default - stringData: - username: 'your_username_here' - password: 'your_password_here' - ``` - - If you're using Grafana Cloud, you can find your hosted Loki endpoint username and password by clicking **Details** on the Loki tile on the [Grafana Cloud Portal](/profile/org). If you want to base64-encode these values yourself, use `data` instead of `stringData`. - -1. Copy the following `PodLogs` manifest to a file, then roll it to your cluster using `kubectl apply -f` followed by the filename. The manifest defines your logging targets. Agent Operator turns this into Agent configuration for the logs subsystem, and rolls it out to the DaemonSet of logging Agents. - - {{% admonition type="note" %}} - The following is a minimal working example which you should adapt to your production needs. - {{% /admonition %}} - - ```yaml - apiVersion: monitoring.grafana.com/v1alpha1 - kind: PodLogs - metadata: - labels: - instance: primary - name: kubernetes-pods - namespace: default - spec: - pipelineStages: - - docker: {} - namespaceSelector: - matchNames: - - default - selector: - matchLabels: {} - ``` - - This example tails container logs for all Pods in the `default` namespace. You can restrict the set of matched Pods by using the `matchLabels` selector. You can also set additional `pipelineStages` and create `relabelings` to add or modify log line labels. To learn more about the `PodLogs` specification and available resource fields, see the [PodLogs CRD](https://github.com/grafana/agent/blob/main/production/operator/crds/monitoring.grafana.com_podlogs.yaml). - - The above `PodLogs` resource adds the following labels to log lines: - - - `namespace` - - `service` - - `pod` - - `container` - - `job` (set to `PodLogs_namespace/PodLogs_name`) - - `__path__` (the path to log files, set to `/var/log/pods/*$1/*.log` where `$1` is `__meta_kubernetes_pod_uid/__meta_kubernetes_pod_container_name`) - - To learn more about this configuration format and other available labels, see the [Promtail Scraping](/docs/loki/latest/clients/promtail/scraping/#promtail-scraping-service-discovery) documentation. Agent Operator loads this configuration into the `LogsInstance` agents automatically. - -The DaemonSet of logging agents should be tailing your container logs, applying default labels to the log lines, and shipping them to your remote Loki endpoint. - -## Summary - -You've now rolled out the following into your cluster: - -- A `GrafanaAgent` resource that discovers one or more `MetricsInstance` and `LogsInstances` resources. -- A `MetricsInstance` resource that defines where to ship collected metrics. -- A `ServiceMonitor` resource to collect cAdvisor and kubelet metrics. -- A `LogsInstance` resource that defines where to ship collected logs. -- A `PodLogs` resource to collect container logs from Kubernetes Pods. - -## What's next - -You can verify that everything is working correctly by navigating to your Grafana instance and querying your Loki and Prometheus data sources. - -> Tip: You can deploy multiple GrafanaAgent resources to isolate allocated resources to the agent pods. By default, the GrafanaAgent resource determines the resources of all deployed agent containers. However, you might want different memory limits for metrics versus logs. diff --git a/docs/sources/operator/getting-started.md b/docs/sources/operator/getting-started.md index f148d399b297..e78a79bb1f62 100644 --- a/docs/sources/operator/getting-started.md +++ b/docs/sources/operator/getting-started.md @@ -79,7 +79,7 @@ To install Agent Operator: serviceAccountName: grafana-agent-operator containers: - name: operator - image: grafana/agent-operator:v0.36.2 + image: grafana/agent-operator:{{< param "AGENT_RELEASE" >}} args: - --kubelet-service=default/kubelet --- diff --git a/docs/sources/operator/getting-started.md.t b/docs/sources/operator/getting-started.md.t deleted file mode 100644 index ad8721ad27b0..000000000000 --- a/docs/sources/operator/getting-started.md.t +++ /dev/null @@ -1,155 +0,0 @@ ---- -aliases: -- /docs/grafana-cloud/agent/operator/getting-started/ -- /docs/grafana-cloud/monitor-infrastructure/agent/operator/getting-started/ -- /docs/grafana-cloud/monitor-infrastructure/integrations/agent/operator/getting-started/ -canonical: https://grafana.com/docs/agent/latest/operator/getting-started/ -title: Install the Operator -description: Learn how to install the Operator -weight: 110 ---- - -# Install the Operator - -In this guide, you'll learn how to deploy [Grafana Agent Operator]({{< relref "./_index.md" >}}) into your Kubernetes cluster. This guide does not use Helm. To learn how to deploy Agent Operator using the [grafana-agent-operator Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/agent-operator), see [Install Grafana Agent Operator with Helm]({{< relref "./helm-getting-started.md" >}}). - -> **Note**: If you are shipping your data to Grafana Cloud, use [Kubernetes Monitoring](/docs/grafana-cloud/kubernetes-monitoring/) to set up Agent Operator. Kubernetes Monitoring provides a simplified approach and preconfigured dashboards and alerts. -## Before you begin - -To deploy Agent Operator, make sure that you have the following: - -- A Kubernetes cluster -- The `kubectl` command-line client installed and configured on your machine - -> **Note:** Agent Operator is currently in beta and its custom resources are subject to change. - -## Deploy the Agent Operator Custom Resource Definitions (CRDs) - -Before you can create the custom resources for a Grafana Agent deployment, -you need to deploy the -[Custom Resource Definitions](https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/) -to the cluster. These definitions describe the schema that the custom -resources will conform to. This is also required for Grafana Agent Operator to run; it -will fail if it can't find the Custom Resource Definitions of objects it is -looking to use. To learn more about the custom resources Agent Operator provides and their hierarchy, see [Grafana Agent Operator architecture]({{< relref "./architecture" >}}). - -You can find the set of Custom Resource Definitions for Grafana Agent Operator in the Grafana Agent repository under -[production/operator/crds](https://github.com/grafana/agent/tree/main/production/operator/crds). - -To deploy the CRDs: - -1. Clone the agent repo and then apply the CRDs from the root of the agent repository: - ``` - kubectl apply -f production/operator/crds - ``` - - This step _must_ be completed before installing Agent Operator—it will -fail to start if the CRDs do not exist. - -2. To check that the CRDs are deployed to your Kubernetes cluster and to access documentation for each resource, use `kubectl explain `. - - For example, `kubectl explain GrafanaAgent` describes the GrafanaAgent CRD, and `kubectl explain GrafanaAgent.spec` gives you information on its spec field. - -## Install Grafana Agent Operator - -Next, install Agent Operator by applying the Agent Operator deployment schema. - -To install Agent Operator: - -1. Copy the following deployment schema to a file, updating the namespace if needed: - - ```yaml - apiVersion: apps/v1 - kind: Deployment - metadata: - name: grafana-agent-operator - namespace: default - labels: - app: grafana-agent-operator - spec: - replicas: 1 - selector: - matchLabels: - app: grafana-agent-operator - template: - metadata: - labels: - app: grafana-agent-operator - spec: - serviceAccountName: grafana-agent-operator - containers: - - name: operator - image: grafana/agent-operator:$AGENT_VERSION - args: - - --kubelet-service=default/kubelet - --- - - apiVersion: v1 - kind: ServiceAccount - metadata: - name: grafana-agent-operator - namespace: default - - --- - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRole - metadata: - name: grafana-agent-operator - rules: - - apiGroups: [monitoring.grafana.com] - resources: - - grafanaagents - - metricsinstances - - logsinstances - - podlogs - - integrations - verbs: [get, list, watch] - - apiGroups: [monitoring.coreos.com] - resources: - - podmonitors - - probes - - servicemonitors - verbs: [get, list, watch] - - apiGroups: [""] - resources: - - namespaces - - nodes - verbs: [get, list, watch] - - apiGroups: [""] - resources: - - secrets - - services - - configmaps - - endpoints - verbs: [get, list, watch, create, update, patch, delete] - - apiGroups: ["apps"] - resources: - - statefulsets - - daemonsets - - deployments - verbs: [get, list, watch, create, update, patch, delete] - - --- - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: grafana-agent-operator - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent-operator - subjects: - - kind: ServiceAccount - name: grafana-agent-operator - namespace: default - ``` - -2. Roll out the deployment in your cluster using `kubectl apply -f` followed by your deployment filename. - -> **Note**: If you want to run Agent Operator locally, make sure your kubectl context is correct. Running locally uses your current kubectl context. If it is set to your production environment, you could accidentally deploy a new Grafana Agent to production. Install CRDs on the cluster prior to running locally. Afterwards, you can run Agent Operator using `go run ./cmd/grafana-agent-operator`. - -## Deploy the Grafana Agent Operator resources - -Agent Operator is now up and running. Next, you need to install a Grafana Agent for Agent Operator to run for you. To do so, follow the instructions in the [Deploy the Grafana Agent Operator resources]({{< relref "./deploy-agent-operator-resources" >}}) topic. diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md b/docs/sources/static/configuration/integrations/node-exporter-config.md index a3be4a248e4f..e8d4eff7f0ab 100644 --- a/docs/sources/static/configuration/integrations/node-exporter-config.md +++ b/docs/sources/static/configuration/integrations/node-exporter-config.md @@ -30,7 +30,7 @@ docker run \ -v "/proc:/host/proc:ro,rslave" \ -v /tmp/agent:/etc/agent \ -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:v0.36.2 \ + grafana/agent:{{< param "AGENT_RELEASE" >}} \ --config.file=/etc/agent-config/agent.yaml ``` @@ -70,7 +70,7 @@ metadata: name: agent spec: containers: - - image: grafana/agent:v0.36.2 + - image: grafana/agent:{{< param "AGENT_RELEASE" >}} name: agent args: - --config.file=/etc/agent-config/agent.yaml diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md.t b/docs/sources/static/configuration/integrations/node-exporter-config.md.t deleted file mode 100644 index ac84dacf6194..000000000000 --- a/docs/sources/static/configuration/integrations/node-exporter-config.md.t +++ /dev/null @@ -1,433 +0,0 @@ ---- -aliases: -- ../../../configuration/integrations/node-exporter-config/ -canonical: https://grafana.com/docs/agent/latest/static/configuration/integrations/node-exporter-config/ -title: node_exporter_config -description: Learn about node_exporter_config ---- - -# node_exporter_config - -The `node_exporter_config` block configures the `node_exporter` integration, -which is an embedded version of -[`node_exporter`](https://github.com/prometheus/node_exporter) -and allows for collecting metrics from the UNIX system that `node_exporter` is -running on. It provides a significant amount of collectors that are responsible -for monitoring various aspects of the host system. - -Note that if running the Agent in a container, you will need to bind mount -folders from the host system so the integration can monitor them. You can use -the example below, making sure to replace `/path/to/config.yaml` with a path on -your host machine where an Agent configuration file is: - -``` -docker run \ - --net="host" \ - --pid="host" \ - --cap-add=SYS_TIME \ - -v "/:/host/root:ro,rslave" \ - -v "/sys:/host/sys:ro,rslave" \ - -v "/proc:/host/proc:ro,rslave" \ - -v /tmp/agent:/etc/agent \ - -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:$AGENT_VERSION \ - --config.file=/etc/agent-config/agent.yaml -``` - -Use this configuration file for testing out `node_exporter` support, replacing -the `remote_write` settings with settings appropriate for you: - -```yaml -server: - log_level: info - -metrics: - wal_directory: /tmp/agent - global: - scrape_interval: 60s - remote_write: - - url: https://prometheus-us-central1.grafana.net/api/prom/push - basic_auth: - username: user-id - password: api-token - -integrations: - node_exporter: - enabled: true - rootfs_path: /host/root - sysfs_path: /host/sys - procfs_path: /host/proc - udev_data_path: /host/root/run/udev/data -``` - -For running on Kubernetes, ensure to set the equivalent mounts and capabilities -there as well: - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: agent -spec: - containers: - - image: grafana/agent:$AGENT_VERSION - name: agent - args: - - --config.file=/etc/agent-config/agent.yaml - securityContext: - capabilities: - add: ["SYS_TIME"] - privileged: true - runAsUser: 0 - volumeMounts: - - name: rootfs - mountPath: /host/root - readOnly: true - - name: sysfs - mountPath: /host/sys - readOnly: true - - name: procfs - mountPath: /host/proc - readOnly: true - hostPID: true - hostNetwork: true - dnsPolicy: ClusterFirstWithHostNet - volumes: - - name: rootfs - hostPath: - path: / - - name: sysfs - hostPath: - path: /sys - - name: procfs - hostPath: - path: /proc -``` - -The manifest and Tanka configs provided by this repository do not have the -mounts or capabilities required for running this integration. - -Some collectors only work on specific operating systems, documented in the -table below. Enabling a collector that is not supported by the operating system -the Agent is running on is a no-op. - -| Name | Description | OS | Enabled by default | -| ---------------- | ----------- | -- | ------------------ | -| arp | Exposes ARP statistics from /proc/net/arp. | Linux | yes | -| bcache | Exposes bcache statistics from /sys/fs/bcache. | Linux | yes | -| bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux | yes | -| boottime | Exposes system boot time derived from the kern.boottime sysctl. | Darwin, Dragonfly, FreeBSD, NetBSD, OpenBSD, Solaris | yes | -| btrfs | Exposes statistics on btrfs. | Linux | yes | -| buddyinfo | Exposes statistics of memory fragments as reported by /proc/buddyinfo. | Linux | no | -| cgroups | Exposes number of active and enabled cgroups. | Linux | no | -| conntrack | Shows conntrack statistics (does nothing if no /proc/sys/net/netfilter/ present). | Linux | yes | -| cpu | Exposes CPU statistics. | Darwin, Dragonfly, FreeBSD, Linux, Solaris, NetBSD | yes | -| cpufreq | Exposes CPU frequency statistics. | Linux, Solaris | yes | -| devstat | Exposes device statistics. | Dragonfly, FreeBSD | no | -| diskstats | Exposes disk I/O statistics. | Darwin, Linux, OpenBSD | yes | -| dmi | Exposes DMI information. | Linux | yes | -| drbd | Exposes Distributed Replicated Block Device statistics (to version 8.4). | Linux | no | -| drm | Exposes GPU card info from /sys/class/drm/card?/device | Linux | no | -| edac | Exposes error detection and correction statistics. | Linux | yes | -| entropy | Exposes available entropy. | Linux | yes | -| ethtool | Exposes ethtool stats | Linux | no | -| exec | Exposes execution statistics. | Dragonfly, FreeBSD | yes | -| fibrechannel | Exposes FibreChannel statistics. | Linux | yes | -| filefd | Exposes file descriptor statistics from /proc/sys/fs/file-nr. | Linux | yes | -| filesystem | Exposes filesystem statistics, such as disk space used. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD | yes | -| hwmon | Exposes hardware monitoring and sensor data from /sys/class/hwmon. | Linux | yes | -| infiniband | Exposes network statistics specific to InfiniBand and Intel OmniPath configurations. | Linux | yes | -| interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD | no | -| ipvs | Exposes IPVS status from /proc/net/ip_vs and stats from /proc/net/ip_vs_stats. | Linux | yes | -| ksmd | Exposes kernel and system statistics from /sys/kernel/mm/ksm. | Linux | no | -| lnstat | Exposes Linux network cache stats | Linux | no | -| loadavg | Exposes load average. | Darwin, Dragonfly, FreeBSD, Linux, NetBSD, OpenBSD, Solaris | yes | -| logind | Exposes session counts from logind. | Linux | no | -| mdadm | Exposes statistics about devices in /proc/mdstat (does nothing if no /proc/mdstat present). | Linux | yes | -| meminfo | Exposes memory statistics. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD, NetBSD | yes | -| meminfo_numa | Exposes memory statistics from /proc/meminfo_numa. | Linux | no | -| mountstats | Exposes filesystem statistics from /proc/self/mountstats. Exposes detailed NFS client statistics. | Linux | no | -| netclass | Exposes network interface info from /sys/class/net. | Linux | yes | -| netisr | Exposes netisr statistics. | FreeBSD | yes | -| netdev | Exposes network interface statistics such as bytes transferred. | Darwin, Dragonfly, FreeBSD, Linux, OpenBSD | yes | -| netstat | Exposes network statistics from /proc/net/netstat. This is the same information as netstat -s. | Linux | yes | -| network_route | Exposes network route statistics. | Linux | no | -| nfs | Exposes NFS client statistics from /proc/net/rpc/nfs. This is the same information as nfsstat -c. | Linux | yes | -| nfsd | Exposes NFS kernel server statistics from /proc/net/rpc/nfsd. This is the same information as nfsstat -s. | Linux | yes | -| ntp | Exposes local NTP daemon health to check time. | any | no | -| nvme | Exposes NVMe statistics. | Linux | yes | -| os | Exposes os-release information. | Linux | yes | -| perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel configuration and settings). | Linux | no | -| powersupplyclass | Collects information on power supplies. | any | yes | -| pressure | Exposes pressure stall statistics from /proc/pressure/. | Linux (kernel 4.20+ and/or CONFIG_PSI) | yes | -| processes | Exposes aggregate process statistics from /proc. | Linux | no | -| qdisc | Exposes queuing discipline statistics. | Linux | no | -| rapl | Exposes various statistics from /sys/class/powercap. | Linux | yes | -| runit | Exposes service status from runit. | any | no | -| schedstat | Exposes task scheduler statistics from /proc/schedstat. | Linux | yes | -| selinux | Exposes SELinux statistics. | Linux | yes | -| slabinfo | Exposes slab statistics from `/proc/slabinfo`. | Linux | no | -| softirqs | Exposes detailed softirq statistics from `/proc/softirqs`. | Linux | no | -| sockstat | Exposes various statistics from /proc/net/sockstat. | Linux | yes | -| softnet | Exposes statistics from /proc/net/softnet_stat. | Linux | yes | -| stat | Exposes various statistics from /proc/stat. This includes boot time, forks and interrupts. | Linux | yes | -| supervisord | Exposes service status from supervisord. | any | no | -| sysctl | Expose sysctl values from `/proc/sys`. | Linux | no | -| systemd | Exposes service and system status from systemd. | Linux | no | -| tapestats | Exposes tape device stats. | Linux | yes | -| tcpstat | Exposes TCP connection status information from /proc/net/tcp and /proc/net/tcp6. (Warning: the current version has potential performance issues in high load situations). | Linux | no | -| textfile | Collects metrics from files in a directory matching the filename pattern *.prom. The files must be using the text format defined here: https://prometheus.io/docs/instrumenting/exposition_formats/ | any | yes | -| thermal | Exposes thermal statistics. | Darwin | yes | -| thermal_zone | Exposes thermal zone & cooling device statistics from /sys/class/thermal. | Linux | yes | -| time | Exposes the current system time. | any | yes | -| timex | Exposes selected adjtimex(2) system call stats. | Linux | yes | -| udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from /proc/net/udp and /proc/net/udp6. | Linux | yes | -| uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD, NetBSD | yes | -| vmstat | Exposes statistics from /proc/vmstat. | Linux | yes | -| wifi | Exposes WiFi device and station statistics. | Linux | no | -| xfs | Exposes XFS runtime statistics. | Linux (kernel 4.4+) | yes | -| zfs | Exposes ZFS performance statistics. | Linux, Solaris | yes | -| zoneinfo | Exposes zone stats. | Linux | no | - -```yaml - # Enables the node_exporter integration, allowing the Agent to automatically - # collect system metrics from the host UNIX system. - [enabled: | default = false] - - # Sets an explicit value for the instance label when the integration is - # self-scraped. Overrides inferred values. - # - # The default value for this integration is inferred from the agent hostname - # and HTTP listen port, delimited by a colon. - [instance: ] - - # Automatically collect metrics from this integration. If disabled, - # the node_exporter integration will be run but not scraped and thus not remote-written. Metrics for the - # integration will be exposed at /integrations/node_exporter/metrics and can - # be scraped by an external process. - [scrape_integration: | default = ] - - # How often should the metrics be collected? Defaults to - # prometheus.global.scrape_interval. - [scrape_interval: | default = ] - - # The timtout before considering the scrape a failure. Defaults to - # prometheus.global.scrape_timeout. - [scrape_timeout: | default = ] - - # Allows for relabeling labels on the target. - relabel_configs: - [- ... ] - - # Relabel metrics coming from the integration, allowing to drop series - # from the integration that you don't care about. - metric_relabel_configs: - [ - ... ] - - # How frequent to truncate the WAL for this integration. - [wal_truncate_frequency: | default = "60m"] - - # Monitor the exporter itself and include those metrics in the results. - [include_exporter_metrics: | default = false] - - # Optionally defines the list of enabled-by-default collectors. - # Anything not provided in the list below will be disabled by default, - # but requires at least one element to be treated as defined. - # - # This is useful if you have a very explicit set of collectors you wish - # to run. - set_collectors: - - [] - - # Additional collectors to enable on top of the default set of enabled - # collectors or on top of the list provided by set_collectors. - # - # This is useful if you have a few collectors you wish to run that are - # not enabled by default, but do not want to explicitly provide an entire - # list through set_collectors. - enable_collectors: - - [] - - # Additional collectors to disable on top of the default set of disabled - # collectors. Takes precedence over enable_collectors. - # - # This is useful if you have a few collectors you do not want to run that - # are enabled by default, but do not want to explicitly provide an entire - # list through set_collectors. - disable_collectors: - - [] - - # procfs mountpoint. - [procfs_path: | default = "/proc"] - - # sysfs mountpoint. - [sysfs_path: | default = "/sys"] - - # rootfs mountpoint. If running in docker, the root filesystem of the host - # machine should be mounted and this value should be changed to the mount - # directory. - [rootfs_path: | default = "/"] - - # udev data path needed for diskstats from Node exporter. When running - # in Kubernetes it should be set to /host/root/run/udev/data. - [udev_data_path: | default = "/run/udev/data"] - - # Expose expensive bcache priority stats. - [enable_bcache_priority_stats: ] - - # Regexp of `bugs` field in cpu info to filter. - [cpu_bugs_include: ] - - # Enable the node_cpu_guest_seconds_total metric. - [enable_cpu_guest_seconds_metric: | default = true] - - # Enable the cpu_info metric for the cpu collector. - [enable_cpu_info_metric: | default = true] - - # Regexp of `flags` field in cpu info to filter. - [cpu_flags_include: ] - - # Regexp of devices to ignore for diskstats. - [diskstats_device_exclude: | default = "^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"] - - # Regexp of devices to include for diskstats. If set, the diskstat_device_exclude field is ignored. - [diskstats_device_include: ] - - # Regexp of ethtool devices to exclude (mutually exclusive with ethtool_device_include) - [ethtool_device_exclude: ] - - # Regexp of ethtool devices to include (mutually exclusive with ethtool_device_exclude) - [ethtool_device_include: ] - - # Regexp of ethtool stats to include. - [ethtool_metrics_include: | default = ".*"] - - # Regexp of mount points to ignore for filesystem collector. - [filesystem_mount_points_exclude: | default = "^/(dev|proc|sys|var/lib/docker/.+)($|/)"] - - # Regexp of filesystem types to ignore for filesystem collector. - [filesystem_fs_types_exclude: | default = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"] - - # How long to wait for a mount to respond before marking it as stale. - [filesystem_mount_timeout: | default = "5s"] - - # Array of IPVS backend stats labels. - # - # The default is [local_address, local_port, remote_address, remote_port, proto, local_mark]. - ipvs_backend_labels: - [- ] - - # NTP server to use for ntp collector - [ntp_server: | default = "127.0.0.1"] - - # NTP protocol version - [ntp_protocol_version: | default = 4] - - # Certify that the server address is not a public ntp server. - [ntp_server_is_local: | default = false] - - # IP TTL to use wile sending NTP query. - [ntp_ip_ttl: | default = 1] - - # Max accumulated distance to the root. - [ntp_max_distance: | default = "3466080us"] - - # Offset between local clock and local ntpd time to tolerate. - [ntp_local_offset_tolerance: | default = "1ms"] - - # Regexp of net devices to ignore for netclass collector. - [netclass_ignored_devices: | default = "^$"] - - # Ignore net devices with invalid speed values. This will default to true in - # node_exporter 2.0. - [netclass_ignore_invalid_speed_device: | default = false] - - # Enable collecting address-info for every device. - [netdev_address_info: ] - - # Regexp of net devices to exclude (mutually exclusive with include) - [netdev_device_exclude: | default = ""] - - # Regexp of net devices to include (mutually exclusive with exclude) - [netdev_device_include: | default = ""] - - # Regexp of fields to return for netstat collector. - [netstat_fields: | default = "^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*|TCPSynRetrans|TCPTimeouts)|Tcp_(ActiveOpens|InSegs|OutSegs|OutRsts|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts|RcvbufErrors|SndbufErrors))$"] - - # List of CPUs from which perf metrics should be collected. - [perf_cpus: | default = ""] - - # Array of perf tracepoints that should be collected. - perf_tracepoint: - [- ] - - # Disable perf hardware profilers. - [perf_disable_hardware_profilers: | default = false] - - # Perf hardware profilers that should be collected. - perf_hardware_profilers: - [- ] - - # Disable perf software profilers. - [perf_disable_software_profilers: | default = false] - - # Perf software profilers that should be collected. - perf_software_profilers: - [- ] - - # Disable perf cache profilers. - [perf_disable_cache_profilers: | default = false] - - # Perf cache profilers that should be collected. - perf_cache_profilers: - [- ] - - # Regexp of power supplies to ignore for the powersupplyclass collector. - [powersupply_ignored_supplies: | default = "^$"] - - # Path to runit service directory. - [runit_service_dir: | default = "/etc/service"] - - # XML RPC endpoint for the supervisord collector. - # - # Setting SUPERVISORD_URL in the environment will override the default value. - # An explicit value in the YAML config takes precedence over the environment - # variable. - [supervisord_url: | default = "http://localhost:9001/RPC2"] - - # Numeric sysctl values to expose. - # For sysctl with multiple numeric values, - # an optional mapping can be given to expose each value as its own metric. - sysctl_include: - [- ] - - # String sysctl values to expose. - sysctl_include_info: - [- ] - - # Regexp of systemd units to include. Units must both match include and not - # match exclude to be collected. - [systemd_unit_include: | default = ".+"] - - # Regexp of systemd units to exclude. Units must both match include and not - # match exclude to be collected. - [systemd_unit_exclude: | default = ".+\\.(automount|device|mount|scope|slice)"] - - # Enables service unit tasks metrics unit_tasks_current and unit_tasks_max - [systemd_enable_task_metrics: | default = false] - - # Enables service unit metric service_restart_total - [systemd_enable_restarts_metrics: | default = false] - - # Enables service unit metric unit_start_time_seconds - [systemd_enable_start_time_metrics: | default = false] - - # Regexp of tapestats devices to ignore. - [tapestats_ignored_devices: | default = "^$"] - - # Directory to read *.prom files from for the textfile collector. - [textfile_directory: | default = ""] - - # Regexp of fields to return for the vmstat collector. - [vmstat_fields: | default = "^(oom_kill|pgpg|pswp|pg.*fault).*"] -``` diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.md b/docs/sources/static/configuration/integrations/process-exporter-config.md index 944d65803ce7..a0ba6235148d 100644 --- a/docs/sources/static/configuration/integrations/process-exporter-config.md +++ b/docs/sources/static/configuration/integrations/process-exporter-config.md @@ -22,7 +22,7 @@ docker run \ -v "/proc:/proc:ro" \ -v /tmp/agent:/etc/agent \ -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:v0.36.2 \ + grafana/agent:{{< param "AGENT_RELEASE" >}} \ --config.file=/etc/agent-config/agent.yaml ``` @@ -39,7 +39,7 @@ metadata: name: agent spec: containers: - - image: grafana/agent:v0.36.2 + - image: grafana/agent:{{< param "AGENT_RELEASE" >}} name: agent args: - --config.file=/etc/agent-config/agent.yaml diff --git a/docs/sources/static/configuration/integrations/process-exporter-config.md.t b/docs/sources/static/configuration/integrations/process-exporter-config.md.t deleted file mode 100644 index 3f417b7b3faa..000000000000 --- a/docs/sources/static/configuration/integrations/process-exporter-config.md.t +++ /dev/null @@ -1,184 +0,0 @@ ---- -aliases: -- ../../../configuration/integrations/process-exporter-config/ -canonical: https://grafana.com/docs/agent/latest/static/configuration/integrations/process-exporter-config/ -title: process_exporter_config -description: Learn about process_exporter_config ---- - -# process_exporter_config - -The `process_exporter_config` block configures the `process_exporter` integration, -which is an embedded version of -[`process-exporter`](https://github.com/ncabatoff/process-exporter) -and allows for collection metrics based on the /proc filesystem on Linux -systems. Note that on non-Linux systems, enabling this exporter is a no-op. - -Note that if running the Agent in a container, you will need to bind mount -folders from the host system so the integration can monitor them: - -``` -docker run \ - -v "/proc:/proc:ro" \ - -v /tmp/agent:/etc/agent \ - -v /path/to/config.yaml:/etc/agent-config/agent.yaml \ - grafana/agent:$AGENT_VERSION \ - --config.file=/etc/agent-config/agent.yaml -``` - -Replace `/path/to/config.yaml` with the appropriate path on your host system -where an Agent config file can be found. - -For running on Kubernetes, ensure to set the equivalent mounts and capabilities -there as well: - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: agent -spec: - containers: - - image: grafana/agent:$AGENT_VERSION - name: agent - args: - - --config.file=/etc/agent-config/agent.yaml - volumeMounts: - - name: procfs - mountPath: /proc - readOnly: true - volumes: - - name: procfs - hostPath: - path: /proc -``` - -The manifest and Tanka configs provided by this repository do not have the -mounts or capabilities required for running this integration. - -An example config for `process_exporter_config` that tracks all processes is the -following: - -``` -enabled: true -process_names: -- name: "{{.Comm}}" - cmdline: - - '.+' -``` - -Full reference of options: - -```yaml - # Enables the process_exporter integration, allowing the Agent to automatically - # collect system metrics from the host UNIX system. - [enabled: | default = false] - - # Sets an explicit value for the instance label when the integration is - # self-scraped. Overrides inferred values. - # - # The default value for this integration is inferred from the agent hostname - # and HTTP listen port, delimited by a colon. - [instance: ] - - # Automatically collect metrics from this integration. If disabled, - # the process_exporter integration will be run but not scraped and thus not - # remote-written. Metrics for the integration will be exposed at - # /integrations/process_exporter/metrics and can be scraped by an external - # process. - [scrape_integration: | default = ] - - # How often should the metrics be collected? Defaults to - # prometheus.global.scrape_interval. - [scrape_interval: | default = ] - - # The timeout before considering the scrape a failure. Defaults to - # prometheus.global.scrape_timeout. - [scrape_timeout: | default = ] - - # Allows for relabeling labels on the target. - relabel_configs: - [- ... ] - - # Relabel metrics coming from the integration, allowing to drop series - # from the integration that you don't care about. - metric_relabel_configs: - [ - ... ] - - # How frequent to truncate the WAL for this integration. - [wal_truncate_frequency: | default = "60m"] - - # procfs mountpoint. - [procfs_path: | default = "/proc"] - - # If a proc is tracked, track with it any children that aren't a part of their - # own group. - [track_children: | default = true] - - # Report on per-threadname metrics as well. - [track_threads: | default = true] - - # Gather metrics from smaps file, which contains proportional resident memory - # size. - [gather_smaps: | default = true] - - # Recheck process names on each scrape. - [recheck_on_scrape: | default = false] - - # A collection of matching rules to use for deciding which processes to - # monitor. Each config can match multiple processes to be tracked as a single - # process "group." - process_names: - [- ] -``` - -## process_matcher_config - -```yaml -# The name to use for identifying the process group name in the metric. By -# default, it uses the base path of the executable. -# -# The following template variables are available: -# -# - {{.Comm}}: Basename of the original executable from /proc//stat -# - {{.ExeBase}}: Basename of the executable from argv[0] -# - {{.ExeFull}}: Fully qualified path of the executable -# - {{.Username}}: Username of the effective user -# - {{.Matches}}: Map containing all regex capture groups resulting from -# matching a process with the cmdline rule group. -# - {{.PID}}: PID of the process. Note that the PID is copied from the -# first executable found. -# - {{.StartTime}}: The start time of the process. This is useful when combined -# with PID as PIDS get reused over time. -# - `{{.Cgroups}}`: The cgroups, if supported, of the process (`/proc/self/cgroup`). This is particularly useful for identifying to which container a process belongs. -# -# **NOTE**: Using `PID` or `StartTime` is discouraged, as it is almost never what you want, and is likely to result in high cardinality metrics. - - -[name: | default = "{{.ExeBase}}"] - -# A list of strings that match the base executable name for a process, truncated -# at 15 characters. It is derived from reading the second field of -# /proc//stat minus the parens. -# -# If any of the strings match, the process will be tracked. -comm: - [- ] - -# A list of strings that match argv[0] for a process. If there are no slashes, -# only the basename of argv[0] needs to match. Otherwise the name must be an -# exact match. For example, "postgres" may match any postgres binary but -# "/usr/local/bin/postgres" can only match a postgres at that path exactly. -# -# If any of the strings match, the process will be tracked. -exe: - [- ] - -# A list of regular expressions applied to the argv of the process. Each -# regex here must match the corresponding argv for the process to be tracked. -# The first element that is matched is argv[1]. -# -# Regex Captures are added to the .Matches map for use in the name. -cmdline: - [- ] -``` diff --git a/docs/sources/static/set-up/install/install-agent-docker.md b/docs/sources/static/set-up/install/install-agent-docker.md index 41029ab77198..7f32cc4e6d7f 100644 --- a/docs/sources/static/set-up/install/install-agent-docker.md +++ b/docs/sources/static/set-up/install/install-agent-docker.md @@ -34,7 +34,7 @@ To run a Grafana Agent Docker container on Linux, run the following command in a docker run \ -v WAL_DATA_DIRECTORY:/etc/agent/data \ -v CONFIG_FILE_PATH:/etc/agent/agent.yaml \ - grafana/agent:v0.36.2 + grafana/agent:{{< param "AGENT_RELEASE" >}} ``` Replace `CONFIG_FILE_PATH` with the configuration file path on your Linux host system. @@ -51,7 +51,7 @@ To run a Grafana Agent Docker container on Windows, run the following command in docker run ^ -v WAL_DATA_DIRECTORY:C:\etc\grafana-agent\data ^ -v CONFIG_FILE_PATH:C:\etc\grafana-agent ^ - grafana/agent:v0.36.2-windows + grafana/agent:{{< param "AGENT_RELEASE" >}}-windows ``` Replace the following: diff --git a/docs/sources/static/set-up/install/install-agent-docker.md.t b/docs/sources/static/set-up/install/install-agent-docker.md.t deleted file mode 100644 index 536ef3fc5978..000000000000 --- a/docs/sources/static/set-up/install/install-agent-docker.md.t +++ /dev/null @@ -1,76 +0,0 @@ ---- -aliases: -- ../../set-up/install-agent-docker/ -- ../set-up/install-agent-docker/ -canonical: https://grafana.com/docs/agent/latest/static/set-up/install/install-agent-docker/ -menuTitle: Docker -title: Run Grafana Agent in static mode in a Docker container -description: Learn how to run Grafana Agent in static mode in a Docker container -weight: 200 ---- - -# Run Grafana Agent in static mode in a Docker container - -Grafana Agent is available as a Docker container image on the following platforms: - -* [Linux containers][] for AMD64 and ARM64. -* [Windows containers][] for AMD64. - -[Linux containers]: #run-a-linux-docker-container -[Windows containers]: #run-a-windows-docker-container - -## Before you begin - -* Install [Docker][] on your computer. -* Create and save a Grafana Agent YAML [configuration file][configure] on your computer. - -[Docker]: https://docker.io - -## Run a Linux Docker container - -To run a Grafana Agent Docker container on Linux, run the following command in a terminal window: - -```shell -docker run \ - -v WAL_DATA_DIRECTORY:/etc/agent/data \ - -v CONFIG_FILE_PATH:/etc/agent/agent.yaml \ - grafana/agent:$AGENT_VERSION -``` - -Replace `CONFIG_FILE_PATH` with the configuration file path on your Linux host system. - -{{% admonition type="note" %}} -For the flags to work correctly, you must expose the paths on your Linux host to the Docker container through a bind mount. -{{%/admonition %}} - -## Run a Windows Docker container - -To run a Grafana Agent Docker container on Windows, run the following command in a Windows command prompt: - -```shell -docker run ^ - -v WAL_DATA_DIRECTORY:C:\etc\grafana-agent\data ^ - -v CONFIG_FILE_PATH:C:\etc\grafana-agent ^ - grafana/agent:$AGENT_VERSION-windows -``` - -Replace the following: - -* `CONFIG_FILE_PATH`: The configuration file path on your Windows host system. -* `WAL_DATA_DIRECTORY`: the directory used to store your metrics before sending them to Prometheus. Old WAL data is cleaned up every hour and is used for recovery if the process crashes. - -{{% admonition type="note" %}} -For the flags to work correctly, you must expose the paths on your Windows host to the Docker container through a bind mount. -{{%/admonition %}} - -## Next steps - -- [Start Grafana Agent][start] -- [Configure Grafana Agent][configure] - -{{% docs/reference %}} -[start]: "/docs/agent/ -> /docs/agent//static/set-up/start-agent" -[start]: "/docs/grafana-cloud/ -> ../start-agent" -[configure]: "/docs/agent/ -> /docs/agent//static/configuration/create-config-file" -[configure]: "/docs/grafana-cloud/ -> ../../configuration/create-config-file" -{{% /docs/reference %}} From 24e5df1fbf4c5b7f7185e030794c941bcc43322b Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Wed, 4 Oct 2023 19:44:41 -0700 Subject: [PATCH 16/21] Update docs/sources/_index.md.t --- docs/sources/_index.md.t | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/docs/sources/_index.md.t b/docs/sources/_index.md.t index a494fed2676a..ade2db655994 100644 --- a/docs/sources/_index.md.t +++ b/docs/sources/_index.md.t @@ -23,7 +23,7 @@ form programmable observability **pipelines** for telemetry collection, processing, and delivery. {{% admonition type="note" %}} -This page focuses mainly on [Flow mode]({{< relref "./flow" >}}), the Terraform-inspired variant of Grafana Agent. +This page focuses mainly on [Flow mode][], the Terraform-inspired variant of Grafana Agent. For information on other variants of Grafana Agent, refer to [Introduction to Grafana Agent]({{< relref "./about.md" >}}). {{% /admonition %}} @@ -58,8 +58,6 @@ Grafana Agent can collect, transform, and send data to: * **Batteries included**: Integrate with systems like MySQL, Kubernetes, and Apache to get telemetry that's immediately useful. -[UI]: {{< relref "./flow/monitoring/debugging.md#grafana-agent-flow-ui" >}} - ## Getting started * Choose a [variant][variants] of Grafana Agent to run. @@ -68,11 +66,6 @@ Grafana Agent can collect, transform, and send data to: * [Static mode Kubernetes operator][] * [Flow mode][] -[variants]: {{< relref "./about.md" >}} -[Static mode]: {{< relref "./static" >}} -[Static mode Kubernetes operator]: {{< relref "./operator" >}} -[Flow mode]: {{< relref "./flow" >}} - ## Supported platforms * Linux @@ -108,3 +101,20 @@ one minor release is moved. Patch and security releases may be created at any time. [Milestones]: https://github.com/grafana/agent/milestones + +{{% docs/reference %}} +[variants]: "/docs/agent/ -> /docs/agent//about" +[variants]: "/docs/grafana-cloud/ -> /docs/grafana-cloud/monitor-infrastructure/agent/about" + +[Static mode]: "/docs/agent/ -> /docs/agent//static" +[Static mode]: "/docs/grafana-cloud/ -> /docs/grafana-cloud/monitor-infrastructure/agent/static" + +[Static mode Kubernetes operator]: "/docs/agent/ -> /docs/agent//operator" +[Static mode Kubernetes operator]: "/docs/grafana-cloud/ -> /docs/grafana-cloud/monitor-infrastructure/agent/operator" + +[Flow mode]: "/docs/agent/ -> /docs/agent//flow" +[Flow mode]: "/docs/grafana-cloud/ -> /docs/agent//flow" + +[UI]: "/docs/agent/ -> /docs/agent//flow/monitoring/debugging.md#grafana-agent-flow-ui" +[UI]: "/docs/grafana-cloud/ -> /docs/agent//flow/monitoring/debugging.md#grafana-agent-flow-ui" +{{% /docs/reference %}} From 26372d031a49338421079bdd5793f7881a14ec8d Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Wed, 4 Oct 2023 19:47:27 -0700 Subject: [PATCH 17/21] Fix missing cascade --- docs/sources/_index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sources/_index.md b/docs/sources/_index.md index a589b6545f2e..3778e454f3cd 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -7,6 +7,8 @@ canonical: https://grafana.com/docs/agent/latest/ title: Grafana Agent description: Grafana Agent is a flexible, performant, vendor-neutral, telemetry collector weight: 350 +cascade: + AGENT_RELEASE: v0.36.2 --- # Grafana Agent From a21004fea2eb6cf0897983d44ba8653ebe4379ee Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 5 Oct 2023 12:49:06 -0700 Subject: [PATCH 18/21] Remove templates from already generated or unnecessary files --- production/kubernetes/agent-bare.yaml.t | 115 ---- production/kubernetes/agent-loki.yaml.t | 100 --- production/kubernetes/agent-traces.yaml.t | 154 ----- .../kubernetes/build/lib/version.libsonnet.t | 1 - .../build/templates/operator/main.jsonnet.t | 160 ----- production/kubernetes/install-bare.sh.t | 34 - .../operator/templates/agent-operator.yaml.t | 645 ------------------ .../tanka/grafana-agent/v1/main.libsonnet.t | 142 ---- .../v2/internal/base.libsonnet.t | 56 -- .../v2/internal/syncer.libsonnet.t | 62 -- 10 files changed, 1469 deletions(-) delete mode 100644 production/kubernetes/agent-bare.yaml.t delete mode 100644 production/kubernetes/agent-loki.yaml.t delete mode 100644 production/kubernetes/agent-traces.yaml.t delete mode 100644 production/kubernetes/build/lib/version.libsonnet.t delete mode 100644 production/kubernetes/build/templates/operator/main.jsonnet.t delete mode 100644 production/kubernetes/install-bare.sh.t delete mode 100644 production/operator/templates/agent-operator.yaml.t delete mode 100644 production/tanka/grafana-agent/v1/main.libsonnet.t delete mode 100644 production/tanka/grafana-agent/v2/internal/base.libsonnet.t delete mode 100644 production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t diff --git a/production/kubernetes/agent-bare.yaml.t b/production/kubernetes/agent-bare.yaml.t deleted file mode 100644 index e306191ee170..000000000000 --- a/production/kubernetes/agent-bare.yaml.t +++ /dev/null @@ -1,115 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-agent - namespace: ${NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent -rules: -- apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - - events - verbs: - - get - - list - - watch -- nonResourceURLs: - - /metrics - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-agent -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent -subjects: -- kind: ServiceAccount - name: grafana-agent - namespace: ${NAMESPACE} ---- -apiVersion: v1 -kind: Service -metadata: - labels: - name: grafana-agent - name: grafana-agent - namespace: ${NAMESPACE} -spec: - clusterIP: None - ports: - - name: grafana-agent-http-metrics - port: 80 - targetPort: 80 - selector: - name: grafana-agent ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: grafana-agent - namespace: ${NAMESPACE} -spec: - replicas: 1 - selector: - matchLabels: - name: grafana-agent - serviceName: grafana-agent - template: - metadata: - labels: - name: grafana-agent - spec: - containers: - - args: - - -config.expand-env=true - - -config.file=/etc/agent/agent.yaml - - -enable-features=integrations-next - - -server.http.address=0.0.0.0:80 - env: - - name: HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - image: grafana/agent:$AGENT_VERSION - imagePullPolicy: IfNotPresent - name: grafana-agent - ports: - - containerPort: 80 - name: http-metrics - volumeMounts: - - mountPath: /var/lib/agent - name: agent-wal - - mountPath: /etc/agent - name: grafana-agent - serviceAccountName: grafana-agent - volumes: - - configMap: - name: grafana-agent - name: grafana-agent - updateStrategy: - type: RollingUpdate - volumeClaimTemplates: - - apiVersion: v1 - kind: PersistentVolumeClaim - metadata: - name: agent-wal - namespace: ${NAMESPACE} - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi diff --git a/production/kubernetes/agent-loki.yaml.t b/production/kubernetes/agent-loki.yaml.t deleted file mode 100644 index 5e279d5d9e84..000000000000 --- a/production/kubernetes/agent-loki.yaml.t +++ /dev/null @@ -1,100 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-agent-logs - namespace: ${NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent-logs -rules: -- apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - - events - verbs: - - get - - list - - watch -- nonResourceURLs: - - /metrics - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-agent-logs -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent-logs -subjects: -- kind: ServiceAccount - name: grafana-agent-logs - namespace: ${NAMESPACE} ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: grafana-agent-logs - namespace: ${NAMESPACE} -spec: - minReadySeconds: 10 - selector: - matchLabels: - name: grafana-agent-logs - template: - metadata: - labels: - name: grafana-agent-logs - spec: - containers: - - args: - - -config.expand-env=true - - -config.file=/etc/agent/agent.yaml - - -server.http.address=0.0.0.0:80 - env: - - name: HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - image: grafana/agent:$AGENT_VERSION - imagePullPolicy: IfNotPresent - name: grafana-agent-logs - ports: - - containerPort: 80 - name: http-metrics - securityContext: - privileged: true - runAsUser: 0 - volumeMounts: - - mountPath: /etc/agent - name: grafana-agent-logs - - mountPath: /var/log - name: varlog - - mountPath: /var/lib/docker/containers - name: varlibdockercontainers - readOnly: true - serviceAccountName: grafana-agent-logs - tolerations: - - effect: NoSchedule - operator: Exists - volumes: - - configMap: - name: grafana-agent-logs - name: grafana-agent-logs - - hostPath: - path: /var/log - name: varlog - - hostPath: - path: /var/lib/docker/containers - name: varlibdockercontainers - updateStrategy: - type: RollingUpdate diff --git a/production/kubernetes/agent-traces.yaml.t b/production/kubernetes/agent-traces.yaml.t deleted file mode 100644 index f77edbd855dc..000000000000 --- a/production/kubernetes/agent-traces.yaml.t +++ /dev/null @@ -1,154 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-agent-traces - namespace: ${NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent-traces -rules: -- apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - services - - endpoints - - pods - - events - verbs: - - get - - list - - watch -- nonResourceURLs: - - /metrics - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-agent-traces -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent-traces -subjects: -- kind: ServiceAccount - name: grafana-agent-traces - namespace: ${NAMESPACE} ---- -apiVersion: v1 -kind: Service -metadata: - labels: - name: grafana-agent-traces - name: grafana-agent-traces - namespace: ${NAMESPACE} -spec: - ports: - - name: grafana-agent-traces-http-metrics - port: 80 - targetPort: 80 - - name: grafana-agent-traces-thrift-compact - port: 6831 - protocol: UDP - targetPort: 6831 - - name: grafana-agent-traces-thrift-binary - port: 6832 - protocol: UDP - targetPort: 6832 - - name: grafana-agent-traces-thrift-http - port: 14268 - protocol: TCP - targetPort: 14268 - - name: grafana-agent-traces-thrift-grpc - port: 14250 - protocol: TCP - targetPort: 14250 - - name: grafana-agent-traces-zipkin - port: 9411 - protocol: TCP - targetPort: 9411 - - name: grafana-agent-traces-otlp-grpc - port: 4317 - protocol: TCP - targetPort: 4317 - - name: grafana-agent-traces-otlp-http - port: 4318 - protocol: TCP - targetPort: 4318 - - name: grafana-agent-traces-opencensus - port: 55678 - protocol: TCP - targetPort: 55678 - selector: - name: grafana-agent-traces ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: grafana-agent-traces - namespace: ${NAMESPACE} -spec: - minReadySeconds: 10 - replicas: 1 - revisionHistoryLimit: 10 - selector: - matchLabels: - name: grafana-agent-traces - template: - metadata: - labels: - name: grafana-agent-traces - spec: - containers: - - args: - - -config.expand-env=true - - -config.file=/etc/agent/agent.yaml - - -server.http.address=0.0.0.0:80 - env: - - name: HOSTNAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - image: grafana/agent:$AGENT_VERSION - imagePullPolicy: IfNotPresent - name: grafana-agent-traces - ports: - - containerPort: 80 - name: http-metrics - - containerPort: 6831 - name: thrift-compact - protocol: UDP - - containerPort: 6832 - name: thrift-binary - protocol: UDP - - containerPort: 14268 - name: thrift-http - protocol: TCP - - containerPort: 14250 - name: thrift-grpc - protocol: TCP - - containerPort: 9411 - name: zipkin - protocol: TCP - - containerPort: 4317 - name: otlp-grpc - protocol: TCP - - containerPort: 4318 - name: otlp-http - protocol: TCP - - containerPort: 55678 - name: opencensus - protocol: TCP - volumeMounts: - - mountPath: /etc/agent - name: grafana-agent-traces - serviceAccountName: grafana-agent-traces - volumes: - - configMap: - name: grafana-agent-traces - name: grafana-agent-traces diff --git a/production/kubernetes/build/lib/version.libsonnet.t b/production/kubernetes/build/lib/version.libsonnet.t deleted file mode 100644 index 2c54521fbc08..000000000000 --- a/production/kubernetes/build/lib/version.libsonnet.t +++ /dev/null @@ -1 +0,0 @@ -'grafana/agent:$AGENT_VERSION' diff --git a/production/kubernetes/build/templates/operator/main.jsonnet.t b/production/kubernetes/build/templates/operator/main.jsonnet.t deleted file mode 100644 index 0a769df58975..000000000000 --- a/production/kubernetes/build/templates/operator/main.jsonnet.t +++ /dev/null @@ -1,160 +0,0 @@ -local k = import 'ksonnet-util/kausal.libsonnet'; -local secret = k.core.v1.secret; -local pvc = k.core.v1.persistentVolumeClaim; - -local gen = import 'agent-operator-gen/main.libsonnet'; -local ga = gen.monitoring.v1alpha1.grafanaAgent; -local mi = gen.monitoring.v1alpha1.metricsInstance; -local li = gen.monitoring.v1alpha1.logsInstance; -local pl = gen.monitoring.v1alpha1.podLogs; -local int = gen.monitoring.v1alpha1.integration; - -local op = import 'grafana-agent-operator/operator.libsonnet'; -local ga_util = import 'grafana-agent-operator/util/grafana-agent.libsonnet'; -local mi_util = import 'grafana-agent-operator/util/metricsinstance.libsonnet'; -local li_util = import 'grafana-agent-operator/util/logsinstance.libsonnet'; -local pl_util = import 'grafana-agent-operator/util/k8slogs.libsonnet'; -local mon_util = import 'grafana-agent-operator/util/k8smonitors.libsonnet'; -local int_util = import 'grafana-agent-operator/util/integrations.libsonnet'; - -local ksm = import 'kube-state-metrics/kube-state-metrics.libsonnet'; - -{ - local this = self, - - _images:: { - agent: 'grafana/agent:$AGENT_VERSION', - agent_operator: 'grafana/agent-operator:$AGENT_VERSION', - ksm: 'registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.5.0', - }, - - _config:: { - namespace: '${NAMESPACE}', - metrics_url: '${METRICS_URL}', - metrics_user: '${METRICS_USER}', - metrics_key: '${METRICS_KEY}', - logs_url: '${LOGS_URL}', - logs_user: '${LOGS_USER}', - logs_key: '${LOGS_KEY}', - cluster_label: { cluster: '${CLUSTER}' }, - kubelet_job: 'kubelet', - cadvisor_job: 'cadvisor', - ksm_job: 'kube-state-metrics', - ksm_version: '2.5.0', - }, - - operator: - op.new(name='grafana-agent-operator', namespace=this._config.namespace, image=this._images.agent_operator, serviceAccount='grafana-agent-operator') + - op.withRbac(name='grafana-agent-operator', namespace=this._config.namespace), - - grafana_agent: - ga.new(name='grafana-agent') + - ga.metadata.withNamespace(this._config.namespace) + - ga.spec.withServiceAccountName('grafana-agent') + - ga.spec.withImage(this._images.agent) + - ga.spec.metrics.instanceSelector.withMatchLabels({ agent: 'grafana-agent' }) + - ga.spec.logs.instanceSelector.withMatchLabels({ agent: 'grafana-agent' }) + - ga.spec.integrations.selector.withMatchLabels({ agent: 'grafana-agent' }) + - ga.spec.metrics.withExternalLabels(this._config.cluster_label), - rbac: - ga_util.withRbac(name='grafana-agent', namespace=this._config.namespace), - - metrics_instance: - mi.new(name='grafana-agent-metrics') + - mi.metadata.withNamespace(this._config.namespace) + - mi.metadata.withLabels({ agent: 'grafana-agent' }) + - mi.spec.serviceMonitorSelector.withMatchLabels({ instance: 'primary' }) + - mi_util.withRemoteWrite(secretName='metrics-secret', metricsUrl=this._config.metrics_url) + - mi_util.withNilServiceMonitorNamespace(), - metrics_secret: - secret.new('metrics-secret', {}) + - secret.withStringData({ - username: this._config.metrics_user, - password: this._config.metrics_key, - }) + secret.mixin.metadata.withNamespace(this._config.namespace), - - logs_instance: - li.new(name='grafana-agent-logs') + - li.metadata.withNamespace(this._config.namespace) + - li.metadata.withLabels({ agent: 'grafana-agent' }) + - li.spec.podLogsSelector.withMatchLabels({ instance: 'primary' }) + - li_util.withLogsClient(secretName='logs-secret', logsUrl=this._config.logs_url, externalLabels=this._config.cluster_label) + - li_util.withNilPodLogsNamespace(), - logs_secret: - secret.new('logs-secret', {}) + - secret.withStringData({ - username: this._config.logs_user, - password: this._config.logs_key, - }) + secret.mixin.metadata.withNamespace(this._config.namespace), - - pod_logs: - pl.new('kubernetes-logs') + - pl.metadata.withNamespace(this._config.namespace) + - pl.metadata.withLabels({ instance: 'primary' }) + - pl.spec.withPipelineStages(pl.spec.pipelineStages.withCri({})) + - pl.spec.namespaceSelector.withAny(true) + - pl.spec.selector.withMatchLabels({}) + - pl.spec.withRelabelings(pl_util.withK8sLogsRelabeling()), - - k8s_monitors: [ - mon_util.newKubernetesMonitor( - name='kubelet-monitor', - namespace=this._config.namespace, - monitorLabels={ instance: 'primary' }, - targetNamespace=this._config.namespace, - targetLabels={ 'app.kubernetes.io/name': 'kubelet' }, - jobLabel=this._config.kubelet_job, - metricsPath='/metrics', - allowlist=false, - allowlistMetrics=[] - ), - mon_util.newKubernetesMonitor( - name='cadvisor-monitor', - namespace='default', - monitorLabels={ instance: 'primary' }, - targetNamespace=this._config.namespace, - targetLabels={ 'app.kubernetes.io/name': 'kubelet' }, - jobLabel=this._config.cadvisor_job, - metricsPath='/metrics/cadvisor', - allowlist=false, - allowlistMetrics=[] - ), - mon_util.newServiceMonitor( - name='ksm-monitor', - namespace=this._config.namespace, - monitorLabels={ instance: 'primary' }, - targetNamespace=this._config.namespace, - targetLabels={ 'app.kubernetes.io/name': 'kube-state-metrics' }, - jobLabel=this._config.ksm_job, - metricsPath='/metrics', - allowlist=false, - allowlistMetrics=[] - ), - ], - - kube_state_metrics: - ksm { - name:: 'kube-state-metrics', - namespace:: this._config.namespace, - version:: this._config.ksm_version, - image:: this._images.ksm, - }, - - events: - int.new('agent-eventhandler') + - int.metadata.withNamespace(this._config.namespace) + - int.metadata.withLabels({ agent: 'grafana-agent' }) + - int.spec.withName('eventhandler') + - int.spec.type.withUnique(true) + - int.spec.withConfig({ - logs_instance: this._config.namespace + '/' + 'grafana-agent-logs', - cache_path: '/etc/eventhandler/eventhandler.cache', - }) + - int_util.withPVC('agent-eventhandler'), - pvc: - pvc.new('agent-eventhandler') + - pvc.mixin.metadata.withNamespace(this._config.namespace) + - pvc.mixin.spec.withAccessModes('ReadWriteOnce') + - pvc.mixin.spec.resources.withRequests({ storage: '1Gi' }), - -} diff --git a/production/kubernetes/install-bare.sh.t b/production/kubernetes/install-bare.sh.t deleted file mode 100644 index c13b3cb97cd6..000000000000 --- a/production/kubernetes/install-bare.sh.t +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# shellcheck shell=bash - -# -# install-bare.sh is an installer for the Agent without a ConfigMap. It is -# used during the Grafana Cloud integrations wizard and is not recommended -# to be used directly. Instead of calling this script directly, please -# make a copy of ./agent-bare.yaml and modify it for your needs. -# -# Note that agent-bare.yaml does not have a ConfigMap, so the Grafana Agent -# will not launch until one is created. For more information on setting up -# a ConfigMap, please refer to: -# -# Metrics quickstart: https://grafana.com/docs/grafana-cloud/quickstart/agent-k8s/k8s_agent_metrics/ -# Logs quickstart: https://grafana.com/docs/grafana-cloud/quickstart/agent-k8s/k8s_agent_logs/ -# - -check_installed() { - if ! type "$1" >/dev/null 2>&1; then - echo "error: $1 not installed" >&2 - exit 1 - fi -} - -check_installed curl -check_installed envsubst - -MANIFEST_BRANCH=$AGENT_VERSION -MANIFEST_URL=${MANIFEST_URL:-https://raw.githubusercontent.com/grafana/agent/${MANIFEST_BRANCH}/production/kubernetes/agent-bare.yaml} -NAMESPACE=${NAMESPACE:-default} - -export NAMESPACE - -curl -fsSL "$MANIFEST_URL" | envsubst diff --git a/production/operator/templates/agent-operator.yaml.t b/production/operator/templates/agent-operator.yaml.t deleted file mode 100644 index 449ac6acefea..000000000000 --- a/production/operator/templates/agent-operator.yaml.t +++ /dev/null @@ -1,645 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-agent - namespace: ${NAMESPACE} ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-agent-operator - namespace: ${NAMESPACE} ---- -apiVersion: v1 -automountServiceAccountToken: false -kind: ServiceAccount -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - name: kube-state-metrics - namespace: ${NAMESPACE} ---- -apiVersion: v1 -data: {} -kind: Secret -metadata: - name: logs-secret - namespace: ${NAMESPACE} -stringData: - password: ${LOGS_KEY} - username: ${LOGS_USER} -type: Opaque ---- -apiVersion: v1 -data: {} -kind: Secret -metadata: - name: metrics-secret - namespace: ${NAMESPACE} -stringData: - password: ${METRICS_KEY} - username: ${METRICS_USER} -type: Opaque ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: agent-eventhandler - namespace: ${NAMESPACE} -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent -rules: -- apiGroups: - - "" - resources: - - nodes - - nodes/proxy - - nodes/metrics - - services - - endpoints - - pods - - events - verbs: - - get - - list - - watch -- apiGroups: - - networking.k8s.io - resources: - - ingresses - verbs: - - get - - list - - watch -- nonResourceURLs: - - /metrics - - /metrics/cadvisor - verbs: - - get ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent-operator -rules: -- apiGroups: - - monitoring.grafana.com - resources: - - grafanaagents - - metricsinstances - - logsinstances - - podlogs - - integrations - verbs: - - get - - list - - watch -- apiGroups: - - monitoring.grafana.com - resources: - - grafanaagents/finalizers - - metricsinstances/finalizers - - logsinstances/finalizers - - podlogs/finalizers - - integrations/finalizers - verbs: - - get - - list - - watch - - update -- apiGroups: - - monitoring.coreos.com - resources: - - podmonitors - - probes - - servicemonitors - verbs: - - get - - list - - watch -- apiGroups: - - monitoring.coreos.com - resources: - - podmonitors/finalizers - - probes/finalizers - - servicemonitors/finalizers - verbs: - - get - - list - - watch - - update -- apiGroups: - - "" - resources: - - namespaces - - nodes - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - secrets - - services - - configmaps - - endpoints - verbs: - - get - - list - - watch - - create - - update - - patch - - delete -- apiGroups: - - apps - resources: - - statefulsets - - daemonsets - - deployments - verbs: - - get - - list - - watch - - create - - update - - patch - - delete ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - name: kube-state-metrics -rules: -- apiGroups: - - "" - resources: - - configmaps - - secrets - - nodes - - pods - - services - - resourcequotas - - replicationcontrollers - - limitranges - - persistentvolumeclaims - - persistentvolumes - - namespaces - - endpoints - verbs: - - list - - watch -- apiGroups: - - apps - resources: - - statefulsets - - daemonsets - - deployments - - replicasets - verbs: - - list - - watch -- apiGroups: - - batch - resources: - - cronjobs - - jobs - verbs: - - list - - watch -- apiGroups: - - autoscaling - resources: - - horizontalpodautoscalers - verbs: - - list - - watch -- apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create -- apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create -- apiGroups: - - policy - resources: - - poddisruptionbudgets - verbs: - - list - - watch -- apiGroups: - - certificates.k8s.io - resources: - - certificatesigningrequests - verbs: - - list - - watch -- apiGroups: - - storage.k8s.io - resources: - - storageclasses - - volumeattachments - verbs: - - list - - watch -- apiGroups: - - admissionregistration.k8s.io - resources: - - mutatingwebhookconfigurations - - validatingwebhookconfigurations - verbs: - - list - - watch -- apiGroups: - - networking.k8s.io - resources: - - networkpolicies - - ingresses - verbs: - - list - - watch -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - list - - watch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-agent -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent -subjects: -- kind: ServiceAccount - name: grafana-agent - namespace: ${NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-agent-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-agent-operator -subjects: -- kind: ServiceAccount - name: grafana-agent-operator - namespace: ${NAMESPACE} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - name: kube-state-metrics -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kube-state-metrics -subjects: -- kind: ServiceAccount - name: kube-state-metrics - namespace: ${NAMESPACE} ---- -apiVersion: v1 -kind: Service -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - name: kube-state-metrics - namespace: ${NAMESPACE} -spec: - clusterIP: None - ports: - - name: http-metrics - port: 8080 - targetPort: http-metrics - - name: telemetry - port: 8081 - targetPort: telemetry - selector: - app.kubernetes.io/name: kube-state-metrics ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: grafana-agent-operator - namespace: ${NAMESPACE} -spec: - minReadySeconds: 10 - replicas: 1 - revisionHistoryLimit: 10 - selector: - matchLabels: - name: grafana-agent-operator - template: - metadata: - labels: - name: grafana-agent-operator - spec: - containers: - - args: - - --kubelet-service=default/kubelet - image: grafana/agent-operator:$AGENT_VERSION - imagePullPolicy: IfNotPresent - name: grafana-agent-operator - serviceAccount: grafana-agent-operator ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - name: kube-state-metrics - namespace: ${NAMESPACE} -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: kube-state-metrics - template: - metadata: - labels: - app.kubernetes.io/component: exporter - app.kubernetes.io/name: kube-state-metrics - app.kubernetes.io/version: 2.5.0 - spec: - automountServiceAccountToken: true - containers: - - image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.5.0 - livenessProbe: - httpGet: - path: /healthz - port: 8080 - initialDelaySeconds: 5 - timeoutSeconds: 5 - name: kube-state-metrics - ports: - - containerPort: 8080 - name: http-metrics - - containerPort: 8081 - name: telemetry - readinessProbe: - httpGet: - path: / - port: 8081 - initialDelaySeconds: 5 - timeoutSeconds: 5 - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - readOnlyRootFilesystem: true - runAsUser: 65534 - nodeSelector: - kubernetes.io/os: linux - serviceAccountName: kube-state-metrics ---- -apiVersion: monitoring.grafana.com/v1alpha1 -kind: GrafanaAgent -metadata: - name: grafana-agent - namespace: ${NAMESPACE} -spec: - image: grafana/agent:$AGENT_VERSION - integrations: - selector: - matchLabels: - agent: grafana-agent - logs: - instanceSelector: - matchLabels: - agent: grafana-agent - metrics: - externalLabels: - cluster: ${CLUSTER} - instanceSelector: - matchLabels: - agent: grafana-agent - serviceAccountName: grafana-agent ---- -apiVersion: monitoring.grafana.com/v1alpha1 -kind: Integration -metadata: - labels: - agent: grafana-agent - name: agent-eventhandler - namespace: ${NAMESPACE} -spec: - config: - cache_path: /etc/eventhandler/eventhandler.cache - logs_instance: ${NAMESPACE}/grafana-agent-logs - name: eventhandler - type: - unique: true - volumeMounts: - - mountPath: /etc/eventhandler - name: agent-eventhandler - volumes: - - name: agent-eventhandler - persistentVolumeClaim: - claimName: agent-eventhandler ---- -apiVersion: monitoring.grafana.com/v1alpha1 -kind: LogsInstance -metadata: - labels: - agent: grafana-agent - name: grafana-agent-logs - namespace: ${NAMESPACE} -spec: - clients: - - basicAuth: - password: - key: password - name: logs-secret - username: - key: username - name: logs-secret - externalLabels: - cluster: ${CLUSTER} - url: ${LOGS_URL} - podLogsNamespaceSelector: {} - podLogsSelector: - matchLabels: - instance: primary ---- -apiVersion: monitoring.grafana.com/v1alpha1 -kind: MetricsInstance -metadata: - labels: - agent: grafana-agent - name: grafana-agent-metrics - namespace: ${NAMESPACE} -spec: - remoteWrite: - - basicAuth: - password: - key: password - name: metrics-secret - username: - key: username - name: metrics-secret - url: ${METRICS_URL} - serviceMonitorNamespaceSelector: {} - serviceMonitorSelector: - matchLabels: - instance: primary ---- -apiVersion: monitoring.grafana.com/v1alpha1 -kind: PodLogs -metadata: - labels: - instance: primary - name: kubernetes-logs - namespace: ${NAMESPACE} -spec: - namespaceSelector: - any: true - pipelineStages: - - cri: {} - relabelings: - - sourceLabels: - - __meta_kubernetes_pod_node_name - targetLabel: __host__ - - action: replace - sourceLabels: - - __meta_kubernetes_namespace - targetLabel: namespace - - action: replace - sourceLabels: - - __meta_kubernetes_pod_name - targetLabel: pod - - action: replace - sourceLabels: - - __meta_kubernetes_pod_container_name - targetLabel: container - - replacement: /var/log/pods/*$1/*.log - separator: / - sourceLabels: - - __meta_kubernetes_pod_uid - - __meta_kubernetes_pod_container_name - targetLabel: __path__ - selector: - matchLabels: {} ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - instance: primary - name: ksm-monitor - namespace: ${NAMESPACE} -spec: - endpoints: - - honorLabels: true - interval: 60s - path: /metrics - port: http-metrics - relabelings: - - action: replace - replacement: kube-state-metrics - targetLabel: job - namespaceSelector: - matchNames: - - ${NAMESPACE} - selector: - matchLabels: - app.kubernetes.io/name: kube-state-metrics ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - instance: primary - name: kubelet-monitor - namespace: ${NAMESPACE} -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - interval: 60s - path: /metrics - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - action: replace - replacement: kubelet - targetLabel: job - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - ${NAMESPACE} - selector: - matchLabels: - app.kubernetes.io/name: kubelet ---- -apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor -metadata: - labels: - instance: primary - name: cadvisor-monitor - namespace: default -spec: - endpoints: - - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token - honorLabels: true - interval: 60s - path: /metrics/cadvisor - port: https-metrics - relabelings: - - sourceLabels: - - __metrics_path__ - targetLabel: metrics_path - - action: replace - replacement: cadvisor - targetLabel: job - scheme: https - tlsConfig: - insecureSkipVerify: true - namespaceSelector: - matchNames: - - ${NAMESPACE} - selector: - matchLabels: - app.kubernetes.io/name: kubelet diff --git a/production/tanka/grafana-agent/v1/main.libsonnet.t b/production/tanka/grafana-agent/v1/main.libsonnet.t deleted file mode 100644 index 21ae76113f24..000000000000 --- a/production/tanka/grafana-agent/v1/main.libsonnet.t +++ /dev/null @@ -1,142 +0,0 @@ -local agent = import './internal/agent.libsonnet'; -local utils = import './internal/utils.libsonnet'; -local k = import 'ksonnet-util/kausal.libsonnet'; - -local container = k.core.v1.container; -local configMap = k.core.v1.configMap; -local service = k.core.v1.service; - -// Merge all of our libraries to create the final exposed library. -(import './lib/deployment.libsonnet') + -(import './lib/integrations.libsonnet') + -(import './lib/metrics.libsonnet') + -(import './lib/scraping_service.libsonnet') + -(import './lib/logs.libsonnet') + -(import './lib/traces.libsonnet') + -{ - _images:: { - agent: 'grafana/agent:$AGENT_VERSION', - agentctl: 'grafana/agentctl:$AGENT_VERSION', - }, - - // new creates a new DaemonSet deployment of the grafana-agent. By default, - // the deployment will do no collection. You must merge the result of this - // function with one or more of the following: - // - // - withMetricsConfig, withMetricsInstances (and optionally withRemoteWrite) - // - withLogsConfig - // - // When using withMetricsInstances, a [name]-etc deployment - // with one replica will be created alongside the DaemonSet. This deployment - // is responsible for handling scrape configs that will not work on the host - // machine. - // - // For example, if a scrape_config scrapes the Kubernetes API, that must be - // handled by the [name]-etc deployment as the Kubernetes API does not run - // on any node in the cluster. - // - // scrapeInstanceKubernetes provides the default - // MetricsInstanceConfig Grafana Labs uses in production. - new(name='grafana-agent', namespace='default'):: { - local this = self, - - _mode:: 'daemonset', - _images:: $._images, - _config_hash:: true, - - local has_logs_config = std.objectHasAll(self, '_logs_config'), - local has_trace_config = std.objectHasAll(self, '_trace_config'), - local has_metrics_config = std.objectHasAll(self, '_metrics_config'), - local has_metrics_instances = std.objectHasAll(self, '_metrics_instances'), - local has_integrations = std.objectHasAll(self, '_integrations'), - local has_sampling_strategies = std.objectHasAll(self, '_traces_sampling_strategies'), - - local metrics_instances = - if has_metrics_instances then this._metrics_instances else [], - local host_filter_instances = utils.transformInstances(metrics_instances, true), - local etc_instances = utils.transformInstances(metrics_instances, false), - - config:: { - server: { - log_level: 'info', - }, - } + ( - if has_metrics_config - then { metrics: this._metrics_config { configs: host_filter_instances } } - else {} - ) + ( - if has_logs_config then { - logs: { - positions_directory: '/tmp/positions', - configs: [this._logs_config { - name: 'default', - }], - }, - } else {} - ) + ( - if has_trace_config then { - traces: { - configs: [this._trace_config { - name: 'default', - }], - }, - } - else {} - ) + ( - if has_integrations then { integrations: this._integrations } else {} - ), - - etc_config:: if has_metrics_config then this.config { - // Hide logs and integrations from our extra configs, we just want the - // scrape configs that wouldn't work for the DaemonSet. - metrics+: { - configs: std.map(function(cfg) cfg { host_filter: false }, etc_instances), - }, - logs:: {}, - traces:: {}, - integrations:: {}, - }, - - agent: - agent.newAgent(name, namespace, self._images.agent, self.config, use_daemonset=true) + - agent.withConfigHash(self._config_hash) + { - // If sampling strategies were defined, we need to mount them as a JSON - // file. - config_map+: - if has_sampling_strategies - then configMap.withDataMixin({ - 'strategies.json': std.toString(this._traces_sampling_strategies), - }) - else {}, - - // If we're deploying for tracing, applications will want to write to - // a service for load balancing span delivery. - service: - if has_trace_config - then k.util.serviceFor(self.agent) + service.mixin.metadata.withNamespace(namespace) - else {}, - } + ( - if has_logs_config then $.logsPermissionsMixin else {} - ) + ( - if has_integrations && std.objectHas(this._integrations, 'node_exporter') then $.integrationsMixin else {} - ), - - agent_etc: if std.length(etc_instances) > 0 then - agent.newAgent(name + '-etc', namespace, self._images.agent, self.etc_config, use_daemonset=false) + - agent.withConfigHash(self._config_hash), - }, - - // withImages sets the images used for launching the Agent. - // Keys supported: agent, agentctl - withImages(images):: { _images+: images }, - - // Includes or excludes the config hash annotation. - withConfigHash(include=true):: { _config_hash:: include }, - - // withPortsMixin adds extra ports to expose. - withPortsMixin(ports=[]):: { - agent+: { - container+:: container.withPortsMixin(ports), - }, - }, -} diff --git a/production/tanka/grafana-agent/v2/internal/base.libsonnet.t b/production/tanka/grafana-agent/v2/internal/base.libsonnet.t deleted file mode 100644 index a0b01078f994..000000000000 --- a/production/tanka/grafana-agent/v2/internal/base.libsonnet.t +++ /dev/null @@ -1,56 +0,0 @@ -function(name='grafana-agent', namespace='') { - local k = (import 'ksonnet-util/kausal.libsonnet') { _config+:: { namespace: namespace } }, - - local container = k.core.v1.container, - local configMap = k.core.v1.configMap, - local containerPort = k.core.v1.containerPort, - local policyRule = k.rbac.v1.policyRule, - local serviceAccount = k.core.v1.serviceAccount, - local envVar = k.core.v1.envVar, - - local this = self, - - _images:: { - agent: 'grafana/agent:$AGENT_VERSION', - agentctl: 'grafana/agentctl:$AGENT_VERSION', - }, - _config:: { - name: name, - namespace: namespace, - config_hash: true, - agent_config: '', - agent_port: 80, - agent_args: { - 'config.file': '/etc/agent/agent.yaml', - 'server.http.address': '0.0.0.0:80', - 'config.expand-env': 'true', - }, - }, - - rbac: k.util.rbac(name, [ - policyRule.withApiGroups(['']) + - policyRule.withResources(['nodes', 'nodes/proxy', 'services', 'endpoints', 'pods', 'events']) + - policyRule.withVerbs(['get', 'list', 'watch']), - - policyRule.withNonResourceUrls('/metrics') + - policyRule.withVerbs(['get']), - ]) { - service_account+: serviceAccount.mixin.metadata.withNamespace(namespace), - }, - - configMap: - configMap.new(name) + - configMap.mixin.metadata.withNamespace(namespace) + - configMap.withData({ - 'agent.yaml': k.util.manifestYaml(this._config.agent_config), - }), - - container:: - container.new(name, this._images.agent) + - container.withPorts(containerPort.new('http-metrics', this._config.agent_port)) + - container.withArgsMixin(k.util.mapToFlags(this._config.agent_args)) + - // `HOSTNAME` is required for promtail (logs) otherwise it will silently do nothing - container.withEnvMixin([ - envVar.fromFieldPath('HOSTNAME', 'spec.nodeName'), - ]), -} diff --git a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t b/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t deleted file mode 100644 index 7b9fb7fd420b..000000000000 --- a/production/tanka/grafana-agent/v2/internal/syncer.libsonnet.t +++ /dev/null @@ -1,62 +0,0 @@ -local k = import 'ksonnet-util/kausal.libsonnet'; - -local cronJob = k.batch.v1.cronJob; -local configMap = k.core.v1.configMap; -local container = k.core.v1.container; -local deployment = k.apps.v1.deployment; -local volumeMount = k.core.v1.volumeMount; -local volume = k.core.v1.volume; - -function( - name='grafana-agent-syncer', - namespace='', - config={}, -) { - local _config = { - api: error 'api must be set', - image: 'grafana/agentctl:$AGENT_VERSION', - schedule: '*/5 * * * *', - configs: [], - } + config, - - local this = self, - local _configs = std.foldl( - function(agg, cfg) - // Sanitize the name and remove / so every file goes into the same - // folder. - local name = std.strReplace(cfg.name, '/', '_'); - - agg { ['%s.yml' % name]: k.util.manifestYaml(cfg) }, - _config.configs, - {}, - ), - - configMap: - configMap.new(name) + - configMap.mixin.metadata.withNamespace(namespace) + - configMap.withData(_configs), - - container:: - container.new(name, _config.image) + - container.withArgsMixin([ - 'config-sync', - '--addr=%s' % _config.api, - '/etc/configs', - ]) + - container.withVolumeMounts(volumeMount.new(name, '/etc/configs')), - - job: - cronJob.new(name, _config.schedule, this.container) + - cronJob.mixin.metadata.withNamespace(namespace) + - cronJob.mixin.spec.withSuccessfulJobsHistoryLimit(1) + - cronJob.mixin.spec.withFailedJobsHistoryLimit(3) + - cronJob.mixin.spec.jobTemplate.spec.template.spec.withRestartPolicy('OnFailure') + - cronJob.mixin.spec.jobTemplate.spec.template.spec.withActiveDeadlineSeconds(600) + - cronJob.mixin.spec.jobTemplate.spec.withTtlSecondsAfterFinished(120) + - cronJob.mixin.spec.jobTemplate.spec.template.spec.withVolumes([ - volume.fromConfigMap( - name=name, - configMapName=this.configMap.metadata.name, - ), - ]), -} From 84665d8ff526dc449159acaf4c75035a6406b91a Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Thu, 5 Oct 2023 12:50:58 -0700 Subject: [PATCH 19/21] Update version and regenerate --- docs/sources/_index.md | 2 +- pkg/operator/defaults.go | 2 +- tools/gen-versioned-files/agent-version.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sources/_index.md b/docs/sources/_index.md index 3778e454f3cd..ee7f78ecffe5 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -8,7 +8,7 @@ title: Grafana Agent description: Grafana Agent is a flexible, performant, vendor-neutral, telemetry collector weight: 350 cascade: - AGENT_RELEASE: v0.36.2 + AGENT_RELEASE: v0.37.0-rc0 --- # Grafana Agent diff --git a/pkg/operator/defaults.go b/pkg/operator/defaults.go index b67671a3ad77..bfb1be187f6e 100644 --- a/pkg/operator/defaults.go +++ b/pkg/operator/defaults.go @@ -2,7 +2,7 @@ package operator // Supported versions of the Grafana Agent. var ( - DefaultAgentVersion = "v0.37.0-rc.0" + DefaultAgentVersion = "v0.37.0-rc0" DefaultAgentBaseImage = "grafana/agent" DefaultAgentImage = DefaultAgentBaseImage + ":" + DefaultAgentVersion ) diff --git a/tools/gen-versioned-files/agent-version.txt b/tools/gen-versioned-files/agent-version.txt index 434c18794f78..e3c37a06fc38 100644 --- a/tools/gen-versioned-files/agent-version.txt +++ b/tools/gen-versioned-files/agent-version.txt @@ -1 +1 @@ -v0.36.2 \ No newline at end of file +v0.37.0-rc0 \ No newline at end of file From ea3533b006672e9df42205dcd11d2ece50a9dcb6 Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Fri, 6 Oct 2023 08:56:24 -0700 Subject: [PATCH 20/21] Update version in gen-versioned-files and regenerate --- docs/sources/_index.md | 2 +- tools/gen-versioned-files/agent-version.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sources/_index.md b/docs/sources/_index.md index ee7f78ecffe5..44a296be55e7 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -8,7 +8,7 @@ title: Grafana Agent description: Grafana Agent is a flexible, performant, vendor-neutral, telemetry collector weight: 350 cascade: - AGENT_RELEASE: v0.37.0-rc0 + AGENT_RELEASE: v0.37.0-rc1 --- # Grafana Agent diff --git a/tools/gen-versioned-files/agent-version.txt b/tools/gen-versioned-files/agent-version.txt index e3c37a06fc38..2f881371110e 100644 --- a/tools/gen-versioned-files/agent-version.txt +++ b/tools/gen-versioned-files/agent-version.txt @@ -1 +1 @@ -v0.37.0-rc0 \ No newline at end of file +v0.37.0-rc1 \ No newline at end of file From a25e6c4bee472941a4c67d4d7e97238d08b321fc Mon Sep 17 00:00:00 2001 From: Mischa Thompson Date: Tue, 10 Oct 2023 13:53:16 -0700 Subject: [PATCH 21/21] Fix missing cascade params and update to latest version --- docs/sources/_index.md | 2 +- .../static/configuration/integrations/node-exporter-config.md | 2 +- docs/sources/static/set-up/install/install-agent-docker.md | 2 +- tools/gen-versioned-files/agent-version.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/sources/_index.md b/docs/sources/_index.md index 44a296be55e7..2411cd963a0c 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -8,7 +8,7 @@ title: Grafana Agent description: Grafana Agent is a flexible, performant, vendor-neutral, telemetry collector weight: 350 cascade: - AGENT_RELEASE: v0.37.0-rc1 + AGENT_RELEASE: v0.37.1 --- # Grafana Agent diff --git a/docs/sources/static/configuration/integrations/node-exporter-config.md b/docs/sources/static/configuration/integrations/node-exporter-config.md index 6a9581ce3585..eb65fb51d91f 100644 --- a/docs/sources/static/configuration/integrations/node-exporter-config.md +++ b/docs/sources/static/configuration/integrations/node-exporter-config.md @@ -70,7 +70,7 @@ metadata: name: agent spec: containers: - - image: grafana/agent:v0.37.0-rc.0 + - image: {{< param "AGENT_RELEASE" >}} name: agent args: - --config.file=/etc/agent-config/agent.yaml diff --git a/docs/sources/static/set-up/install/install-agent-docker.md b/docs/sources/static/set-up/install/install-agent-docker.md index a42ae0bb54f9..7f32cc4e6d7f 100644 --- a/docs/sources/static/set-up/install/install-agent-docker.md +++ b/docs/sources/static/set-up/install/install-agent-docker.md @@ -51,7 +51,7 @@ To run a Grafana Agent Docker container on Windows, run the following command in docker run ^ -v WAL_DATA_DIRECTORY:C:\etc\grafana-agent\data ^ -v CONFIG_FILE_PATH:C:\etc\grafana-agent ^ - grafana/agent:v0.37.0-rc.0-windows + grafana/agent:{{< param "AGENT_RELEASE" >}}-windows ``` Replace the following: diff --git a/tools/gen-versioned-files/agent-version.txt b/tools/gen-versioned-files/agent-version.txt index 2f881371110e..283d4a015533 100644 --- a/tools/gen-versioned-files/agent-version.txt +++ b/tools/gen-versioned-files/agent-version.txt @@ -1 +1 @@ -v0.37.0-rc1 \ No newline at end of file +v0.37.1 \ No newline at end of file