From 4ee18975ba3c9f6838b4949468aef68e2763808b Mon Sep 17 00:00:00 2001 From: Howard Cheung Date: Mon, 24 Jul 2023 10:33:06 +0800 Subject: [PATCH 1/7] feat: initial commit for k8slog receiver --- .chloggen/k8slog_receiver_setup.yaml | 20 + .github/CODEOWNERS | 1 + .github/ISSUE_TEMPLATE/bug_report.yaml | 1 + .github/ISSUE_TEMPLATE/feature_request.yaml | 1 + .github/ISSUE_TEMPLATE/other.yaml | 1 + cmd/githubgen/allowlist.txt | 1 + receiver/k8slogreceiver/Makefile | 1 + receiver/k8slogreceiver/README.md | 173 +++++ receiver/k8slogreceiver/config.go | 217 +++++++ receiver/k8slogreceiver/config_test.go | 102 +++ receiver/k8slogreceiver/design.md | 29 + receiver/k8slogreceiver/doc.go | 6 + receiver/k8slogreceiver/factory.go | 68 ++ .../generated_component_test.go | 61 ++ receiver/k8slogreceiver/go.mod | 84 +++ receiver/k8slogreceiver/go.sum | 596 ++++++++++++++++++ .../internal/metadata/generated_status.go | 25 + receiver/k8slogreceiver/metadata.yaml | 8 + receiver/k8slogreceiver/receiver.go | 31 + receiver/k8slogreceiver/runtime_api_config.go | 121 ++++ receiver/k8slogreceiver/testdata/config.yaml | 26 + versions.yaml | 1 + 22 files changed, 1574 insertions(+) create mode 100644 .chloggen/k8slog_receiver_setup.yaml create mode 100644 receiver/k8slogreceiver/Makefile create mode 100644 receiver/k8slogreceiver/README.md create mode 100644 receiver/k8slogreceiver/config.go create mode 100644 receiver/k8slogreceiver/config_test.go create mode 100644 receiver/k8slogreceiver/design.md create mode 100644 receiver/k8slogreceiver/doc.go create mode 100644 receiver/k8slogreceiver/factory.go create mode 100644 receiver/k8slogreceiver/generated_component_test.go create mode 100644 receiver/k8slogreceiver/go.mod create mode 100644 receiver/k8slogreceiver/go.sum create mode 100644 receiver/k8slogreceiver/internal/metadata/generated_status.go create mode 100644 receiver/k8slogreceiver/metadata.yaml create mode 100644 receiver/k8slogreceiver/receiver.go create mode 100644 receiver/k8slogreceiver/runtime_api_config.go create mode 100644 receiver/k8slogreceiver/testdata/config.yaml diff --git a/.chloggen/k8slog_receiver_setup.yaml b/.chloggen/k8slog_receiver_setup.yaml new file mode 100644 index 000000000000..53e63beaaadb --- /dev/null +++ b/.chloggen/k8slog_receiver_setup.yaml @@ -0,0 +1,20 @@ +# Use this changelog template to create an entry for release notes. +# If your change doesn't affect end users, such as a test fix or a tooling change, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: new_component + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8slogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add the skeleton for the new k8slogreceiver in development." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [23339] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 48fa4d869fe8..3d42e002c27c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -250,6 +250,7 @@ receiver/jmxreceiver/ @open-telemetry receiver/journaldreceiver/ @open-telemetry/collector-contrib-approvers @sumo-drosiek @djaglowski receiver/k8sclusterreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @TylerHelmuth @povilasv @ChrsMark receiver/k8seventsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @TylerHelmuth @ChrsMark +receiver/k8slogreceiver/ @open-telemetry/collector-contrib-approvers @h0cheung @TylerHelmuth receiver/k8sobjectsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @hvaghani221 @TylerHelmuth @ChrsMark receiver/kafkametricsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax receiver/kafkareceiver/ @open-telemetry/collector-contrib-approvers @pavolloffay @MovieStoreGuy diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 95b022b70897..bd04a7e3b99e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -252,6 +252,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index acf39e99429e..363306042e23 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -246,6 +246,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/.github/ISSUE_TEMPLATE/other.yaml b/.github/ISSUE_TEMPLATE/other.yaml index a99efb30d46b..0548e4f160c9 100644 --- a/.github/ISSUE_TEMPLATE/other.yaml +++ b/.github/ISSUE_TEMPLATE/other.yaml @@ -246,6 +246,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/cmd/githubgen/allowlist.txt b/cmd/githubgen/allowlist.txt index 838360612c7b..ba5608fc601f 100644 --- a/cmd/githubgen/allowlist.txt +++ b/cmd/githubgen/allowlist.txt @@ -18,3 +18,4 @@ shazlehu swar8080 thmshmm zpzhuSplunk +h0cheung diff --git a/receiver/k8slogreceiver/Makefile b/receiver/k8slogreceiver/Makefile new file mode 100644 index 000000000000..ded7a36092dc --- /dev/null +++ b/receiver/k8slogreceiver/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/receiver/k8slogreceiver/README.md b/receiver/k8slogreceiver/README.md new file mode 100644 index 000000000000..7f97b885f927 --- /dev/null +++ b/receiver/k8slogreceiver/README.md @@ -0,0 +1,173 @@ +# K8slog Receiver + + +| Status | | +| ------------- |-----------| +| Stability | [development]: logs | +| Distributions | [] | +| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Areceiver%2Fk8slog%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Areceiver%2Fk8slog) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Areceiver%2Fk8slog%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Areceiver%2Fk8slog) | +| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@h0cheung](https://www.github.com/h0cheung), [@TylerHelmuth](https://www.github.com/TylerHelmuth) | + +[development]: https://github.com/open-telemetry/opentelemetry-collector#development + + +Tails and parses logs in k8s environment. + +There only one mode of discovery as for now, it's specified by the `discovery.mode` configuration option: +- `daemonset-stdout`: (default) Deployed as a DaemonSet, the receiver will read logs from the stdout of pods in the same node. + +Two modes of discovery are planned to be supported in the future: + +- `daemonset-file`: Deployed as a DaemonSet, the receiver will read logs from files inside pods in the same node. +- `sidecar`: Deployed as a sidecar container, the receiver will read logs from files. + +## Configuration + +The following settings are common to all discovery modes: + +| Field | Default | Description | +|-------------------------------------|--------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `discovery.mode` | `daemonset-stdout` | The mode of discovery. Options are `daemonset-stdout`, `daemonset-file` or `sidecar`. | +| `discovery.extract` | | The rules to extract metadata from pods and containers. TODO default values. | +| `start_at` | `end` | At startup, where to start reading logs from the file. Options are `beginning` or `end` | +| `force_flush_period` | `500ms` | Time since last read of data from file, after which currently buffered log should be send to pipeline. Takes `time.Duration` (e.g. `10s`, `1m`, or `500ms`) as value. Zero means waiting for new data forever | +| `encoding` | `utf-8` | The encoding of the file being read. See the list of supported encodings below for available options | +| `preserve_leading_whitespaces` | `true` | Whether to preserve leading whitespaces. | +| `preserve_trailing_whitespaces` | `false` | Whether to preserve trailing whitespaces. | +| `include_file_name` | `true` | Whether to add the file name as the attribute `log.file.name`. | +| `include_file_path` | `true` | Whether to add the file path as the attribute `log.file.path`. | +| `poll_interval` | 200ms | The duration between filesystem polls | +| `max_log_size` | `128kb` | The maximum size of a log entry to read. A log entry will be truncated if it is larger than `max_log_size`. Protects against reading large amounts of data into memory | +| `max_readers` | 1024 | The maximum number of readers running at the same time. | +| `attributes` | {} | A map of `key: value` pairs to add to the entry's attributes | +| `resource` | {} | A map of `key: value` pairs to add to the entry's resource | +| `operators` | [] | An array of [operators](../../pkg/stanza/docs/operators/README.md#what-operators-are-available). See below for more details | +| `storage` | none | The ID of a storage extension to be used to store file checkpoints. File checkpoints allow the receiver to pick up where it left off in the case of a collector restart. If no storage extension is used, the receiver will manage checkpoints in memory only. | +| `retry_on_failure.enabled` | `false` | If `true`, the receiver will pause reading a file and attempt to resend the current batch of logs if it encounters an error from downstream components. | +| `retry_on_failure.initial_interval` | `1 second` | Time to wait after the first failure before retrying. | +| `retry_on_failure.max_interval` | `30 seconds` | Upper bound on retry backoff interval. Once this value is reached the delay between consecutive retries will remain constant at the specified value. | +| `retry_on_failure.max_elapsed_time` | `5 minutes` | Maximum amount of time (including retries) spent trying to send a logs batch to a downstream consumer. Once this value is reached, the data is discarded. Retrying never stops if set to `0`. | + +When `discovery.mode` is not `sidecar`, there are additional configuration options: + +| Field | Default | Description | +|-------------------------------|------------------|--------------------------------------------------------------------------------------------------------------------------------------------------| +| `discovery.k8s_api.auth_type` | `serviceAccount` | The authentication type of k8s api. Options are `serviceAccount` or `kubeConfig`. | +| `discovery.host_root` | `/host-root` | The directory which the root of host is mounted on. | +| `discovery.runtime_apis` | | The runtime apis used to get log file paths. docker and cri are supported now. By default, it will try to automatically detect the runtime apis. | +| `discovery.node_from_env` | `KUBE_NODE_NAME` | The environment variable name of node name. | +| `discovery.filter` | [] | The filter used to filter pods and containers. By default, all pods and containers will be collected. | + +When `discovery.mode` is not `daemonset-stdout`, there are additional configuration options (will be useful when another mode is supported): + +| Field | Default | Description | +|--------------------|----------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `include` | required | A list of file glob patterns that match the file paths to be read. | +| `exclude` | [] | A list of file glob patterns to exclude from reading. | +| `fingerprint_size` | `1kb` | The number of bytes with which to identify a file. The first bytes in the file are used as the fingerprint. Decreasing this value at any point will cause existing fingerprints to forgotten, meaning that all files will be read from the beginning (one time) | + +Note that _by default_, no logs will be read from a file that is not actively being written to because `start_at` defaults to `end`. + +### Operators + +Each operator performs a simple responsibility, such as parsing a timestamp or JSON. Chain together operators to process logs into a desired format. + +- Every operator has a `type`. +- Every operator can be given a unique `id`. If you use the same type of operator more than once in a pipeline, you must specify an `id`. Otherwise, the `id` defaults to the value of `type`. +- Operators will output to the next operator in the pipeline. The last operator in the pipeline will emit from the receiver. Optionally, the `output` parameter can be used to specify the `id` of another operator to which logs will be passed directly. +- Only parsers and general purpose operators should be used. + +### Filters + +When `discovery.mode` is not `sidecar`, the `discovery.filter` field can be used to filter pods and containers. The filter is a list of rules. Each rule is a map with the following fields: + +| Field | Description | +|---------------|--------------------------------------------------------------| +| `annotations` | MapFilters that filters pods by annotations. | +| `labels` | MapFilters that filters pods by labels. | +| `env` | MapFilters that filters containers by environment variables. | +| `containers` | ValueFilters that filters containers by name. | +| `namespaces` | ValueFilters that filters pods by namespace. | +| `pods` | ValueFilters that filters pods by name. | +| `uids` | ValueFilters that filters pods by uid. | + +#### MapFilter + +A MapFilter can be used to filter pods by maps, such as annotations or labels. It has the following fields: + +| Field | Description | +|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `op` | The operation to perform. Options are:
- "equals": (default) the value must be equal to the specified value.
- "not-equals": the value must not be equal to the specified value.
- "exists": the value must exist.
- "not-exists": the value must not exist.
- "matches": the value must match the specified regular expression.
- "not-matches": the value must not match the specified regular expression. | +| `key` | The key of the map. | +| `value` | The value to match. Only used for "equals", "not-equals", "matches", and "not-matches" operations. | + +#### ValueFilter + +A ValueFilter can be used to filter pods by string values, such as container names or namespaces. It has the following fields: + +| Field | Description | +|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `op` | The operation to perform. Options are:
- "equals": (default) the value must be equal to the specified value.
- "not-equals": the value must not be equal to the specified value.
- "matches": the value must match the specified regular expression.
- "not-matches": the value must not match the specified regular expression. | +| `value` | The value to match. | + +### Extract + +The `extract` field can be used to extract fields from the log file path. It has the following fields: + +| Field | Description | +|---------------|--------------------------------------------------------------------------------------| +| `metadata` | A string slice of metadata to extract from the pods and containers. | +| `env` | A FieldExtractConfig that extracts fields from environment variables of containers. | +| `otel_env` | A FieldExtractConfig that extracts fields from environment variables of otel itself. | +| `annotations` | A FieldExtractConfig that extracts fields from annotations of pods. | +| `labels` | A FieldExtractConfig that extracts fields from labels of pods. | + +#### FieldExtractConfig + +A FieldExtractConfig can be used to extract fields from maps, such as annotations or labels. It has the following fields: + +| Field | Description | +|-------------|------------------------------------------------------------------------------------------------------| +| `tag_name` | Required. The name of the extracted attributes. | +| `key` | The key of the map (annotation, label or etc).Exactly one of `key` or `key_regex` must be specified. | +| `key_regex` | The regular expression of the key. Exactly one of `key` or `key_regex` must be specified. | +| `regex` | Optional. The regular expression to extract a submatch from the value. | + +### Supported encodings + +| Key | Description | +|------------|------------------------------------------------------------------| +| `nop` | No encoding validation. Treats the file as a stream of raw bytes | +| `utf-8` | UTF-8 encoding | +| `utf-16le` | UTF-16 encoding with little-endian byte order | +| `utf-16be` | UTF-16 encoding with big-endian byte order | +| `ascii` | ASCII encoding | +| `big5` | The Big5 Chinese character encoding | + +Other less common encodings are supported on a best-effort basis. See [https://www.iana.org/assignments/character-sets/character-sets.xhtml](https://www.iana.org/assignments/character-sets/character-sets.xhtml) for other encodings available. + +## Additional Terminology and Features + +- An [entry](../../pkg/stanza/docs/types/entry.md) is the base representation of log data as it moves through a pipeline. All operators either create, modify, or consume entries. +- A [field](../../pkg/stanza/docs/types/field.md) is used to reference values in an entry. +- A common [expression](../../pkg/stanza/docs/types/expression.md) syntax is used in several operators. For example, expressions can be used to [filter](../../pkg/stanza/docs/operators/filter.md) or [route](../../pkg/stanza/docs/operators/router.md) entries. + +### Parsers with Embedded Operations + +Many parsers operators can be configured to embed certain followup operations such as timestamp and severity parsing. For more information, see [complex parsers](../../pkg/stanza/docs/types/parsers.md#complex-parsers). + +## Example - Collect logs from stdout of all containers + +Receiver Configuration +```yaml +receivers: + k8slog: + discovery: + mode: daemonset-stdout + operators: + - type: recombine + combine_field: body + is_first_entry: body matches "^\\d{4}-\\d{2}-\\d{2}" + max_log_size: 128kb + source_identifier: attributes["k8s.pod.uid"] +``` diff --git a/receiver/k8slogreceiver/config.go b/receiver/k8slogreceiver/config.go new file mode 100644 index 000000000000..3a2c0242e1d1 --- /dev/null +++ b/receiver/k8slogreceiver/config.go @@ -0,0 +1,217 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package k8slogreceiver // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8slogreceiver" + +import ( + "fmt" + + "go.uber.org/multierr" + + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig" +) + +const ( + ModeDaemonSetStdout = "daemonset-stdout" +) + +const ( + DefaultMode = ModeDaemonSetStdout + DefaultHostRoot = "/host_root" + DefaultNodeFromEnv = "KUBE_NODE_NAME" +) + +// Config is the configuration of a k8slog receiver +type Config struct { + Discovery SourceConfig `mapstructure:"discovery"` + Extract ExtractConfig `mapstructure:"extract"` + + // TODO: refactor fileconsumer and add it's config of k8s implementation here. +} + +// ExtractConfig allows specifying how to extract resource attributes from pod. +type ExtractConfig struct { + // Metadata represents the list of metadata fields to extract from pod. + // TODO: supported metadata fields and default values. + Metadata []string `mapstructure:"metadata"` + + // Annotations represents the rules to extract from pod annotations. + Annotations []FieldExtractConfig `mapstructure:"annotations"` + + // Labels represents the rules to extract from pod labels. + Labels []FieldExtractConfig `mapstructure:"labels"` + + // Env represents the rules to extract from container environment variables. + Env []FieldExtractConfig `mapstructure:"env"` +} + +// FieldExtractConfig allows specifying an extraction rule to extract a resource attribute from pod (or namespace) +// annotations (or labels). +// This is a copy of the config from the k8sattributes processor. +type FieldExtractConfig struct { + // TagName represents the name of the resource attribute that will be added to logs, metrics or spans. + // When not specified, a default tag name will be used of the format: + // - k8s.pod.annotations. + // - k8s.pod.labels.