From a9ca8c533865630cec75f3d711698f0c6cc0d22c Mon Sep 17 00:00:00 2001 From: Julien Clarysse Date: Tue, 3 Dec 2024 18:29:42 +0100 Subject: [PATCH] update(MirrorMaker): improve configuration concepts Explained the different configuration layers and moved monitoring instructions to howto section. [DOC-1169] --- .../concepts/mirrormaker2-tuning.md | 67 +++++++++---------- .../howto/monitor-replication-execution.md | 28 ++++++++ sidebars.ts | 1 + 3 files changed, 62 insertions(+), 34 deletions(-) create mode 100644 docs/products/kafka/kafka-mirrormaker/howto/monitor-replication-execution.md diff --git a/docs/products/kafka/kafka-mirrormaker/concepts/mirrormaker2-tuning.md b/docs/products/kafka/kafka-mirrormaker/concepts/mirrormaker2-tuning.md index 19c7a339c..ad4826921 100644 --- a/docs/products/kafka/kafka-mirrormaker/concepts/mirrormaker2-tuning.md +++ b/docs/products/kafka/kafka-mirrormaker/concepts/mirrormaker2-tuning.md @@ -1,12 +1,34 @@ --- -title: MirrorMaker 2 common parameters +title: Configuration parameters --- -MirrorMaker 2 (MM2) offers a suite of parameters to help with data replication and monitoring within Apache Kafka® ecosystems. -This topic outlines common parameters you can adjust, along with tips for -validating MM2's performance. +Apache Kafka® MirrorMaker 2 provides a suite of configuration parameters +to help with data replication within Apache Kafka® ecosystems. -1. Increase the value of `kafka_mirrormaker.tasks_max_per_cpu` in the +## Configuration layers + +1. **Service** configurations apply to the nodes and workers of Apache Kafka® MirrorMaker 2 cluster. + - They are documented under [Advanced parameters for Aiven for Apache Kafka® MirrorMaker 2](/docs/products/kafka/kafka-mirrormaker/reference/advanced-params). + - An example of service configuration is `kafka_mirrormaker.emit_checkpoints_enabled`: Whether to emit consumer group offset checkpoints to target cluster periodically. + - Changing the value of such parameter leads to a restart of the workers (along with their connectors and tasks). +1. **Replication-flow** configurations apply to the connectors (Source, Sink, Checkpoint, Heartbeat). + - They are documented under [Aiven Terraform provider mirrormaker_replication_flow resource documentation](https://registry.terraform.io/providers/aiven/aiven/latest/docs/resources/mirrormaker_replication_flow). + - An example of replication-flow configuration is `topics`: List of topics and/or regular expressions to replicate (see [topics included in a replication flow](/docs/products/kafka/kafka-mirrormaker/concepts/replication-flow-topics-regex)). + - Chaging the value of such parameter leads to the restart of impacted connectors (along with their tasks). +1. **Integration** configurations apply to the consumers and producers of the connectors. + - They are documented under [Aiven Terraform provider service_integration resource documentation](https://registry.terraform.io/providers/aiven/aiven/latest/docs/resources/service_integration#nested-schema-for-kafka_mirrormaker_user_configkafka_mirrormaker). + - An example of integration configuration is `consumer_fetch_min_bytes`: The minimum amount of data the server should return for a fetch request. + - Changing the value of such parameter leads to a restart of the workers (along with their connectors and tasks). + +:::note +Most configurations are directly inherited from the upstream [KIP-382: MirrorMaker 2.0 - Configuration Properties](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=95650722#KIP382:MirrorMaker2.0-ConnectorConfigurationProperties). +::: + +## Common parameters + +This section outlines common parameters which you can adjust. + +1. Increase the value of `kafka_mirrormaker.tasks_max_per_cpu` _in the advanced options. Setting this to match the number of partitions can enhance performance. 1. Ensure the interval seconds for the following settings match. You @@ -19,32 +41,9 @@ validating MM2's performance. 1. To exclude internal topics, add these patterns to your topic blacklist: - `.*[\-\.]internal` `.*\.replica` `__.*` `connect.*` -1. Depending on your use case, consider adjusting these parameters: - - `kafka_mirrormaker.consumer_fetch_min_bytes` - - `kafka_mirrormaker.producer_batch_size` - - `kafka_mirrormaker.producer_buffer_memory` - - `kafka_mirrormaker.producer_linger_ms` - - `kafka_mirrormaker.producer_max_request_size` - -## MirrorMaker 2 validation tips - -To ensure MirrorMaker 2 is up-to-date with message processing, monitor -these: - -1. **Consumer lag metric**: Monitor the `kafka.consumer_lag` metric. - -1. **Dashboard metrics**: If MirrorMaker 2 stops adding records to a - topic, the `jmx.kafka.connect.mirror.record_count` metric stops - increasing, showing a flat line on the dashboard. - -1. **Retrieve latest messages with \`kt\`**: Use - [kt](https://github.com/fgeller/kt) to retrieve the latest messages - from all partitions with the following command: - - ``` - kt consume -auth ./mykafka.conf \ - -brokers SERVICE-PROJECT.aivencloud.com:PORT \ - -topic topicname -offsets all=newest:newest | \ - jq -c -s 'sort_by(.partition) | .[] | \ - {partition: .partition, value: .value, timestamp: .timestamp}' - ``` +1. Depending on your use case, consider adjusting these integration parameters: + - `consumer_fetch_min_bytes` + - `producer_batch_size` + - `producer_buffer_memory` + - `producer_linger_ms` + - `producer_max_request_size` diff --git a/docs/products/kafka/kafka-mirrormaker/howto/monitor-replication-execution.md b/docs/products/kafka/kafka-mirrormaker/howto/monitor-replication-execution.md new file mode 100644 index 000000000..d6f473676 --- /dev/null +++ b/docs/products/kafka/kafka-mirrormaker/howto/monitor-replication-execution.md @@ -0,0 +1,28 @@ +--- +title: Monitor replication execution +--- + +Apache Kafka® MirrorMaker 2 leverages Kafka Connect to help with state management +and monitoring. + +## Tips + +To ensure that the replication is up-to-date with message processing, check this: + +1. **Consumer lag metric**: Monitor the `kafka.consumer_lag` metric. + +1. **Dashboard metrics**: If MirrorMaker 2 stops adding records to a + topic, the `jmx.kafka.connect.mirror.record_count` metric stops + increasing, showing a flat line on the dashboard. + +1. **Retrieve latest messages with \`kt\`**: Use + [kt](https://github.com/fgeller/kt) to retrieve the latest messages + from all partitions with the following command: + + ``` + kt consume -auth ./mykafka.conf \ + -brokers SERVICE-PROJECT.aivencloud.com:PORT \ + -topic topicname -offsets all=newest:newest | \ + jq -c -s 'sort_by(.partition) | .[] | \ + {partition: .partition, value: .value, timestamp: .timestamp}' + ``` diff --git a/sidebars.ts b/sidebars.ts index 2bbdfbf7a..9d1505c2e 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -1058,6 +1058,7 @@ const sidebars: SidebarsConfig = { items: [ 'products/kafka/kafka-mirrormaker/howto/integrate-external-kafka-cluster', 'products/kafka/kafka-mirrormaker/howto/setup-replication-flow', + 'products/kafka/kafka-mirrormaker/howto/monitor-replication-execution', 'products/kafka/kafka-mirrormaker/howto/remove-mirrormaker-prefix', 'products/kafka/kafka-mirrormaker/howto/datadog-customised-metrics', 'products/kafka/kafka-mirrormaker/howto/log-analysis-offset-sync-tool',