From 63a6d2d24aff11c72a3750e228027cf5fd1b86b9 Mon Sep 17 00:00:00 2001 From: urfreespace Date: Sun, 20 Oct 2024 17:35:01 +0000 Subject: [PATCH] Docs sync 20241020 --- .../io-activemq-sink.md | 2 +- .../v4.0.0.1/io-activemq-sink.md | 172 ++++++++ .../io-activemq-source.md | 2 +- .../v4.0.0.1/io-activemq-source.md | 161 ++++++++ .../{v3.0.6 => v3.0.7}/aerospike-sink.md | 4 +- .../aerospike-sink/v4.0.0/aerospike-sink.md | 42 ++ .../{v3.0.6.8 => v3.0.7.1}/amqp-1-0-sink.md | 4 +- .../amqp-1-0-sink/v4.0.0.1/amqp-1-0-sink.md | 184 +++++++++ .../{v3.0.6.8 => v3.0.7.1}/amqp-1-0-source.md | 4 +- .../v4.0.0.1/amqp-1-0-source.md | 170 ++++++++ .../aws-eventbridge-sink.md | 2 +- .../v4.0.0.1/aws-eventbridge-sink.md | 339 ++++++++++++++++ .../{v3.0.6.8 => v3.0.7.1}/aws-lambda-sink.md | 28 +- .../v4.0.0.1/aws-lambda-sink.md | 372 ++++++++++++++++++ .../{v3.0.6.8 => v3.0.7.1}/aws-s3-sink.md | 4 +- .../aws-s3-sink/v4.0.0.1/aws-s3-sink.md | 277 +++++++++++++ .../azure-blob-storage-sink.md | 4 +- .../v4.0.0.1/azure-blob-storage-sink.md | 262 ++++++++++++ .../{v3.0.6 => v3.0.7}/elasticsearch-sink.md | 4 +- .../v4.0.0/elasticsearch-sink.md | 165 ++++++++ .../{v3.0.6 => v3.0.7}/hbase-sink.md | 4 +- connectors/hbase-sink/v4.0.0/hbase-sink.md | 79 ++++ .../{v3.0.6 => v3.0.7}/hdfs2-sink.md | 4 +- connectors/hdfs2-sink/v4.0.0/hdfs2-sink.md | 70 ++++ .../{v3.0.6 => v3.0.7}/hdfs3-sink.md | 4 +- connectors/hdfs3-sink/v4.0.0/hdfs3-sink.md | 69 ++++ .../{v3.0.6 => v3.0.7}/influxdb-sink.md | 4 +- .../influxdb-sink/v4.0.0/influxdb-sink.md | 123 ++++++ .../{v3.0.6 => v3.0.7}/kafka-sink.md | 4 +- connectors/kafka-sink/v4.0.0/kafka-sink.md | 124 ++++++ .../{v3.0.6 => v3.0.7}/kafka-source.md | 4 +- .../kafka-source/v4.0.0/kafka-source.md | 109 +++++ .../{v3.0.6 => v3.0.7}/kinesis-sink.md | 4 +- .../kinesis-sink/v4.0.0/kinesis-sink.md | 145 +++++++ .../{v3.0.6 => v3.0.7}/kinesis-source.md | 4 +- .../kinesis-source/v4.0.0/kinesis-source.md | 195 +++++++++ .../{v3.0.6 => v3.0.7}/mongodb-sink.md | 4 +- .../mongodb-sink/v4.0.0/mongodb-sink.md | 67 ++++ .../{v3.0.6 => v3.0.7}/redis-sink.md | 4 +- connectors/redis-sink/v4.0.0/redis-sink.md | 80 ++++ .../twitter-firehose-source.md | 4 +- .../v4.0.0/twitter-firehose-source.md | 43 ++ 42 files changed, 3299 insertions(+), 51 deletions(-) rename connectors/activemq-sink/{v3.0.6.8 => v3.0.7.1}/io-activemq-sink.md (99%) create mode 100644 connectors/activemq-sink/v4.0.0.1/io-activemq-sink.md rename connectors/activemq-source/{v3.0.6.8 => v3.0.7.1}/io-activemq-source.md (99%) create mode 100644 connectors/activemq-source/v4.0.0.1/io-activemq-source.md rename connectors/aerospike-sink/{v3.0.6 => v3.0.7}/aerospike-sink.md (91%) create mode 100644 connectors/aerospike-sink/v4.0.0/aerospike-sink.md rename connectors/amqp-1-0-sink/{v3.0.6.8 => v3.0.7.1}/amqp-1-0-sink.md (99%) create mode 100644 connectors/amqp-1-0-sink/v4.0.0.1/amqp-1-0-sink.md rename connectors/amqp-1-0-source/{v3.0.6.8 => v3.0.7.1}/amqp-1-0-source.md (99%) create mode 100644 connectors/amqp-1-0-source/v4.0.0.1/amqp-1-0-source.md rename connectors/aws-eventbridge-sink/{v3.0.6.8 => v3.0.7.1}/aws-eventbridge-sink.md (99%) create mode 100644 connectors/aws-eventbridge-sink/v4.0.0.1/aws-eventbridge-sink.md rename connectors/aws-lambda-sink/{v3.0.6.8 => v3.0.7.1}/aws-lambda-sink.md (94%) create mode 100644 connectors/aws-lambda-sink/v4.0.0.1/aws-lambda-sink.md rename connectors/aws-s3-sink/{v3.0.6.8 => v3.0.7.1}/aws-s3-sink.md (99%) create mode 100644 connectors/aws-s3-sink/v4.0.0.1/aws-s3-sink.md rename connectors/azure-blob-storage-sink/{v3.0.6.8 => v3.0.7.1}/azure-blob-storage-sink.md (99%) create mode 100644 connectors/azure-blob-storage-sink/v4.0.0.1/azure-blob-storage-sink.md rename connectors/elasticsearch-sink/{v3.0.6 => v3.0.7}/elasticsearch-sink.md (99%) create mode 100644 connectors/elasticsearch-sink/v4.0.0/elasticsearch-sink.md rename connectors/hbase-sink/{v3.0.6 => v3.0.7}/hbase-sink.md (94%) create mode 100644 connectors/hbase-sink/v4.0.0/hbase-sink.md rename connectors/hdfs2-sink/{v3.0.6 => v3.0.7}/hdfs2-sink.md (95%) create mode 100644 connectors/hdfs2-sink/v4.0.0/hdfs2-sink.md rename connectors/hdfs3-sink/{v3.0.6 => v3.0.7}/hdfs3-sink.md (95%) create mode 100644 connectors/hdfs3-sink/v4.0.0/hdfs3-sink.md rename connectors/influxdb-sink/{v3.0.6 => v3.0.7}/influxdb-sink.md (96%) create mode 100644 connectors/influxdb-sink/v4.0.0/influxdb-sink.md rename connectors/kafka-sink/{v3.0.6 => v3.0.7}/kafka-sink.md (97%) create mode 100644 connectors/kafka-sink/v4.0.0/kafka-sink.md rename connectors/kafka-source/{v3.0.6 => v3.0.7}/kafka-source.md (97%) create mode 100644 connectors/kafka-source/v4.0.0/kafka-source.md rename connectors/kinesis-sink/{v3.0.6 => v3.0.7}/kinesis-sink.md (99%) create mode 100644 connectors/kinesis-sink/v4.0.0/kinesis-sink.md rename connectors/kinesis-source/{v3.0.6 => v3.0.7}/kinesis-source.md (99%) create mode 100644 connectors/kinesis-source/v4.0.0/kinesis-source.md rename connectors/mongodb-sink/{v3.0.6 => v3.0.7}/mongodb-sink.md (92%) create mode 100644 connectors/mongodb-sink/v4.0.0/mongodb-sink.md rename connectors/redis-sink/{v3.0.6 => v3.0.7}/redis-sink.md (94%) create mode 100644 connectors/redis-sink/v4.0.0/redis-sink.md rename connectors/twitter-firehose-source/{v3.0.6 => v3.0.7}/twitter-firehose-source.md (92%) create mode 100644 connectors/twitter-firehose-source/v4.0.0/twitter-firehose-source.md diff --git a/connectors/activemq-sink/v3.0.6.8/io-activemq-sink.md b/connectors/activemq-sink/v3.0.7.1/io-activemq-sink.md similarity index 99% rename from connectors/activemq-sink/v3.0.6.8/io-activemq-sink.md rename to connectors/activemq-sink/v3.0.7.1/io-activemq-sink.md index 55219b94..a06c078c 100644 --- a/connectors/activemq-sink/v3.0.6.8/io-activemq-sink.md +++ b/connectors/activemq-sink/v3.0.7.1/io-activemq-sink.md @@ -11,7 +11,7 @@ tags: apache-pulsar,pulsar-io,source-connector,sink-connector,apache-activemq alias: ActiveMQ sink connector features: ["ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. "] icon: "/images/connectors/activemq_logo_white_vertical.jpg" -download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-activemq support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" diff --git a/connectors/activemq-sink/v4.0.0.1/io-activemq-sink.md b/connectors/activemq-sink/v4.0.0.1/io-activemq-sink.md new file mode 100644 index 00000000..e9491824 --- /dev/null +++ b/connectors/activemq-sink/v4.0.0.1/io-activemq-sink.md @@ -0,0 +1,172 @@ +--- +description: ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. +author: StreamNative +contributors: gaoran10,shibd,dependabot[bot],sijie +language: Java,Shell,Dockerfile +document: +source: https://github.com/streamnative/pulsar-io-activemq +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-activemq/blob/master/LICENSE +tags: apache-pulsar,pulsar-io,source-connector,sink-connector,apache-activemq +alias: ActiveMQ sink connector +features: ["ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. "] +icon: "/images/connectors/activemq_logo_white_vertical.jpg" +download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-activemq +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-activemq +sn_available: "" +id: "io-activemq-sink" +--- + + +The ActiveMQ sink connector pulls messages from Pulsar topics and persist messages to ActiveMQ. + +# Installation + +``` +git clone https://github.com/streamnative/pulsar-io-activemq.git +cd pulsar-io-activemq/ +mvn clean install -DskipTests +cp target/pulsar-io-activemq-0.0.1.nar $PULSAR_HOME/pulsar-io-activemq-0.0.1.nar +``` + +# Configuration + +The configuration of the ActiveMQ sink connector has the following properties. + +## ActiveMQ sink connector configuration + +| Name | Type | Required | Sensitive | Default | Description | +|---------------------|--------|----------|-----------|--------------------|--------------------------------------------------------------------------| +| `protocol` | String | true | false | "tcp" | The ActiveMQ protocol. | +| `host` | String | true | false | " " (empty string) | The ActiveMQ host. | +| `port` | int | true | false | 5672 | The ActiveMQ port. | +| `username` | String | false | true | " " (empty string) | The username used to authenticate to ActiveMQ. | +| `password` | String | false | true | " " (empty string) | The password used to authenticate to ActiveMQ. | +| `queueName` | String | false | false | " " (empty string) | The ActiveMQ queue name that messages should be read from or written to. | +| `topicName` | String | false | false | " " (empty string) | The ActiveMQ topic name that messages should be read from or written to. | +| `activeMessageType` | String | false | false | 0 | The ActiveMQ message simple class name. | + +## Configure ActiveMQ sink connector + +Before using the ActiveMQ sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "tenant": "public", + "namespace": "default", + "name": "activemq-sink", + "inputs": ["user-op-queue-topic"], + "archive": "connectors/pulsar-io-activemq-2.5.1.nar", + "parallelism": 1, + "configs": + { + "protocol": "tcp", + "host": "localhost", + "port": "61616", + "username": "admin", + "password": "admin", + "queueName": "user-op-queue-pulsar" + } + } + ``` + +* YAML + + ```yaml + tenant: "public" + namespace: "default" + name: "activemq-sink" + inputs: + - "user-op-queue-topic" + archive: "connectors/pulsar-io-activemq-2.5.1.nar" + parallelism: 1 + + configs: + protocol: "tcp" + host: "localhost" + port: "61616" + username: "admin" + password: "admin" + queueName: "user-op-queue-pulsar" + ``` + +# Usage + +1. Prepare ActiveMQ service. + + ``` + docker pull rmohr/activemq + docker run -p 61616:61616 -p 8161:8161 rmohr/activemq + ``` + +2. Put the `pulsar-io-activemq-2.5.1.nar` in the pulsar connectors catalog. + + ``` + cp pulsar-io-activemq-2.5.1.nar $PULSAR_HOME/connectors/pulsar-io-activemq-2.5.1.nar + ``` + +3. Start Pulsar in standalone mode. + + ``` + $PULSAR_HOME/bin/pulsar standalone + ``` + +4. Run ActiveMQ sink locally. + + ``` + $PULSAR_HOME/bin/pulsar-admin sink localrun --sink-config-file activemq-sink-config.yaml + ``` + +5. Send Pulsar messages. + + ``` + $PULSAR_HOME/bin/pulsar-client produce public/default/user-op-queue-topic --messages hello -n 10 + ``` + +6. Consume ActiveMQ messages. + + Use the test method `receiveMessage` of the class `org.apache.pulsar.ecosystem.io.activemq.ActiveMQDemo` +to consume ActiveMQ messages. + + ``` + @Test + private void receiveMessage() throws JMSException, InterruptedException { + + ActiveMQConnectionFactory connectionFactory = new ActiveMQConnectionFactory("tcp://localhost:61616"); + + @Cleanup + Connection connection = connectionFactory.createConnection(); + connection.start(); + + @Cleanup + Session session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE); + + Destination destination = session.createQueue("user-op-queue-pulsar"); + + @Cleanup + MessageConsumer consumer = session.createConsumer(destination); + + consumer.setMessageListener(new MessageListener() { + @Override + public void onMessage(Message message) { + if (message instanceof ActiveMQTextMessage) { + try { + System.out.println("get message ----------------- "); + System.out.println("receive: " + ((ActiveMQTextMessage) message).getText()); + } catch (JMSException e) { + e.printStackTrace(); + } + } + } + }); + } + ``` + + diff --git a/connectors/activemq-source/v3.0.6.8/io-activemq-source.md b/connectors/activemq-source/v3.0.7.1/io-activemq-source.md similarity index 99% rename from connectors/activemq-source/v3.0.6.8/io-activemq-source.md rename to connectors/activemq-source/v3.0.7.1/io-activemq-source.md index afeccca4..1d4f17f4 100644 --- a/connectors/activemq-source/v3.0.6.8/io-activemq-source.md +++ b/connectors/activemq-source/v3.0.7.1/io-activemq-source.md @@ -11,7 +11,7 @@ tags: apache-pulsar,pulsar-io,source-connector,sink-connector,apache-activemq alias: ActiveMQ source connector features: ["ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. "] icon: "/images/connectors/activemq_logo_white_vertical.jpg" -download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-activemq support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" diff --git a/connectors/activemq-source/v4.0.0.1/io-activemq-source.md b/connectors/activemq-source/v4.0.0.1/io-activemq-source.md new file mode 100644 index 00000000..cd49c4fa --- /dev/null +++ b/connectors/activemq-source/v4.0.0.1/io-activemq-source.md @@ -0,0 +1,161 @@ +--- +description: ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. +author: StreamNative +contributors: gaoran10,shibd,dependabot[bot],sijie +language: Java,Shell,Dockerfile +document: +source: https://github.com/streamnative/pulsar-io-activemq +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-activemq/blob/master/LICENSE +tags: apache-pulsar,pulsar-io,source-connector,sink-connector,apache-activemq +alias: ActiveMQ source connector +features: ["ActiveMQ Connector integrates Apache Pulsar with Apache ActiveMQ. "] +icon: "/images/connectors/activemq_logo_white_vertical.jpg" +download: https://api.github.com/repos/streamnative/pulsar-io-activemq/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-activemq +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-activemq +sn_available: "" +id: "io-activemq-source" +--- + + +The ActiveMQ source connector receives messages from ActiveMQ clusters and writes messages to Pulsar topics. + +# Installation + +``` +git clone https://github.com/streamnative/pulsar-io-activemq.git +cd pulsar-io-activemq/ +mvn clean install -DskipTests +cp target/pulsar-io-activemq-0.0.1.nar $PULSAR_HOME/pulsar-io-activemq-0.0.1.nar +``` + +# Configuration + +The configuration of the ActiveMQ source connector has the following properties. + +## ActiveMQ source connector configuration + +| Name | Type | Required | Sensitive | Default | Description | +|-------------|--------|----------|-----------|--------------------|--------------------------------------------------------------------------| +| `protocol` | String | true | false | "tcp" | The ActiveMQ protocol. | +| `host` | String | true | false | " " (empty string) | The ActiveMQ host. | +| `port` | int | true | false | 5672 | The ActiveMQ port. | +| `username` | String | false | true | " " (empty string) | The username used to authenticate to ActiveMQ. | +| `password` | String | false | true | " " (empty string) | The password used to authenticate to ActiveMQ. | +| `queueName` | String | false | false | " " (empty string) | The ActiveMQ queue name that messages should be read from or written to. | +| `topicName` | String | false | false | " " (empty string) | The ActiveMQ topic name that messages should be read from or written to. | + +## Configure ActiveMQ source connector + +Before using the ActiveMQ source connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "tenant": "public", + "namespace": "default", + "name": "activemq-source", + "topicName": "user-op-queue-topic", + "archive": "connectors/pulsar-io-activemq-2.5.1.nar", + "parallelism": 1, + "configs": { + "protocol": "tcp", + "host": "localhost", + "port": "61616", + "username": "admin", + "password": "admin", + "queueName": "user-op-queue" + } + } + ``` + +* YAML + + ```yaml + tenant: "public" + namespace: "default" + name: "activemq-source" + topicName: "user-op-queue-topic" + archive: "connectors/pulsar-io-activemq-2.5.1.nar" + parallelism: 1 + + configs: + protocol: "tcp" + host: "localhost" + port: "61616" + username: "admin" + password: "admin" + queueName: "user-op-queue" + ``` + +1. Prepare ActiveMQ service. + + ``` + docker pull rmohr/activemq + docker run -p 61616:61616 -p 8161:8161 rmohr/activemq + ``` + +2. Put the `pulsar-io-activemq-2.5.1.nar` in the pulsar connectors catalog. + + ``` + cp pulsar-io-activemq-2.5.1.nar $PULSAR_HOME/connectors/pulsar-io-activemq-2.5.1.nar + ``` + +3. Start Pulsar in standalone mode. + + ``` + $PULSAR_HOME/bin/pulsar standalone + ``` + +4. Run ActiveMQ source locally. + + ``` + $PULSAR_HOME/bin/pulsar-admin source localrun --source-config-file activemq-source-config.yaml + ``` + +5. Consume Pulsar messages. + + ``` + bin/pulsar-client consume -s "sub-products" public/default/user-op-queue-topic -n 0 + ``` + +6. Send ActiveMQ messages. + + Use the test method `sendMessage` of the `class org.apache.pulsar.ecosystem.io.activemq.ActiveMQDemo` +to send ActiveMQ messages. + + ``` + @Test + private void sendMessage() throws JMSException { + + ActiveMQConnectionFactory connectionFactory = new ActiveMQConnectionFactory("tcp://localhost:61616"); + + @Cleanup + Connection connection = connectionFactory.createConnection(); + connection.start(); + + @Cleanup + Session session = connection.createSession(false, Session.CLIENT_ACKNOWLEDGE); + + Destination destination = session.createQueue("user-op-queue"); + + @Cleanup + MessageProducer producer = session.createProducer(destination); + producer.setDeliveryMode(DeliveryMode.NON_PERSISTENT); + + for (int i = 0; i < 10; i++) { + String msgContent = "Hello ActiveMQ - " + i; + ActiveMQTextMessage message = new ActiveMQTextMessage(); + message.setText(msgContent); + producer.send(message); + } + } + ``` + + diff --git a/connectors/aerospike-sink/v3.0.6/aerospike-sink.md b/connectors/aerospike-sink/v3.0.7/aerospike-sink.md similarity index 91% rename from connectors/aerospike-sink/v3.0.6/aerospike-sink.md rename to connectors/aerospike-sink/v3.0.7/aerospike-sink.md index fc39e99b..887a8d65 100644 --- a/connectors/aerospike-sink/v3.0.6/aerospike-sink.md +++ b/connectors/aerospike-sink/v3.0.7/aerospike-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: sss -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/aerospike" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/aerospike" license: Apache License 2.0 tags: ["Pulsar IO", "Aerospike", "Sink"] alias: Aerospike Sink features: ["Use Aerospike sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/aerospike-sink.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-aerospike-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-aerospike-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/aerospike-sink/v4.0.0/aerospike-sink.md b/connectors/aerospike-sink/v4.0.0/aerospike-sink.md new file mode 100644 index 00000000..aab56776 --- /dev/null +++ b/connectors/aerospike-sink/v4.0.0/aerospike-sink.md @@ -0,0 +1,42 @@ +--- +description: The Aerospike sink connector pulls messages from Pulsar topics to Aerospike clusters +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: sss +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/aerospike" +license: Apache License 2.0 +tags: ["Pulsar IO", "Aerospike", "Sink"] +alias: Aerospike Sink +features: ["Use Aerospike sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/aerospike-sink.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-aerospike-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "aerospike-sink" +--- + +The Aerospike sink connector pulls messages from Pulsar topics to Aerospike clusters. + +# Configuration + +The configuration of the Aerospike sink connector has the following properties. + +## Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `seedHosts` |String| true | No default value| The comma-separated list of one or more Aerospike cluster hosts.

Each host can be specified as a valid IP address or hostname followed by an optional port number. | +| `keyspace` | String| true |No default value |The Aerospike namespace. | +| `columnName` | String | true| No default value|The Aerospike column name. | +|`userName`|String|false|NULL|The Aerospike username.| +|`password`|String|false|NULL|The Aerospike password.| +| `keySet` | String|false |NULL | The Aerospike set name. | +| `maxConcurrentRequests` |int| false | 100 | The maximum number of concurrent Aerospike transactions that a sink can open. | +| `timeoutMs` | int|false | 100 | This property controls `socketTimeout` and `totalTimeout` for Aerospike transactions. | +| `retries` | int|false | 1 |The maximum number of retries before aborting a write transaction to Aerospike. | diff --git a/connectors/amqp-1-0-sink/v3.0.6.8/amqp-1-0-sink.md b/connectors/amqp-1-0-sink/v3.0.7.1/amqp-1-0-sink.md similarity index 99% rename from connectors/amqp-1-0-sink/v3.0.6.8/amqp-1-0-sink.md rename to connectors/amqp-1-0-sink/v3.0.7.1/amqp-1-0-sink.md index 8dc77ea4..770cb408 100644 --- a/connectors/amqp-1-0-sink/v3.0.6.8/amqp-1-0-sink.md +++ b/connectors/amqp-1-0-sink/v3.0.7.1/amqp-1-0-sink.md @@ -11,7 +11,7 @@ tags: alias: AMQP 1.0 Sink Connector features: ["support sink/source for AMQP version 1.0.0"] icon: "/images/connectors/amqp-logo.png" -download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-amqp-1-0 support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" @@ -27,7 +27,7 @@ id: "amqp-1-0-sink" The AMQP 1.0 sink connector pulls messages from Pulsar topics and persists messages to [AMQP 1.0](https://www.amqp.org/). -![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v3.0.6.8/docs/amqp-1-0-sink.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v3.0.7.1/docs/amqp-1-0-sink.png) ## Quick start diff --git a/connectors/amqp-1-0-sink/v4.0.0.1/amqp-1-0-sink.md b/connectors/amqp-1-0-sink/v4.0.0.1/amqp-1-0-sink.md new file mode 100644 index 00000000..7903762e --- /dev/null +++ b/connectors/amqp-1-0-sink/v4.0.0.1/amqp-1-0-sink.md @@ -0,0 +1,184 @@ +--- +description: support sink/source for AMQP version 1.0.0 +author: StreamNative +contributors: gaoran10,Anonymitaet,shibd,dependabot[bot] +language: Java,Shell,Dockerfile,Python +document: +source: https://github.com/streamnative/pulsar-io-amqp-1-0 +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-amqp-1-0/blob/master/LICENSE +tags: +alias: AMQP 1.0 Sink Connector +features: ["support sink/source for AMQP version 1.0.0"] +icon: "/images/connectors/amqp-logo.png" +download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-amqp-1-0 +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-amqp1_0 +sn_available: "true" +id: "amqp-1-0-sink" +--- + + +# AMQP 1.0 sink connector + +The AMQP 1.0 sink connector pulls messages from Pulsar topics and persists messages to [AMQP 1.0](https://www.amqp.org/). + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v4.0.0.1/docs/amqp-1-0-sink.png) + +## Quick start + +### 1. Start AMQP 1.0 service + +Start a service that supports the AMQP 1.0 protocol, such as [Solace](https://docs.solace.com/index.html). +```bash +docker run -d -p 8080:8080 -p:8008:8008 -p:1883:1883 -p:8000:8000 -p:5672:5672 -p:9000:9000 -p:2222:2222 --shm-size=2g --env username_admin_globalaccesslevel=admin --env username_admin_password=admin --name=solace solace/solace-pubsub-standard +``` + +### 2. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type amqp1_0` with `--archive /path/to/pulsar-io-amqp1_0.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type amqp1_0 \ + --name amqp1_0-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "connection": { + "failover": { + "useFailover": true + }, + "uris": [ + { + "protocol": "amqp", + "host": "localhost", + "port": 5672, + "urlOptions": [ + "transport.tcpKeepAlive=true" + ] + } + ] + }, + "username": "guest", + "password": "guest", + "queue": "user-op-queue-pulsar" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +- If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +- The following sample code uses the **Apache qpid** library. +{% /callout %} + +``` java + public static void main(String[] args) { + PulsarClient pulsarClient = PulsarClient.builder().serviceUrl("{{Your Pulsar URL}}").build(); + Producer producer = pulsarClient.newProducer(Schema.BYTEBUFFER) + .topic("{{The topic name that you specified when you created the connector}}") + .create(); + + JmsConnectionFactory jmsConnectionFactory = new JmsConnectionFactory(); + JMSContext jmsContext = jmsConnectionFactory.createContext(); + + for (int i = 0; i < 10; i++) { + JmsTextMessage textMessage = (JmsTextMessage) jmsContext.createTextMessage("text message - " + i); + ByteBuf byteBuf = (ByteBuf) textMessage.getFacade().encodeMessage(); + producer.send(byteBuf.nioBuffer()); + } + System.out.println("finish send messages."); + jmsContext.close(); + pulsarClient.close(); + } +``` + +### 3. Consume data from AMQP 1.0 service + +``` java + public static void main(String[] args) { + ConnectionFactory connectionFactory = new JmsConnectionFactory("guest", "guest", "amqp://localhost:5672"); + Connection connection = connectionFactory.createConnection(); + connection.start(); + Session session = connection.createSession(); + MessageConsumer consumer = session.createConsumer(new JmsQueue("user-op-queue-pulsar")); + for (int i = 0; i < 10; i++) { + JmsTextMessage textMessage = (JmsTextMessage) consumer.receive(); + System.out.println("receive msg content: " + textMessage.getText()); + textMessage.acknowledge(); + } + consumer.close(); + session.close(); + connection.close(); + } +``` + +## Configuration Properties + +Before using the AMQP 1.0 sink connector, you need to configure it. + +You can create a configuration file (JSON or YAML) to set the following properties. + +| Name | Type | Required | Sensitive | Default | Description | +|---------------------|------------|----------------------------------------------|-----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------| +| `protocol` | String | required if connection is not used | false | "amqp" | [deprecated: use connection instead] The AMQP protocol. | +| `host` | String | required if connection is not used | false | " " (empty string) | [deprecated: use connection instead] The AMQP service host. | +| `port` | int | required if connection is not used | false | 5672 | [deprecated: use connection instead] The AMQP service port. | +| `connection` | Connection | required if protocol, host, port is not used | false | " " (empty string) | The connection details. | +| `username` | String | false | true | " " (empty string) | The username used to authenticate to ActiveMQ. | +| `password` | String | false | true | " " (empty string) | The password used to authenticate to ActiveMQ. | +| `queue` | String | false | false | " " (empty string) | The queue name that messages should be read from or written to. | +| `topic` | String | false | false | " " (empty string) | The topic name that messages should be read from or written to. | +| `activeMessageType` | String | false | false | 0 | The ActiveMQ message simple class name. | +| `onlyTextMessage` | boolean | false | false | false | If it is set to `true`, the AMQP message type must be set to `TextMessage`. Pulsar consumers can consume the messages with schema ByteBuffer. | + +A `Connection` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|------------|-----------------------|----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `failover` | Failover | false | " " (empty string) | The configuration for a failover connection. | +| `uris` | list of ConnectionUri | true | " " (empty string) | A list of ConnectionUri objects. When useFailover is set to true 1 or more should be provided. Currently only 1 uri is supported when useFailover is set to false | + +A `Failover` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|--------------------------------|----------------|--------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `useFailover` | boolean | true | false | If it is set to true, the connection will be created from the uris provided under uris, using qpid's failover connection factory. | +| `jmsClientId` | String | required if failoverConfigurationOptions is used | " " (empty string) | Identifying name for the jms Client | +| `failoverConfigurationOptions` | List of String | required if jmsClientId is used | " " (empty string) | A list of options (e.g. ). The options wil be joined using an '&', prefixed with a the jmsClientId and added to the end of the failoverUri. see also: https://qpid.apache.org/releases/qpid-jms-2.2.0/docs/index.html#failover-configuration-options | + +A `ConnectionUri` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|--------------|----------------|----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `protocol` | String | true | " " (empty string) | The AMQP protocol. | +| `host` | String | true | " " (empty string) | The AMQP service host. | +| `port` | int | true | 0 | The AMQP service port. | +| `urlOptions` | List of String | false | " " (empty string) | A list of url-options (e.g. ). The url options wil be joined using an '&', prefixed with a '?' and added to the end of the uri | + + + diff --git a/connectors/amqp-1-0-source/v3.0.6.8/amqp-1-0-source.md b/connectors/amqp-1-0-source/v3.0.7.1/amqp-1-0-source.md similarity index 99% rename from connectors/amqp-1-0-source/v3.0.6.8/amqp-1-0-source.md rename to connectors/amqp-1-0-source/v3.0.7.1/amqp-1-0-source.md index 4d8ab266..6e4ffbb4 100644 --- a/connectors/amqp-1-0-source/v3.0.6.8/amqp-1-0-source.md +++ b/connectors/amqp-1-0-source/v3.0.7.1/amqp-1-0-source.md @@ -11,7 +11,7 @@ tags: alias: AMQP 1.0 Source Connector features: ["support sink/source for AMQP version 1.0.0"] icon: "/images/connectors/amqp-logo.png" -download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-amqp-1-0 support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" @@ -27,7 +27,7 @@ id: "amqp-1-0-source" The AMQP 1.0 source connector receives messages from [AMQP 1.0](https://www.amqp.org/) and writes messages to Pulsar topics. -![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v3.0.6.8/docs/amqp-1-0-source.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v3.0.7.1/docs/amqp-1-0-source.png) ## Quick start diff --git a/connectors/amqp-1-0-source/v4.0.0.1/amqp-1-0-source.md b/connectors/amqp-1-0-source/v4.0.0.1/amqp-1-0-source.md new file mode 100644 index 00000000..6d4d842d --- /dev/null +++ b/connectors/amqp-1-0-source/v4.0.0.1/amqp-1-0-source.md @@ -0,0 +1,170 @@ +--- +description: support sink/source for AMQP version 1.0.0 +author: StreamNative +contributors: gaoran10,Anonymitaet,shibd,dependabot[bot] +language: Java,Shell,Dockerfile,Python +document: +source: https://github.com/streamnative/pulsar-io-amqp-1-0 +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-amqp-1-0/blob/master/LICENSE +tags: +alias: AMQP 1.0 Source Connector +features: ["support sink/source for AMQP version 1.0.0"] +icon: "/images/connectors/amqp-logo.png" +download: https://api.github.com/repos/streamnative/pulsar-io-amqp-1-0/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-amqp-1-0 +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-amqp1_0 +sn_available: "true" +id: "amqp-1-0-source" +--- + + +# AMQP 1.0 source connector + +The AMQP 1.0 source connector receives messages from [AMQP 1.0](https://www.amqp.org/) and writes messages to Pulsar topics. + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-amqp-1-0/v4.0.0.1/docs/amqp-1-0-source.png) + +## Quick start + +### 1. Start AMQP 1.0 service + +Start a service that supports the AMQP 1.0 protocol, such as [Solace](https://docs.solace.com/index.html). +```bash +docker run -d -p 8080:8080 -p:8008:8008 -p:1883:1883 -p:8000:8000 -p:5672:5672 -p:9000:9000 -p:2222:2222 --shm-size=2g --env username_admin_globalaccesslevel=admin --env username_admin_password=admin --name=solace solace/solace-pubsub-standard +``` + +### 2. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--source-type amqp1_0` with `--archive /path/to/pulsar-io-amqp1_0.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sources create \ + --source-type amqp1_0 \ + --name amqp1_0-source \ + --tenant public \ + --namespace default \ + --destination-topic-name "Your topic name" \ + --parallelism 1 \ + --source-config \ + '{ + "connection": { + "failover": { + "useFailover": true + }, + "uris": [ + { + "protocol": "amqp", + "host": "localhost", + "port": 5672, + "urlOptions": [ + "transport.tcpKeepAlive=true" + ] + } + ] + }, + "username": "guest", + "password": "guest", + "queue": "user-op-queue-pulsar" + }' +``` + +The `--source-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/source-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the AMQP 1.0 service + +{% callout title="Note" type="note" %} +- The following sample code uses the **Apache qpid** library. +{% /callout %} + + +``` java + public static void main(String[] args) { + ConnectionFactory connectionFactory = new JmsConnectionFactory("amqp://localhost:5672"); + Connection connection = connectionFactory.createConnection(); + connection.start(); + JMSProducer producer = connectionFactory.createContext().createProducer(); + producer.setDeliveryMode(DeliveryMode.NON_PERSISTENT); + Destination destination = new JmsQueue("user-op-queue"); + for (int i = 0; i < 10; i++) { + producer.send(destination, "Hello AMQP 1.0 - " + i); + } + connection.close(); + } +``` + +### 3. Consume data from Pulsar + +{% callout title="Note" type="note" %} +- If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java +bin/pulsar-client \ +--url "Your Pulsar serviceUrl" \ +consume "The topic that you specified when you created the connector" -s "test-sub" -n 10 -p Earliest +``` + +## Configuration Properties + +Before using the AMQP 1.0 sink connector, you need to configure it. + +You can create a configuration file (JSON or YAML) to set the following properties. + +| Name | Type | Required | Sensitive | Default | Description | +|---------------------|------------|----------------------------------------------|-----------|---------------------|-----------------------------------------------------------------------------------------------------------------------------------------------| +| `protocol` | String | required if connection is not used | false | "amqp" | [deprecated: use connection instead] The AMQP protocol. | +| `host` | String | required if connection is not used | false | " " (empty string) | [deprecated: use connection instead] The AMQP service host. | +| `port` | int | required if connection is not used | false | 5672 | [deprecated: use connection instead] The AMQP service port. | +| `connection` | Connection | required if protocol, host, port is not used | false | " " (empty string) | The connection details. | +| `username` | String | false | true | " " (empty string) | The username used to authenticate to ActiveMQ. | +| `password` | String | false | true | " " (empty string) | The password used to authenticate to ActiveMQ. | +| `queue` | String | false | false | " " (empty string) | The queue name that messages should be read from or written to. | +| `topic` | String | false | false | " " (empty string) | The topic name that messages should be read from or written to. | +| `activeMessageType` | String | false | false | 0 | The ActiveMQ message simple class name. | +| `onlyTextMessage` | boolean | false | false | false | If it is set to `true`, the AMQP message type must be set to `TextMessage`. Pulsar consumers can consume the messages with schema ByteBuffer. | + +A `Connection` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|------------|-----------------------|----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `failover` | Failover | false | " " (empty string) | The configuration for a failover connection. | +| `uris` | list of ConnectionUri | true | " " (empty string) | A list of ConnectionUri objects. When useFailover is set to true 1 or more should be provided. Currently only 1 uri is supported when useFailover is set to false | + +A `Failover` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|--------------------------------|----------------|--------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `useFailover` | boolean | true | false | If it is set to true, the connection will be created from the uris provided under uris, using qpid's failover connection factory. | +| `jmsClientId` | String | required if failoverConfigurationOptions is used | " " (empty string) | Identifying name for the jms Client | +| `failoverConfigurationOptions` | List of String | required if jmsClientId is used | " " (empty string) | A list of options (e.g. ). The options wil be joined using an '&', prefixed with a the jmsClientId and added to the end of the failoverUri. see also: https://qpid.apache.org/releases/qpid-jms-2.2.0/docs/index.html#failover-configuration-options | + +A `ConnectionUri` object can be specified as follows: + +| Name | Type | Required | Default | Description | +|--------------|----------------|----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `protocol` | String | true | " " (empty string) | The AMQP protocol. | +| `host` | String | true | " " (empty string) | The AMQP service host. | +| `port` | int | true | 0 | The AMQP service port. | +| `urlOptions` | List of String | false | " " (empty string) | A list of url-options (e.g. ). The url options wil be joined using an '&', prefixed with a '?' and added to the end of the uri | + + + diff --git a/connectors/aws-eventbridge-sink/v3.0.6.8/aws-eventbridge-sink.md b/connectors/aws-eventbridge-sink/v3.0.7.1/aws-eventbridge-sink.md similarity index 99% rename from connectors/aws-eventbridge-sink/v3.0.6.8/aws-eventbridge-sink.md rename to connectors/aws-eventbridge-sink/v3.0.7.1/aws-eventbridge-sink.md index a3c7419b..9a2e713c 100644 --- a/connectors/aws-eventbridge-sink/v3.0.6.8/aws-eventbridge-sink.md +++ b/connectors/aws-eventbridge-sink/v3.0.7.1/aws-eventbridge-sink.md @@ -26,7 +26,7 @@ id: "aws-eventbridge-sink" The [Amazon EventBridge](https://aws.amazon.com/eventbridge/) sink connector pulls data from Pulsar topics and persists data to Amazon EventBridge. -![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-eventbridge/v3.0.6.8/docs/aws-eventbridge-sink.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-eventbridge/v3.0.7.1/docs/aws-eventbridge-sink.png) ## Quick start diff --git a/connectors/aws-eventbridge-sink/v4.0.0.1/aws-eventbridge-sink.md b/connectors/aws-eventbridge-sink/v4.0.0.1/aws-eventbridge-sink.md new file mode 100644 index 00000000..7ac8ce6a --- /dev/null +++ b/connectors/aws-eventbridge-sink/v4.0.0.1/aws-eventbridge-sink.md @@ -0,0 +1,339 @@ +--- +description: This connector allows you to make sink connections from Pulsar to AWS EventBridge. +author: StreamNative +contributors: shibd,sijie,Anonymitaet,nlu90 +language: Java,Shell,Dockerfile +document: +source: Private source +license: StreamNative, Inc.. All Rights Reserved +license_link: +tags: +alias: AWS EventBridge Sink Connector +features: ["This connector allows you to make sink connections from Pulsar to AWS EventBridge."] +icon: "/images/connectors/aws-eventbridge.png" +download: +support: streamnative +support_link: https://streamnative.io +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: +sn_available: "true" +id: "aws-eventbridge-sink" +--- + + +The [Amazon EventBridge](https://aws.amazon.com/eventbridge/) sink connector pulls data from Pulsar topics and persists +data to Amazon EventBridge. + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-eventbridge/v4.0.0.1/docs/aws-eventbridge-sink.png) + +## Quick start + +### Prerequisites + +The prerequisites for connecting an AWS EventBridge sink connector to external systems include: + +1. Create EventBridge and EventBus in AWS. +2. Create the [AWS User](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) and create `AccessKey`(Please record `AccessKey` and `SecretAccessKey`). +3. Assign permissions to AWS User, and ensure they have the `PutEvents` permissions to the AWS EventBus. For details, see [permissions for event buses](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-event-bus-perms.html) + +```json + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowAccountToPutEvents", + "Effect": "Allow", + "Principal": { + "AWS": "" + }, + "Action": "events:PutEvents", + "Resource": "{EventBusArn}" + } + ] +} +``` +- You can set permissions directly for this user. With this method, when you create a connector, you only need to configure `accessKey` and `secretAccessKey`. +- Or you can use [Security Token Service](https://docs.aws.amazon.com/STS/latest/APIReference/welcome.html), this [video](https://www.youtube.com/watch?v=dqF4VJCska4) explains how to use STS on AWS. + With this method, when you create a connector, in addition to configuring `accessKey` and `secretAccessKey`, you also need to configure `role` and `roleSessionName`. + +4. Create a [Rule](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-create-rule.html) in EventBridge. +- The data structure sent to Event Bridge is described [here](## Metadata mapping), and you can create **event pattern** based on this structure. +- Set the target according to your needs. If you're testing this connector, you can set the target to [Cloud Watch](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/WhatIsCloudWatch.html). + + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type aws-eventbridge` with `--archive /path/to/pulsar-io-aws-eventbridge.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type aws-eventbridge \ + --name aws-eventbridge-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "accessKeyId": "Your AWS access key", + "secretAccessKey": "Your AWS secret access key", + "region": "Your event bridge region", + "eventBusName": "Your eventbus name" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + String message = "{\"msg\": \"msg-data\"}"; + MessageId msgID = producer.send(message); + System.out.println("Publish " + message + " and message ID " + msgID); + + producer.flush(); + producer.close(); + client.close(); +``` + +### 3. Show data on AWS EventBridge +The connector will send the following format of JSON event to EventBridge. +```json +{ + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail-type": "{{Your topic name}}", + "source": "{{Your connector name}}", + "account": "111122223333", + "time": "2017-12-22T18:43:48Z", + "region": "us-west-1", + "resources": [ + "arn:aws:ec2:us-west-1:123456789012:instance/i-1234567890abcdef0" + ], + "detail": { + "data": { + "msg": "msg-data" + }, + "message_id": "124:191:0" + } +} +``` + +## Configuration Properties + +Before using the AWS EventBridge sink connector, you need to configure it. This table outlines the properties and the +descriptions. + +| Name | Type | Required | Sensitive | Default | Description | +|-------------------------|--------|----------|-----------|-------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `accessKeyId` | String | yes | true | "" (empty string) | The AWS EventBridge [access key ID.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) | +| `secretAccessKey` | String | yes | true | "" (empty string) | The AWS EventBridge [secret access key.](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) | +| `region` | String | yes | false | "" (empty string) | The region where AWS EventBridge service is located. [All AWS region](https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/regions/Region.html) | +| `eventBusName` | String | yes | false | "" (empty string) | The Event Bus name. | +| `role` | String | false | false | "" (empty string) | The AWS STS [roleArn](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html). Example: arn:aws:iam::598203581484:role/test-role | +| `roleSessionName` | String | false | false | "" (empty string) | The AWS role session name, Name it yourself. | +| `stsEndpoint` | String | false | false | "" (empty string) | The AWS STS endpoint. By default, the default STS endpoint: https://sts.amazonaws.com is used. See [Amazon documentation](https://docs.aws.amazon.com/STS/latest/APIReference/welcome.html) for more details. | +| `stsRegion` | String | false | false | "" (empty string) | The AWS STS region, By default, the 'region' config or env region is used. | +| `eventBusResourceName` | String | no | false | "" (empty string) | The Event Bus ARN (AWS Resource Name). Example: `arn:aws:events:ap-northeast-1:598263551484:event-bus/my_eventbus` | +| `metaDataField` | String | no | false | "" (empty string) | The metadata fields added to the event. Multiple fields are separated with commas. Optional values: `schema_version`, `partition`, `event_time`, `publish_time`, `message_id`, `sequence_id`, `producer_name`, `key`, and `properties`. | +| `batchPendingQueueSize` | int | no | false | 1000 | Pending queue size. This value must be greater than `batchMaxSize`. | +| `batchMaxSize` | int | no | false | 10 | Maximum number of batch messages. The number must be less than or equal to 10 (AWS EventBridge required). | +| `batchMaxBytesSize` | long | no | false | 640 | Maximum number of batch bytes payload size. This value cannot be greater than 512KB. | +| `batchMaxTimeMs` | long | no | false | 5000 | Batch max wait time: milliseconds. | +| `maxRetryCount` | long | no | false | 100 | Maximum number of retries to send events, when put events failed. | +| `intervalRetryTimeMs` | long | no | false | 1000 | The interval time(milliseconds) for each retry, when the put events failed. | + +> For details about this connector's advanced features and configurations, see [Advanced features](#advanced-features). + +## Advanced features + +### Delivery guarantees + +The AWS EventBridge sink connector provides two delivery guarantees: **at-most-once** and **at-least-once**. + +{% callout title="Note" type="note" %} +Currently, the **effectively-once** delivery guarantee is not supported, because Amazon EventBridge cannot offer the support of the Sink downstream system. +{% /callout %} + +### Data convert + +In AWS EventBridge, all events +is [JSON format](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-events.html). + +Pulsar supports multiple schema types. When receiving the data from Pulsar, the AWS EventBridge sink connectors +recognize it and convert it to a JSON string according to the following table: + +| Pulsar Schema | Convert to JSON | Note | +|----------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------| +| Primitive | ✔* | Just support primitive type is string and data is JSON format. | +| Avro | ✔ | Take advantage of toolkit conversions | +| Json | ✔ | Just send it directly | +| Protobuf | X | The Protobuf schema is based on the Avro schema. It uses Avro as an intermediate format, so it may not provide the best effort conversion. | +| ProtobufNative | ✔ | Take advantage of toolkit conversions | + +In EventBridge, the user data is in the `detail$data` field. + +```json +{ + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail": { + "data": { + "instance-id": " i-1234567890abcdef0", + "state": "terminated" + } + } +} +``` + +### Metadata mapping + +In EventBridge, a complete event contains +many [system fields](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-events.html#eb-custom-event). These +system fields can help you to configure the rule. + +An **Event** containing event data: + +```json +{ + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "source-type": "test-aws-event-bridge-sink-connector", + "detail-type": "topic_name_test_1", + "source": "aws.ec2", + "account": "111122223333", + "time": "2017-12-22T18:43:48Z", + "region": "us-west-1", + "resources": [ + "arn:aws:ec2:us-west-1:123456789012:instance/i-1234567890abcdef0" + ], + "detail": { + "data": { + "instance-id": " i-1234567890abcdef0", + "state": "terminated" + } + } +} +``` + +This connector maps the following fields: + +- sourceType: The default value is `${{Connector Name}}`. +- detailType: The default value is `${{Topic Name}}`. + +And, this connector supports setting the metadata of Pulsar to every **Event** (set in the **detail** field). + +You can select the desired metadata through the following configuration: + +```jsx +# +optional: schema_version | partition | event_time | publish_time +# +message_id | sequence_id | producer_name | key | properties +metaDataField = event_time, message_id +``` + +An **Event** containing metadata : + +```json +{ + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "source-type": "test-aws-event-bridge-sink-connector", + "detail-type": "topic_name_test_1", + "source": "aws.ec2", + "account": "111122223333", + "time": "2017-12-22T18:43:48Z", + "region": "us-west-1", + "resources": [ + "arn:aws:ec2:us-west-1:123456789012:instance/i-1234567890abcdef0" + ], + "detail": { + "data": { + "instance-id": " i-1234567890abcdef0", + "state": "terminated" + }, + "event_time": 789894645625, + "message_id": "1,1,1" + } +} +``` + +### Parallelism + +You can configure the parallelism of Sink execution by using the scheduling mechanism of the Function, and multiple +sink instances will be scheduled to run on different worker nodes. Multiple sinks will consume messages together +according to the configured subscription mode. + +Since EventBus doesn't need to guarantee sequentiality, the connectors support the `shared` subscription model. + +To increase the write throughput, you can configure the following: + +```jsx +parallelism = 4 +``` + +> When `retainOrdering` is set to `false`, the `Shared` subscription mode is used. + +### Batch Put + +AWS EventBridge connectors support batch put events, which are mainly controlled by the following three parameters: + +- **batchSize**: When the buffered message is larger than batchSize, it will trigger flush (put) events. `0` means no + trigger. +- **maxBatchBytes**: When the buffered message data size is larger than maxBatchBytes, it will trigger flush pending events. + This value should be less than 256000 and greater than 0, The default value is 640. +- **batchTimeMs**: When the interval from the last flush exceeds `batchTimeMs`, it will trigger flush pending + events. `0` means no trigger. + +In addition to these three parameters that control flush +behavior, [in AWS EventBridge](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-putevent-size.html), batches +larger than 256KB per write are not allowed. So, when the buffered message is larger than 256KB, it will trigger a +flush. + +### Retry Put + +In AWS Event Bridge, about Handling failures with PutEvents, It suggests retrying each error +message [until it succeeds](https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-putevents.html). + +This connector will provide two flow configs for the controller's retry strategy: + +```jsx +maxRetryCount: 100 // Maximum retry send event count, when event send failed. +intervalRetryTimeMs: 1000 //The interval time(milliseconds) for each retry, when event send failed. +``` + +## More Links +- [GitHub Repo](https://github.com/streamnative/pulsar-io-aws-eventbridge) +- [Announcing the Amazon EventBridge Sink Connector for Apache Pulsar](https://streamnative.io/blog/announcing-the-amazon-eventbridge-sink-connector-for-apache-pulsar) +- [Amazon EventBridge connector is now integrated with StreamNative Cloud](https://streamnative.io/blog/amazon-eventbridge-connector-is-now-integrated-with-streamnative-cloud) + + diff --git a/connectors/aws-lambda-sink/v3.0.6.8/aws-lambda-sink.md b/connectors/aws-lambda-sink/v3.0.7.1/aws-lambda-sink.md similarity index 94% rename from connectors/aws-lambda-sink/v3.0.6.8/aws-lambda-sink.md rename to connectors/aws-lambda-sink/v3.0.7.1/aws-lambda-sink.md index dc999d8b..510d9ce2 100644 --- a/connectors/aws-lambda-sink/v3.0.6.8/aws-lambda-sink.md +++ b/connectors/aws-lambda-sink/v3.0.7.1/aws-lambda-sink.md @@ -25,7 +25,7 @@ id: "aws-lambda-sink" The [AWS Lambda](https://aws.amazon.com/lambda/) sink connector is a [Pulsar IO connector](http://pulsar.apache.org/docs/en/next/io-overview/) for pulling messages from Pulsar topics to AWS Lambda to invoke Lambda functions. -![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-lambda/v3.0.6.8/docs/lambda-sink.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-lambda/v3.0.7.1/docs/lambda-sink.png) # How to get @@ -35,7 +35,7 @@ This section describes how to build the AWS Lambda sink connector. You can get the AWS Lambda sink connector using one of the following methods if you use [Pulsar Function Worker](https://pulsar.apache.org/docs/en/functions-worker/) to run connectors in a cluster. -- Download the NAR package from [the download page](https://github.com/streamnative/pulsar-io-aws-lambda/releases/download/v3.0.6.8/pulsar-io-google-pubsub-3.0.6.8.nar). +- Download the NAR package from [the download page](https://github.com/streamnative/pulsar-io-aws-lambda/releases/download/v3.0.7.1/pulsar-io-google-pubsub-3.0.7.1.nar). - Build it from the source code. @@ -57,7 +57,7 @@ To build the AWS Lambda sink connector from the source code, follow these steps. ```bash ls target - pulsar-io-aws-lambda-3.0.6.8.nar + pulsar-io-aws-lambda-3.0.7.1.nar ``` ## Work with Function Mesh @@ -93,7 +93,7 @@ You can create a configuration file (JSON or YAML) to set the properties if you "inputs": [ "test-aws-lambda-topic" ], - "archive": "connectors/pulsar-io-aws-lambda-3.0.6.8.nar", + "archive": "connectors/pulsar-io-aws-lambda-3.0.7.1.nar", "parallelism": 1, "configs": { @@ -115,7 +115,7 @@ You can create a configuration file (JSON or YAML) to set the properties if you name: "aws-lambda-sink" inputs: - "test-aws-lambda-topic" - archive: "connectors/pulsar-io-aws-lambda-3.0.6.8.nar" + archive: "connectors/pulsar-io-aws-lambda-3.0.7.1.nar" parallelism: 1 configs: @@ -139,7 +139,7 @@ kind: Sink metadata: name: aws-lambda-sink-sample spec: - image: streamnative/pulsar-io-aws-lambda:3.0.6.8 + image: streamnative/pulsar-io-aws-lambda:3.0.7.1 className: org.apache.pulsar.ecosystem.io.aws.lambda.AWSLambdaBytesSink replicas: 1 input: @@ -163,7 +163,7 @@ spec: cpu: "0.1" memory: 1G java: - jar: connectors/pulsar-io-aws-lambda-3.0.6.8.nar + jar: connectors/pulsar-io-aws-lambda-3.0.7.1.nar clusterName: test-pulsar autoAck: true ``` @@ -186,7 +186,7 @@ This example shows how to create an AWS Lambda sink connector on a Pulsar cluste ```bash PULSAR_HOME/bin/pulsar-admin sinks create \ ---archive pulsar-io-aws-lambda-3.0.6.8.nar \ +--archive pulsar-io-aws-lambda-3.0.7.1.nar \ --sink-config-file aws-lambda-sink-config.yaml \ --classname org.apache.pulsar.ecosystem.io.aws.lambda.AWSLambdaBytesSink \ --name aws-lambda-sink @@ -209,8 +209,8 @@ This example describes how to use the AWS Lambda sink connector to pull messages 2. Copy the NAR package of the AWS Lambda connector to the Pulsar connectors directory. ``` - cp pulsar-io-aws-lambda-3.0.6.8.nar - PULSAR_HOME/connectors/pulsar-io-aws-lambda-3.0.6.8.nar + cp pulsar-io-aws-lambda-3.0.7.1.nar + PULSAR_HOME/connectors/pulsar-io-aws-lambda-3.0.7.1.nar ``` 3. Start Pulsar in standalone mode. @@ -244,8 +244,8 @@ This example explains how to create an AWS Lambda sink connector in an on-premis 1. Copy the NAR package of the AWS Lambda connector to the Pulsar connectors directory. ``` - cp pulsar-io-aws-lambda-3.0.6.8.nar - PULSAR_HOME/connectors/pulsar-io-aws-lambda-3.0.6.8.nar + cp pulsar-io-aws-lambda-3.0.7.1.nar + PULSAR_HOME/connectors/pulsar-io-aws-lambda-3.0.7.1.nar ``` 2. Reload all [built-in connectors](https://pulsar.apache.org/docs/en/next/io-connectors/). @@ -297,7 +297,7 @@ This example describes how to create an AWS Lambda sink connector for a Kuberbet metadata: name: aws-lambda-sink-sample spec: - image: streamnative/pulsar-io-aws-lambda:3.0.6.8 + image: streamnative/pulsar-io-aws-lambda:3.0.7.1 className: org.apache.pulsar.ecosystem.io.aws.lambda.AWSLambdaBytesSink replicas: 1 input: @@ -321,7 +321,7 @@ This example describes how to create an AWS Lambda sink connector for a Kuberbet cpu: "0.1" memory: 1G java: - jar: connectors/pulsar-io-aws-lambda-3.0.6.8.nar + jar: connectors/pulsar-io-aws-lambda-3.0.7.1.nar clusterName: test-pulsar autoAck: true ``` diff --git a/connectors/aws-lambda-sink/v4.0.0.1/aws-lambda-sink.md b/connectors/aws-lambda-sink/v4.0.0.1/aws-lambda-sink.md new file mode 100644 index 00000000..dc6523d2 --- /dev/null +++ b/connectors/aws-lambda-sink/v4.0.0.1/aws-lambda-sink.md @@ -0,0 +1,372 @@ +--- +description: The AWS Lambda sink connector allows you to send messages from Apache Pulsar to AWS Lambda. +author: StreamNative +contributors: freeznet,RobertIndie,nlu90,shibd +language: Java,Shell,Python,Dockerfile +document: +source: Private source +license: StreamNative, Inc.. All Rights Reserved +license_link: +tags: +alias: AWS Lambda Sink +features: ["The AWS Lambda sink connector allows you to send messages from Apache Pulsar to AWS Lambda."] +icon: "/images/connectors/lambda-logo.png" +download: +support: streamnative +support_link: https://streamnative.io +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: +sn_available: "true" +id: "aws-lambda-sink" +--- + + +The [AWS Lambda](https://aws.amazon.com/lambda/) sink connector is +a [Pulsar IO connector](http://pulsar.apache.org/docs/en/next/io-overview/) for pulling messages from Pulsar topics to +AWS Lambda to invoke Lambda functions. + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-aws-lambda/v4.0.0.1/docs/lambda-sink.png) + +## Quick start + +### Prerequisites + +The prerequisites for connecting an AWS Lambda sink connector to external systems include: + +1. Create a AWS Lambda function in AWS: https://docs.aws.amazon.com/lambda/latest/dg/getting-started.html +2. Create the [AWS User](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) and create `AccessKey`( + Please record `AccessKey` and `SecretAccessKey`). +3. Assign permissions to AWS User, and ensure they have the following permissions to the AWS Lambda. For details, + see [permissions for AWS Lambda](https://docs.aws.amazon.com/lambda/latest/dg/lambda-permissions.html) + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "lambda:InvokeFunction", + "lambda:GetFunction" + ], + "Resource": "*" + } + ] +} +``` + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type aws-lambda` with `--archive /path/to/pulsar-io-aws-lambda.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type aws-lambda \ + --name aws-lambda-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "awsAccessKey": "Your AWS access key", + "awsSecretKey": "Your AWS secret key", + "awsRegion": "Your AWS region", + "lambdaFunctionName": "Your AWS function name" + "payloadFormat": "V2" + }' +``` + +The `--sink-config` is the minimum necessary and recommended configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. +See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for +more information. +{% /callout %} + +``` java + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + String message = "Hello, AWS Lambda"; + MessageId msgID = producer.send(message); + System.out.println("Publish " + message + " and message ID " + msgID); + + producer.flush(); + producer.close(); + client.close(); +``` + +You can also send the message using the command line: + +```sh +$ bin/pulsar-client produce pulsar-topic-name --messages "Hello, AWS Lambda" +``` + +### 3. Inspect messages in AWS Lambda + +Once you have sent messages to your Pulsar topic, the AWS Lambda sink connector should automatically forward them to the +specified AWS Lambda function. To verify that your messages have been correctly received by AWS Lambda, you can inspect +the logs in the AWS Management Console. + +Here are the steps to inspect messages in AWS Lambda: + +1. Log in to your AWS Management Console. +2. Navigate to the AWS Lambda service by clicking on "Services" at the top of the page and then typing "Lambda" into the + search bar. +3. Once you're in the AWS Lambda service, locate and click on the name of the Lambda function you specified when setting + up your connector. +4. Once you've opened your function, click on the "Monitoring" tab. +5. In the "Monitoring" tab, click on "View logs in CloudWatch". This will redirect you to the AWS CloudWatch service, + where you can view the log streams for your function. +6. In CloudWatch, select the most recent log stream to view the most recent logs. If your connector is correctly + forwarding messages, you should see log entries corresponding to the execution of your function with the messages you + sent. + +Remember, the logs may take a few minutes to appear in CloudWatch due to the nature of distributed systems and potential +network latencies. + +{% callout title="Note" type="note" %} +If you do not see your messages in the logs, make sure that your AWS Lambda function is correctly logging incoming +events. You may need to modify your function to explicitly log the event data it receives. +{% /callout %} + +By regularly checking the CloudWatch logs for your AWS Lambda function, you can ensure that your Pulsar AWS Lambda sink +connector is correctly forwarding messages and troubleshoot any issues that may arise. + +## Configuration Properties + +Before using the AWS Lambda sink connector, you need to configure it. This table outlines the properties and the +descriptions. + +| Name | Type | Required | Sensitive | Default | Description | +|----------------------------|---------|----------|-----------|----------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `awsEndpoint` | String | false | false | " " (empty string) | The AWS Lambda endpoint URL. It can be found at [AWS Lambda endpoints and quotas](https://docs.aws.amazon.com/general/latest/gr/lambda-service.html). | +| `awsRegion` | String | true | false | " " (empty string) | The supported AWS region. For example, `us-west-1`, `us-west-2`. | +| `awsAccessKey` | String | false | true | " " (empty string) | The AWS access key. See here for how to get it: [Managing Access Keys for IAM Users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). | +| `awsSecretKey` | String | false | true | " " (empty string) | The AWS secret key. See here for how to get it: [Managing Access Keys for IAM Users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html). | +| `lambdaFunctionName` | String | true | false | " " (empty string) | The Lambda function that should be invoked by the messages. | +| `awsCredentialPluginName` | String | false | false | " " (empty string) | The fully-qualified class name of the `AwsCredentialProviderPlugin` implementation. | +| `awsCredentialPluginParam` | String | false | true | " " (empty string) | The JSON parameters to initialize `AwsCredentialsProviderPlugin`. | +| `synchronousInvocation` | Boolean | false | false | true |
- `true`: invoke a Lambda function synchronously.
- `false`: invoke a Lambda function asynchronously. | +| `payloadFormat` | String | false | false | "V1" | The format of the payload to be sent to the lambda function. Valid values are "V1" and "V2". "V1" is the default value. | +| `metadataFields` | String | false | false | "topic,key,partitionIndex,sequence,properties,eventTime" | The metadata fields to be sent to the lambda function. This configuration only takes effect when using the V2 data format (payloadFormat=V2). | +| `batchMaxSize` | Integer | false | false | 10 | The maximum number of records to send to the lambda function in a single batch. This configuration only takes effect when using the V2 data format (payloadFormat=V2). | +| `batchMaxBytesSize` | Integer | false | false | 262144 | The maximum size of the payload to send to the lambda function in a single batch. This configuration only takes effect when using the V2 data format (payloadFormat=V2). | +| `batchMaxTimeMs` | Integer | false | false | 5000 | The maximum wait time for batching in milliseconds. This configuration only takes effect when using the V2 data format (payloadFormat=V2). | + +## Advanced features + +### Payload Format + +The payload refers to the actual data that the AWS Lambda sink connector sends to the AWS Lambda function. The AWS +Lambda sink connector supports two payload formats: `V1` and `V2`. It is strongly recommended for you to utilize +the `V2` payload format. + +The `V2` payload format provides a more standardized method for managing message data, with added support for schema +conversion and batching. + +#### V1 Payload Format + +The `V1` payload format is the default payload format. It incorporates three types of data formats, all of which are +represented as JSON objects. + +- **Serialization of the Record Object**: Initially, the sink connector attempts to convert the Record object into a + JSON object. +- **Conversion from the Message Value**: If the conversion of the `Record` object into a JSON object encounters an + exception, the connector will attempt to convert the message value itself into a JSON object. The format of this data + entirely depends on how the user has set the message value. It can take any form specified by the user. +- **The JSON Object Containing Metadata and the Value with String Type**: If the message value is not a valid JSON, the + connector will construct a JSON object that includes the message metadata and the message value. If a metadata field + does not exist, it will not be included in the JSON object. + +Here is an example of the V1 format payload: + +```json +{ + sourceRecord: { + ... + value: 'aGVsbG8=', + key: { + empty: true, + present: false + }, + ... + }, + value: 'aGVsbG8=', + schema: { + schemaInfo: { + name: 'Bytes', + schema: '', + type: 'BYTES', + timestamp: 0, + properties: {}, + schemaDefinition: '' + }, + nativeSchema: { + empty: true, + present: false + } + }, + ... +} +``` + +#### V2 Payload Format + +The payload in the `V2` format consists of an array of JSON objects, each representing a message. Each message includes +metadata fields and a value, with the value being either a JSON object or a primitive JSON value. + +Here is an example of the V2 payload format: + +```json +[ + { + "topic": "my-topic-1", + "key": "my-key", + ... + "value": { + "my-field": 123 + } + }, + { + "topic": "my-topic-2", + "key": "my-key", + ... + "value": "test-value" + } +] +``` + +### Schema Support + +The AWS Lambda sink connector supports the following schema types: `Primitive Schema`, `Avro Schema`, and `JSON Schema`. + +#### Primitive Schema + +For the primitive type, the payload format is as follows: + +```JSON +[ + { + "topic": "my-topic-1", + "key": "my-key", + ... + "value": 123 + }, + { + "topic": "my-topic-2", + "key": "my-key", + ... + "value": "test-value" + }, + { + "topic": "my-topic-3", + "key": "my-key", + ... + "value": true + } +] +``` + +The value types include: Number, Boolean, and String. Here's a table indicating the conversion type for each Primitive +Schema Type: + +| Primitive Schema Type | JSON Conversion Type | Example | +|------------------------------------------|------------------------------------------------------|-----------------------------------------------------------| +| Boolean | Boolean | true | +| INT8, INT16, INT32, INT64, FLOAT, DOUBLE | Number | 1234 | +| STRING | String | "Hello" | +| BYTES | Base64-encoded String | "SGVsbG8=" (base64-encoded version of the string "Hello") | +| DATE, TIME, TIMESTAMP | ISO 8601 String (yyy-MM-dd'T'HH:mm:ss.SSSXXX) | '2023-10-30T06:13:48.123+08:00' | +| LocalDate | ISO 8601 String (yyyy-MM-dd) | '2023-10-17' | +| LocalTime | ISO 8601 String (HH:mm:ss.SSSSSSSSS) | '04:30:33.123456789' | +| LocalDateTime | ISO 8601 String (yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS) | '2023-10-17T04:30:33.123456789' | +| Instant | ISO 8601 String (yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSXXX) | '2023-10-30T06:13:48.123456789+08:00' | + +#### Struct Schema (Avro Schema and JSON Schema) + +For the struct schema types `JSON` and `AVRO`, the value is converted into a JSON object. The conversion rules outlined +in the `Primitive schema section` are applied to all primitive type fields within this value object. Nested objects are +also supported. + +Here is an example: + +```JSON +[ + { + "topic": "my-topic", + "key": "my-key", + ... + "value": { + "message": "hello", + "time": "2023-10-17T08:22:11.263Z" + } + } +] +``` + +Here are the rules for handling the logical type of the Avro based struct schema (`AVRO` and `JSON`): + +| Logical Type | JSON Conversion Type | Example | +|------------------------------|--------------------------------------------------|------------------------------------| +| `time-millis`, `time-micros` | ISO 8601 String (HH:mm:ss.SSS) | '13:48:41.123' | +| `timestamp-millis` | ISO 8601 String (yyy-MM-dd'T'HH:mm:ss.SSSXXX) | '2023-10-30T06:13:48.123+08:00' | +| `timestamp-micros` | ISO 8601 String (yyy-MM-dd'T'HH:mm:ss.SSSSSSXXX) | '2023-10-30T06:13:48.123456+08:00' | +| `local-timestamp-millis` | ISO 8601 String (yyyy-MM-dd'T'HH:mm:ss.SSS) | '2023-10-29T22:13:48.123' | +| `local-timestamp-micros` | ISO 8601 String (yyyy-MM-dd'T'HH:mm:ss.SSSSSS) | '2023-10-29T22:13:48.123456' | + +#### Metadata + +You can select the metadata fields through the `metaDataField` configuration. The supported metadata fields include: + +- `topic`: The source topic name +- `key`: The string type key +- `partitionIndex`: The partition index of the topic +- `sequence`: The sequence ID of the message +- `properties`: The String to String map +- `eventTime`: The event time of the message in the [ISO 8601 format](https://www.w3.org/TR/NOTE-datetime) +- `messageId`: The string representation of a message ID. e.g., '1:1:-1:-1' + +#### Batch Support + +The AWS Lambda sink connector supports combining multiple messages into a single Lambda invocation for delivery. Each +batch is a V2 format payload that contains multiple messages. The batching support only works when using the V2 format. + +You can use the following configurations to control the batch sink policy: + +- `batchMaxSize`: The maximum number of records to send to the Lambda function in a single batch. +- `batchMaxBytesSize`: The maximum size of the payload to send to the Lambda function in a single batch. +- `batchMaxTimeMs`: The maximum wait time for batching in milliseconds. + +You can simply set `batchMaxSize` to `1` to disable batching. + +Please note that AWS Lambda has a payload quota +limit: [AWS Lambda Payload Quotas](https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-limits.html). The byte +size of a batch should not exceed this quota limit. + + diff --git a/connectors/aws-s3-sink/v3.0.6.8/aws-s3-sink.md b/connectors/aws-s3-sink/v3.0.7.1/aws-s3-sink.md similarity index 99% rename from connectors/aws-s3-sink/v3.0.6.8/aws-s3-sink.md rename to connectors/aws-s3-sink/v3.0.7.1/aws-s3-sink.md index 6a508d1f..26eb2847 100644 --- a/connectors/aws-s3-sink/v3.0.6.8/aws-s3-sink.md +++ b/connectors/aws-s3-sink/v3.0.7.1/aws-s3-sink.md @@ -11,7 +11,7 @@ tags: alias: AWS S3 Sink Connector features: ["Cloud Storage Connector integrates Apache Pulsar with cloud storage."] icon: "/images/connectors/aws-s3-logo.png" -download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-cloud-storage support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" @@ -25,7 +25,7 @@ id: "aws-s3-sink" The [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) sink connector pulls data from Pulsar topics and persists data to AWS S3 buckets. -![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v3.0.6.8/docs/aws-s3-sink.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v3.0.7.1/docs/aws-s3-sink.png) ## Quick start diff --git a/connectors/aws-s3-sink/v4.0.0.1/aws-s3-sink.md b/connectors/aws-s3-sink/v4.0.0.1/aws-s3-sink.md new file mode 100644 index 00000000..53868301 --- /dev/null +++ b/connectors/aws-s3-sink/v4.0.0.1/aws-s3-sink.md @@ -0,0 +1,277 @@ +--- +description: Cloud Storage Connector integrates Apache Pulsar with cloud storage. +author: StreamNative +contributors: freeznet,jianyun8023,shibd,RobertIndie +language: Java,Shell,Dockerfile +document: +source: https://github.com/streamnative/pulsar-io-cloud-storage +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-cloud-storage/blob/master/LICENSE +tags: +alias: AWS S3 Sink Connector +features: ["Cloud Storage Connector integrates Apache Pulsar with cloud storage."] +icon: "/images/connectors/aws-s3-logo.png" +download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-cloud-storage +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-cloud-storage +sn_available: "true" +id: "aws-s3-sink" +--- + + +The [AWS S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) sink connector pulls data from Pulsar topics and persists data to AWS S3 buckets. + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v4.0.0.1/docs/aws-s3-sink.png) + +## Quick start + +### Prerequisites + +The prerequisites for connecting an AWS S3 sink connector to external systems include: + +1. Create S3 buckets in AWS. +2. Create the [AWS User](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) and create `AccessKey`(Please record `AccessKey` and `SecretAccessKey`). +3. Assign permissions to AWS User, and ensure they have the following permissions to the AWS S3. +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "VisualEditor0", + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:AbortMultipartUpload" + ], + "Resource": "{Your bucket arn}/*" + } + ] +} +``` + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type cloud-storage-s3` with `--archive /path/to/pulsar-io-cloud-storage.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type cloud-storage-s3 \ + --name aws-s3-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "accessKeyId": "Your AWS access key", + "secretAccessKey": "Your AWS secret access key", + "provider": "s3v2", + "bucket": "Your bucket name", + "region": "Your AWS S3 region", + "formatType": "json", + "partitioner": "topic" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + public static void main(String[] args) throws Exception { + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + for (int i = 0; i < 10; i++) { + // JSON string containing a single character + String message = "{\"test-message\": \"test-value\"}"; + producer.send(message); + } + + producer.close(); + client.close(); + } +``` + +### 3. Display data on AWS S3 console + +You can see the object at public/default/{{Your topic name}}-partition-0/xxxx.json on the AWS S3 console. Download and open it, the content is: + +```text +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +``` + +## Configuration Properties + +Before using the AWS S3 sink connector, you need to configure it. This table outlines the properties and the descriptions. + +| Name | Type | Required | Sensitive | Default | Description | +|---------------------------------|---------|----------|-----------|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `provider` | String | True | false | null | The AWS S3 client type, such as `aws-s3`,`s3v2`(`s3v2` uses the AWS client but not the JCloud client). | +| `accessKeyId` | String | True | true | null | The AWS access key ID. It requires permission to write objects. | +| `secretAccessKey` | String | True | true | null | The AWS secret access key. | +| `bucket` | String | True | false | null | The AWS S3 bucket. | +| `formatType` | String | True | false | "json" | The data format type. Available options are `json`, `avro`, `bytes`, or `parquet`. By default, it is set to `json`. | +| `partitioner` | String | False | false | null | The partitioner for partitioning the resulting files. Available options are `topic`, `time` or `legacy`. By default, it's set to `legacy`. Please see [Partitioner](#partitioner) for more details. | +| `partitionerType` | String | False | false | null | The legacy partitioning type. It can be configured by topic partitions or by time. By default, the partition type is configured by topic partitions. It only works when the partitioner is set to `legacy`. | +| `region` | String | False | false | null | The AWS S3 region. Either the endpoint or region must be set. | +| `endpoint` | String | False | false | null | The AWS S3 endpoint. Either the endpoint or region must be set. | +| `role` | String | False | false | null | The AWS role. | +| `roleSessionName` | String | False | false | null | The AWS role session name. | +| `timePartitionPattern` | String | False | false | "yyyy-MM-dd" | The format pattern of the time-based partitioning. For details, refer to the Java date and time format. | +| `timePartitionDuration` | String | False | false | "86400000" | The time interval for time-based partitioning. Support formatted interval string, such as `30d`, `24h`, `30m`, `10s`, and also support number in milliseconds precision, such as `86400000` refers to `24h` or `1d`. | +| `partitionerUseIndexAsOffset` | Boolean | False | false | false | Whether to use the Pulsar's message index as offset or the record sequence. It's recommended if the incoming messages may be batched. The brokers may or not expose the index metadata and, if it's not present on the record, the sequence will be used. See [PIP-70](https://github.com/apache/pulsar/wiki/PIP-70%3A-Introduce-lightweight-broker-entry-metadata) for more details. | +| `batchSize` | int | False | false | 10 | The number of records submitted in batch. | +| `batchTimeMs` | long | False | false | 1000 | The interval for batch submission. | +| `maxBatchBytes` | long | False | false | 10000000 | The maximum number of bytes in a batch. | +| `sliceTopicPartitionPath` | Boolean | False | false | false | When it is set to `true`, split the partitioned topic name into separate folders in the bucket path. | +| `withMetadata` | Boolean | False | false | false | Save message attributes to metadata. | +| `useHumanReadableMessageId` | Boolean | False | false | false | Use a human-readable format string for messageId in message metadata. The messageId is in a format like `ledgerId:entryId:partitionIndex:batchIndex`. Otherwise, the messageId is a Hex-encoded string. | +| `withTopicPartitionNumber` | Boolean | False | false | true | When it is set to `true`, include the topic partition number to the object path. | +| `bytesFormatTypeSeparator` | String | False | false | "0x10" | It is inserted between records for the `formatType` of bytes. By default, it is set to '0x10'. An input record that contains the line separator looks like multiple records in the output object. | +| `pendingQueueSize` | int | False | false | 10 | The number of records buffered in queue. By default, it is equal to `batchSize`. You can set it manually. | +| `useHumanReadableSchemaVersion` | Boolean | False | false | false | Use a human-readable format string for the schema version in the message metadata. If it is set to `true`, the schema version is in plain string format. Otherwise, the schema version is in hex-encoded string format. | +| `skipFailedMessages` | Boolean | False | false | false | Configure whether to skip a message which it fails to be processed. If it is set to `true`, the connector will skip the failed messages by `ack` it. Otherwise, the connector will `fail` the message. | +| `pathPrefix` | String | False | false | false | If it is set, the output files are stored in a folder under the given bucket path. The `pathPrefix` must be in the format of `xx/xxx/`. | +| `avroCodec` | String | False | false | snappy | Compression codec used when formatType=`avro`. Available compression types are: none (no compression), deflate, bzip2, xz, zstandard, snappy. | +| `parquetCodec` | String | False | false | gzip | Compression codec used when formatType=`parquet`. Available compression types are: none (no compression), snappy, gzip, lzo, brotli, lz4, zstd. | +| `jsonAllowNaN` | Boolean | False | false | false | Recognize 'NaN', 'INF', '-INF' as legal floating number values when formatType=`json`. Since JSON specification does not allow such values this is a non-standard feature and disabled by default. | +| `includeTopicToMetadata` | Boolean | False | false | false | Include the topic name to the metadata. | + +## Advanced features + +### Data format types + +AWS S3 Sink Connector provides multiple output format options, including JSON, Avro, Bytes, or Parquet. The default format is JSON. +With current implementation, there are some limitations for different formats: + +This table lists the Pulsar Schema types supported by the writers. + +| Pulsar Schema | Writer: Avro | Writer: JSON | Writer: Parquet | Writer: Bytes | +|----------------|--------------|--------------|-----------------|---------------| +| Primitive | ✗ | ✔ * | ✗ | ✔ | +| Avro | ✔ | ✔ | ✔ | ✔ | +| Json | ✔ | ✔ | ✔ | ✔ | +| Protobuf ** | ✔ | ✔ | ✔ | ✔ | +| ProtobufNative | ✔ *** | ✗ | ✔ | ✔ | + +> *: The JSON writer will try to convert the data with a `String` or `Bytes` schema to JSON-format data if convertable. +> +> **: The Protobuf schema is based on the Avro schema. It uses Avro as an intermediate format, so it may not provide the best effort conversion. +> +> ***: The ProtobufNative record holds the Protobuf descriptor and the message. When writing to Avro format, the connector uses [avro-protobuf](https://github.com/apache/avro/tree/master/lang/java/protobuf) to do the conversion. + +This table lists the support of `withMetadata` configurations for different writer formats: + +| Writer Format | `withMetadata` | +|---------------|----------------| +| Avro | ✔ | +| JSON | ✔ | +| Parquet | ✔ * | +| Bytes | ✗ | + +> *: When using `Parquet` with `PROTOBUF_NATIVE` format, the connector will write the messages with `DynamicMessage` format. When `withMetadata` is set to `true`, the connector will add `__message_metadata__` to the messages with `PulsarIOCSCProtobufMessageMetadata` format. +> +> For example, if a message `User` has the following schema: +> ```protobuf +> syntax = "proto3"; +> message User { +> string name = 1; +> int32 age = 2; +> } +> ``` +> +> When `withMetadata` is set to `true`, the connector will write the message `DynamicMessage` with the following schema: +> ```protobuf +> syntax = "proto3"; +> message PulsarIOCSCProtobufMessageMetadata { +> map properties = 1; +> string schema_version = 2; +> string message_id = 3; +> } +> message User { +> string name = 1; +> int32 age = 2; +> PulsarIOCSCProtobufMessageMetadata __message_metadata__ = 3; +> } +> ``` +> + + +### Dead-letter topics + +To use a dead-letter topic, you need to set `skipFailedMessages` to `false`, and set `--max-redeliver-count` and `--dead-letter-topic` when submit the connector with the `pulsar-admin` CLI tool. For more info about dead-letter topics, see the [Pulsar documentation](https://pulsar.apache.org/docs/en/concepts-messaging/#dead-letter-topic). +If a message fails to be sent to the AWS S3 and there is a dead-letter topic, the connector will send the message to the dead-letter topic. + +### Sink flushing only after batchTimeMs elapses + +There is a scenario where the sink is only flushing whenever the `batchTimeMs` has elapsed, even though there are many messages waiting to be processed. +The reason for this is that the sink will only acknowledge messages after they are flushed to AWS S3 but the broker stops sending messages when it reaches a certain limit of unacknowledged messages. +If this limit is lower or close to `batchSize`, the sink never receives enough messages to trigger a flush based on the amount of messages. +In this case please ensure the `maxUnackedMessagesPerConsumer` set in the broker configuration is sufficiently larger than the `batchSize` setting of the sink. + +### Partitioner + +The partitioner is used for partitioning the data into different files in the cloud storage. +There are three types of partitioner: + +- **Topic Partitioner**: Messages are partitioned according to the pre-existing partitions in the Pulsar topics. For + instance, a message for the topic `public/default/my-topic-partition-0` would be directed to the + file `public/default/my-topic-partition-0/xxx.json`, where `xxx` signifies the earliest message offset in this file. +- **Time Partitioner**: Messages are partitioned based on the timestamp at the time of flushing. For the aforementioned + message, it would be directed to the file `1703037311.json`, where `1703037311` represents the flush timestamp of the + first message in this file. +- **Legacy Partitioner**: This type reverts to the old partitioner behavior. The legacy configuration `partitionerType` would be respected. + +#### Legacy Partitioner + +There are two types of legacy partitioner: + +- **Simple partitioner**: This is the default partitioning method based on Pulsar partitions. In other words, data is + partitioned according to the pre-existing partitions in Pulsar topics. For instance, a message for the + topic `public/default/my-topic-partition-0` would be directed to the + file `public/default/my-topic-partition-0/xxx.json`, where `xxx` signifies the earliest message offset in this file. + +- **Time partitioner**: Data is partitioned according to the time it was flushed. Using the previous message as an + example, if it was received on 2023-12-20, it would be directed + to `public/default/my-topic-partition-0/2023-12-20/xxx.json`, where `xxx` also denotes the earliest message offset in + this file. + + diff --git a/connectors/azure-blob-storage-sink/v3.0.6.8/azure-blob-storage-sink.md b/connectors/azure-blob-storage-sink/v3.0.7.1/azure-blob-storage-sink.md similarity index 99% rename from connectors/azure-blob-storage-sink/v3.0.6.8/azure-blob-storage-sink.md rename to connectors/azure-blob-storage-sink/v3.0.7.1/azure-blob-storage-sink.md index 22482d1e..62784d32 100644 --- a/connectors/azure-blob-storage-sink/v3.0.6.8/azure-blob-storage-sink.md +++ b/connectors/azure-blob-storage-sink/v3.0.7.1/azure-blob-storage-sink.md @@ -11,7 +11,7 @@ tags: alias: Azure Blob Storage Sink Connector features: ["Cloud Storage Connector integrates Apache Pulsar with cloud storage."] icon: "/images/connectors/azure-blob-storage-logo.png" -download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v3.0.6.8 +download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v3.0.7.1 support: streamnative support_link: https://github.com/streamnative/pulsar-io-cloud-storage support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" @@ -25,7 +25,7 @@ id: "azure-blob-storage-sink" The [Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-overview) sink connector pulls data from Pulsar topics and persists data to Azure Blob Storage containers. -![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v3.0.6.8/docs/azure-blob-storage-sink.png) +![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v3.0.7.1/docs/azure-blob-storage-sink.png) ## Quick start diff --git a/connectors/azure-blob-storage-sink/v4.0.0.1/azure-blob-storage-sink.md b/connectors/azure-blob-storage-sink/v4.0.0.1/azure-blob-storage-sink.md new file mode 100644 index 00000000..e993641b --- /dev/null +++ b/connectors/azure-blob-storage-sink/v4.0.0.1/azure-blob-storage-sink.md @@ -0,0 +1,262 @@ +--- +description: Cloud Storage Connector integrates Apache Pulsar with cloud storage. +author: StreamNative +contributors: freeznet,jianyun8023,shibd,RobertIndie +language: Java,Shell,Dockerfile +document: +source: https://github.com/streamnative/pulsar-io-cloud-storage +license: Apache License 2.0 +license_link: https://github.com/streamnative/pulsar-io-cloud-storage/blob/master/LICENSE +tags: +alias: Azure Blob Storage Sink Connector +features: ["Cloud Storage Connector integrates Apache Pulsar with cloud storage."] +icon: "/images/connectors/azure-blob-storage-logo.png" +download: https://api.github.com/repos/streamnative/pulsar-io-cloud-storage/tarball/refs/tags/v4.0.0.1 +support: streamnative +support_link: https://github.com/streamnative/pulsar-io-cloud-storage +support_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +owner_name: "streamnative" +owner_img: "https://avatars.githubusercontent.com/u/44651383?v=4" +dockerfile: https://hub.docker.com/r/streamnative/pulsar-io-cloud-storage +sn_available: "true" +id: "azure-blob-storage-sink" +--- + + +The [Azure Blob Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-overview) sink connector pulls data from Pulsar topics and persists data to Azure Blob Storage containers. + +![](https://raw.githubusercontent.com/streamnative/pulsar-io-cloud-storage/v4.0.0.1/docs/azure-blob-storage-sink.png) + +## Quick start + +### Prerequisites + +The prerequisites for connecting an Azure Blob Storage sink connector to external systems include: + +1. Create Blob Storage container in Azure Cloud. +2. Get Storage account `Connection string`. + + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type cloud-storage-azure-blob` with `--archive /path/to/pulsar-io-cloud-storage.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type cloud-storage-azure-blob \ + --name azure-blob-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "azureStorageAccountConnectionString": "Your azure blob storage account connection string", + "provider": "azure-blob-storage", + "bucket": "Your container name", + "formatType": "json", + "partitioner": "topic" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + public static void main(String[] args) throws Exception { + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + for (int i = 0; i < 10; i++) { + // JSON string containing a single character + String message = "{\"test-message\": \"test-value\"}"; + producer.send(message); + } + + producer.close(); + client.close(); + } +``` + +### 3. Display data on Azure Blob Storage console + +You can see the object at public/default/{{Your topic name}}-partition-0/xxxx.json on the Azure Blob Storage console. Download and open it, the content is: + +```text +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +{"test-message":"test-value"} +``` + +## Configuration Properties + +Before using the Azure Blob Storage sink connector, you need to configure it. This table outlines the properties and the descriptions. + +| Name | Type | Required | Sensitive | Default | Description | +|---------------------------------|---------|----------|-----------|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `provider` | String | True | false | null | The Cloud Storage type, Azure Blob Storage only supports the `azure-blob-storage` provider. | +| `bucket` | String | True | false | null | The Azure Blob Storage container name. | +| `formatType` | String | True | false | "json" | The data format type. Available options are `json`, `avro`, `bytes`, or `parquet`. By default, it is set to `json`. | +| `partitioner` | String | False | false | null | The partitioner for partitioning the resulting files. Available options are `topic`, `time` or `legacy`. By default, it's set to `legacy`. Please see [Partitioner](#partitioner) for more details. | +| `partitionerType` | String | False | false | null | The legacy partitioning type. It can be configured by topic partitions or by time. By default, the partition type is configured by topic partitions. It only works when the partitioner is set to `legacy`. || `azureStorageAccountConnectionString` | String | False | true | "" | The Azure Blob Storage connection string. Required when authenticating via connection string. | +| `azureStorageAccountSASToken` | String | False | true | "" | The Azure Blob Storage account SAS token. Required when authenticating via SAS token. | +| `azureStorageAccountName` | String | False | true | "" | The Azure Blob Storage account name. Required when authenticating via account name and account key. | +| `azureStorageAccountKey` | String | False | true | "" | The Azure Blob Storage account key. Required when authenticating via account name and account key. | +| `endpoint` | String | False | false | null | The Azure Blob Storage endpoint. Required when authenticating via account name or SAS token. | +| `timePartitionPattern` | String | False | false | "yyyy-MM-dd" | The format pattern of the time-based partitioning. For details, refer to the Java date and time format. | +| `timePartitionDuration` | String | False | false | "86400000" | The time interval for time-based partitioning. Support formatted interval string, such as `30d`, `24h`, `30m`, `10s`, and also support number in milliseconds precision, such as `86400000` refers to `24h` or `1d`. | +| `partitionerUseIndexAsOffset` | Boolean | False | false | false | Whether to use the Pulsar's message index as offset or the record sequence. It's recommended if the incoming messages may be batched. The brokers may or not expose the index metadata and, if it's not present on the record, the sequence will be used. See [PIP-70](https://github.com/apache/pulsar/wiki/PIP-70%3A-Introduce-lightweight-broker-entry-metadata) for more details. | +| `batchSize` | int | False | false | 10 | The number of records submitted in batch. | +| `batchTimeMs` | long | False | false | 1000 | The interval for batch submission. | +| `maxBatchBytes` | long | False | false | 10000000 | The maximum number of bytes in a batch. | +| `sliceTopicPartitionPath` | Boolean | False | false | false | When it is set to `true`, split the partitioned topic name into separate folders in the bucket path. | +| `withMetadata` | Boolean | False | false | false | Save message attributes to metadata. | +| `useHumanReadableMessageId` | Boolean | False | false | false | Use a human-readable format string for messageId in message metadata. The messageId is in a format like `ledgerId:entryId:partitionIndex:batchIndex`. Otherwise, the messageId is a Hex-encoded string. | +| `withTopicPartitionNumber` | Boolean | False | false | true | When it is set to `true`, include the topic partition number to the object path. | +| `bytesFormatTypeSeparator` | String | False | false | "0x10" | It is inserted between records for the `formatType` of bytes. By default, it is set to '0x10'. An input record that contains the line separator looks like multiple records in the output object. | +| `pendingQueueSize` | int | False | false | 10 | The number of records buffered in queue. By default, it is equal to `batchSize`. You can set it manually. | +| `useHumanReadableSchemaVersion` | Boolean | False | false | false | Use a human-readable format string for the schema version in the message metadata. If it is set to `true`, the schema version is in plain string format. Otherwise, the schema version is in hex-encoded string format. | +| `skipFailedMessages` | Boolean | False | false | false | Configure whether to skip a message which it fails to be processed. If it is set to `true`, the connector will skip the failed messages by `ack` it. Otherwise, the connector will `fail` the message. | +| `pathPrefix` | String | False | false | false | If it is set, the output files are stored in a folder under the given bucket path. The `pathPrefix` must be in the format of `xx/xxx/`. | +| `avroCodec` | String | False | false | snappy | Compression codec used when formatType=`avro`. Available compression types are: none (no compression), deflate, bzip2, xz, zstandard, snappy. | +| `parquetCodec` | String | False | false | gzip | Compression codec used when formatType=`parquet`. Available compression types are: none (no compression), snappy, gzip, lzo, brotli, lz4, zstd. | +| `jsonAllowNaN` | Boolean | False | false | false | Recognize 'NaN', 'INF', '-INF' as legal floating number values when formatType=`json`. Since JSON specification does not allow such values this is a non-standard feature and disabled by default. | +| `includeTopicToMetadata` | Boolean | False | false | false | Include the topic name to the metadata. | + +There are three methods to authenticate with Azure Blob Storage: +1. `azureStorageAccountConnectionString`: This method involves using the Azure Blob Storage connection string for authentication. It's the simplest method as it only requires the connection string. +2. `azureStorageAccountSASToken`: This method uses a Shared Access Signature (SAS) token for the Azure Blob Storage account. When using this method, you must also set the `endpoint`. +3. `azureStorageAccountName` and `azureStorageAccountKey`: This method uses the Azure Blob Storage account name and account key for authentication. Similar to the SAS token method, you must also set the `endpoint` when using this method. + + +## Advanced features + +### Data format types + +Azure Blob Storage Sink Connector provides multiple output format options, including JSON, Avro, Bytes, or Parquet. The default format is JSON. +With current implementation, there are some limitations for different formats: + +This table lists the Pulsar Schema types supported by the writers. + +| Pulsar Schema | Writer: Avro | Writer: JSON | Writer: Parquet | Writer: Bytes | +|----------------|--------------|--------------|-----------------|---------------| +| Primitive | ✗ | ✔ * | ✗ | ✔ | +| Avro | ✔ | ✔ | ✔ | ✔ | +| Json | ✔ | ✔ | ✔ | ✔ | +| Protobuf ** | ✔ | ✔ | ✔ | ✔ | +| ProtobufNative | ✔ *** | ✗ | ✔ | ✔ | + +> *: The JSON writer will try to convert the data with a `String` or `Bytes` schema to JSON-format data if convertable. +> +> **: The Protobuf schema is based on the Avro schema. It uses Avro as an intermediate format, so it may not provide the best effort conversion. +> +> ***: The ProtobufNative record holds the Protobuf descriptor and the message. When writing to Avro format, the connector uses [avro-protobuf](https://github.com/apache/avro/tree/master/lang/java/protobuf) to do the conversion. + +This table lists the support of `withMetadata` configurations for different writer formats: + +| Writer Format | `withMetadata` | +|---------------|----------------| +| Avro | ✔ | +| JSON | ✔ | +| Parquet | ✔ * | +| Bytes | ✗ | + +> *: When using `Parquet` with `PROTOBUF_NATIVE` format, the connector will write the messages with `DynamicMessage` format. When `withMetadata` is set to `true`, the connector will add `__message_metadata__` to the messages with `PulsarIOCSCProtobufMessageMetadata` format. +> +> For example, if a message `User` has the following schema: +> ```protobuf +> syntax = "proto3"; +> message User { +> string name = 1; +> int32 age = 2; +> } +> ``` +> +> When `withMetadata` is set to `true`, the connector will write the message `DynamicMessage` with the following schema: +> ```protobuf +> syntax = "proto3"; +> message PulsarIOCSCProtobufMessageMetadata { +> map properties = 1; +> string schema_version = 2; +> string message_id = 3; +> } +> message User { +> string name = 1; +> int32 age = 2; +> PulsarIOCSCProtobufMessageMetadata __message_metadata__ = 3; +> } +> ``` +> + + +### Dead-letter topics + +To use a dead-letter topic, you need to set `skipFailedMessages` to `false`, and set `--max-redeliver-count` and `--dead-letter-topic` when submit the connector with the `pulsar-admin` CLI tool. For more info about dead-letter topics, see the [Pulsar documentation](https://pulsar.apache.org/docs/en/concepts-messaging/#dead-letter-topic). +If a message fails to be sent to the Azure Blob Storage and there is a dead-letter topic, the connector will send the message to the dead-letter topic. + +### Sink flushing only after batchTimeMs elapses + +There is a scenario where the sink is only flushing whenever the `batchTimeMs` has elapsed, even though there are many messages waiting to be processed. +The reason for this is that the sink will only acknowledge messages after they are flushed to the Azure Blob Storage but the broker stops sending messages when it reaches a certain limit of unacknowledged messages. +If this limit is lower or close to `batchSize`, the sink never receives enough messages to trigger a flush based on the amount of messages. +In this case please ensure the `maxUnackedMessagesPerConsumer` set in the broker configuration is sufficiently larger than the `batchSize` setting of the sink. + +### Partitioner + +The partitioner is used for partitioning the data into different files in the cloud storage. +There are three types of partitioner: + +- **Topic Partitioner**: Messages are partitioned according to the pre-existing partitions in the Pulsar topics. For + instance, a message for the topic `public/default/my-topic-partition-0` would be directed to the + file `public/default/my-topic-partition-0/xxx.json`, where `xxx` signifies the earliest message offset in this file. +- **Time Partitioner**: Messages are partitioned based on the timestamp at the time of flushing. For the aforementioned + message, it would be directed to the file `1703037311.json`, where `1703037311` represents the flush timestamp of the + first message in this file. +- **Legacy Partitioner**: This type reverts to the old partitioner behavior. The legacy configuration `partitionerType` would be respected. + +#### Legacy Partitioner + +There are two types of legacy partitioner: + +- **Simple partitioner**: This is the default partitioning method based on Pulsar partitions. In other words, data is + partitioned according to the pre-existing partitions in Pulsar topics. For instance, a message for the + topic `public/default/my-topic-partition-0` would be directed to the + file `public/default/my-topic-partition-0/xxx.json`, where `xxx` signifies the earliest message offset in this file. + +- **Time partitioner**: Data is partitioned according to the time it was flushed. Using the previous message as an + example, if it was received on 2023-12-20, it would be directed + to `public/default/my-topic-partition-0/2023-12-20/xxx.json`, where `xxx` also denotes the earliest message offset in + this file. + diff --git a/connectors/elasticsearch-sink/v3.0.6/elasticsearch-sink.md b/connectors/elasticsearch-sink/v3.0.7/elasticsearch-sink.md similarity index 99% rename from connectors/elasticsearch-sink/v3.0.6/elasticsearch-sink.md rename to connectors/elasticsearch-sink/v3.0.7/elasticsearch-sink.md index 01f9830c..98712f14 100644 --- a/connectors/elasticsearch-sink/v3.0.6/elasticsearch-sink.md +++ b/connectors/elasticsearch-sink/v3.0.7/elasticsearch-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/elastic-search" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/elastic-search" license: Apache License 2.0 tags: ["Pulsar IO", "Elasticsearch", "Sink"] alias: Elasticsearch Sink Connector features: ["Use Elasticsearch sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/elasticsearch.png" -download: "https://github.com/streamnative/pulsar/releases/download/v3.0.6.1/pulsar-io-elastic-search-3.0.6.1.nar" +download: "https://github.com/streamnative/pulsar/releases/download/v3.0.7.1/pulsar-io-elastic-search-3.0.7.1.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/elasticsearch-sink/v4.0.0/elasticsearch-sink.md b/connectors/elasticsearch-sink/v4.0.0/elasticsearch-sink.md new file mode 100644 index 00000000..62601396 --- /dev/null +++ b/connectors/elasticsearch-sink/v4.0.0/elasticsearch-sink.md @@ -0,0 +1,165 @@ +--- +description: The Elasticsearch sink connector pulls messages from Pulsar topics and persists the messages to indexes +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/elastic-search" +license: Apache License 2.0 +tags: ["Pulsar IO", "Elasticsearch", "Sink"] +alias: Elasticsearch Sink Connector +features: ["Use Elasticsearch sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/elasticsearch.png" +download: "https://github.com/streamnative/pulsar/releases/download/v4.0.0.2/pulsar-io-elastic-search-4.0.0.2.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +sn_available: true +id: "elasticsearch-sink" +--- + +The [Elasticsearch](https://www.elastic.co/elasticsearch/) sink connector pulls messages from Pulsar topics and persists the messages to indexes. For more information about connectors, see [Connector Overview](https://docs.streamnative.io/docs/connector-overview). + +![](/images/connectors/elasticsearch-sink.png) + +This document introduces how to get started with creating an Elasticsearch sink connector and get it up and running. +## Quick start + +### Prerequisites + +The prerequisites for connecting an Elasticsearch sink connector to external systems include: + +Create a Elasticsearch cluster. You can create a single-node Elasticsearch cluster by executing this command: + +```bash +docker run -p 9200:9200 -p 9300:9300 \ + -e "discovery.type=single-node" \ + -e "ELASTIC_PASSWORD=pulsar-sink-test" \ + docker.elastic.co/elasticsearch/elasticsearch:7.17.13 +``` + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type elastic-search` with `--archive /path/to/pulsar-io-elastic-search.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type elastic-search \ + --name es-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "elasticSearchUrl": "http://localhost:90902", + "indexName": "myindex", + "typeName": "doc", + "username": "elastic", + "password": "pulsar-sink-test" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + String message = "{\"a\":1}"; + MessageId msgID = producer.send(message); + System.out.println("Publish " + message + " and message ID " + msgID); + + producer.flush(); + producer.close(); + client.close(); +``` + +### 3. Check documents in Elasticsearch +- Refresh the index +```bash +curl -s http://localhost:9200/my_index/_refresh +``` + +- Search documents + +```bash +curl -s http://localhost:9200/my_index/_search +``` + +- You can see the record that was published earlier has been successfully written into Elasticsearch. + +```json +{"took":2,"timed_out":false,"_shards":{"total":1,"successful":1,"skipped":0,"failed":0},"hits":{"total":{"value":1,"relation":"eq"},"max_score":1.0,"hits":[{"_index":"my_index","_type":"_doc","_id":"FSxemm8BLjG_iC0EeTYJ","_score":1.0,"_source":{"a":1}}]}} +``` + +## Configuration Properties + +This table outlines the properties of an Elasticsearch sink connector. + +| Name | Type | Required | Sensitive | Default | Description | +|--------------------------------|------------------------------------------------------|----------|-----------|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `elasticSearchUrl` | String | true | false | " " (empty string) | The URL of elastic search cluster to which the connector connects. | +| `indexName` | String | false | false | " " (empty string) | The index name to which the connector writes messages. The default value is the topic name. It accepts date formats in the name to support event time based index with the pattern `%{+}`. For example, suppose the event time of the record is 1645182000000L, the indexName is `logs-%{+yyyy-MM-dd}`, then the formatted index name would be `logs-2022-02-18`. | +| `schemaEnable` | Boolean | false | false | false | Turn on the Schema Aware mode. | +| `createIndexIfNeeded` | Boolean | false | false | false | Manage index if missing. | +| `maxRetries` | Integer | false | false | 1 | The maximum number of retries for elasticsearch requests. Use -1 to disable it. | +| `retryBackoffInMs` | Integer | false | false | 100 | The base time to wait when retrying an Elasticsearch request (in milliseconds). | +| `maxRetryTimeInSec` | Integer | false | false | 86400 | The maximum retry time interval in seconds for retrying an elasticsearch request. | +| `bulkEnabled` | Boolean | false | false | false | Enable the elasticsearch bulk processor to flush write requests based on the number or size of requests, or after a given period. | +| `bulkActions` | Integer | false | false | 1000 | The maximum number of actions per elasticsearch bulk request. Use -1 to disable it. | +| `bulkSizeInMb` | Integer | false | false | 5 | The maximum size in megabytes of elasticsearch bulk requests. Use -1 to disable it. | +| `bulkConcurrentRequests` | Integer | false | false | 0 | The maximum number of in flight elasticsearch bulk requests. The default 0 allows the execution of a single request. A value of 1 means 1 concurrent request is allowed to be executed while accumulating new bulk requests. | +| `bulkFlushIntervalInMs` | Long | false | false | 1000 | The maximum period of time to wait for flushing pending writes when bulk writes are enabled. -1 or zero means the scheduled flushing is disabled. | +| `compressionEnabled` | Boolean | false | false | false | Enable elasticsearch request compression. | +| `connectTimeoutInMs` | Integer | false | false | 5000 | The elasticsearch client connection timeout in milliseconds. | +| `connectionRequestTimeoutInMs` | Integer | false | false | 1000 | The time in milliseconds for getting a connection from the elasticsearch connection pool. | +| `connectionIdleTimeoutInMs` | Integer | false | false | 5 | Idle connection timeout to prevent a read timeout. | +| `keyIgnore` | Boolean | false | false | true | Whether to ignore the record key to build the Elasticsearch document `_id`. If primaryFields is defined, the connector extract the primary fields from the payload to build the document `_id` If no primaryFields are provided, elasticsearch auto generates a random document `_id`. | +| `primaryFields` | String | false | false | "id" | The comma separated ordered list of field names used to build the Elasticsearch document `_id` from the record value. If this list is a singleton, the field is converted as a string. If this list has 2 or more fields, the generated `_id` is a string representation of a JSON array of the field values. | +| `nullValueAction` | enum (IGNORE,DELETE,FAIL) | false | false | IGNORE | How to handle records with null values, possible options are IGNORE, DELETE or FAIL. Default is IGNORE the message. | +| `malformedDocAction` | enum (IGNORE,WARN,FAIL) | false | false | FAIL | How to handle elasticsearch rejected documents due to some malformation. Possible options are IGNORE, DELETE or FAIL. Default is FAIL the Elasticsearch document. | +| `stripNulls` | Boolean | false | false | true | If stripNulls is false, elasticsearch _source includes 'null' for empty fields (for example {"foo": null}), otherwise null fields are stripped. | +| `socketTimeoutInMs` | Integer | false | false | 60000 | The socket timeout in milliseconds waiting to read the elasticsearch response. | +| `typeName` | String | false | false | "_doc" | The type name to which the connector writes messages to.

The value should be set explicitly to a valid type name other than "_doc" for Elasticsearch version before 6.2, and left to default otherwise. | +| `indexNumberOfShards` | int | false | false | 1 | The number of shards of the index. | +| `indexNumberOfReplicas` | int | false | false | 1 | The number of replicas of the index. | +| `username` | String | false | true | " " (empty string) | The username used by the connector to connect to the elastic search cluster.

If `username` is set, then `password` should also be provided. | +| `password` | String | false | true | " " (empty string) | The password used by the connector to connect to the elastic search cluster.

If `username` is set, then `password` should also be provided. | +| `ssl` | ElasticSearchSslConfig | false | false | | Configuration for TLS encrypted communication | +| `compatibilityMode` | enum (AUTO,ELASTICSEARCH,ELASTICSEARCH_7,OPENSEARCH) | false | false | AUTO | Specify compatibility mode with the ElasticSearch cluster. `AUTO` value will try to auto detect the correct compatibility mode to use. Use `ELASTICSEARCH_7` if the target cluster is running ElasticSearch 7 or prior. Use `ELASTICSEARCH` if the target cluster is running ElasticSearch 8 or higher. Use `OPENSEARCH` if the target cluster is running OpenSearch. | +| `token` | String | false | true | " " (empty string) | The token used by the connector to connect to the ElasticSearch cluster. Only one between basic/token/apiKey authentication mode must be configured. | +| `apiKey` | String | false | true | " " (empty string) | The apiKey used by the connector to connect to the ElasticSearch cluster. Only one between basic/token/apiKey authentication mode must be configured. | +| `canonicalKeyFields` | Boolean | false | false | false | Whether to sort the key fields for JSON and Avro or not. If it is set to `true` and the record key schema is `JSON` or `AVRO`, the serialized object does not consider the order of properties. | +| `stripNonPrintableCharacters` | Boolean | false | false | true | Whether to remove all non-printable characters from the document or not. If it is set to true, all non-printable characters are removed from the document. | +| `idHashingAlgorithm` | enum(NONE,SHA256,SHA512) | false | false | NONE | Hashing algorithm to use for the document id. This is useful in order to be compliant with the ElasticSearch _id hard limit of 512 bytes. | +| `conditionalIdHashing` | Boolean | false | false | false | This option only works if idHashingAlgorithm is set. If enabled, the hashing is performed only if the id is greater than 512 bytes otherwise the hashing is performed on each document in any case. | +| `copyKeyFields` | Boolean | false | false | false | If the message key schema is AVRO or JSON, the message key fields are copied into the ElasticSearch document. | diff --git a/connectors/hbase-sink/v3.0.6/hbase-sink.md b/connectors/hbase-sink/v3.0.7/hbase-sink.md similarity index 94% rename from connectors/hbase-sink/v3.0.6/hbase-sink.md rename to connectors/hbase-sink/v3.0.7/hbase-sink.md index a1ace2f2..54480285 100644 --- a/connectors/hbase-sink/v3.0.6/hbase-sink.md +++ b/connectors/hbase-sink/v3.0.7/hbase-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/hbase" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/hbase" license: Apache License 2.0 tags: ["Pulsar IO", "HBase", "Sink"] alias: HBase Sink features: ["Use HBase sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/hbase.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-hbase-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-hbase-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/hbase-sink/v4.0.0/hbase-sink.md b/connectors/hbase-sink/v4.0.0/hbase-sink.md new file mode 100644 index 00000000..7d61431c --- /dev/null +++ b/connectors/hbase-sink/v4.0.0/hbase-sink.md @@ -0,0 +1,79 @@ +--- +description: The HBase sink connector pulls the messages from Pulsar topics and persists the messages to HBase tables +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/hbase" +license: Apache License 2.0 +tags: ["Pulsar IO", "HBase", "Sink"] +alias: HBase Sink +features: ["Use HBase sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/hbase.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-hbase-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "hbase-sink" +--- + +The HBase sink connector pulls the messages from Pulsar topics and persists the messages to HBase tables. + +# Configuration + +The configuration of the HBase sink connector has the following properties. + +## Property + +| Name | Type|Default | Required | Description | +|------|---------|----------|-------------|--- +| `hbaseConfigResources` | String|None | false | HBase system configuration `hbase-site.xml` file. | +| `zookeeperQuorum` | String|None | true | HBase system configuration about `hbase.zookeeper.quorum` value. | +| `zookeeperClientPort` | String|2181 | false | HBase system configuration about `hbase.zookeeper.property.clientPort` value. | +| `zookeeperZnodeParent` | String|/hbase | false | HBase system configuration about `zookeeper.znode.parent` value. | +| `tableName` | None |String | true | HBase table, the value is `namespace:tableName`. | +| `rowKeyName` | String|None | true | HBase table rowkey name. | +| `familyName` | String|None | true | HBase table column family name. | +| `qualifierNames` |String| None | true | HBase table column qualifier names. | +| `batchTimeMs` | Long|1000l| false | HBase table operation timeout in milliseconds. | +| `batchSize` | int|200| false | Batch size of updates made to the HBase table. | + +## Example + +Before using the HBase sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "hbaseConfigResources": "hbase-site.xml", + "zookeeperQuorum": "localhost", + "zookeeperClientPort": "2181", + "zookeeperZnodeParent": "/hbase", + "tableName": "pulsar_hbase", + "rowKeyName": "rowKey", + "familyName": "info", + "qualifierNames": [ 'name', 'address', 'age'] + } + ``` + + +* YAML + + ```yaml + configs: + hbaseConfigResources: "hbase-site.xml" + zookeeperQuorum: "localhost" + zookeeperClientPort: "2181" + zookeeperZnodeParent: "/hbase" + tableName: "pulsar_hbase" + rowKeyName: "rowKey" + familyName: "info" + qualifierNames: [ 'name', 'address', 'age'] + ``` + + \ No newline at end of file diff --git a/connectors/hdfs2-sink/v3.0.6/hdfs2-sink.md b/connectors/hdfs2-sink/v3.0.7/hdfs2-sink.md similarity index 95% rename from connectors/hdfs2-sink/v3.0.6/hdfs2-sink.md rename to connectors/hdfs2-sink/v3.0.7/hdfs2-sink.md index 878b2277..469bfe16 100644 --- a/connectors/hdfs2-sink/v3.0.6/hdfs2-sink.md +++ b/connectors/hdfs2-sink/v3.0.7/hdfs2-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/hdfs2" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/hdfs2" license: Apache License 2.0 tags: ["Pulsar IO", "HDFS2", "Sink"] alias: HDFS2 Sink features: ["Use HDFS2 sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/hadoop.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-hdfs2-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-hdfs2-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/hdfs2-sink/v4.0.0/hdfs2-sink.md b/connectors/hdfs2-sink/v4.0.0/hdfs2-sink.md new file mode 100644 index 00000000..9a1c3626 --- /dev/null +++ b/connectors/hdfs2-sink/v4.0.0/hdfs2-sink.md @@ -0,0 +1,70 @@ +--- +description: The HDFS2 sink connector pulls the messages from Pulsar topics and persists the messages to HDFS files +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/hdfs2" +license: Apache License 2.0 +tags: ["Pulsar IO", "HDFS2", "Sink"] +alias: HDFS2 Sink +features: ["Use HDFS2 sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/hadoop.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-hdfs2-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "hdfs2-sink" +--- + +The HDFS2 sink connector pulls the messages from Pulsar topics and persists the messages to HDFS files. + +# Configuration + +The configuration of the HDFS2 sink connector has the following properties. + +## Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `hdfsConfigResources` | String|true| None | A file or a comma-separated list containing the Hadoop file system configuration.

**Example**
'core-site.xml'
'hdfs-site.xml' | +| `directory` | String | true | None|The HDFS directory where files read from or written to. | +| `encoding` | String |false |None |The character encoding for the files.

**Example**
UTF-8
ASCII | +| `compression` | Compression |false |None |The compression code used to compress or de-compress the files on HDFS.

Below are the available options:
  • BZIP2
  • DEFLATE
  • GZIP
  • LZ4
  • SNAPPY| +| `kerberosUserPrincipal` |String| false| None|The principal account of Kerberos user used for authentication. | +| `keytab` | String|false|None| The full pathname of the Kerberos keytab file used for authentication. | +| `filenamePrefix` |String| false |None |The prefix of the files created inside the HDFS directory.

    **Example**
    The value of topicA result in files named topicA-. | +| `fileExtension` | String| false | None| The extension added to the files written to HDFS.

    **Example**
    '.txt'
    '.seq' | +| `separator` | char|false |None |The character used to separate records in a text file.

    If no value is provided, the contents from all records are concatenated together in one continuous byte array. | +| `syncInterval` | long| false |0| The interval between calls to flush data to HDFS disk in milliseconds. | +| `maxPendingRecords` |int| false|Integer.MAX_VALUE | The maximum number of records that hold in memory before acking.

    Setting this property to 1 makes every record send to disk before the record is acked.

    Setting this property to a higher value allows buffering records before flushing them to disk. + +## Example + +Before using the HDFS2 sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "hdfsConfigResources": "core-site.xml", + "directory": "/foo/bar", + "filenamePrefix": "prefix", + "compression": "SNAPPY" + } + ``` + +* YAML + + ```yaml + configs: + hdfsConfigResources: "core-site.xml" + directory: "/foo/bar" + filenamePrefix: "prefix" + compression: "SNAPPY" + ``` + diff --git a/connectors/hdfs3-sink/v3.0.6/hdfs3-sink.md b/connectors/hdfs3-sink/v3.0.7/hdfs3-sink.md similarity index 95% rename from connectors/hdfs3-sink/v3.0.6/hdfs3-sink.md rename to connectors/hdfs3-sink/v3.0.7/hdfs3-sink.md index ae1e2050..81c07fa3 100644 --- a/connectors/hdfs3-sink/v3.0.6/hdfs3-sink.md +++ b/connectors/hdfs3-sink/v3.0.7/hdfs3-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/hdfs3" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/hdfs3" license: Apache License 2.0 tags: ["Pulsar IO", "HDFS3", "Sink"] alias: HDFS3 Sink features: ["Use HDFS3 sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/hadoop.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-hdfs3-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-hdfs3-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/hdfs3-sink/v4.0.0/hdfs3-sink.md b/connectors/hdfs3-sink/v4.0.0/hdfs3-sink.md new file mode 100644 index 00000000..34765592 --- /dev/null +++ b/connectors/hdfs3-sink/v4.0.0/hdfs3-sink.md @@ -0,0 +1,69 @@ +--- +description: The HDFS3 sink connector pulls the messages from Pulsar topics and persists the messages to HDFS files. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/hdfs3" +license: Apache License 2.0 +tags: ["Pulsar IO", "HDFS3", "Sink"] +alias: HDFS3 Sink +features: ["Use HDFS3 sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/hadoop.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-hdfs3-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "hdfs3-sink" +--- + +The HDFS3 sink connector pulls the messages from Pulsar topics and persists the messages to HDFS files. + +# Configuration + +The configuration of the HDFS3 sink connector has the following properties. + +## Property + +| Name | Type|Required | Default | Description +|------|----------|----------|---------|-------------| +| `hdfsConfigResources` | String|true| None | A file or a comma-separated list containing the Hadoop file system configuration.

    **Example**
    'core-site.xml'
    'hdfs-site.xml' | +| `directory` | String | true | None|The HDFS directory where files read from or written to. | +| `encoding` | String |false |None |The character encoding for the files.

    **Example**
    UTF-8
    ASCII | +| `compression` | Compression |false |None |The compression code used to compress or de-compress the files on HDFS.

    Below are the available options:
  • BZIP2
  • DEFLATE
  • GZIP
  • LZ4
  • SNAPPY| +| `kerberosUserPrincipal` |String| false| None|The principal account of Kerberos user used for authentication. | +| `keytab` | String|false|None| The full pathname of the Kerberos keytab file used for authentication. | +| `filenamePrefix` |String| false |None |The prefix of the files created inside the HDFS directory.

    **Example**
    The value of topicA result in files named topicA-. | +| `fileExtension` | String| false | None| The extension added to the files written to HDFS.

    **Example**
    '.txt'
    '.seq' | +| `separator` | char|false |None |The character used to separate records in a text file.

    If no value is provided, the contents from all records are concatenated together in one continuous byte array. | +| `syncInterval` | long| false |0| The interval between calls to flush data to HDFS disk in milliseconds. | +| `maxPendingRecords` |int| false|Integer.MAX_VALUE | The maximum number of records that hold in memory before acking.

    Setting this property to 1 makes every record send to disk before the record is acked.

    Setting this property to a higher value allows buffering records before flushing them to disk. + +## Example + +Before using the HDFS3 sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "hdfsConfigResources": "core-site.xml", + "directory": "/foo/bar", + "filenamePrefix": "prefix", + "compression": "SNAPPY" + } + ``` + +* YAML + + ```yaml + configs: + hdfsConfigResources: "core-site.xml" + directory: "/foo/bar" + filenamePrefix: "prefix" + compression: "SNAPPY" + ``` diff --git a/connectors/influxdb-sink/v3.0.6/influxdb-sink.md b/connectors/influxdb-sink/v3.0.7/influxdb-sink.md similarity index 96% rename from connectors/influxdb-sink/v3.0.6/influxdb-sink.md rename to connectors/influxdb-sink/v3.0.7/influxdb-sink.md index f3f51b74..92e8b41c 100644 --- a/connectors/influxdb-sink/v3.0.6/influxdb-sink.md +++ b/connectors/influxdb-sink/v3.0.7/influxdb-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/influxdb" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/influxdb" license: Apache License 2.0 tags: ["Pulsar IO", "InfluxDB", "Sink"] alias: InfluxDB Sink features: ["Use InfluxDB sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/influxdb.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-influxdb-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-influxdb-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/influxdb-sink/v4.0.0/influxdb-sink.md b/connectors/influxdb-sink/v4.0.0/influxdb-sink.md new file mode 100644 index 00000000..8e06d66e --- /dev/null +++ b/connectors/influxdb-sink/v4.0.0/influxdb-sink.md @@ -0,0 +1,123 @@ +--- +description: The InfluxDB sink connector pulls messages from Pulsar topics and persists the messages to InfluxDB. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/influxdb" +license: Apache License 2.0 +tags: ["Pulsar IO", "InfluxDB", "Sink"] +alias: InfluxDB Sink +features: ["Use InfluxDB sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/influxdb.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-influxdb-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "influxdb-sink" +--- + +The InfluxDB sink connector pulls messages from Pulsar topics and persists the messages to InfluxDB. + +The InfluxDB sink provides different configurations for InfluxDBv1 and v2 respectively. + +# Configuration + +The configuration of the InfluxDB sink connector has the following properties. + +## Property +### InfluxDBv2 +| Name | Type | Required | Sensitive | Default | Description | +|----------------|---------|----------|-----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `influxdbUrl` | String | true | false | " " (empty string) | The URL of the InfluxDB instance. | +| `token` | String | true | true | " " (empty string) | The authentication token used to authenticate to InfluxDB. | +| `organization` | String | true | false | " " (empty string) | The InfluxDB organization to write to. | +| `bucket` | String | true | false | " " (empty string) | The InfluxDB bucket to write to. | +| `precision` | String | false | false | ns | The timestamp precision for writing data to InfluxDB.

    Below are the available options:
  • ns
  • us
  • ms
  • s | +| `logLevel` | String | false | false | NONE | The log level for InfluxDB request and response.

    Below are the available options:
  • NONE
  • BASIC
  • HEADERS
  • FULL | +| `gzipEnable` | boolean | false | false | false | Whether to enable gzip or not. | +| `batchTimeMs` | long | false | false | 1000L | The InfluxDB operation time in milliseconds. | +| `batchSize` | int | false | false | 200 | The batch size of writing to InfluxDB. | + +### InfluxDBv1 +| Name | Type | Required | Sensitive | Default | Description | +|--------------------|---------|----------|-----------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------| +| `influxdbUrl` | String | true | false | " " (empty string) | The URL of the InfluxDB instance. | +| `username` | String | false | true | " " (empty string) | The username used to authenticate to InfluxDB. | +| `password` | String | false | true | " " (empty string) | The password used to authenticate to InfluxDB. | +| `database` | String | true | false | " " (empty string) | The InfluxDB to which write messages. | +| `consistencyLevel` | String | false | false | ONE | The consistency level for writing data to InfluxDB.

    Below are the available options:
  • ALL
  • ANY
  • ONE
  • QUORUM | +| `logLevel` | String | false | false | NONE | The log level for InfluxDB request and response.

    Below are the available options:
  • NONE
  • BASIC
  • HEADERS
  • FULL | +| `retentionPolicy` | String | false | false | autogen | The retention policy for InfluxDB. | +| `gzipEnable` | boolean | false | false | false | Whether to enable gzip or not. | +| `batchTimeMs` | long | false | false | 1000L | The InfluxDB operation time in milliseconds. | +| `batchSize` | int | false | false | 200 | The batch size of writing to InfluxDB. | + +## Example +Before using the InfluxDB sink connector, you need to create a configuration file through one of the following methods. +### InfluxDBv2 +* JSON + ```json + { + "influxdbUrl": "http://localhost:9999", + "organization": "example-org", + "bucket": "example-bucket", + "token": "xxxx", + "precision": "ns", + "logLevel": "NONE", + "gzipEnable": false, + "batchTimeMs": 1000, + "batchSize": 100 + } + ``` + +* YAML + ```yaml + { + influxdbUrl: "http://localhost:9999" + organization: "example-org" + bucket: "example-bucket" + token: "xxxx" + precision: "ns" + logLevel: "NONE" + gzipEnable: false + batchTimeMs: 1000 + batchSize: 100 + } + ``` + +### InfluxDBv1 + +* JSON + + ```json + { + "influxdbUrl": "http://localhost:8086", + "database": "test_db", + "consistencyLevel": "ONE", + "logLevel": "NONE", + "retentionPolicy": "autogen", + "gzipEnable": false, + "batchTimeMs": 1000, + "batchSize": 100 + } + ``` + +* YAML + + ```yaml + { + influxdbUrl: "http://localhost:8086" + database: "test_db" + consistencyLevel: "ONE" + logLevel: "NONE" + retentionPolicy: "autogen" + gzipEnable: false + batchTimeMs: 1000 + batchSize: 100 + } + ``` diff --git a/connectors/kafka-sink/v3.0.6/kafka-sink.md b/connectors/kafka-sink/v3.0.7/kafka-sink.md similarity index 97% rename from connectors/kafka-sink/v3.0.6/kafka-sink.md rename to connectors/kafka-sink/v3.0.7/kafka-sink.md index 95d6d880..4edeb47a 100644 --- a/connectors/kafka-sink/v3.0.6/kafka-sink.md +++ b/connectors/kafka-sink/v3.0.7/kafka-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/kafka" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/kafka" license: Apache License 2.0 tags: ["Pulsar IO", "Kafka", "Sink"] alias: Kafka Sink features: ["Use Kafka sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/kafka-logo.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-kafka-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-kafka-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/kafka-sink/v4.0.0/kafka-sink.md b/connectors/kafka-sink/v4.0.0/kafka-sink.md new file mode 100644 index 00000000..8bc43070 --- /dev/null +++ b/connectors/kafka-sink/v4.0.0/kafka-sink.md @@ -0,0 +1,124 @@ +--- +description: The Kafka sink connector pulls messages from Pulsar topics and persists the messages to Kafka topics. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/kafka" +license: Apache License 2.0 +tags: ["Pulsar IO", "Kafka", "Sink"] +alias: Kafka Sink +features: ["Use Kafka sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/kafka-logo.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-kafka-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "kafka-sink" +--- + +The [Kafka](https://kafka.apache.org/) sink connector pulls messages from Pulsar topics and persists the messages to Kafka topics. For more information about connectors, see [Connector Overview](https://docs.streamnative.io/docs/connector-overview). + +This document introduces how to get started with creating an Kafka sink connector and get it up and running. + +## Quick start + +### Prerequisites + +The prerequisites for connecting an Kafka sink connector to external systems include: + +Apache Kafka: Ensure you have a running Kafka instance. You can follow the official Kafka [Quickstart guide](https://kafka.apache.org/quickstart) to set up a Kafka instance if you don't have one already. + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type kafka` with `--archive /path/to/pulsar-io-kafka.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type kafka \ + --name kafka-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "bootstrapServers": "localhost:9092", + "topic": "kafka-topic-name", + "ack": 1 + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + String message = "hello kafka"; + MessageId msgID = producer.send(message); + System.out.println("Publish " + message + " and message ID " + msgID); + + producer.flush(); + producer.close(); + client.close(); +``` + +You can also send the message using the command line: +```sh +$ bin/pulsar-client produce pulsar-topic-name --messages "hello kafka" +``` + +### 3. Check the data on kafka topic + +You can consume the data from the kafka topic using the command: +```sh +$ bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic kafka-topic-name --from-beginning +``` + +If everything is set up correctly, you should see the message "hello kafka" in the Kafka consumer. + +## Configuration Properties + +This table outlines the properties of a Kafka sink connector. + +| Name | Type| Required | Default | Description +|------|----------|---------|-------------|-------------| +| `bootstrapServers` |String| true | " " (empty string) | A comma-separated list of host and port pairs for establishing the initial connection to the Kafka cluster. | +|`acks`|String|true|" " (empty string) |The number of acknowledgments that the producer requires the leader to receive before a request completes.
    This controls the durability of the sent records. +|`batchsize`|long|false|16384L|The batch size that a Kafka producer attempts to batch records together before sending them to brokers. +|`maxRequestSize`|long|false|1048576L|The maximum size of a Kafka request in bytes. +|`topic`|String|true|" " (empty string) |The Kafka topic which receives messages from Pulsar. +| `keyDeserializationClass` | String|false | org.apache.kafka.common.serialization.StringSerializer | The serializer class for Kafka producers to serialize keys. +| `valueDeserializationClass` | String|false | org.apache.kafka.common.serialization.ByteArraySerializer | The serializer class for Kafka producers to serialize values.

    The serializer is set by a specific implementation of [`KafkaAbstractSink`](https://github.com/apache/pulsar/blob/master/pulsar-io/kafka/src/main/java/org/apache/pulsar/io/kafka/KafkaAbstractSink.java). +|`producerConfigProperties`|Map|false|" " (empty string)|The producer configuration properties to be passed to producers.

    **Note: other properties specified in the connector configuration file take precedence over this configuration**. + diff --git a/connectors/kafka-source/v3.0.6/kafka-source.md b/connectors/kafka-source/v3.0.7/kafka-source.md similarity index 97% rename from connectors/kafka-source/v3.0.6/kafka-source.md rename to connectors/kafka-source/v3.0.7/kafka-source.md index 211c2aa0..41c83210 100644 --- a/connectors/kafka-source/v3.0.6/kafka-source.md +++ b/connectors/kafka-source/v3.0.7/kafka-source.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/kafka" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/kafka" license: Apache License 2.0 tags: ["Pulsar IO", "Kafka", "Source"] alias: Kafka Source features: ["Use Kafka source connector to sync data to Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/kafka-logo.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-kafka-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-kafka-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/kafka-source/v4.0.0/kafka-source.md b/connectors/kafka-source/v4.0.0/kafka-source.md new file mode 100644 index 00000000..c4c3c862 --- /dev/null +++ b/connectors/kafka-source/v4.0.0/kafka-source.md @@ -0,0 +1,109 @@ +--- +description: The Kafka source connector pulls messages from Kafka topics and persists the messages to Pulsar topics. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/kafka" +license: Apache License 2.0 +tags: ["Pulsar IO", "Kafka", "Source"] +alias: Kafka Source +features: ["Use Kafka source connector to sync data to Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/kafka-logo.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-kafka-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +sn_available: true +id: "kafka-source" +--- + +The [Kafka](https://kafka.apache.org/) source connector pulls messages from Kafka topics and persists the messages to Pulsar topics. For more information about connectors, see [Connector Overview](https://docs.streamnative.io/docs/connector-overview). + +This document introduces how to get started with creating a Kafka source connector and get it up and running. + +![](/images/connectors/kafka-source.png) + +## Quick start + +### Prerequisites + +The prerequisites for connecting an Kafka source connector to external systems include: + +Apache Kafka: Ensure you have a running Kafka instance. You can follow the official Kafka [Quickstart guide](https://kafka.apache.org/quickstart) to set up a Kafka instance if you don't have one already. + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--source-type kafka` with `--archive /path/to/pulsar-io-kafka.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sources create \ + --source-type kafka \ + --name kafka-source \ + --tenant public \ + --namespace default \ + --destination-topic-name "Your topic name" \ + --parallelism 1 \ + --source-config \ + '{ + "bootstrapServers": "localhost:9092", + "topic": "kafka-topic-name", + "groupId": "group-id" + }' +``` + +The `--source-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the Kafka topic + +You can send the message using the command line: +```sh +$ bin/kafka-console-producer.sh --broker-list localhost:9092 --topic kafka-topic-name +> hello pulsar +``` + +### 3. Check the data on Pulsar topic + +You can consume the data from the Pulsar topic using the command: +```sh +$ bin/pulsar-client consume --subscription-name my-subscription pulsar-topic-name -n 0 +``` + +If everything is set up correctly, you should see the message "hello pulsar" in the Pulsar consumer. + +## Configuration Properties + +This table outlines the properties of a Kafka source connector. + +| Name | Type| Required | Default | Description +|------|----------|---------|-------------|-------------| +| `bootstrapServers` |String| true | " " (empty string) | A comma-separated list of host and port pairs for establishing the initial connection to the Kafka cluster. | +| `groupId` |String| true | " " (empty string) | A unique string that identifies the group of consumer processes to which this consumer belongs. | +| `fetchMinBytes` | long|false | 1 | The minimum byte expected for each fetch response. | +| `autoCommitEnabled` | boolean |false | true | If set to true, the consumer's offset is periodically committed in the background.

    This committed offset is used when the process fails as the position from which a new consumer begins. | +| `autoCommitIntervalMs` | long|false | 5000 | The frequency in milliseconds that the consumer offsets are auto-committed to Kafka if `autoCommitEnabled` is set to true. | +| `heartbeatIntervalMs` | long| false | 3000 | The interval between heartbeats to the consumer when using Kafka's group management facilities.

    **Note: `heartbeatIntervalMs` must be smaller than `sessionTimeoutMs`**.| +| `sessionTimeoutMs` | long|false | 30000 | The timeout used to detect consumer failures when using Kafka's group management facility. | +| `topic` | String|true | " " (empty string)| The Kafka topic that sends messages to Pulsar. | +| `consumerConfigProperties` | Map| false | " " (empty string) | The consumer configuration properties to be passed to consumers.

    **Note: other properties specified in the connector configuration file take precedence over this configuration**. | +| `keyDeserializationClass` | String|false | org.apache.kafka.common.serialization.StringDeserializer | The deserializer class for Kafka consumers to deserialize keys.
    The deserializer is set by a specific implementation of [`KafkaAbstractSource`](https://github.com/apache/pulsar/blob/master/pulsar-io/kafka/src/main/java/org/apache/pulsar/io/kafka/KafkaAbstractSource.java). +| `valueDeserializationClass` | String|false | org.apache.kafka.common.serialization.ByteArrayDeserializer | The deserializer class for Kafka consumers to deserialize values. +| `autoOffsetReset` | String | false | earliest | The default offset reset policy. | \ No newline at end of file diff --git a/connectors/kinesis-sink/v3.0.6/kinesis-sink.md b/connectors/kinesis-sink/v3.0.7/kinesis-sink.md similarity index 99% rename from connectors/kinesis-sink/v3.0.6/kinesis-sink.md rename to connectors/kinesis-sink/v3.0.7/kinesis-sink.md index 92c74210..21f50baf 100644 --- a/connectors/kinesis-sink/v3.0.6/kinesis-sink.md +++ b/connectors/kinesis-sink/v3.0.7/kinesis-sink.md @@ -16,8 +16,8 @@ support_img: "/images/streamnative.png" owner_name: "" owner_img: "" dockerfile: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/kinesis" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-kinesis-3.0.6.nar" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/kinesis" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-kinesis-3.0.7.nar" sn_available: true id: "kinesis-sink" --- diff --git a/connectors/kinesis-sink/v4.0.0/kinesis-sink.md b/connectors/kinesis-sink/v4.0.0/kinesis-sink.md new file mode 100644 index 00000000..a89708ad --- /dev/null +++ b/connectors/kinesis-sink/v4.0.0/kinesis-sink.md @@ -0,0 +1,145 @@ +--- +description: The Kinesis sink connector pulls data from Pulsar and persists data into Amazon Kinesis. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +license: Apache License 2.0 +tags: ["Pulsar IO", "Kinesis", "Sink"] +alias: Kinesis Sink +features: ["Use Kinesis sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/aws-kinesis.png" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/kinesis" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-kinesis-4.0.0.nar" +sn_available: true +id: "kinesis-sink" +--- + +The AWS Kinesis sink connector pulls data from Pulsar and persists data into Amazon Kinesis. For more information about connectors, see [Connector Overview](https://docs.streamnative.io/docs/connector-overview). + +![](/images/connectors/kinesis-sink.png) + +This document introduces how to get started with creating an AWS Kinesis sink connector and get it up and running. +## Quick start + +### Prerequisites + +The prerequisites for connecting an AWS Kinesis sink connector to external systems include: + +1. Create a Kinesis data stream in AWS. +2. Create an [AWS User](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) and an `AccessKey`(Please record the value of `AccessKey` and its `SecretKey`). +3. Assign the following permissions to the AWS User: +- [AmazonKinesisFullAccess](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonKinesisFullAccess.html) +- [CloudWatch:PutMetricData](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html): it is required because AWS Kinesis producer will periodically [send metrics to CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-cloudwatch.html). + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--sink-type kinesis` with `--archive /path/to/pulsar-io-kinesis.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sinks create \ + --sink-type kinesis \ + --name kinesis-sink \ + --tenant public \ + --namespace default \ + --inputs "Your topic name" \ + --parallelism 1 \ + --sink-config \ + '{ + "awsRegion": "Your aws kinesis region", + "awsKinesisStreamName": "Your kinesis stream name", + "awsCredentialPluginParam": "{\"accessKey\":\"Your AWS access key\",\"secretKey\":\"Your AWS secret access key\"}" + }' +``` + +The `--sink-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/sink-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to the topic + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` java + PulsarClient client = PulsarClient.builder() + .serviceUrl("{{Your Pulsar URL}}") + .build(); + + Producer producer = client.newProducer(Schema.STRING) + .topic("{{Your topic name}}") + .create(); + + String message = "test-message"; + MessageId msgID = producer.send(message); + System.out.println("Publish " + message + " and message ID " + msgID); + + producer.flush(); + producer.close(); + client.close(); +``` + +### 3. Show data on AWS Kinesis console +You can use the AWS Kinesis `Data Viewer` to view the data. ![](/images/connectors/kinesis-sink-show-data.png) + +## Configuration Properties + +This table outlines the properties of an AWS Kinesis sink connector. + +| Name | Type | Required | Sensitive | Default | Description | +|-----------------------------|---------------|----------|-----------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `awsKinesisStreamName` | String | true | false | " " (empty string) | The Kinesis stream name. | +| `awsRegion` | String | true | false | " " (empty string) | The AWS Kinesis [region](https://www.aws-services.info/regions.html).

    **Example:**
    us-west-1, us-west-2. | +| `awsCredentialPluginName` | String | false | false | " " (empty string) | The fully-qualified class name of implementation of [AwsCredentialProviderPlugin](https://github.com/apache/pulsar/blob/master/pulsar-io/aws/src/main/java/org/apache/pulsar/io/aws/AwsCredentialProviderPlugin.java). Please refer to [Configure AwsCredentialProviderPlugin](###Configure AwsCredentialProviderPlugin) | +| `awsCredentialPluginParam` | String | false | true | " " (empty string) | The JSON parameter to initialize `awsCredentialsProviderPlugin`. Please refer to [Configure AwsCredentialProviderPlugin](###Configure AwsCredentialProviderPlugin) | +| `awsEndpoint` | String | false | false | " " (empty string) | A custom Kinesis endpoint. For more information, see [AWS documentation](https://docs.aws.amazon.com/general/latest/gr/rande.html). | +| `retainOrdering` | Boolean | false | false | false | Whether Pulsar connectors retain the ordering when moving messages from Pulsar to Kinesis. | +| `messageFormat` | MessageFormat | false | false | ONLY_RAW_PAYLOAD | Message format in which Kinesis sink converts Pulsar messages and publishes them to Kinesis streams.

    Available options include:

  • `ONLY_RAW_PAYLOAD`: Kinesis sink directly publishes Pulsar message payload as a message into the configured Kinesis stream.

  • `FULL_MESSAGE_IN_JSON`: Kinesis sink creates a JSON payload with Pulsar message payload, properties, and encryptionCtx, and publishes JSON payload into the configured Kinesis stream.

  • `FULL_MESSAGE_IN_FB`: Kinesis sink creates a flatbuffers serialized payload with Pulsar message payload, properties, and encryptionCtx, and publishes flatbuffers payload into the configured Kinesis stream.

  • `FULL_MESSAGE_IN_JSON_EXPAND_VALUE`: Kinesis sink sends a JSON structure containing the record topic name, key, payload, properties, and event time. The record schema is used to convert the value to JSON. | +| `jsonIncludeNonNulls` | Boolean | false | false | true | Only the properties with non-null values are included when the message format is `FULL_MESSAGE_IN_JSON_EXPAND_VALUE`. | +| `jsonFlatten` | Boolean | false | false | false | When it is set to `true` and the message format is `FULL_MESSAGE_IN_JSON_EXPAND_VALUE`, the output JSON is flattened. | +| `retryInitialDelayInMillis` | Long | false | false | 100 | The initial delay (in milliseconds) between retries. | +| `retryMaxDelayInMillis` | Long | false | false | 60000 | The maximum delay(in milliseconds) between retries. | + +### Configure AwsCredentialProviderPlugin + +AWS Kinesis sink connector allows you to use three ways to connect to AWS Kinesis by configuring `awsCredentialPluginName`. + +- Leave `awsCredentialPluginName` empty to get the connector authenticated by passing `accessKey` and `secretKey` in `awsCredentialPluginParam`. + + ```json + {"accessKey":"Your access key","secretKey":"Your secret key"} + ``` + +- Set `awsCredentialPluginName` to `org.apache.pulsar.io.aws.AwsDefaultProviderChainPlugin` to use the default AWS provider chain. With this option, you don’t need to configure `awsCredentialPluginParam`. For more information, see [AWS documentation](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default). + +- Set `awsCredentialPluginName`to `org.apache.pulsar.io.aws.STSAssumeRoleProviderPlugin` to use the [default AWS provider chain](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default), and you need to configure `roleArn` and `roleSessionNmae` in `awsCredentialPluginParam`. For more information, see [AWS documentation](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) + + ```json + {"roleArn": "arn...", "roleSessionName": "name"} + ``` + + + + + + diff --git a/connectors/kinesis-source/v3.0.6/kinesis-source.md b/connectors/kinesis-source/v3.0.7/kinesis-source.md similarity index 99% rename from connectors/kinesis-source/v3.0.6/kinesis-source.md rename to connectors/kinesis-source/v3.0.7/kinesis-source.md index 1f5cd14f..fc0a9361 100644 --- a/connectors/kinesis-source/v3.0.6/kinesis-source.md +++ b/connectors/kinesis-source/v3.0.7/kinesis-source.md @@ -16,8 +16,8 @@ support_img: "/images/streamnative.png" owner_name: "" owner_img: "" dockerfile: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/kinesis" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-kinesis-3.0.6.nar" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/kinesis" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-kinesis-3.0.7.nar" sn_available: true id: "kinesis-source" --- diff --git a/connectors/kinesis-source/v4.0.0/kinesis-source.md b/connectors/kinesis-source/v4.0.0/kinesis-source.md new file mode 100644 index 00000000..2f718c2c --- /dev/null +++ b/connectors/kinesis-source/v4.0.0/kinesis-source.md @@ -0,0 +1,195 @@ +--- +description: The Kinesis source connector pulls data from Amazon Kinesis and persists data into Pulsar +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +license: Apache License 2.0 +tags: ["Pulsar IO", "Kinesis", "Source"] +alias: Kinesis Source +features: ["Use Kinesis source connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/aws-kinesis.png" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/kinesis" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-kinesis-4.0.0.nar" +sn_available: true +id: "kinesis-source" +--- + +The Kinesis source connector pulls data from Amazon Kinesis and persists data into Pulsar. For more information about connectors, see [Connector Overview](https://docs.streamnative.io/docs/connector-overview). + +![](/images/connectors/kinesis-source.png) + +This connector uses the [Kinesis Consumer Library](https://github.com/awslabs/amazon-kinesis-client) (KCL) to consume messages. The KCL uses [DynamoDB](https://docs.aws.amazon.com/streams/latest/dev/shared-throughput-kcl-consumers.html) to track checkpoints for consumers, +and uses [CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-cloudwatch.html) to track metrics for consumers. + +This document introduces how to get started with creating an AWS Kinesis source connector and get it up and running. + +{% callout title="Note" type="note" %} +Currently, the Kinesis source connector only supports raw messages. If you use [AWS Key Management Service (KMS)](https://docs.aws.amazon.com/streams/latest/dev/server-side-encryption.html) encrypted messages, the encrypted messages are sent to Pulsar directly. You need to [manually decrypt](https://aws.amazon.com/blogs/big-data/encrypt-and-decrypt-amazon-kinesis-records-using-aws-kms/) the data on the consumer side of Pulsar. +{% /callout %} + +## Quick start + +### Prerequisites + +The prerequisites for connecting an AWS Kinesis source connector to external systems include: + +1. Create a Kinesis data stream in AWS. +2. Create an [AWS User](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_users_create.html) and an `AccessKey`(Please record the value of `AccessKey` and its `SecretKey`). +3. Assign the following permissions to the AWS User: +- [AmazonKinesisFullAccess](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonKinesisFullAccess.html) +- [CloudWatch:PutMetricData](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_PutMetricData.html): it is required because AWS Kinesis client will periodically [send metrics to CloudWatch](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-cloudwatch.html). +- [AmazonDynamoDBFullAccess](https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonDynamoDBFullAccess.html): it is required because AWS Kinesis client will use [DynamoDB store checkpoint status](https://docs.aws.amazon.com/streams/latest/dev/shared-throughput-kcl-consumers.html#shared-throughput-kcl-consumers-what-is-leasetable). + +### 1. Create a connector + +The following command shows how to use [pulsarctl](https://github.com/streamnative/pulsarctl) to create a `builtin` connector. If you want to create a `non-builtin` connector, +you need to replace `--source-type kinesis` with `--archive /path/to/pulsar-io-kinesis.nar`. You can find the button to download the `nar` package at the beginning of the document. + +{% callout title="For StreamNative Cloud User" type="note" %} +If you are a StreamNative Cloud user, you need [set up your environment](https://docs.streamnative.io/docs/connector-setup) first. +{% /callout %} + +```bash +pulsarctl sources create \ + --source-type kinesis \ + --name kinesis-source \ + --tenant public \ + --namespace default \ + --destination-topic-name "Your topic name" \ + --parallelism 1 \ + --source-config \ + '{ + "awsRegion": "Your aws kinesis region", + "awsKinesisStreamName": "Your kinesis stream name", + "awsCredentialPluginParam": "{\"accessKey\":\"Your AWS access key\",\"secretKey\":\"Your AWS secret access key\"}", + "applicationName": "Your application name, which will be used as the table name for DynamoDB. E.g.: pulsar-io-kinesis" + }' +``` + +The `--source-config` is the minimum necessary configuration for starting this connector, and it is a JSON string. You need to substitute the relevant parameters with your own. +If you want to configure more parameters, see [Configuration Properties](#configuration-properties) for reference. + +{% callout title="Note" type="note" %} +You can also choose to use a variety of other tools to create a connector: +- [pulsar-admin](https://pulsar.apache.org/docs/3.1.x/io-use/): The command arguments for `pulsar-admin` are similar to those of `pulsarctl`. You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector ). +- [RestAPI](https://pulsar.apache.org/source-rest-api/?version=3.1.1): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Terraform](https://github.com/hashicorp/terraform): You can find an example for [StreamNative Cloud Doc](https://docs.streamnative.io/docs/connector-create#create-a-built-in-connector). +- [Function Mesh](https://functionmesh.io/docs/connectors/run-connector): The docker image can be found at the beginning of the document. +{% /callout %} + +### 2. Send messages to Kinesis + +{% callout title="Note" type="note" %} +The following example uses KPL to send data to Kinesis. For more details, see [Writing to your Kinesis Data Stream Using the KPL](https://docs.aws.amazon.com/streams/latest/dev/kinesis-kpl-writing.html) +{% /callout %} + +``` java +public static void main(String[] args) throws Exception { + + AWSCredentialsProvider credentialsProvider = + new AWSStaticCredentialsProvider(new BasicAWSCredentials("Your access key", "Your secret key")); + + KinesisProducerConfiguration kinesisConfig = new KinesisProducerConfiguration(); + kinesisConfig.setRegion("Your aws kinesis region"); + kinesisConfig.setCredentialsProvider(credentialsProvider); + KinesisProducer kinesis = new KinesisProducer(kinesisConfig); + // Put some records + for (int i = 0; i < 10; ++i) { + ByteBuffer data = ByteBuffer.wrap("test-kinesis-data".getBytes("UTF-8")); + // doesn't block + kinesis.addUserRecord("Your kinesis stream name", "myPartitionKey", data); + } + kinesis.flush(); + Thread.sleep(60000); +} +``` + +### 3. Show data using Pulsar client + +{% callout title="Note" type="note" %} +If your connector is created on StreamNative Cloud, you need to authenticate your clients. See [Build applications using Pulsar clients](https://docs.streamnative.io/docs/qs-connect#jumpstart-for-beginners) for more information. +{% /callout %} + +``` +bin/pulsar-client \ +--url "Your Pulsar serviceUrl" \ +consume "The topic that you specified when you created the connector" -s "test-sub" -n 10 -p Earliest + +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450427674751028642409795813410], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450430092602667871668145225762], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=4964366554314398361344289545044.0.0528487486297319931938], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450432510454307100926494638114], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450433719380126715555669344290], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450434928305946330184844050466], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=4964366554314398361344289545043614.0.0765944814018756642], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450437346157585559443193462818], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450438555083405174072368168994], content:test-kinesis-data +----- got message ----- +key:[myPartitionKey], properties:[=49643665543143983613442895450439764009224788701542875170], content:test-kinesis-data + +``` + +## Configuration Properties + +This table outlines the properties of an AWS Kinesis source connector. + +| Name | Type | Required | Sensitive | Default | Description | +|----------------------------|-------------------------|----------|-----------|---------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `awsKinesisStreamName` | String | true | false | " " (empty string) | The Kinesis stream name. | +| `awsRegion` | String | false | false | " " (empty string) | The AWS region.

    **Example**
    us-west-1, us-west-2. | +| `awsCredentialPluginName` | String | false | false | " " (empty string) | The fully-qualified class name of implementation of [AwsCredentialProviderPlugin](https://github.com/apache/pulsar/blob/master/pulsar-io/aws/src/main/java/org/apache/pulsar/io/aws/AwsCredentialProviderPlugin.java). For more information, see [Configure AwsCredentialProviderPlugin](###Configure AwsCredentialProviderPlugin). | +| `awsCredentialPluginParam` | String | false | true | " " (empty string) | The JSON parameter to initialize `awsCredentialsProviderPlugin`. For more information, see [Configure AwsCredentialProviderPlugin](###Configure AwsCredentialProviderPlugin). | +| `awsEndpoint` | String | false | false | " " (empty string) | The Kinesis end-point URL, which can be found at [here](https://docs.aws.amazon.com/general/latest/gr/rande.html). | +| `dynamoEndpoint` | String | false | false | " " (empty string) | The Dynamo end-point URL, which can be found at [here](https://docs.aws.amazon.com/general/latest/gr/rande.html). | +| `cloudwatchEndpoint` | String | false | false | " " (empty string) | The Cloudwatch end-point URL. For more information, see[Amazon documentation](https://docs.aws.amazon.com/general/latest/gr/rande.html). | +| `applicationName` | String | false | false | Pulsar IO connector | The name of the Amazon Kinesis application, which will be used as the table name for DynamoDB. | +| `initialPositionInStream` | InitialPositionInStream | false | false | LATEST | The position where the connector starts from.

    Below are the available options:

  • `AT_TIMESTAMP`: start from the record at or after the specified timestamp.

  • `LATEST`: start after the most recent data record.

  • `TRIM_HORIZON`: start from the oldest available data record. | +| `startAtTime` | Date | false | false | " " (empty string) | If set to `AT_TIMESTAMP`, it specifies the time point to start consumption. | +| `checkpointInterval` | Long | false | false | 60000 | The frequency of the Kinesis stream checkpoint in milliseconds. | +| `backoffTime` | Long | false | false | 3000 | The amount of time to delay between requests when the connector encounters a throttling exception from AWS Kinesis in milliseconds. | +| `numRetries` | int | false | false | 3 | The number of re-attempts when the connector encounters an exception while trying to set a checkpoint. | +| `receiveQueueSize` | int | false | false | 1000 | The maximum number of AWS records that can be buffered inside the connector.

    Once the `receiveQueueSize` is reached, the connector does not consume any messages from Kinesis until some messages in the queue are successfully consumed. | +| `useEnhancedFanOut` | boolean | false | false | true | If set to true, it uses Kinesis enhanced fan-out.

    If set to false, it uses polling. | + +### Configure AwsCredentialProviderPlugin + +AWS Kinesis source connector allows you to use three ways to connect to AWS Kinesis by configuring `awsCredentialPluginName`. + +- Leave `awsCredentialPluginName` empty to get the connector authenticated by passing `accessKey` and `secretKey` in `awsCredentialPluginParam`. + + ```json + {"accessKey":"Your access key","secretKey":"Your secret key"} + ``` + +- Set `awsCredentialPluginName` to `org.apache.pulsar.io.aws.AwsDefaultProviderChainPlugin` to use the default AWS provider chain. With this option, you don’t need to configure `awsCredentialPluginParam`. For more information, see [AWS documentation](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default). + +- Set `awsCredentialPluginName`to `org.apache.pulsar.io.aws.STSAssumeRoleProviderPlugin` to use the [default AWS provider chain](https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default), and you need to configure `roleArn` and `roleSessionNmae` in `awsCredentialPluginParam`. For more information, see [AWS documentation](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) + + ```json + {"roleArn": "arn...", "roleSessionName": "name"} + ``` + + + + + + + + \ No newline at end of file diff --git a/connectors/mongodb-sink/v3.0.6/mongodb-sink.md b/connectors/mongodb-sink/v3.0.7/mongodb-sink.md similarity index 92% rename from connectors/mongodb-sink/v3.0.6/mongodb-sink.md rename to connectors/mongodb-sink/v3.0.7/mongodb-sink.md index 53b923bb..78a75e5b 100644 --- a/connectors/mongodb-sink/v3.0.6/mongodb-sink.md +++ b/connectors/mongodb-sink/v3.0.7/mongodb-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/mongo" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/mongo" license: Apache License 2.0 tags: ["Pulsar IO", "MongoDB", "Sink"] alias: MongoDB Sink features: ["Use MongoDB sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/mongodb.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-mongo-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-mongo-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/mongodb-sink/v4.0.0/mongodb-sink.md b/connectors/mongodb-sink/v4.0.0/mongodb-sink.md new file mode 100644 index 00000000..ed4223be --- /dev/null +++ b/connectors/mongodb-sink/v4.0.0/mongodb-sink.md @@ -0,0 +1,67 @@ +--- +description: The MongoDB sink connector pulls messages from Pulsar topics and persists the messages to collections. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/mongo" +license: Apache License 2.0 +tags: ["Pulsar IO", "MongoDB", "Sink"] +alias: MongoDB Sink +features: ["Use MongoDB sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/mongodb.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-mongo-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "mongodb-sink" +--- + +The MongoDB sink connector pulls messages from Pulsar topics and persists the messages to collections. + +# Configuration + +The configuration of the MongoDB sink connector has the following properties. + +## Property + +| Name | Type | Required | Sensitive | Default | Description | +|---------------|--------|----------|-----------|--------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `mongoUri` | String | true | true | " " (empty string) | The MongoDB URI to which the connector connects.

    For more information, see [connection string URI format](https://docs.mongodb.com/manual/reference/connection-string/). | +| `database` | String | true | false | " " (empty string) | The database name to which the collection belongs. | +| `collection` | String | true | false | " " (empty string) | The collection name to which the connector writes messages. | +| `batchSize` | int | false | false | 100 | The batch size of writing messages to collections. | +| `batchTimeMs` | long | false | false | 1000 | The batch operation interval in milliseconds. | + + +## Example + +Before using the Mongo sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "mongoUri": "mongodb://localhost:27017", + "database": "pulsar", + "collection": "messages", + "batchSize": "2", + "batchTimeMs": "500" + } + ``` + +* YAML + + ```yaml + { + mongoUri: "mongodb://localhost:27017" + database: "pulsar" + collection: "messages" + batchSize: 2 + batchTimeMs: 500 + } + ``` diff --git a/connectors/redis-sink/v3.0.6/redis-sink.md b/connectors/redis-sink/v3.0.7/redis-sink.md similarity index 94% rename from connectors/redis-sink/v3.0.6/redis-sink.md rename to connectors/redis-sink/v3.0.7/redis-sink.md index 3d6a3510..e34e7d09 100644 --- a/connectors/redis-sink/v3.0.6/redis-sink.md +++ b/connectors/redis-sink/v3.0.7/redis-sink.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/redis" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/redis" license: Apache License 2.0 tags: ["Pulsar IO", "Redis", "Sink"] alias: Redis Sink features: ["Use Redis sink connector to sync data from Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/redis.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-redis-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-redis-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/redis-sink/v4.0.0/redis-sink.md b/connectors/redis-sink/v4.0.0/redis-sink.md new file mode 100644 index 00000000..494eddae --- /dev/null +++ b/connectors/redis-sink/v4.0.0/redis-sink.md @@ -0,0 +1,80 @@ +--- +description: The Redis sink connector pulls messages from Pulsar topics and persists the messages to a Redis database. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/redis" +license: Apache License 2.0 +tags: ["Pulsar IO", "Redis", "Sink"] +alias: Redis Sink +features: ["Use Redis sink connector to sync data from Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/redis.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-redis-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "redis-sink" +--- + +The Redis sink connector pulls messages from Pulsar topics and persists the messages to a Redis database. + +# Configuration + +The configuration of the Redis sink connector has the following properties. + +## Property + +| Name | Type | Required | Sensitive | Default | Description | +|--------------------|---------|----------|-----------|--------------------|---------------------------------------------------------------------------------------------------------------------------------| +| `redisHosts` | String | true | false | " " (empty string) | A comma-separated list of Redis hosts to connect to. | +| `redisPassword` | String | false | true | " " (empty string) | The password used to connect to Redis. | +| `redisDatabase` | int | true | false | 0 | The Redis database to connect to. | +| `clientMode` | String | false | false | Standalone | The client mode when interacting with Redis cluster.

    Below are the available options:
  • Standalone
  • Cluster | +| `autoReconnect` | boolean | false | false | true | Whether the Redis client automatically reconnect or not. | +| `requestQueue` | int | false | false | 2147483647 | The maximum number of queued requests to Redis. | +| `tcpNoDelay` | boolean | false | false | false | Whether to enable TCP with no delay or not. | +| `keepAlive` | boolean | false | false | false | Whether to enable a keepalive to Redis or not. | +| `connectTimeout` | long | false | false | 10000 | The time to wait before timing out when connecting in milliseconds. | +| `operationTimeout` | long | false | false | 10000 | The time before an operation is marked as timed out in milliseconds . | +| `batchTimeMs` | int | false | false | 1000 | The Redis operation time in milliseconds. | +| `batchSize` | int | false | false | 200 | The batch size of writing to Redis database. | + + +## Example + +Before using the Redis sink connector, you need to create a configuration file through one of the following methods. + +* JSON + + ```json + { + "redisHosts": "localhost:6379", + "redisPassword": "fake@123", + "redisDatabase": "1", + "clientMode": "Standalone", + "operationTimeout": "2000", + "batchSize": "100", + "batchTimeMs": "1000", + "connectTimeout": "3000" + } + ``` + +* YAML + + ```yaml + { + redisHosts: "localhost:6379" + redisPassword: "fake@123" + redisDatabase: 1 + clientMode: "Standalone" + operationTimeout: 2000 + batchSize: 100 + batchTimeMs: 1000 + connectTimeout: 3000 + } + ``` diff --git a/connectors/twitter-firehose-source/v3.0.6/twitter-firehose-source.md b/connectors/twitter-firehose-source/v3.0.7/twitter-firehose-source.md similarity index 92% rename from connectors/twitter-firehose-source/v3.0.6/twitter-firehose-source.md rename to connectors/twitter-firehose-source/v3.0.7/twitter-firehose-source.md index 5137cf8c..d1b03413 100644 --- a/connectors/twitter-firehose-source/v3.0.6/twitter-firehose-source.md +++ b/connectors/twitter-firehose-source/v3.0.7/twitter-firehose-source.md @@ -4,14 +4,14 @@ author: ["ASF"] contributors: ["ASF"] language: Java document: "" -source: "https://github.com/apache/pulsar/tree/v3.0.6/pulsar-io/twitter" +source: "https://github.com/apache/pulsar/tree/v3.0.7/pulsar-io/twitter" license: Apache License 2.0 tags: ["Pulsar IO", "Twitter", "Source"] alias: Twitter Source features: ["Use twitter source connector to sync data to Pulsar"] license_link: "https://www.apache.org/licenses/LICENSE-2.0" icon: "/images/connectors/twitter.png" -download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.6/connectors/pulsar-io-twitter-3.0.6.nar" +download: "https://archive.apache.org/dist/pulsar/pulsar-3.0.7/connectors/pulsar-io-twitter-3.0.7.nar" support: StreamNative support_link: https://streamnative.io support_img: "/images/streamnative.png" diff --git a/connectors/twitter-firehose-source/v4.0.0/twitter-firehose-source.md b/connectors/twitter-firehose-source/v4.0.0/twitter-firehose-source.md new file mode 100644 index 00000000..bc80ad32 --- /dev/null +++ b/connectors/twitter-firehose-source/v4.0.0/twitter-firehose-source.md @@ -0,0 +1,43 @@ +--- +description: The Twitter Firehose source connector receives tweets from Twitter Firehose and writes the tweets to Pulsar topics. +author: ["ASF"] +contributors: ["ASF"] +language: Java +document: "" +source: "https://github.com/apache/pulsar/tree/v4.0.0/pulsar-io/twitter" +license: Apache License 2.0 +tags: ["Pulsar IO", "Twitter", "Source"] +alias: Twitter Source +features: ["Use twitter source connector to sync data to Pulsar"] +license_link: "https://www.apache.org/licenses/LICENSE-2.0" +icon: "/images/connectors/twitter.png" +download: "https://archive.apache.org/dist/pulsar/pulsar-4.0.0/connectors/pulsar-io-twitter-4.0.0.nar" +support: StreamNative +support_link: https://streamnative.io +support_img: "/images/streamnative.png" +owner_name: "" +owner_img: "" +dockerfile: +id: "twitter-firehose-source" +--- + +The Twitter Firehose source connector receives tweets from Twitter Firehose and writes the tweets to Pulsar topics. + +# Configuration + +The configuration of the Twitter Firehose source connector has the following properties. + +## Property + +| Name | Type | Required | Sensitive | Default | Description | +|-----------------------|---------|----------|-----------|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `consumerKey` | String | true | true | " " (empty string) | The twitter OAuth consumer key.

    For more information, see [Access tokens](https://developer.twitter.com/en/docs/basics/authentication/guides/access-tokens). | +| `consumerSecret` | String | true | true | " " (empty string) | The twitter OAuth consumer secret. | +| `token` | String | true | true | " " (empty string) | The twitter OAuth token. | +| `tokenSecret` | String | true | true | " " (empty string) | The twitter OAuth secret. | +| `guestimateTweetTime` | Boolean | false | false | false | Most firehose events have null createdAt time.

    If `guestimateTweetTime` set to true, the connector estimates the createdTime of each firehose event to be current time. | +| `clientName` | String | false | false | openconnector-twitter-source | The twitter firehose client name. | +| `clientHosts` | String | false | false | Constants.STREAM_HOST | The twitter firehose hosts to which client connects. | +| `clientBufferSize` | int | false | false | 50000 | The buffer size for buffering tweets fetched from twitter firehose. | + +> For more information about OAuth credentials, see [Twitter developers portal](https://developer.twitter.com/en.html).