From 68119a1ae4e423bdcdb60d3f0aceae07bbfadf01 Mon Sep 17 00:00:00 2001 From: RongDu7 <151746090+RongDu7@users.noreply.github.com> Date: Sat, 28 Sep 2024 14:07:16 +1000 Subject: [PATCH] update kafka documentation (#113) Co-authored-by: rongdu --- .../kafka_tutorial.md | 211 ++++++++++++++++++ yarn.lock | 59 ++--- 2 files changed, 234 insertions(+), 36 deletions(-) create mode 100644 docs/project-4/Crowd-Monitoring-Detection/kafka_tutorial.md diff --git a/docs/project-4/Crowd-Monitoring-Detection/kafka_tutorial.md b/docs/project-4/Crowd-Monitoring-Detection/kafka_tutorial.md new file mode 100644 index 00000000..ac7ff57d --- /dev/null +++ b/docs/project-4/Crowd-Monitoring-Detection/kafka_tutorial.md @@ -0,0 +1,211 @@ +--- +sidebar_position: 2 +--- + +# Crowd Monitoring & Player Tracking Project Plan: Apache Kafka + +## Introduction +As a member of the Crowd Monitoring & Player Tracking team, my primary task is to develop a system for handling data logistics using a document-based database. The focus of my work is on ensuring that the data generated by our monitoring and tracking systems is efficiently and reliably processed, stored, and made available for analysis and visualization. + +## Specific Focus on Kafka Data Streaming Pipeline +I have chosen to focus on the Kafka data streaming pipeline as a crucial component of our data logistics system. Kafka is well-suited for our needs due to its ability to handle high-throughput, real-time data streams with low latency, which is essential for monitoring and tracking applications where timely data processing is critical. + +## Why Kafka? +Kafka was chosen for several reasons: + +- **Scalability**: Kafka's distributed architecture allows it to scale horizontally, which is vital as the volume of data from player tracking and crowd monitoring can be substantial. +- **Reliability**: Kafka's strong durability guarantees ensure that no data is lost during transmission, which is important for maintaining the integrity of our tracking data. +- **Real-time Processing**: Kafka's capability to process data in real-time is a perfect fit for our system's requirement to monitor crowd movement and player tracking as events unfold. + +## Key Components of Kafka + +- **Producers**: Entities that publish data to Kafka topics. They push records (data) into Kafka without concern for how the data is processed downstream. +- **Consumers**: Entities that read records from Kafka topics. They can be independent processes or applications that subscribe to specific topics to process data. +- **Topics**: Categories or feed names to which records are published. Kafka topics are partitioned to allow for parallelism and scalability. +- **Brokers**: Kafka brokers are servers that store and serve data. A Kafka cluster consists of multiple brokers, ensuring fault tolerance and distributed storage. +- **Zookeeper**: Used by Kafka to manage and coordinate the brokers. It handles leader election for partitions and maintains a list of all brokers in the cluster. + +## Installing Apache Kafka + +### On macOS + +To get started with Kafka on a macOS system, you'll need to install both Kafka and its dependency, Zookeeper. Here's a step-by-step guide: + +#### Prerequisites +- **Homebrew**: Ensure that Homebrew is installed on your Mac. Homebrew is a popular package manager for macOS that simplifies the installation of software. + To install Homebrew, open Terminal and enter: + ```bash + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" + ``` +- **Java**: Kafka requires Java to run. + Install it using Homebrew: + ```bash + brew install openjdk@11 + ``` + +#### Step-by-Step Installation + +1. **Install Kafka and Zookeeper** + Install Kafka and Zookeeper using Homebrew: + ```bash + brew install kafka + ``` + +2. **Start Zookeeper** + Kafka uses Zookeeper to manage its brokers. Start Zookeeper with the following command: + ```bash + zookeeper-server-start /usr/local/etc/kafka/zookeeper.properties + ``` + +3. **Start Kafka Server** + Once Zookeeper is running, start the Kafka broker: + ```bash + kafka-server-start /usr/local/etc/kafka/server.properties + ``` + +4. **Create a Topic** + To create a Kafka topic, use the following command: + ```bash + kafka-topics --create --topic test-topic --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1 + ``` + +5. **Send and Receive Messages** + Start sending messages to the Kafka topic using a producer: + ```bash + kafka-console-producer --topic test-topic --bootstrap-server localhost:9092 + ``` + To consume messages from the topic, use: + ```bash + kafka-console-consumer --topic test-topic --from-beginning --bootstrap-server localhost:9092 + ``` + +### On Windows + +To install Kafka on a Windows system, follow these steps: + +#### Prerequisites + +- **Java**: Ensure that Java is installed on your machine. You can download and install it from the [Oracle JDK website](https://www.oracle.com/java/technologies/javase-downloads.html). +- **Download Kafka**: Go to the [Apache Kafka download page](https://kafka.apache.org/downloads) and download the latest binary for your operating system. + +#### Step-by-Step Installation + +1. **Extract Kafka** + Extract the downloaded Kafka archive to your desired directory (e.g., `C:\kafka`). + +2. **Configure Environment Variables** + Add the Kafka `bin` directory (e.g., `C:\kafka\bin\windows`) to your system's `PATH` environment variable. + +3. **Start Zookeeper** + Kafka uses Zookeeper to manage its brokers. Start Zookeeper with the following command in a new Command Prompt: + ```bash + zookeeper-server-start.bat C:\kafka\config\zookeeper.properties + ``` + +4. **Start Kafka Server** + Once Zookeeper is running, start the Kafka broker in another Command Prompt: + ```bash + kafka-server-start.bat C:\kafka\config\server.properties + ``` + +5. **Create a Topic** + To create a Kafka topic, use the following command: + ```bash + kafka-topics.bat --create --topic test-topic --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1 + ``` + +6. **Send and Receive Messages** + Start sending messages to the Kafka topic using a producer: + ```bash + kafka-console-producer.bat --topic test-topic --bootstrap-server localhost:9092 + ``` + To consume messages from the topic, use: + ```bash + kafka-console-consumer.bat --topic test-topic --from-beginning --bootstrap-server localhost:9092 + ``` + +### Using Docker + +To run Kafka using Docker, follow these steps: + +#### Prerequisites + +- **Docker**: Ensure Docker is installed on your system. You can download Docker from the [Docker website](https://www.docker.com/products/docker-desktop). + +#### Step-by-Step Installation + +1. **Create a Docker Network** + Create a new Docker network for Kafka and Zookeeper: + ```bash + docker network create kafka-network + ``` + +2. **Start Zookeeper Container** + Run a Zookeeper container: + ```bash + docker run -d --name zookeeper --network kafka-network -e ZOOKEEPER_CLIENT_PORT=2181 confluentinc/cp-zookeeper:latest + ``` + +3. **Start Kafka Container** + Run a Kafka container: + ```bash + docker run -d --name kafka --network kafka-network -e KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 -e KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092 -e KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 confluentinc/cp-kafka:latest + ``` + +4. **Create a Topic** + To create a Kafka topic, use the following command: + ```bash + docker exec -it kafka kafka-topics --create --topic test-topic --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1 + ``` + +5. **Send and Receive Messages** + Send messages to the Kafka topic using a producer: + ```bash + docker exec -it kafka kafka-console-producer --topic test-topic --bootstrap-server localhost:9092 + ``` + To consume messages from the topic, use: + ```bash + docker exec -it kafka kafka-console-consumer --topic test-topic --from-beginning --bootstrap-server localhost:9092 + ``` + +### On Linux + +To install Kafka on a Linux system, follow these steps: + +#### Prerequisites + +- **Java**: Kafka requires Java to run. You can install it using your package manager. For example, on Ubuntu or Debian: + ```bash + sudo apt update + sudo apt install openjdk-11-jdk + ``` +- **Download Kafka**: Go to the [Apache Kafka download page](https://kafka.apache.org/downloads) and download the latest binary for your operating system. + +#### Step-by-Step Installation + +1. **Extract Kafka** + Extract the downloaded Kafka archive to your desired directory (e.g., `/opt/kafka`). + +2. **Start Zookeeper** + Kafka uses Zookeeper to manage its brokers. Start Zookeeper with the following command: + ```bash + /opt/kafka/bin/zookeeper-server-start.sh /opt/kafka/config/zookeeper.properties + ``` + +3. **Start Kafka Server** + Once Zookeeper is running, start the Kafka broker: + ```bash + /opt/kafka/bin/kafka-server-start.sh /opt/kafka/config/server.properties + ``` + +4. **Create a Topic** + To create a Kafka topic, use the following command: + ```bash + /opt/kafka/bin/kafka-topics.sh --create --topic test-topic --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1 + ``` + +5. **Send and Receive Messages** + Start sending messages to the Kafka topic using a producer: + ```bash + /opt/kafka/bin/kafka-console-producer.sh --topic + ``` diff --git a/yarn.lock b/yarn.lock index 2363a994..61fb9ff5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2622,11 +2622,11 @@ ajv@^6.12.2, ajv@^6.12.3, ajv@^6.12.5, ajv@^6.9.1: integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g== dependencies: fast-deep-equal "^3.1.1" - fast-json-stable-stringify "^2.0.0" - json-schema-traverse "^0.4.1" + json-schema-traverse "^1.0.0" + require-from-string "^2.0.2" uri-js "^4.2.2" -ajv@^8.0.0, ajv@^8.8.2, ajv@^8.9.0: +ajv@^8.8.2, ajv@^8.9.0: version "8.12.0" resolved "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz" integrity sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA== @@ -3956,18 +3956,18 @@ console-stream@^0.1.1: resolved "https://registry.npmjs.org/console-stream/-/console-stream-0.1.1.tgz" integrity sha512-QC/8l9e6ofi6nqZ5PawlDgzmMw3OxIXtvolBzap/F4UDBJlDaZRSNbL/lb41C29FcbSJncBFlJFj2WJoNyZRfQ== -content-disposition@^0.5.2, content-disposition@0.5.2: - version "0.5.2" - resolved "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz" - integrity sha512-kRGRZw3bLlFISDBgwTSA1TMBFN6J6GWDeubmDE3AF+3+yXL8hTWv8r5rkLbqYXY4RjPk/EzHnClI3zQf1cFmHA== - -content-disposition@0.5.4: +content-disposition@^0.5.2, content-disposition@0.5.4: version "0.5.4" resolved "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz" integrity sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ== dependencies: safe-buffer "5.2.1" +content-disposition@0.5.2: + version "0.5.2" + resolved "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.2.tgz" + integrity sha512-kRGRZw3bLlFISDBgwTSA1TMBFN6J6GWDeubmDE3AF+3+yXL8hTWv8r5rkLbqYXY4RjPk/EzHnClI3zQf1cFmHA== + content-type@~1.0.4, content-type@~1.0.5: version "1.0.5" resolved "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz" @@ -4098,7 +4098,16 @@ cross-spawn@^6.0.0: shebang-command "^1.2.0" which "^1.2.9" -cross-spawn@^7.0.3, cross-spawn@7.0.3: +cross-spawn@^7.0.3: + version "7.0.3" + resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" + integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + dependencies: + path-key "^3.1.0" + shebang-command "^2.0.0" + which "^2.0.1" + +cross-spawn@7.0.3: version "7.0.3" resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== @@ -9096,17 +9105,7 @@ micromatch@^4.0.2, micromatch@^4.0.4, micromatch@^4.0.5: braces "^3.0.2" picomatch "^2.3.1" -mime-db@^1.28.0, mime-db@~1.33.0: - version "1.33.0" - resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.33.0.tgz" - integrity sha512-BHJ/EKruNIqJf/QahvxwQZXKygOQ256myeN/Ew+THcAa5q+PjyTTMMeNQC4DZw5AwfvelsUrA6B67NKMqXDbzQ== - -"mime-db@>= 1.43.0 < 2": - version "1.52.0" - resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz" - integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== - -mime-db@1.52.0: +mime-db@^1.28.0, "mime-db@>= 1.43.0 < 2", mime-db@1.52.0: version "1.52.0" resolved "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz" integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg== @@ -11564,7 +11563,7 @@ safe-array-concat@^1.0.0, safe-array-concat@^1.1.0: has-symbols "^1.0.3" isarray "^2.0.5" -safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@>=5.1.0, safe-buffer@~5.2.0, safe-buffer@5.2.1: +safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@>=5.1.0, safe-buffer@5.2.1: version "5.2.1" resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== @@ -12284,14 +12283,7 @@ strict-uri-encode@^1.0.0: resolved "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz" integrity sha512-R3f198pcvnB+5IpnBlRkphuE9n46WyVl8I39W/ZUTZLz4nqSP/oLYUrcnJrw462Ds8he4YKMov2efsTIw1BDGQ== -string_decoder@^1.1.1: - version "1.3.0" - resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz" - integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== - dependencies: - safe-buffer "~5.2.0" - -string_decoder@~1.1.1: +string_decoder@^1.1.1, string_decoder@~1.1.1: version "1.1.1" resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz" integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg== @@ -13044,12 +13036,7 @@ unist-util-find-after@^5.0.0: "@types/unist" "^3.0.0" unist-util-is "^6.0.0" -unist-util-is@^4.0.0: - version "4.1.0" - resolved "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.1.0.tgz" - integrity sha512-ZOQSsnce92GrxSqlnEEseX0gi7GH9zTJZ0p9dtu87WRb/37mMPO2Ilx1s/t9vBHrFhbgweUwb+t7cIn5dxPhZg== - -unist-util-is@^4.0.2: +unist-util-is@^4.0.0, unist-util-is@^4.0.2: version "4.1.0" resolved "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.1.0.tgz" integrity sha512-ZOQSsnce92GrxSqlnEEseX0gi7GH9zTJZ0p9dtu87WRb/37mMPO2Ilx1s/t9vBHrFhbgweUwb+t7cIn5dxPhZg==