cd docker
docker-machine up docker-vm
docker-machine env docker-vm
Use this at relevant places.
docker-compose up -d mysql
docker exec -it my-mysql bash -c "mysql -uroot -p -e \"GRANT ALL on demo.* to 'demo'@'%' identified by 'demo';CREATE DATABASE demo;SELECT User FROM mysql.user;\""
docker exec -it my-mysql bash -c "mysql -uroot -p -e \"GRANT ALL on maxwell.* to 'maxwell'@'%' identified by 'maxwell';GRANT SELECT, REPLICATION CLIENT, REPLICATION SLAVE on *.* to 'maxwell'@'%';SELECT User FROM mysql.user;\""
docker-compose up -d kafka
docker-compose run maxwell bash -c "bin/maxwell --user=maxwell --password=maxwell --host=mysql --producer=kafka --kafka.bootstrap.servers=kafka:9092"
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-topics.sh --zookeeper localhost:2181 --list"
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic maxwell"
cd data-generators/ruby
bundle exec ruby create_users.rb
You will have created 1000 records each on demo.A and demo.B
Now, let us look at how we can do DB denormalization while stream processing
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-topics.sh --create --topic events --zookeeper localhost:2181 --partitions 1 --replication-factor 1"
bundle exec ruby create_events.rb
Everytime you run create_events.rb
1000 arbitrary events will be logged of type {"aid":803,"bid":205}
.
cd data-processors/stream-processors/samza
bin/grid install yarn
bin/grid start yarn
Once YARN is up, you should be able to access http://localhost:8088/cluster.
mvn clean package
mkdir -p deploy/samza
tar -xvf ./target/db-caching-0.0.1-dist.tar.gz -C deploy/samza
deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/stream-db-denormalization.properties
Wait for job to get into RUNNING status on YARN.
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic db-de-normalized-events
Let this shell remain open.
bundle exec ruby create_events.rb
deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/db-to-changelog.properties
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic stream-de-normalized-events"
docker exec -it my-kafka bash -c "/opt/kafka_2.11-0.8.2.1/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic db-de-normalized-events"
bundle exec ruby create_events.rb
Note the difference in speed.
Enjoy!