diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..35aa824 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: ruby +rvm: + - jruby +script: + - bundle install + - rake install_jars + - jgem build logstash-output-pravega.gemspec diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..dcffa4a --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,76 @@ +# Docker container for Pravega +FROM ubuntu:xenial +MAINTAINER Lida He "https://github.com/hldnova" + + +RUN apt update && \ + apt install -y --no-install-recommends \ + wget supervisor curl net-tools \ + apt-transport-https \ + software-properties-common && \ + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* && rm -rf ~/.cache && rm -rf /usr/share/doc + +# Install Java. +RUN \ + echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \ + add-apt-repository -y ppa:webupd8team/java && \ + apt update && \ + apt install -y --no-install-recommends oracle-java8-installer && \ + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* && rm -rf ~/.cache && rm -rf /usr/share/doc && \ + rm -rf /var/cache/oracle-jdk8-installer + +# Install logstash +RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | apt-key add - && \ + echo "deb https://artifacts.elastic.co/packages/5.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-5.x.list && \ + apt update && apt install -y --no-install-recommends logstash && \ + rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/* && rm -rf ~/.cache && rm -rf /usr/share/doc + +# Pravega package and version +ARG PRAVEGA_VERSION=0.3.0 + +# TODO: update whe Pravega 0.3.0 is released +ARG PRAVEGA_TAG=v0.3.0 +ENV PRAVEGA_PACKAGE=pravega-${PRAVEGA_VERSION} + +# Logstash Pravega output plugin version +ARG PLUGIN_VERSION=0.3.0 + +# Install Pravega +RUN cd /opt && \ + wget --no-check-certificate https://github.com/pravega/logstash-output-pravega/releases/download/${PRAVEGA_TAG}/${PRAVEGA_PACKAGE}.tgz && \ + tar zxvf ${PRAVEGA_PACKAGE}.tgz && \ + ln -s /opt/${PRAVEGA_PACKAGE} /opt/pravega && \ + rm -rf /opt/${PRAVEGA_PACKAGE}.tgz + +# Install logstash Pravega output plugin +RUN cd /opt && \ + wget --no-check-certificate https://github.com/pravega/logstash-output-pravega/releases/download/v${PLUGIN_VERSION}/logstash-output-pravega-${PLUGIN_VERSION}.gem && \ + /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-${PLUGIN_VERSION}.gem && \ + rm -rf logstash-output-pravega-${PLUGIN_VERSION}.gem + +ADD supervisord.conf /etc/supervisord.conf + +ADD supervisord_pravega.conf /etc/supervisor/conf.d/pravega-standalone.conf + +RUN mkdir -p /var/log/pravega +RUN mkdir -p /opt/data + +ADD logstash.yml /etc/logstash/ +ADD filters/* /etc/logstash/conf.d/ + +ADD entrypoint.sh /opt/ + +# pravega controller port +EXPOSE 9090 +# pravega rest api port +EXPOSE 9091 +# pravega segment store server port +EXPOSE 6000 +# logstash monitoring api port +EXPOSE 9600 + +ENV TERM linux + +# default command +ENTRYPOINT ["/opt/entrypoint.sh"] +CMD ["supervisord", "-c", "/etc/supervisord.conf"] diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..80d7b07 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,71 @@ +# Pravega Demo In Docker Container + +This is for running the following pipeline in a Docker container: +``` +Apache access logs -> Logstash with Pravega output plugin -> Pravega stream +``` + +Applications, e.g., Flink jobs, can then read the data from Pravega stream and process them. + +Services running inside the container. +- Pravega standalone. See more details [here](http://pravega.io/docs/latest/getting-started/) +- Logstash with [Pravega output plugin](https://github.com/pravega/logstash-output-pravega). It is configured to read data from a file that contains Apache access logs and push the logs, by default, to Pravega standalone running inside the container. + +To build, first pick a [pravega standalone version](https://oss.jfrog.org/artifactory/jfrog-dependencies/io/pravega/pravega-standalone), for example, 0.3.0-1870.f56b52d-SNAPSHOT. Then pick a [plugin release](https://github.com/pravega/logstash-output-pravega/releases), e.g., 0.3.0-SNAPSHOT +``` +$ docker build --build-arg PRAVEGA_VERSION=0.3.0-1870.f56b52d-SNAPSHOT --build-arg PLUGIN_VERSION=0.3.0-SNAPSHOT -t pravega-demo . +``` + +To run the pipeline, first create a file at /tmp/access.log +``` +$ touch /tmp/access.log +``` + +Then run script below to start container from the image. Adjust parameters to your need. +``` +#!/bin/sh +set -u + +PRAVEGA_SCOPE=examples +PRAVEGA_STREAM=apacheaccess +CONTAINER_NAME=pravega +IMAGE_NAME=/pravega-demo + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -v /tmp/access.log:/opt/data/access.log \ + -v $PWD/logs/:/var/log/pravega/ \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} +``` +For debugging, the logs files can be found under at $PWD/logs. + +Add access logs to /tmp/access.log, e.g., by runing the command below a few times. +``` +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log +``` + +The access logs are sent to Pravega stream as json string, for example. +``` +{ + "request" => "/mapping/", + "agent" => "\"python-client\"", + "auth" => "peter", + "ident" => "-", + "verb" => "PUT", + "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", + "path" => "/opt/data/access.log", + "referrer" => "\"http://example.com/myapp\"", + "@timestamp" => 2018-03-19T06:24:01.000Z, + "response" => "500", + "bytes" => "182", + "clientip" => "10.1.1.11", + "@version" => "1", + "host" => "5e91529a729f", + "httpversion" => "1.1" +} +``` + +You can then start a Pravega reader to read from it, e.g., [Pravega Samples](https://github.com/pravega/pravega-samples) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100755 index 0000000..8ade623 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +set -ue +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-myscope} +PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} +PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT:-tcp://localhost:9090} +sed -i 's|scope =>.*|scope => "'"${PRAVEGA_SCOPE}"'"|' /etc/logstash/conf.d/90-pravega-output.conf +sed -i 's|stream_name =>.*|stream_name => "'"${PRAVEGA_STREAM}"'"|' /etc/logstash/conf.d/90-pravega-output.conf +sed -i 's|pravega_endpoint =>.*|pravega_endpoint => "'"${PRAVEGA_ENDPOINT}"'"|' /etc/logstash/conf.d/90-pravega-output.conf + +exec "$@" diff --git a/docker/filters/01-file-input.conf b/docker/filters/01-file-input.conf new file mode 100644 index 0000000..a8049e1 --- /dev/null +++ b/docker/filters/01-file-input.conf @@ -0,0 +1,17 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +input { + file { + path => "/opt/data/access.log" + start_position => beginning + } +} + diff --git a/docker/filters/10-apache-accesslog-filter.conf b/docker/filters/10-apache-accesslog-filter.conf new file mode 100644 index 0000000..d33b4b9 --- /dev/null +++ b/docker/filters/10-apache-accesslog-filter.conf @@ -0,0 +1,23 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +filter { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + } + date { + match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] + } + mutate { + remove_field => [ "timestamp" ] + } + +} + diff --git a/docker/filters/90-pravega-output.conf b/docker/filters/90-pravega-output.conf new file mode 100644 index 0000000..9da70a6 --- /dev/null +++ b/docker/filters/90-pravega-output.conf @@ -0,0 +1,18 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" + stream_name => "apacheaccess" + scope => "myscope" + } +} + diff --git a/docker/filters/95-stdout-output.conf b/docker/filters/95-stdout-output.conf new file mode 100644 index 0000000..04986e4 --- /dev/null +++ b/docker/filters/95-stdout-output.conf @@ -0,0 +1,14 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + stdout { codec => rubydebug } +} + diff --git a/docker/hooks/build b/docker/hooks/build new file mode 100644 index 0000000..cc2856a --- /dev/null +++ b/docker/hooks/build @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +set -vuex +echo "------ HOOK START - BUILD -------" +export PRAVEGA_VERSION=`grep pravega-client ../logstash-output-pravega.gemspec | awk -F"'" '{print $4}'` + +export PLUGIN_VERSION=`grep s.version ../logstash-output-pravega.gemspec | awk -F"'" '{print $2}'` + +docker build --build-arg PRAVEGA_VERSION=${PRAVEGA_VERSION} --build-arg PLUGIN_VERSION=0.3.0-SNAPSHOT -t $IMAGE_NAME . + +echo "------ HOOK END - BUILD -------" diff --git a/docker/logstash.yml b/docker/logstash.yml new file mode 100644 index 0000000..0a37bc7 --- /dev/null +++ b/docker/logstash.yml @@ -0,0 +1,5 @@ + +# bind address for the monitoring api +http.host: "0.0.0.0" + + diff --git a/docker/start.sh b/docker/start.sh new file mode 100755 index 0000000..06b1582 --- /dev/null +++ b/docker/start.sh @@ -0,0 +1,19 @@ +#!/bin/sh +set -u + +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} +CONTAINER_NAME=pravega +IMAGE_NAME=pravega-demo + +docker rm -f ${CONTAINER_NAME} + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -p 9600:9600 \ + -v ${PWD}/access.log:/opt/data/access.log \ + -v ${PWD}/logs:/var/log/pravega \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} diff --git a/docker/supervisord.conf b/docker/supervisord.conf new file mode 100644 index 0000000..a951685 --- /dev/null +++ b/docker/supervisord.conf @@ -0,0 +1,5 @@ +[supervisord] +nodaemon=true + +[include] +files = /etc/supervisor/conf.d/*.conf diff --git a/docker/supervisord_pravega.conf b/docker/supervisord_pravega.conf new file mode 100644 index 0000000..cd701aa --- /dev/null +++ b/docker/supervisord_pravega.conf @@ -0,0 +1,15 @@ +[program:pravega] +command=/opt/pravega/bin/pravega-standalone +stderr_logfile=/var/log/pravega/pravega-error.log +stdout_logfile=/var/log/pravega/pravega-out.log +#redirect_stderr=true + +[program:logstash] +# sleep a while for pravega to start +command=bash -c 'sleep 60 && exec /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d --path.settings /etc/logstash' +stderr_logfile=/var/log/pravega/logstash-error.log +stdout_logfile=/var/log/pravega/logstash-out.log +#redirect_stderr=true + + + diff --git a/logstash-output-pravega.gemspec b/logstash-output-pravega.gemspec index e635888..66433e2 100644 --- a/logstash-output-pravega.gemspec +++ b/logstash-output-pravega.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = 'logstash-output-pravega' - s.version = '0.3.0-SNAPSHOT' + s.version = '0.3.0' s.licenses = ['Apache License (2.0)'] s.summary = 'Output events to a Pravega Stream. This uses the Pravega Writer API to write event to a stream on the Pravega' s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program' @@ -17,7 +17,7 @@ Gem::Specification.new do |s| # Special flag to let us know this is actually a logstash plugin s.metadata = { "logstash_plugin" => "true", "logstash_group" => "output" } - s.requirements << "jar 'io.pravega:pravega-client', '0.3.0-64.d0c8497-SNAPSHOT'" + s.requirements << "jar 'io.pravega:pravega-client', '0.3.0'" # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" diff --git a/pom.xml b/pom.xml index 15e43d2..cc777a8 100644 --- a/pom.xml +++ b/pom.xml @@ -8,14 +8,10 @@ io.pravega pravega-client - 0.3.0-64.d0c8497-SNAPSHOT + 0.3.0 - - sonatype - https://oss.sonatype.org/content/repositories/snapshots - jfrog https://oss.jfrog.org/artifactory/jfrog-dependencies