From 502e2e61347f5941d4d32a335a765371b3918aca Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 23 Mar 2018 18:20:51 -0400 Subject: [PATCH 01/48] example to generate alert from apache access log via logstash pravega output plugin Signed-off-by: Lida He add example to process apache access log and generate high 500 response alert logstash config and first cut of readme for alert sample instruction to run high count alerter sample. instruction to run the high count alerter sample print output to stdout Add flink references update flink references Steps to use wordCountWriter instead of logstash Signed-off-by: Lida He Add link to readme file for high error count alerter Signed-off-by: Lida He Update README.md Signed-off-by: Lida He add license to conf files Signed-off-by: Lida He read access log from file instead of stdin Signed-off-by: Lida He Update instruction to read access log from file. Signed-off-by: Lida He Update instruction to read from file. Signed-off-by: Lida He Update instruction to read access log from file Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Signed-off-by: Lida He --- flink-examples/README.md | 6 + flink-examples/build.gradle | 11 + .../flink-high-error-count-alert/README.md | 133 +++++++++++ .../filters/01-file-input.conf | 17 ++ .../filters/10-apache-accesslog-filter.conf | 23 ++ .../filters/90-pravega-output.conf | 18 ++ .../filters/95-stdout-output.conf | 14 ++ .../src/main/dist/bin/create-stream.sh | 22 ++ .../examples/flink/alert/AccessLog.java | 97 +++++++++ .../examples/flink/alert/Constants.java | 25 +++ .../flink/alert/HighCountAlerter.java | 206 ++++++++++++++++++ 11 files changed, 572 insertions(+) create mode 100644 flink-examples/doc/flink-high-error-count-alert/README.md create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf create mode 100755 flink-examples/src/main/dist/bin/create-stream.sh create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java diff --git a/flink-examples/README.md b/flink-examples/README.md index ea7b10e8..fd847a18 100644 --- a/flink-examples/README.md +++ b/flink-examples/README.md @@ -31,3 +31,9 @@ This example demonstrates how to use the Pravega Flink Connectors to write data from an external network stream into a Pravega stream and read the data from the Pravega stream. See [Flink Word Count Sample](doc/flink-wordcount/README.md) for instructions. +## High Error Count Alert + +This example demonstrates how to use the Pravega Flink connectors to read and +parse Apache access logs from logstash via the [logstash pravega output plugin](https://github.com/pravega/logstash-output-pravega), +and how to generate alert when error count is high within a time frame. +See [High Error Count Alert](doc/flink-high-error-count-alert/README.md) for instructions. diff --git a/flink-examples/build.gradle b/flink-examples/build.gradle index 9f97e5cf..e55acade 100644 --- a/flink-examples/build.gradle +++ b/flink-examples/build.gradle @@ -30,7 +30,10 @@ dependencies { compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" + compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.14" + compile "com.google.code.gson:gson:2.3.+" + compile "joda-time:joda-time:2.9.+" } shadowJar { @@ -54,6 +57,13 @@ task scriptWordCountReader(type: CreateStartScripts) { classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } +task scriptFlinkAlerter(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' + applicationName = 'highCountAlerter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + distributions { main { baseName = archivesBaseName @@ -67,6 +77,7 @@ distributions { into('bin') { from project.scriptWordCountWriter from project.scriptWordCountReader + from project.scriptFlinkAlerter } } } diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md new file mode 100644 index 00000000..cc63c867 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -0,0 +1,133 @@ +# High Count Alert # + +The application reads apache access logs from a Pravega stream and once every 2 seconds +counts the number of 500 responses in the last 30 seconds, and generates +alert when the counts of 500 responses exceed 6. + +## Prerequistes ## + +A Docker image containing Pravega and Logstash had been prepared to simplify the demo. Skip ahead to the **Run in Docker Container** section in this document if you have docker environment handy. + +Otherwise proceed to set up Logstash and Pravega + +1. Logstash installed, see [Install logstash](https://www.elastic.co/guide/en/logstash/5.6/installing-logstash.html). +2. Pravega running, see [here](http://pravega.io/docs/latest/getting-started/) for instructions. + +## Start Logstash with Pravega Output Plugin ## + +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.2.0.gem`. + +Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` +``` +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.2.0.gem +``` + +Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. +update **pravega_endpoint** in ~/pravega/90-pravega-output.conf + +``` +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller + stream_name => "apacheaccess" + scope => "myscope" + } +} +``` + +Start logstash, assuming it is installed at /usr/share/logstash/bin. +Note that sometimes it may take a minute or two for logstash to start. For troubleshooting, the logstash log files are +normally at /var/log/logstash. To restart, type Ctrl-C, then re-run the command. + +``` +$ sudo /usr/share/logstash/bin -f ~/pravega +Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties +``` + +Normally Logstash is configured to receive data from remote log shippers, such as filebeat. For simplicity in this demo +Logstash is configured read data from /tmp/access.log. + +## Run in Docker Container ## + +Create a file at /tmp/access.log +``` +$ touch /tmp/access.log +``` + +Run script below to start container from prebuilt image. Adjust parameters to your need. +``` +#!/bin/sh +set -u + +PRAVEGA_SCOPE=myscope +PRAVEGA_STREAM=apacheaccess +CONTAINER_NAME=pravega +IMAGE_NAME=emccorp/pravega-demo + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -v /tmp/access.log:/opt/data/access.log \ + -v /tmp/logs/:/var/log/pravega/ \ + -e PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT} \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} +``` + +More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) + +## Run HighCountAlerter ## + +Run the alerter. Adjust the controller and scope/stream if necessary. +``` +$ cd flink-examples/build/install/pravega-flink-examples +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apacheaccess] +``` + +## Input Data ## + +Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. +``` +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +``` + +Logstash will push the data to Pravega in json string, e.g., +``` +{ + "request" => "/mapping/", + "agent" => "\"python-client\"", + "auth" => "peter", + "ident" => "-", + "verb" => "PUT", + "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", + "referrer" => "\"http://example.com/myapp\"", + "@timestamp" => 2018-03-19T06:24:01.000Z, + "response" => "500", + "bytes" => "182", + "clientip" => "10.1.1.11", + "@version" => "1", + "host" => "lglca061.lss.emc.com", + "httpversion" => "1.1" +} +``` + +## View Alert ## +In the HighCountAlerter window, you should see output like the following. Once the 500 response counts reach 6 or above, it +should print **High 500 responses** alerts. +``` +3> Response count: 500 : 1 +3> Response count: 500 : 2 +3> Response count: 500 : 4 +3> Response count: 500 : 6 +2> High 500 responses: 500 : 6 +3> Response count: 500 : 8 +3> High 500 responses: 500 : 8 +3> Response count: 500 : 8 +2> High 500 responses: 500 : 8 +3> Response count: 500 : 7 +3> High 500 responses: 500 : 7 +3> Response count: 500 : 5 +3> Response count: 500 : 3 +3> Response count: 500 : 1 +``` diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf new file mode 100644 index 00000000..22ae47fe --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf @@ -0,0 +1,17 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +input { + file { + path => "/tmp/access.log" + start_position => beginning + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf new file mode 100644 index 00000000..d33b4b95 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf @@ -0,0 +1,23 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +filter { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + } + date { + match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] + } + mutate { + remove_field => [ "timestamp" ] + } + +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf new file mode 100644 index 00000000..9da70a63 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf @@ -0,0 +1,18 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" + stream_name => "apacheaccess" + scope => "myscope" + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf new file mode 100644 index 00000000..04986e41 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf @@ -0,0 +1,14 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + stdout { codec => rubydebug } +} + diff --git a/flink-examples/src/main/dist/bin/create-stream.sh b/flink-examples/src/main/dist/bin/create-stream.sh new file mode 100755 index 00000000..9993ee91 --- /dev/null +++ b/flink-examples/src/main/dist/bin/create-stream.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# sample script to create scope and stream using Pravega REST API +# +host=localhost +port=9091 +scope=myscope +stream=apacheaccess +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes +-d '{ + "scopeName": "'${scope}'" +}' + +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes/${scope}/streams \ +-d '{ + "streamName": "'${stream}'", + "scopeName": "'${scope}'", + "scalingPolicy":{ + "type": "FIXED_NUM_SEGMENTS", + "minSegments": 1 + } +}' diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java new file mode 100644 index 00000000..4772c6fc --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import io.pravega.shaded.com.google.gson.Gson; + +/** + * Object to process Apache access log + */ +public class AccessLog { + private String ClientIP; + private String Status; + private long Timestamp; + private String Verb; + + public AccessLog(){ + Status=Verb=ClientIP=""; + Timestamp=0L; + } + + public String getClientIP() { + return ClientIP; + } + + public void setClientIP(String clientIP) { + ClientIP = clientIP; + } + + public String getStatus() { + return Status; + } + + public void setStatus(String status) { + Status = status; + } + + public long getTimestamp() { + return Timestamp; + } + + public void setTimestamp(long timestamp) { + this.Timestamp = timestamp; + } + + public String getVerb() { + return Verb; + } + + public void setVerb(String verb) { + Verb = verb; + } + + /** + * The events in the DataStream to which you want to apply pattern matching must + * implement proper equals() and hashCode() methods because these are used for + * comparing and matching events. + */ + @Override + public boolean equals(Object obj) { + if(this==obj){ + return true; + } + if(!(obj instanceof AccessLog)){ + return false; + } + AccessLog accessLog =(AccessLog)obj; + return accessLog.Verb.equals(Verb) && + accessLog.Status.equals(Status) && + accessLog.Timestamp==Timestamp && + accessLog.ClientIP.equals(ClientIP); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((Status == null) ? 0 : Status.hashCode()); + result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); + result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); + return result; + } + + @Override + public String toString() { + Gson gson = new Gson(); + return gson.toJson(this); + } +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java new file mode 100644 index 00000000..a0acb073 --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +/** + * Defines a handful of constants shared by classes in this package. + * + */ +public class Constants { + protected static final String STREAM_PARAM = "stream"; + protected static final String DEFAULT_STREAM = "myscope/apacheaccess"; + protected static final String CONTROLLER_PARAM = "controller"; + protected static final String DEFAULT_CONTROLLER = "tcp://127.0.0.1:9090"; + protected static final Integer ALERT_THRESHOLD = 6; + protected static final Integer ALERT_WINDOW = 30; + protected static final Integer ALERT_INTERVAL = 2; +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java new file mode 100644 index 00000000..bf08fd9a --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import io.pravega.connectors.flink.FlinkPravegaReader; +import io.pravega.connectors.flink.util.FlinkPravegaParams; +import io.pravega.connectors.flink.util.StreamId; +import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.api.common.functions.FilterFunction; +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.cep.CEP; +import org.apache.flink.cep.PatternSelectFunction; +import org.apache.flink.cep.PatternStream; +import org.apache.flink.cep.pattern.Pattern; +import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.windowing.time.Time; +import org.apache.flink.util.Collector; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +/* + * This application has the following input parameters + * stream - Pravega stream name to write to + * controller - the Pravega controller URI, e.g., tcp://localhost:9090 + * Note that this parameter is processed in pravega flink connector + */ +public class HighCountAlerter { + + // Logger initialization + private static final Logger LOG = LoggerFactory.getLogger(HighCountAlerter.class); + + // The application reads data from specified Pravega stream and once every ALERT_INTERVAL (2 seconds) + // counts the number of 500 responses in the last ALERT_WINDOW (30 seconds), and generates + // alert when the counts exceed ALERT_THRESHOLD (6). + + public static void main(String[] args) throws Exception { + LOG.info("Starting HighErrorAlerter..."); + + // initialize the parameter utility tool in order to retrieve input parameters + ParameterTool params = ParameterTool.fromArgs(args); + + // create Pravega helper utility for Flink using the input paramaters + FlinkPravegaParams helper = new FlinkPravegaParams(params); + + // get the Pravega stream from the input parameters + StreamId streamId = helper.getStreamFromParam(Constants.STREAM_PARAM, + Constants.DEFAULT_STREAM); + + // create the Pravega stream is not exists. + helper.createStream(streamId); + + // initialize Flink execution environment + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + // create the Pravega stream reader + long startTime = 0; + FlinkPravegaReader reader = helper.newReader(streamId, startTime, String.class); + + // add the Pravega reader as the data source + DataStream inputStream = env.addSource(reader); + + // create an output sink to stdout for verification + //inputStream.print(); + + // transform logs + DataStream dataStream = inputStream.map(new ParseLogData()); + + // create an output sink to stdout for verification + //dataStream.print(); + + // get responses and their counts + DataStream countStream = + dataStream.flatMap(new FlatMapFunction() { + @Override + public void flatMap(AccessLog value, Collector out) throws Exception { + out.collect(new ResponseCount(value.getStatus(), 1)); + } + }).filter((FilterFunction) count -> { + return !count.response.isEmpty(); + }).keyBy("response") + .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) + .sum("count"); + + // create an output sink to stdout for verification + countStream.print(); + + // create alert pattern + Pattern pattern500 = Pattern.begin("500pattern") + .where(new SimpleCondition() { + @Override + public boolean filter(ResponseCount value) throws Exception { + return value.count >= Constants.ALERT_THRESHOLD && + value.response.equals("500"); + } + }); + + PatternStream patternStream = CEP.pattern(countStream, pattern500); + + DataStream alertStream = patternStream.select( + new PatternSelectFunction() { + @Override + public Alert select(Map> pattern) throws Exception { + ResponseCount count = pattern.get("500pattern").get(0); + return new Alert(count.response, count.count, "High 500 responses"); + } + }); + + // create an output sink to stdout for verification + alertStream.print(); + + + // execute within the Flink environment + env.execute("HighCountAlerter"); + + LOG.info("Ending HighCountAlerter..."); + } + + //Parse the incoming streams & convert into Java PoJos + private static class ParseLogData implements MapFunction{ + public AccessLog map(String record) throws Exception { + // TODO: handle exceptions + Gson gson = new Gson(); + AccessLog accessLog = new AccessLog(); + JsonParser parser = new JsonParser(); + JsonObject obj = parser.parse(record).getAsJsonObject(); + if (obj.has("verb")) { + String verb = obj.get("verb").getAsString(); + accessLog.setVerb(verb); + } + if (obj.has("response")) { + String response = obj.get("response").getAsString(); + accessLog.setStatus(response); + } + if (obj.has("@timestamp")) { + String timestamp = obj.get("@timestamp").getAsString(); + + DateTime dateTime = new DateTime(timestamp); + accessLog.setTimestamp(dateTime.getMillis()); + } + if (obj.has("clientip")) { + String client = obj.get("clientip").getAsString(); + accessLog.setClientIP(client); + } + return accessLog; + } + } + + // Data type access status count + public static class ResponseCount { + + public String response; + public long count; + + public ResponseCount() {} + + public ResponseCount(String status, long count) { + this.response = status; + this.count = count; + } + + @Override + public String toString() { + return "Response count: " + response + " : " + count; + } + } + + // Data type access status count + public static class Alert { + + private String response; + private long count; + private String description; + + public Alert() {} + + public Alert(String response, long count, String description) { + this.response = response; + this.count = count; + this.description = description; + } + + @Override + public String toString() { + return description + ": " + response + " : " + count; + } + } + +} From 75c48155085f13a3c059babf22f8b778f1cc81e3 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:11:07 -0400 Subject: [PATCH 02/48] correct typo Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index cc63c867..6d9cb105 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -89,7 +89,7 @@ $ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apa Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. ``` -echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log ``` Logstash will push the data to Pravega in json string, e.g., From 27e07926f4175fb3107b7f04a13479758cff3b0c Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:13:17 -0400 Subject: [PATCH 03/48] apply filter earlier to let in just 500 responses Signed-off-by: Lida He --- .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index bf08fd9a..59211d13 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -94,7 +94,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.isEmpty(); + return !count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -107,8 +107,7 @@ public void flatMap(AccessLog value, Collector out) throws Except .where(new SimpleCondition() { @Override public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD && - value.response.equals("500"); + return value.count >= Constants.ALERT_THRESHOLD; } }); From 7a01f4b28d30587f35f20859698c1c7d9a82baeb Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 21:43:44 -0400 Subject: [PATCH 04/48] use jackson to covert object to/from json Signed-off-by: Lida He --- flink-examples/build.gradle | 1 - .../examples/flink/alert/AccessLog.java | 39 +++++++++++++------ .../flink/alert/HighCountAlerter.java | 35 +++-------------- 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/flink-examples/build.gradle b/flink-examples/build.gradle index e55acade..784d2c60 100644 --- a/flink-examples/build.gradle +++ b/flink-examples/build.gradle @@ -32,7 +32,6 @@ dependencies { compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.14" - compile "com.google.code.gson:gson:2.3.+" compile "joda-time:joda-time:2.9.+" } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index 4772c6fc..a57c6c3e 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,22 +10,30 @@ */ package io.pravega.examples.flink.alert; -import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.joda.time.DateTime; /** * Object to process Apache access log */ +@JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { private String ClientIP; + private String Status; - private long Timestamp; + private String Verb; + private String TimestampStr; + public AccessLog(){ Status=Verb=ClientIP=""; - Timestamp=0L; } + @JsonProperty("clientip") public String getClientIP() { return ClientIP; } @@ -34,6 +42,7 @@ public void setClientIP(String clientIP) { ClientIP = clientIP; } + @JsonProperty("response") public String getStatus() { return Status; } @@ -42,14 +51,16 @@ public void setStatus(String status) { Status = status; } - public long getTimestamp() { - return Timestamp; - } + @JsonProperty("@timestamp") + public String getTimestampStr() { return TimestampStr; } + + public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } - public void setTimestamp(long timestamp) { - this.Timestamp = timestamp; + public long getTimestampMillis() { + return new DateTime(getTimestampStr()).getMillis(); } + @JsonProperty("verb") public String getVerb() { return Verb; } @@ -74,7 +85,7 @@ public boolean equals(Object obj) { AccessLog accessLog =(AccessLog)obj; return accessLog.Verb.equals(Verb) && accessLog.Status.equals(Status) && - accessLog.Timestamp==Timestamp && + accessLog.TimestampStr.equals(TimestampStr) && accessLog.ClientIP.equals(ClientIP); } @@ -83,15 +94,19 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); + } } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 59211d13..b03d7963 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,12 +10,9 @@ */ package io.pravega.examples.flink.alert; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.util.FlinkPravegaParams; import io.pravega.connectors.flink.util.StreamId; -import io.pravega.shaded.com.google.gson.Gson; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -25,11 +22,11 @@ import org.apache.flink.cep.PatternStream; import org.apache.flink.cep.pattern.Pattern; import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.util.Collector; -import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,7 +91,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.equals("500"); + return count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -134,31 +131,9 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ - public AccessLog map(String record) throws Exception { - // TODO: handle exceptions - Gson gson = new Gson(); - AccessLog accessLog = new AccessLog(); - JsonParser parser = new JsonParser(); - JsonObject obj = parser.parse(record).getAsJsonObject(); - if (obj.has("verb")) { - String verb = obj.get("verb").getAsString(); - accessLog.setVerb(verb); - } - if (obj.has("response")) { - String response = obj.get("response").getAsString(); - accessLog.setStatus(response); - } - if (obj.has("@timestamp")) { - String timestamp = obj.get("@timestamp").getAsString(); - - DateTime dateTime = new DateTime(timestamp); - accessLog.setTimestamp(dateTime.getMillis()); - } - if (obj.has("clientip")) { - String client = obj.get("clientip").getAsString(); - accessLog.setClientIP(client); - } - return accessLog; + public AccessLog map(String value) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(value, AccessLog.class); } } From 44842c03f734ba5579179f90f7b694535fd265fa Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 22:35:40 -0400 Subject: [PATCH 05/48] merge with develop branch Signed-off-by: Lida He --- flink-examples/build.gradle | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/flink-examples/build.gradle b/flink-examples/build.gradle index ad03f315..549534d0 100644 --- a/flink-examples/build.gradle +++ b/flink-examples/build.gradle @@ -31,6 +31,8 @@ dependencies { compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" + compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" + compile "joda-time:joda-time:2.9.+" } shadowJar { @@ -54,6 +56,13 @@ task scriptWordCountReader(type: CreateStartScripts) { classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } +task scriptFlinkAlerter(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' + applicationName = 'highCountAlerter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + distributions { main { baseName = archivesBaseName @@ -67,6 +76,7 @@ distributions { into('bin') { from project.scriptWordCountWriter from project.scriptWordCountReader + from project.scriptFlinkAlerter } } } From e59f71d33c676c109621371b3979b0a56d59236c Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 23 Mar 2018 18:20:51 -0400 Subject: [PATCH 06/48] example to generate alert from apache access log via logstash pravega output plugin Signed-off-by: Lida He add example to process apache access log and generate high 500 response alert logstash config and first cut of readme for alert sample instruction to run high count alerter sample. instruction to run the high count alerter sample print output to stdout Add flink references update flink references Steps to use wordCountWriter instead of logstash Signed-off-by: Lida He Add link to readme file for high error count alerter Signed-off-by: Lida He Update README.md Signed-off-by: Lida He add license to conf files Signed-off-by: Lida He read access log from file instead of stdin Signed-off-by: Lida He Update instruction to read access log from file. Signed-off-by: Lida He Update instruction to read from file. Signed-off-by: Lida He Update instruction to read access log from file Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Signed-off-by: Lida He --- flink-examples/README.md | 6 + .../flink-high-error-count-alert/README.md | 133 +++++++++++ .../filters/01-file-input.conf | 17 ++ .../filters/10-apache-accesslog-filter.conf | 23 ++ .../filters/90-pravega-output.conf | 18 ++ .../filters/95-stdout-output.conf | 14 ++ .../src/main/dist/bin/create-stream.sh | 22 ++ .../examples/flink/alert/AccessLog.java | 97 +++++++++ .../examples/flink/alert/Constants.java | 25 +++ .../flink/alert/HighCountAlerter.java | 206 ++++++++++++++++++ 10 files changed, 561 insertions(+) create mode 100644 flink-examples/doc/flink-high-error-count-alert/README.md create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf create mode 100755 flink-examples/src/main/dist/bin/create-stream.sh create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java diff --git a/flink-examples/README.md b/flink-examples/README.md index ea7b10e8..fd847a18 100644 --- a/flink-examples/README.md +++ b/flink-examples/README.md @@ -31,3 +31,9 @@ This example demonstrates how to use the Pravega Flink Connectors to write data from an external network stream into a Pravega stream and read the data from the Pravega stream. See [Flink Word Count Sample](doc/flink-wordcount/README.md) for instructions. +## High Error Count Alert + +This example demonstrates how to use the Pravega Flink connectors to read and +parse Apache access logs from logstash via the [logstash pravega output plugin](https://github.com/pravega/logstash-output-pravega), +and how to generate alert when error count is high within a time frame. +See [High Error Count Alert](doc/flink-high-error-count-alert/README.md) for instructions. diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md new file mode 100644 index 00000000..cc63c867 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -0,0 +1,133 @@ +# High Count Alert # + +The application reads apache access logs from a Pravega stream and once every 2 seconds +counts the number of 500 responses in the last 30 seconds, and generates +alert when the counts of 500 responses exceed 6. + +## Prerequistes ## + +A Docker image containing Pravega and Logstash had been prepared to simplify the demo. Skip ahead to the **Run in Docker Container** section in this document if you have docker environment handy. + +Otherwise proceed to set up Logstash and Pravega + +1. Logstash installed, see [Install logstash](https://www.elastic.co/guide/en/logstash/5.6/installing-logstash.html). +2. Pravega running, see [here](http://pravega.io/docs/latest/getting-started/) for instructions. + +## Start Logstash with Pravega Output Plugin ## + +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.2.0.gem`. + +Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` +``` +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.2.0.gem +``` + +Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. +update **pravega_endpoint** in ~/pravega/90-pravega-output.conf + +``` +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller + stream_name => "apacheaccess" + scope => "myscope" + } +} +``` + +Start logstash, assuming it is installed at /usr/share/logstash/bin. +Note that sometimes it may take a minute or two for logstash to start. For troubleshooting, the logstash log files are +normally at /var/log/logstash. To restart, type Ctrl-C, then re-run the command. + +``` +$ sudo /usr/share/logstash/bin -f ~/pravega +Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties +``` + +Normally Logstash is configured to receive data from remote log shippers, such as filebeat. For simplicity in this demo +Logstash is configured read data from /tmp/access.log. + +## Run in Docker Container ## + +Create a file at /tmp/access.log +``` +$ touch /tmp/access.log +``` + +Run script below to start container from prebuilt image. Adjust parameters to your need. +``` +#!/bin/sh +set -u + +PRAVEGA_SCOPE=myscope +PRAVEGA_STREAM=apacheaccess +CONTAINER_NAME=pravega +IMAGE_NAME=emccorp/pravega-demo + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -v /tmp/access.log:/opt/data/access.log \ + -v /tmp/logs/:/var/log/pravega/ \ + -e PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT} \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} +``` + +More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) + +## Run HighCountAlerter ## + +Run the alerter. Adjust the controller and scope/stream if necessary. +``` +$ cd flink-examples/build/install/pravega-flink-examples +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apacheaccess] +``` + +## Input Data ## + +Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. +``` +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +``` + +Logstash will push the data to Pravega in json string, e.g., +``` +{ + "request" => "/mapping/", + "agent" => "\"python-client\"", + "auth" => "peter", + "ident" => "-", + "verb" => "PUT", + "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", + "referrer" => "\"http://example.com/myapp\"", + "@timestamp" => 2018-03-19T06:24:01.000Z, + "response" => "500", + "bytes" => "182", + "clientip" => "10.1.1.11", + "@version" => "1", + "host" => "lglca061.lss.emc.com", + "httpversion" => "1.1" +} +``` + +## View Alert ## +In the HighCountAlerter window, you should see output like the following. Once the 500 response counts reach 6 or above, it +should print **High 500 responses** alerts. +``` +3> Response count: 500 : 1 +3> Response count: 500 : 2 +3> Response count: 500 : 4 +3> Response count: 500 : 6 +2> High 500 responses: 500 : 6 +3> Response count: 500 : 8 +3> High 500 responses: 500 : 8 +3> Response count: 500 : 8 +2> High 500 responses: 500 : 8 +3> Response count: 500 : 7 +3> High 500 responses: 500 : 7 +3> Response count: 500 : 5 +3> Response count: 500 : 3 +3> Response count: 500 : 1 +``` diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf new file mode 100644 index 00000000..22ae47fe --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf @@ -0,0 +1,17 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +input { + file { + path => "/tmp/access.log" + start_position => beginning + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf new file mode 100644 index 00000000..d33b4b95 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf @@ -0,0 +1,23 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +filter { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + } + date { + match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] + } + mutate { + remove_field => [ "timestamp" ] + } + +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf new file mode 100644 index 00000000..9da70a63 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf @@ -0,0 +1,18 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" + stream_name => "apacheaccess" + scope => "myscope" + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf new file mode 100644 index 00000000..04986e41 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf @@ -0,0 +1,14 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + stdout { codec => rubydebug } +} + diff --git a/flink-examples/src/main/dist/bin/create-stream.sh b/flink-examples/src/main/dist/bin/create-stream.sh new file mode 100755 index 00000000..9993ee91 --- /dev/null +++ b/flink-examples/src/main/dist/bin/create-stream.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# sample script to create scope and stream using Pravega REST API +# +host=localhost +port=9091 +scope=myscope +stream=apacheaccess +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes +-d '{ + "scopeName": "'${scope}'" +}' + +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes/${scope}/streams \ +-d '{ + "streamName": "'${stream}'", + "scopeName": "'${scope}'", + "scalingPolicy":{ + "type": "FIXED_NUM_SEGMENTS", + "minSegments": 1 + } +}' diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java new file mode 100644 index 00000000..4772c6fc --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import io.pravega.shaded.com.google.gson.Gson; + +/** + * Object to process Apache access log + */ +public class AccessLog { + private String ClientIP; + private String Status; + private long Timestamp; + private String Verb; + + public AccessLog(){ + Status=Verb=ClientIP=""; + Timestamp=0L; + } + + public String getClientIP() { + return ClientIP; + } + + public void setClientIP(String clientIP) { + ClientIP = clientIP; + } + + public String getStatus() { + return Status; + } + + public void setStatus(String status) { + Status = status; + } + + public long getTimestamp() { + return Timestamp; + } + + public void setTimestamp(long timestamp) { + this.Timestamp = timestamp; + } + + public String getVerb() { + return Verb; + } + + public void setVerb(String verb) { + Verb = verb; + } + + /** + * The events in the DataStream to which you want to apply pattern matching must + * implement proper equals() and hashCode() methods because these are used for + * comparing and matching events. + */ + @Override + public boolean equals(Object obj) { + if(this==obj){ + return true; + } + if(!(obj instanceof AccessLog)){ + return false; + } + AccessLog accessLog =(AccessLog)obj; + return accessLog.Verb.equals(Verb) && + accessLog.Status.equals(Status) && + accessLog.Timestamp==Timestamp && + accessLog.ClientIP.equals(ClientIP); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((Status == null) ? 0 : Status.hashCode()); + result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); + result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); + return result; + } + + @Override + public String toString() { + Gson gson = new Gson(); + return gson.toJson(this); + } +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java new file mode 100644 index 00000000..a0acb073 --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +/** + * Defines a handful of constants shared by classes in this package. + * + */ +public class Constants { + protected static final String STREAM_PARAM = "stream"; + protected static final String DEFAULT_STREAM = "myscope/apacheaccess"; + protected static final String CONTROLLER_PARAM = "controller"; + protected static final String DEFAULT_CONTROLLER = "tcp://127.0.0.1:9090"; + protected static final Integer ALERT_THRESHOLD = 6; + protected static final Integer ALERT_WINDOW = 30; + protected static final Integer ALERT_INTERVAL = 2; +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java new file mode 100644 index 00000000..bf08fd9a --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import io.pravega.connectors.flink.FlinkPravegaReader; +import io.pravega.connectors.flink.util.FlinkPravegaParams; +import io.pravega.connectors.flink.util.StreamId; +import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.api.common.functions.FilterFunction; +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.cep.CEP; +import org.apache.flink.cep.PatternSelectFunction; +import org.apache.flink.cep.PatternStream; +import org.apache.flink.cep.pattern.Pattern; +import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.windowing.time.Time; +import org.apache.flink.util.Collector; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +/* + * This application has the following input parameters + * stream - Pravega stream name to write to + * controller - the Pravega controller URI, e.g., tcp://localhost:9090 + * Note that this parameter is processed in pravega flink connector + */ +public class HighCountAlerter { + + // Logger initialization + private static final Logger LOG = LoggerFactory.getLogger(HighCountAlerter.class); + + // The application reads data from specified Pravega stream and once every ALERT_INTERVAL (2 seconds) + // counts the number of 500 responses in the last ALERT_WINDOW (30 seconds), and generates + // alert when the counts exceed ALERT_THRESHOLD (6). + + public static void main(String[] args) throws Exception { + LOG.info("Starting HighErrorAlerter..."); + + // initialize the parameter utility tool in order to retrieve input parameters + ParameterTool params = ParameterTool.fromArgs(args); + + // create Pravega helper utility for Flink using the input paramaters + FlinkPravegaParams helper = new FlinkPravegaParams(params); + + // get the Pravega stream from the input parameters + StreamId streamId = helper.getStreamFromParam(Constants.STREAM_PARAM, + Constants.DEFAULT_STREAM); + + // create the Pravega stream is not exists. + helper.createStream(streamId); + + // initialize Flink execution environment + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + // create the Pravega stream reader + long startTime = 0; + FlinkPravegaReader reader = helper.newReader(streamId, startTime, String.class); + + // add the Pravega reader as the data source + DataStream inputStream = env.addSource(reader); + + // create an output sink to stdout for verification + //inputStream.print(); + + // transform logs + DataStream dataStream = inputStream.map(new ParseLogData()); + + // create an output sink to stdout for verification + //dataStream.print(); + + // get responses and their counts + DataStream countStream = + dataStream.flatMap(new FlatMapFunction() { + @Override + public void flatMap(AccessLog value, Collector out) throws Exception { + out.collect(new ResponseCount(value.getStatus(), 1)); + } + }).filter((FilterFunction) count -> { + return !count.response.isEmpty(); + }).keyBy("response") + .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) + .sum("count"); + + // create an output sink to stdout for verification + countStream.print(); + + // create alert pattern + Pattern pattern500 = Pattern.begin("500pattern") + .where(new SimpleCondition() { + @Override + public boolean filter(ResponseCount value) throws Exception { + return value.count >= Constants.ALERT_THRESHOLD && + value.response.equals("500"); + } + }); + + PatternStream patternStream = CEP.pattern(countStream, pattern500); + + DataStream alertStream = patternStream.select( + new PatternSelectFunction() { + @Override + public Alert select(Map> pattern) throws Exception { + ResponseCount count = pattern.get("500pattern").get(0); + return new Alert(count.response, count.count, "High 500 responses"); + } + }); + + // create an output sink to stdout for verification + alertStream.print(); + + + // execute within the Flink environment + env.execute("HighCountAlerter"); + + LOG.info("Ending HighCountAlerter..."); + } + + //Parse the incoming streams & convert into Java PoJos + private static class ParseLogData implements MapFunction{ + public AccessLog map(String record) throws Exception { + // TODO: handle exceptions + Gson gson = new Gson(); + AccessLog accessLog = new AccessLog(); + JsonParser parser = new JsonParser(); + JsonObject obj = parser.parse(record).getAsJsonObject(); + if (obj.has("verb")) { + String verb = obj.get("verb").getAsString(); + accessLog.setVerb(verb); + } + if (obj.has("response")) { + String response = obj.get("response").getAsString(); + accessLog.setStatus(response); + } + if (obj.has("@timestamp")) { + String timestamp = obj.get("@timestamp").getAsString(); + + DateTime dateTime = new DateTime(timestamp); + accessLog.setTimestamp(dateTime.getMillis()); + } + if (obj.has("clientip")) { + String client = obj.get("clientip").getAsString(); + accessLog.setClientIP(client); + } + return accessLog; + } + } + + // Data type access status count + public static class ResponseCount { + + public String response; + public long count; + + public ResponseCount() {} + + public ResponseCount(String status, long count) { + this.response = status; + this.count = count; + } + + @Override + public String toString() { + return "Response count: " + response + " : " + count; + } + } + + // Data type access status count + public static class Alert { + + private String response; + private long count; + private String description; + + public Alert() {} + + public Alert(String response, long count, String description) { + this.response = response; + this.count = count; + this.description = description; + } + + @Override + public String toString() { + return description + ": " + response + " : " + count; + } + } + +} From b3728a409b784ba4cc1d96bcf785940a821a68da Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:11:07 -0400 Subject: [PATCH 07/48] correct typo Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index cc63c867..6d9cb105 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -89,7 +89,7 @@ $ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apa Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. ``` -echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log ``` Logstash will push the data to Pravega in json string, e.g., From 90a94e4c139584ddb1b536d12a6c85386fa58d25 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:13:17 -0400 Subject: [PATCH 08/48] apply filter earlier to let in just 500 responses Signed-off-by: Lida He --- .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index bf08fd9a..59211d13 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -94,7 +94,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.isEmpty(); + return !count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -107,8 +107,7 @@ public void flatMap(AccessLog value, Collector out) throws Except .where(new SimpleCondition() { @Override public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD && - value.response.equals("500"); + return value.count >= Constants.ALERT_THRESHOLD; } }); From d2de98670d54fa526a72ec61b15cf4b2044b5094 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 22:36:50 -0400 Subject: [PATCH 09/48] merge with develop branch Signed-off-by: Lida He --- flink-examples/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/flink-examples/build.gradle b/flink-examples/build.gradle index 549534d0..4f7e3a1d 100644 --- a/flink-examples/build.gradle +++ b/flink-examples/build.gradle @@ -32,6 +32,7 @@ dependencies { compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" + compile "org.slf4j:slf4j-log4j12:1.7.14" compile "joda-time:joda-time:2.9.+" } From 95343a6455fe2bf616cb67c172adff01bcc3638e Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 21:43:44 -0400 Subject: [PATCH 10/48] use jackson to covert object to/from json Signed-off-by: Lida He --- .../examples/flink/alert/AccessLog.java | 39 +++++++++++++------ .../flink/alert/HighCountAlerter.java | 35 +++-------------- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index 4772c6fc..a57c6c3e 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,22 +10,30 @@ */ package io.pravega.examples.flink.alert; -import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.joda.time.DateTime; /** * Object to process Apache access log */ +@JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { private String ClientIP; + private String Status; - private long Timestamp; + private String Verb; + private String TimestampStr; + public AccessLog(){ Status=Verb=ClientIP=""; - Timestamp=0L; } + @JsonProperty("clientip") public String getClientIP() { return ClientIP; } @@ -34,6 +42,7 @@ public void setClientIP(String clientIP) { ClientIP = clientIP; } + @JsonProperty("response") public String getStatus() { return Status; } @@ -42,14 +51,16 @@ public void setStatus(String status) { Status = status; } - public long getTimestamp() { - return Timestamp; - } + @JsonProperty("@timestamp") + public String getTimestampStr() { return TimestampStr; } + + public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } - public void setTimestamp(long timestamp) { - this.Timestamp = timestamp; + public long getTimestampMillis() { + return new DateTime(getTimestampStr()).getMillis(); } + @JsonProperty("verb") public String getVerb() { return Verb; } @@ -74,7 +85,7 @@ public boolean equals(Object obj) { AccessLog accessLog =(AccessLog)obj; return accessLog.Verb.equals(Verb) && accessLog.Status.equals(Status) && - accessLog.Timestamp==Timestamp && + accessLog.TimestampStr.equals(TimestampStr) && accessLog.ClientIP.equals(ClientIP); } @@ -83,15 +94,19 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); + } } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 59211d13..b03d7963 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,12 +10,9 @@ */ package io.pravega.examples.flink.alert; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.util.FlinkPravegaParams; import io.pravega.connectors.flink.util.StreamId; -import io.pravega.shaded.com.google.gson.Gson; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -25,11 +22,11 @@ import org.apache.flink.cep.PatternStream; import org.apache.flink.cep.pattern.Pattern; import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.util.Collector; -import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,7 +91,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.equals("500"); + return count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -134,31 +131,9 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ - public AccessLog map(String record) throws Exception { - // TODO: handle exceptions - Gson gson = new Gson(); - AccessLog accessLog = new AccessLog(); - JsonParser parser = new JsonParser(); - JsonObject obj = parser.parse(record).getAsJsonObject(); - if (obj.has("verb")) { - String verb = obj.get("verb").getAsString(); - accessLog.setVerb(verb); - } - if (obj.has("response")) { - String response = obj.get("response").getAsString(); - accessLog.setStatus(response); - } - if (obj.has("@timestamp")) { - String timestamp = obj.get("@timestamp").getAsString(); - - DateTime dateTime = new DateTime(timestamp); - accessLog.setTimestamp(dateTime.getMillis()); - } - if (obj.has("clientip")) { - String client = obj.get("clientip").getAsString(); - accessLog.setClientIP(client); - } - return accessLog; + public AccessLog map(String value) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(value, AccessLog.class); } } From f18c900158cac3e6e616c6d7be1636eb8810a987 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 01:03:25 -0400 Subject: [PATCH 11/48] use builder api Signed-off-by: Lida He --- .../flink-high-error-count-alert/README.md | 4 +-- .../filters/90-pravega-output.conf | 2 +- .../examples/flink/alert/Constants.java | 5 ++- .../flink/alert/HighCountAlerter.java | 32 +++++++++++-------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index 6d9cb105..5b3ad4c4 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -30,7 +30,7 @@ output { pravega { pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller stream_name => "apacheaccess" - scope => "myscope" + scope => "examples" } } ``` @@ -82,7 +82,7 @@ More details can be found on github [pravega docker](https://github.com/hldnova/ Run the alerter. Adjust the controller and scope/stream if necessary. ``` $ cd flink-examples/build/install/pravega-flink-examples -$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apacheaccess] +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream examples] [--stream apacheaccess] ``` ## Input Data ## diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf index 9da70a63..86119beb 100644 --- a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf +++ b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf @@ -12,7 +12,7 @@ output { pravega { pravega_endpoint => "tcp://127.0.0.1:9090" stream_name => "apacheaccess" - scope => "myscope" + scope => "examples" } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index a0acb073..09951d1d 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -16,9 +16,8 @@ */ public class Constants { protected static final String STREAM_PARAM = "stream"; - protected static final String DEFAULT_STREAM = "myscope/apacheaccess"; - protected static final String CONTROLLER_PARAM = "controller"; - protected static final String DEFAULT_CONTROLLER = "tcp://127.0.0.1:9090"; + protected static final String DEFAULT_SCOPE = "examples"; + protected static final String DEFAULT_STREAM = "apacheaccess"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; protected static final Integer ALERT_INTERVAL = 2; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index b03d7963..3a2b3730 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,9 +10,11 @@ */ package io.pravega.examples.flink.alert; +import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.FlinkPravegaReader; -import io.pravega.connectors.flink.util.FlinkPravegaParams; -import io.pravega.connectors.flink.util.StreamId; +import io.pravega.connectors.flink.PravegaConfig; +import io.pravega.connectors.flink.serialization.PravegaSerialization; +import io.pravega.examples.flink.Utils; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -54,22 +56,24 @@ public static void main(String[] args) throws Exception { // initialize the parameter utility tool in order to retrieve input parameters ParameterTool params = ParameterTool.fromArgs(args); - // create Pravega helper utility for Flink using the input paramaters - FlinkPravegaParams helper = new FlinkPravegaParams(params); + PravegaConfig pravegaConfig = PravegaConfig + .fromParams(params) + .withDefaultScope(Constants.DEFAULT_SCOPE); - // get the Pravega stream from the input parameters - StreamId streamId = helper.getStreamFromParam(Constants.STREAM_PARAM, - Constants.DEFAULT_STREAM); + // create the Pravega input stream (if necessary) + Stream stream = Utils.createStream( + pravegaConfig, + params.get(Constants.STREAM_PARAM, Constants.DEFAULT_STREAM)); - // create the Pravega stream is not exists. - helper.createStream(streamId); - - // initialize Flink execution environment + // initialize the Flink execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - // create the Pravega stream reader - long startTime = 0; - FlinkPravegaReader reader = helper.newReader(streamId, startTime, String.class); + // create the Pravega source to read a stream of text + FlinkPravegaReader reader = FlinkPravegaReader.builder() + .withPravegaConfig(pravegaConfig) + .forStream(stream) + .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class)) + .build(); // add the Pravega reader as the data source DataStream inputStream = env.addSource(reader); From 6a5297700148f07b83cb80e1092de482483c7000 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 01:11:47 -0400 Subject: [PATCH 12/48] update REAMDME Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index 5b3ad4c4..e020d2b7 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -82,7 +82,7 @@ More details can be found on github [pravega docker](https://github.com/hldnova/ Run the alerter. Adjust the controller and scope/stream if necessary. ``` $ cd flink-examples/build/install/pravega-flink-examples -$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream examples] [--stream apacheaccess] +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--scope examples] [--stream apacheaccess] ``` ## Input Data ## From b85510cb4fc15fbf09a55df31f21ea4840783357 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 11:15:39 -0400 Subject: [PATCH 13/48] update tostring method Signed-off-by: Lida He --- .../java/io/pravega/examples/flink/alert/AccessLog.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index a57c6c3e..e2e9b706 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -15,6 +15,7 @@ import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.joda.time.DateTime; +import io.pravega.shaded.com.google.gson.Gson; /** * Object to process Apache access log @@ -102,11 +103,7 @@ public int hashCode() { @Override public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); - } + Gson gson = new Gson(); + return gson.toJson(this); } } From b9c4acfea263d769fe2017206096afc69f3faff5 Mon Sep 17 00:00:00 2001 From: Lida He Date: Tue, 22 May 2018 01:24:59 -0400 Subject: [PATCH 14/48] change class member to follow java bean naming convention Signed-off-by: Lida He --- .../examples/flink/alert/AccessLog.java | 87 +++++++++++-------- .../examples/flink/alert/Constants.java | 3 +- .../flink/alert/HighCountAlerter.java | 6 +- 3 files changed, 54 insertions(+), 42 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index e2e9b706..926d8ca1 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,64 +10,76 @@ */ package io.pravega.examples.flink.alert; +import io.pravega.shaded.com.google.type.Date; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.joda.time.DateTime; -import io.pravega.shaded.com.google.gson.Gson; + +import java.io.IOException; /** * Object to process Apache access log */ @JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { - private String ClientIP; + private static final ObjectMapper mapper = new ObjectMapper(); + + @JsonProperty("clientip") + private String clientIp; - private String Status; + @JsonProperty("response") + private String status; - private String Verb; + @JsonProperty("verb") + private String verb; - private String TimestampStr; + @JsonProperty("@timestamp") + private String timestamp; - public AccessLog(){ - Status=Verb=ClientIP=""; + public static AccessLog toAccessLog(String value) throws IOException { + return mapper.readValue(value, AccessLog.class); } - @JsonProperty("clientip") - public String getClientIP() { - return ClientIP; + public String getClientIp() { + return clientIp; } - public void setClientIP(String clientIP) { - ClientIP = clientIP; + public void setClientIp(String clientIp) { + this.clientIp = clientIp; } - @JsonProperty("response") - public String getStatus() { - return Status; + public String getStatus() + { + return status; } - public void setStatus(String status) { - Status = status; + public void setStatus(String status) + { + this.status = status; } - @JsonProperty("@timestamp") - public String getTimestampStr() { return TimestampStr; } + public String getTimestamp() { + return timestamp; + } - public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } + public void setTimestamp(String timestampStr) { + this.timestamp = timestampStr; + } - public long getTimestampMillis() { - return new DateTime(getTimestampStr()).getMillis(); + public long getTimestampMillis() + { + return new DateTime(getTimestamp()).getMillis(); } - @JsonProperty("verb") - public String getVerb() { - return Verb; + public String getVerb() + { + return verb; } - public void setVerb(String verb) { - Verb = verb; + public void setVerb(String verb) + { + this.verb = verb; } /** @@ -84,26 +96,25 @@ public boolean equals(Object obj) { return false; } AccessLog accessLog =(AccessLog)obj; - return accessLog.Verb.equals(Verb) && - accessLog.Status.equals(Status) && - accessLog.TimestampStr.equals(TimestampStr) && - accessLog.ClientIP.equals(ClientIP); + return accessLog.verb.equals(verb) && + accessLog.status.equals(status) && + accessLog.timestamp.equals(timestamp) && + accessLog.clientIp.equals(clientIp); } @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); - result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); - result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); + result = prime * result + ((status == null) ? 0 : status.hashCode()); + result = prime * result + ((clientIp == null) ? 0 : clientIp.hashCode()); + result = prime * result + ((timestamp == null) ? 0 : timestamp.hashCode()); + result = prime * result + ((verb == null) ? 0 : verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + return "AccessLog [timestamp = "+timestamp+", verb = "+verb+", status = "+status+", clientIp = "+clientIp+"]"; } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index 09951d1d..c23c4a07 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -15,8 +15,9 @@ * */ public class Constants { - protected static final String STREAM_PARAM = "stream"; + protected static final String SCOPE_PARAM = "scope"; protected static final String DEFAULT_SCOPE = "examples"; + protected static final String STREAM_PARAM = "stream"; protected static final String DEFAULT_STREAM = "apacheaccess"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 3a2b3730..3ca7fb88 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -32,6 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -58,7 +59,7 @@ public static void main(String[] args) throws Exception { PravegaConfig pravegaConfig = PravegaConfig .fromParams(params) - .withDefaultScope(Constants.DEFAULT_SCOPE); + .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); // create the Pravega input stream (if necessary) Stream stream = Utils.createStream( @@ -136,8 +137,7 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ public AccessLog map(String value) throws Exception { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(value, AccessLog.class); + return AccessLog.toAccessLog(value); } } From 78165e3ad312d65438eae219e81de1d523668e2e Mon Sep 17 00:00:00 2001 From: Lida He Date: Wed, 23 May 2018 22:01:01 -0400 Subject: [PATCH 15/48] support secure connection to pravega Signed-off-by: Lida He --- .../main/java/io/pravega/examples/flink/alert/Constants.java | 2 ++ .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index c23c4a07..c3acb421 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -19,6 +19,8 @@ public class Constants { protected static final String DEFAULT_SCOPE = "examples"; protected static final String STREAM_PARAM = "stream"; protected static final String DEFAULT_STREAM = "apacheaccess"; + protected static final String USERNAME_PARAM = "username"; + protected static final String PASSWORD_PARAM = "password"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; protected static final Integer ALERT_INTERVAL = 2; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 3ca7fb88..58eb6a8b 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -11,6 +11,7 @@ package io.pravega.examples.flink.alert; import io.pravega.client.stream.Stream; +import io.pravega.client.stream.impl.DefaultCredentials; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.serialization.PravegaSerialization; @@ -57,8 +58,12 @@ public static void main(String[] args) throws Exception { // initialize the parameter utility tool in order to retrieve input parameters ParameterTool params = ParameterTool.fromArgs(args); + String username = params.get(Constants.USERNAME_PARAM, ""); + String password = params.get(Constants.PASSWORD_PARAM, ""); + PravegaConfig pravegaConfig = PravegaConfig .fromParams(params) + .withCredentials(new DefaultCredentials(password, username)) .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); // create the Pravega input stream (if necessary) From 2508e10c3ff822c0152357a205e70301d5e67b73 Mon Sep 17 00:00:00 2001 From: Lida He Date: Mon, 4 Jun 2018 20:05:08 -0400 Subject: [PATCH 16/48] Add steps to check state of pravega and logstash Singed-off-by: Lida He Signed-off-by: Lida He --- .../flink-high-error-count-alert/README.md | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index e020d2b7..bcdc2bd8 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -15,11 +15,11 @@ Otherwise proceed to set up Logstash and Pravega ## Start Logstash with Pravega Output Plugin ## -On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.2.0.gem`. +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.3.0.gem`. Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` ``` -$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.2.0.gem +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.3.0.gem ``` Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. @@ -59,22 +59,40 @@ Run script below to start container from prebuilt image. Adjust parameters to yo #!/bin/sh set -u -PRAVEGA_SCOPE=myscope -PRAVEGA_STREAM=apacheaccess +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:apacheaccess} CONTAINER_NAME=pravega IMAGE_NAME=emccorp/pravega-demo docker run -d --name $CONTAINER_NAME \ -p 9090:9090 \ -p 9091:9091 \ + -p 9600:9600 \ -v /tmp/access.log:/opt/data/access.log \ - -v /tmp/logs/:/var/log/pravega/ \ - -e PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT} \ + -v $PWD/logs/:/var/log/pravega/ \ -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ ${IMAGE_NAME} ``` +You can skip **PRAVEGA_SCOPE** and **PRAVEGA_STREAM** if you want to use the defaults. + +To check Pravega +``` +$ curl localhost:9091/v1/scopes +``` + +To check Logstash and output plugin for Pravega via Logstash monitoring API running on port 9600. Logstash may take one or two minutes to start as a delay is introduced to wait for Pravega to start. +``` +# The output should contain the name and the version (your version may differ) of the plugin +# "name" : "logstash-output-pravega", +# "version" : "0.3.0.pre.SNAPSHOT" + +$ curl localhost:9600/_node/plugins?pretty +``` + +The log files for Pravega and Logstash should be in the **logs** directory under current directory if you the script above to start the container. + More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) ## Run HighCountAlerter ## From 20ca9cfda635086b0e5686d5c4ba108b47a80c63 Mon Sep 17 00:00:00 2001 From: Lida He Date: Mon, 4 Jun 2018 22:24:12 -0400 Subject: [PATCH 17/48] fix bug in build script Signed-off-by: Lida He --- flink-examples/build.gradle | 2 ++ flink-examples/doc/flink-high-error-count-alert/README.md | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flink-examples/build.gradle b/flink-examples/build.gradle index 1af0db81..30babd0f 100644 --- a/flink-examples/build.gradle +++ b/flink-examples/build.gradle @@ -61,6 +61,8 @@ task scriptFlinkAlerter(type: CreateStartScripts) { outputDir = file('build/scripts') mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' applicationName = 'highCountAlerter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} task scriptExactlyOnceWriter(type: CreateStartScripts) { outputDir = file('build/scripts') diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index bcdc2bd8..070295fb 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -59,8 +59,8 @@ Run script below to start container from prebuilt image. Adjust parameters to yo #!/bin/sh set -u -PRAVEGA_SCOPE=${PRAVEGA_SCOPE:examples} -PRAVEGA_STREAM=${PRAVEGA_STREAM:apacheaccess} +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} CONTAINER_NAME=pravega IMAGE_NAME=emccorp/pravega-demo From 6f611b7965aa2570537ab9228a76654d2e8ba660 Mon Sep 17 00:00:00 2001 From: Vijay Srinivasaraghavan Date: Tue, 12 Jun 2018 05:57:35 -0700 Subject: [PATCH 18/48] [issue-95] Remove transaction grace period API call (#96) * Updates pravega and connector snapshot version * Removes transaction writer grace period configuration API call Signed-off-by: Vijay Srinivasaraghavan --- .../examples/flink/primer/process/ExactlyOnceWriter.java | 2 -- gradle.properties | 4 ++-- .../main/java/io/pravega/example/consolerw/ConsoleWriter.java | 4 +--- .../main/java/io/pravega/example/iot/TurbineHeatSensor.java | 2 -- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java b/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java index ab8ca74d..653fe813 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java @@ -39,7 +39,6 @@ public class ExactlyOnceWriter { private static final long checkpointIntervalMillis = 100; private static final Time txnTimeoutMillis = Time.milliseconds(30 * 1000); - private static final Time txnGracePeriodMillis = Time.milliseconds(30 * 1000); private static final int defaultNumEvents = 50; // read data from the time when the program starts @@ -83,7 +82,6 @@ public static void main(String[] args) throws Exception { .forStream(stream) .withEventRouter( new EventRouter()) .withTxnTimeout(txnTimeoutMillis) - .withTxnGracePeriod(txnGracePeriodMillis) .withWriterMode( exactlyOnce ? PravegaWriterMode.EXACTLY_ONCE : PravegaWriterMode.ATLEAST_ONCE ) .withSerializationSchema(PravegaSerialization.serializationFor(IntegerEvent.class)) .build(); diff --git a/gradle.properties b/gradle.properties index d3f66a76..11e144b2 100644 --- a/gradle.properties +++ b/gradle.properties @@ -12,8 +12,8 @@ dcosAddress=master.mesos ### dependencies -pravegaVersion=0.3.0-50.b5ecb57-SNAPSHOT -connectorVersion=0.3.0-103.ad282e9-SNAPSHOT +pravegaVersion=0.3.0-50.5f4d75b-SNAPSHOT +connectorVersion=0.3.0-102.0e30f47-SNAPSHOT ### outputs samplesVersion=0.3.0-SNAPSHOT diff --git a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java b/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java index 2514a96d..d5eda476 100644 --- a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java +++ b/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java @@ -48,8 +48,7 @@ public class ConsoleWriter implements AutoCloseable { private static final long DEFAULT_TXN_TIMEOUT_MS = 30000L; private static final long DEFAULT_TXN_MAX_EXECUTION_TIME_MS = 30000L; - private static final long DEFAULT_TXN_SCALE_GRACE_PERIOD_MS = 30000L; - + private static final long DEFAULT_PING_LEASE_MS = 30000L; private static final String[] HELP_TEXT = { @@ -423,7 +422,6 @@ public static void main(String[] args) { new JavaSerializer(), EventWriterConfig.builder() .transactionTimeoutTime(DEFAULT_TXN_TIMEOUT_MS) - .transactionTimeoutScaleGracePeriod(DEFAULT_TXN_SCALE_GRACE_PERIOD_MS) .build()); ConsoleWriter cw = new ConsoleWriter(scope, streamName, writer); diff --git a/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java b/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java index 8c8c868f..586f4219 100644 --- a/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java +++ b/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java @@ -55,7 +55,6 @@ public class TurbineHeatSensor { private static int reportingInterval = 200; private static final long DEFAULT_TXN_TIMEOUT_MS = 30000L; - private static final long DEFAULT_TXN_SCALE_GRACE_PERIOD_MS = 30000L; public static void main(String[] args) throws Exception { @@ -303,7 +302,6 @@ private static class TemperatureSensors implements Runnable { EventWriterConfig eventWriterConfig = EventWriterConfig.builder() .transactionTimeoutTime(DEFAULT_TXN_TIMEOUT_MS) - .transactionTimeoutScaleGracePeriod(DEFAULT_TXN_SCALE_GRACE_PERIOD_MS) .build(); this.producer = clientFactory.createEventWriter(streamName, SERIALIZER, eventWriterConfig); From d7e7e94bbbe8f682a5d74314dac8e52ad89d42eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Wed, 13 Jun 2018 10:04:54 +0200 Subject: [PATCH 19/48] [issue 75] hadoop-connectors examples (#77) (#101) * Adds a number of examples that exercise the Pravega Input Format implementation in the hadoop connectors project Signed-off-by: Bo Yang --- hadoop-examples/README.md | 94 +++ hadoop-examples/build.gradle | 102 +++ hadoop-examples/gradle.properties | 7 + .../examples/hadoop/ExampleDriver.java | 48 ++ .../examples/hadoop/PravegaOutputFormat.java | 118 +++ .../hadoop/PravegaOutputRecordWriter.java | 53 ++ .../examples/hadoop/RandomTextWriter.java | 739 ++++++++++++++++++ .../pravega/examples/hadoop/RandomWriter.java | 299 +++++++ .../examples/hadoop/TextSerializer.java | 58 ++ .../io/pravega/examples/hadoop/WordCount.java | 141 ++++ .../io/pravega/examples/hadoop/WordMean.java | 206 +++++ .../pravega/examples/hadoop/WordMedian.java | 218 ++++++ .../io/pravega/examples/spark/WordCount.java | 57 ++ 13 files changed, 2140 insertions(+) create mode 100644 hadoop-examples/README.md create mode 100644 hadoop-examples/build.gradle create mode 100644 hadoop-examples/gradle.properties create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java create mode 100644 hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java diff --git a/hadoop-examples/README.md b/hadoop-examples/README.md new file mode 100644 index 00000000..385f8cbf --- /dev/null +++ b/hadoop-examples/README.md @@ -0,0 +1,94 @@ + + +# hadoop-connectors examples +Examples of Hadoop Connectors for Pravega. + +Description +----------- +These examples give you some basic ideas how to use hadoop-connectors for pravega. + +Build +------- +(Most of these steps will be removed when GAed) +### build hadoop connectors +``` +git clone --recurse-submodules https://github.com/pravega/hadoop-connectors.git +cd hadoop-connectors +gradle install + +# start pravega whose version matches hadoop-connectors +cd pravega +./gradlew startStandalone +``` + +## build hadoop connectors examples +``` +open another terminal and goto ~/pravega-samples +cd hadoop-examples +gradle build +``` + +Run Examples on Local Machine +--- + + +Hadoop (verified with Hadoop 2.8.3 on Ubuntu 16.04) +``` +1. setup and start hdfs + +2. set env variables + export HDFS=hdfs:// # e.g. hdfs://192.168.0.188:9000 + export HADOOP_EXAMPLES_JAR= # e.g. ./build/libs/pravega-hadoop-examples-0.3.0-SNAPSHOT-all.jar + export HADOOP_EXAMPLES_INPUT_DUMMY=${HDFS}/tmp/hadoop_examples_input_dummy + export HADOOP_EXAMPLES_OUTPUT=${HDFS}/tmp/hadoop_examples_output + export PRAVEGA_URI=tcp:// # e.g. tcp://192.168.0.188:9090 + export PRAVEGA_SCOPE= # e.g. myScope + export PRAVEGA_STREAM= # e.g. myStream + export CMD=wordcount # so far, can also try wordmean and wordmedian + +3. make sure below dirs are empty + hadoop fs -rmr ${HADOOP_EXAMPLES_INPUT_DUMMY} + hadoop fs -rmr ${HADOOP_EXAMPLES_OUTPUT} + +4. generate words into pravega + hadoop jar ${HADOOP_EXAMPLES_JAR} randomtextwriter -D mapreduce.randomtextwriter.totalbytes=32000 ${HADOOP_EXAMPLES_INPUT_DUMMY} ${PRAVEGA_URI} ${PRAVEGA_SCOPE} ${PRAVEGA_STREAM} + +5. run hadoop command + hadoop jar ${HADOOP_EXAMPLES_JAR} ${CMD} ${HADOOP_EXAMPLES_INPUT_DUMMY} ${PRAVEGA_URI} ${PRAVEGA_SCOPE} ${PRAVEGA_STREAM} ${HADOOP_EXAMPLES_OUTPUT} +``` + + +Additionally, you can run WordCount program (more will be coming soon) on top of [HiBench](https://github.com/intel-hadoop/HiBench) +``` +0. set same env variables as previous section, and + export HADOOP_HOME= # e.g. /services/hadoop-2.8.3 + export HDFS=hdfs:// # e.g. hdfs://192.168.0.188:9000 + export INPUT_HDFS="${HADOOP_EXAMPLES_INPUT_DUMMY} ${PRAVEGA_URI} ${PRAVEGA_SCOPE} ${PRAVEGA_STREAM}" + +1. fetch/build/patch HiBench (make sure mvn is installed) + gradle wcHiBench + +2. prepare testing data + ./HiBench/bin/workloads/micro/wordcount/prepare/prepare.sh + +3. run + ./HiBench/bin/workloads/micro/wordcount/hadoop/run.sh + +4. check report + file:////hadoop-examples/HiBench/report/wordcount/hadoop/monitor.html +``` + + +You can also use hadoop-connectors on Spark +``` +Spark (verified with Spark 2.2.1 on Ubuntu 16.04) + spark-submit --class io.pravega.examples.spark.WordCount ${HADOOP_EXAMPLES_JAR} ${PRAVEGA_URI} ${PRAVEGA_SCOPE} ${PRAVEGA_STREAM} +``` diff --git a/hadoop-examples/build.gradle b/hadoop-examples/build.gradle new file mode 100644 index 00000000..8d446aeb --- /dev/null +++ b/hadoop-examples/build.gradle @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +plugins { + id 'com.github.johnrengelman.shadow' version '1.2.4' +} + +apply plugin: "java" +apply plugin: "distribution" + +sourceCompatibility = 1.8 +archivesBaseName = 'pravega-hadoop-examples' +version = connectorVersion + +repositories { + mavenLocal() + if (findProperty("repositoryUrl")) { + maven { + url findProperty("repositoryUrl") + } + } + else { + jcenter() + mavenCentral() + maven { url "https://repository.apache.org/snapshots" } + maven { url "https://oss.sonatype.org/content/repositories/snapshots" } + } +} + +dependencies { + compile "io.pravega:hadoop-connectors:${connectorVersion}" + compileOnly "org.apache.hadoop:hadoop-common:${hadoopVersion}" + compileOnly "org.apache.hadoop:hadoop-mapreduce-client-core:${hadoopVersion}" + compileOnly "org.apache.spark:spark-core_2.11:${sparkVersion}" +} + +shadowJar { + version = version + dependencies { + include dependency("io.pravega:hadoop-connectors") + } + manifest { + attributes( + 'Main-Class': 'io.pravega.examples.hadoop.ExampleDriver', + ) + } +} + +distributions { + main { + baseName = archivesBaseName + contents { + into('lib') { + from shadowJar + from(project.configurations.shadow) + } + } + } +} + +task cleanHiBench(type: Delete) { + delete 'HiBench' +} + +task fetchHiBench (type: Exec) { + commandLine "git", "clone", "https://github.com/intel-hadoop/HiBench" +} + +task buildHiBench (dependsOn: fetchHiBench, type: Exec) { + workingDir 'HiBench' + commandLine "mvn", "-Dspark=2.1", "-Dscala=2.11", "clean", "package" +} + +task genHiBenchConfig (dependsOn: buildHiBench, type: Copy) { + from 'HiBench/conf' + into 'HiBench/conf' + include 'hadoop.conf.template' + rename('hadoop.conf.template', 'hadoop.conf') + + doLast { + def file = new File('./HiBench/conf/hadoop.conf') + def newConfig = file.text + .replace('/PATH/TO/YOUR/HADOOP/ROOT', System.getenv("HADOOP_HOME")) + .replace('hdfs://localhost:8020', System.getenv("HDFS")) + file.text = newConfig + } +} + +task wcHiBench (dependsOn: genHiBenchConfig) { + doLast { + def file = new File('./HiBench/conf/workloads/micro/wordcount.conf') + def newConfig = file.text.replace('hibench.workload.input', '#hibench.workload.input') + file.text = newConfig + } +} diff --git a/hadoop-examples/gradle.properties b/hadoop-examples/gradle.properties new file mode 100644 index 00000000..eec457f3 --- /dev/null +++ b/hadoop-examples/gradle.properties @@ -0,0 +1,7 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. +# +hadoopVersion=2.8.1 +scalaVersion=2.11.8 +sparkVersion=2.2.0 +connectorVersion=0.3.0-SNAPSHOT diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java new file mode 100644 index 00000000..3f325561 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java @@ -0,0 +1,48 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import org.apache.hadoop.util.ProgramDriver; + +/** + * This class is copied from apache/hadoop and modified by removing + * unsupported commands + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/ExampleDriver.java + * + * A description of an example program based on its class and a + * human-readable description. + */ +public class ExampleDriver { + + public static void main(String argv[]) { + int exitCode = -1; + ProgramDriver pgd = new ProgramDriver(); + try { + pgd.addClass("wordcount", WordCount.class, + "A map/reduce program that counts the words in pravega."); + pgd.addClass("wordmean", WordMean.class, + "A map/reduce program that counts the average length of the words in pravega."); + pgd.addClass("wordmedian", WordMedian.class, + "A map/reduce program that counts the median length of the words in pravega."); + pgd.addClass("randomwriter", RandomWriter.class, + "A map/reduce program that writes random data to pravega."); + pgd.addClass("randomtextwriter", RandomTextWriter.class, + "A map/reduce program that writes random textual data to pravega."); + exitCode = pgd.run(argv); + } catch (Throwable e) { + e.printStackTrace(); + } + + System.exit(exitCode); + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java new file mode 100644 index 00000000..7518e270 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java @@ -0,0 +1,118 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import com.google.common.annotations.VisibleForTesting; +import io.pravega.client.ClientFactory; +import io.pravega.client.admin.StreamManager; +import io.pravega.client.stream.EventStreamWriter; +import io.pravega.client.stream.EventWriterConfig; +import io.pravega.client.stream.ScalingPolicy; +import io.pravega.client.stream.Serializer; +import io.pravega.client.stream.StreamConfiguration; + +import java.io.IOException; +import java.net.URI; +import java.util.Optional; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An OutputFormat that can be added as a storage to write events to Pravega. + */ +public class PravegaOutputFormat extends OutputFormat { + + private static final Logger log = LoggerFactory.getLogger(PravegaOutputFormat.class); + + // Pravega scope name + public static final String SCOPE_NAME = "pravega.scope"; + // Pravega stream name + public static final String STREAM_NAME = "pravega.stream"; + // Pravega uri string + public static final String URI_STRING = "pravega.uri"; + // Pravega deserializer class name + public static final String DESERIALIZER = "pravega.deserializer"; + + private static final long DEFAULT_TXN_TIMEOUT_MS = 30000L; + private static final long DEFAULT_TXN_MAX_EXECUTION_TIME_MS = 30000L; + private static final long DEFAULT_TXN_SCALE_GRACE_PERIOD_MS = 30000L; + private static final long DEFAULT_PING_LEASE_MS = 30000L; + + // client factory + private ClientFactory externalClientFactory; + + public PravegaOutputFormat() { + } + + @VisibleForTesting + protected PravegaOutputFormat(ClientFactory externalClientFactory) { + this.externalClientFactory = externalClientFactory; + } + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + final String scopeName = Optional.ofNullable(conf.get(PravegaOutputFormat.SCOPE_NAME)).orElseThrow(() -> + new IOException("The input scope name must be configured (" + PravegaOutputFormat.SCOPE_NAME + ")")); + final String streamName = Optional.ofNullable(conf.get(PravegaOutputFormat.STREAM_NAME)).orElseThrow(() -> + new IOException("The input stream name must be configured (" + PravegaOutputFormat.STREAM_NAME + ")")); + final URI controllerURI = Optional.ofNullable(conf.get(PravegaOutputFormat.URI_STRING)).map(URI::create).orElseThrow(() -> + new IOException("The Pravega controller URI must be configured (" + PravegaOutputFormat.URI_STRING + ")")); + final String deserializerClassName = Optional.ofNullable(conf.get(PravegaOutputFormat.DESERIALIZER)).orElseThrow(() -> + new IOException("The event deserializer must be configured (" + PravegaOutputFormat.DESERIALIZER + ")")); + + StreamManager streamManager = StreamManager.create(controllerURI); + streamManager.createScope(scopeName); + + StreamConfiguration streamConfig = StreamConfiguration.builder().scope(scopeName).streamName(streamName) + .scalingPolicy(ScalingPolicy.fixed(3)) + .build(); + + streamManager.createStream(scopeName, streamName, streamConfig); + ClientFactory clientFactory = (externalClientFactory != null) ? externalClientFactory : ClientFactory.withScope(scopeName, controllerURI); + + Serializer deserializer; + try { + Class deserializerClass = Class.forName(deserializerClassName); + deserializer = (Serializer) deserializerClass.newInstance(); + } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { + log.error("Exception when creating deserializer: {}", e); + throw new IOException( + "Unable to create the event deserializer (" + deserializerClassName + ")", e); + } + + EventStreamWriter writer = clientFactory.createEventWriter(streamName, deserializer, EventWriterConfig.builder() + .transactionTimeoutTime(DEFAULT_TXN_TIMEOUT_MS) + .transactionTimeoutScaleGracePeriod(DEFAULT_TXN_SCALE_GRACE_PERIOD_MS) + .build()); + + return new PravegaOutputRecordWriter(writer); + } + + @Override + public void checkOutputSpecs(JobContext jobContext) throws IOException, InterruptedException { + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + // tmp solution, not for production + return new FileOutputCommitter(new Path("/tmp/" + context.getTaskAttemptID().getJobID().toString()), context); + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java new file mode 100644 index 00000000..968885ae --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java @@ -0,0 +1,53 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import io.pravega.client.stream.EventStreamWriter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.concurrent.NotThreadSafe; +import java.io.IOException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; + +/** + * A RecordWriter that can write events to Pravega. + */ +@NotThreadSafe +public class PravegaOutputRecordWriter extends RecordWriter { + + private static final Logger log = LoggerFactory.getLogger(PravegaOutputRecordWriter.class); + private final EventStreamWriter writer; + + public PravegaOutputRecordWriter(EventStreamWriter writer) { + this.writer = writer; + } + + @Override + public void write(String key, V value) throws IOException, InterruptedException { + final CompletableFuture future = writer.writeEvent(key, value); + future.whenCompleteAsync( + (v, e) -> { + if (e != null) { + log.warn("Detected a write failure: {}", e); + } + } + ); + } + + @Override + public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { + writer.close(); + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java new file mode 100644 index 00000000..9dd815b1 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java @@ -0,0 +1,739 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.ClusterStatus; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import java.io.IOException; +import java.util.Date; +import java.util.Random; + +/** + * This class is copied from apache/hadoop and modified by adding logic to + * support PravegaInputFormat + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/RandomTextWriter.java + * + * This program uses map/reduce to just run a distributed job where there is + * no interaction between the tasks and each task writes a large unsorted + * random sequence of words. + * In order for this program to generate data for terasort with a 5-10 words + * per key and 20-100 words per value, have the following config: + *
{@code
+ * 
+ * 
+ * 
+ *   
+ *     mapreduce.randomtextwriter.minwordskey
+ *     5
+ *   
+ *   
+ *     mapreduce.randomtextwriter.maxwordskey
+ *     10
+ *   
+ *   
+ *     mapreduce.randomtextwriter.minwordsvalue
+ *     20
+ *   
+ *   
+ *     mapreduce.randomtextwriter.maxwordsvalue
+ *     100
+ *   
+ *   
+ *     mapreduce.randomtextwriter.totalbytes
+ *     1099511627776
+ *   
+ * }
+ * + * Equivalently, {@link RandomTextWriter} also supports all the above options + * and ones supported by {@link Tool} via the command-line. + * + * To run: bin/hadoop jar hadoop-${version}-examples.jar randomtextwriter + * [-outFormat output format class] output + */ +public class RandomTextWriter extends Configured implements Tool { + public static final String TOTAL_BYTES = + "mapreduce.randomtextwriter.totalbytes"; + public static final String BYTES_PER_MAP = + "mapreduce.randomtextwriter.bytespermap"; + public static final String MAPS_PER_HOST = + "mapreduce.randomtextwriter.mapsperhost"; + public static final String MAX_VALUE = "mapreduce.randomtextwriter.maxwordsvalue"; + public static final String MIN_VALUE = "mapreduce.randomtextwriter.minwordsvalue"; + public static final String MIN_KEY = "mapreduce.randomtextwriter.minwordskey"; + public static final String MAX_KEY = "mapreduce.randomtextwriter.maxwordskey"; + + static int printUsage() { + System.out.println("randomtextwriter " + + " "); + ToolRunner.printGenericCommandUsage(System.out); + return 2; + } + + /** + * User counters + */ + enum Counters { + RECORDS_WRITTEN, BYTES_WRITTEN + } + + static class RandomTextMapper extends Mapper { + + private long numBytesToWrite; + private int minWordsInKey; + private int wordsInKeyRange; + private int minWordsInValue; + private int wordsInValueRange; + private Random random = new Random(); + + /** + * Save the configuration value that we need to write the data. + */ + public void setup(Context context) { + Configuration conf = context.getConfiguration(); + numBytesToWrite = conf.getLong(BYTES_PER_MAP, + 1 * 1024 * 1024 * 1024); + minWordsInKey = conf.getInt(MIN_KEY, 5); + wordsInKeyRange = (conf.getInt(MAX_KEY, 10) - minWordsInKey); + minWordsInValue = conf.getInt(MIN_VALUE, 10); + wordsInValueRange = (conf.getInt(MAX_VALUE, 100) - minWordsInValue); + } + + /** + * Given an output filename, write a bunch of random records to it. + */ + public void map(Text key, Text value, + Context context) throws IOException, InterruptedException { + int itemCount = 0; + while (numBytesToWrite > 0) { + // Generate the key/value + int noWordsKey = minWordsInKey + + (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0); + int noWordsValue = minWordsInValue + + (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0); + Text keyWords = generateSentence(noWordsKey); + Text valueWords = generateSentence(noWordsValue); + + // Write the sentence, keyWords is just a routing key for Pravega, won't written to Pravega + context.write(keyWords.toString(), new Text(keyWords.toString() + valueWords.toString())); + + numBytesToWrite -= (keyWords.getLength() + valueWords.getLength()); + + // Update counters, progress etc. + context.getCounter(Counters.BYTES_WRITTEN).increment( + keyWords.getLength() + valueWords.getLength()); + context.getCounter(Counters.RECORDS_WRITTEN).increment(1); + if (++itemCount % 200 == 0) { + context.setStatus("wrote record " + itemCount + ". " + + numBytesToWrite + " bytes left."); + } + } + context.setStatus("done with " + itemCount + " records."); + } + + private Text generateSentence(int noWords) { + StringBuffer sentence = new StringBuffer(); + String space = " "; + for (int i = 0; i < noWords; ++i) { + sentence.append(words[random.nextInt(words.length)]); + sentence.append(space); + } + return new Text(sentence.toString()); + } + } + + /** + * This is the main routine for launching a distributed random write job. + * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. + * The reduce doesn't do anything. + * + * @throws IOException + */ + public int run(String[] args) throws Exception { + if (args.length < 4) { + return printUsage(); + } + + Configuration conf = getConf(); + JobClient client = new JobClient(conf); + ClusterStatus cluster = client.getClusterStatus(); + int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); + long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, + 1 * 1024 * 1024 * 1024); + if (numBytesToWritePerMap == 0) { + System.err.println("Cannot have " + BYTES_PER_MAP + " set to 0"); + return -2; + } + long totalBytesToWrite = conf.getLong(TOTAL_BYTES, + numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); + int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); + if (numMaps == 0 && totalBytesToWrite > 0) { + numMaps = 1; + conf.setLong(BYTES_PER_MAP, totalBytesToWrite); + } + conf.setInt(MRJobConfig.NUM_MAPS, numMaps); + + conf.setStrings("pravega.uri", args[1]); + conf.setStrings("pravega.scope", args[2]); + conf.setStrings("pravega.stream", args[3]); + conf.setStrings("pravega.deserializer", TextSerializer.class.getName()); + + Job job = Job.getInstance(conf); + + job.setJarByClass(RandomTextWriter.class); + job.setJobName("random-text-writer"); + + job.setOutputKeyClass(String.class); + job.setOutputValueClass(Text.class); + + job.setInputFormatClass(RandomWriter.RandomInputFormat.class); + job.setMapperClass(RandomTextMapper.class); + job.setOutputFormatClass(PravegaOutputFormat.class); + FileOutputFormat.setOutputPath(job, new Path(args[0])); + + System.out.println("Running " + numMaps + " maps."); + + // reducer NONE + job.setNumReduceTasks(0); + + Date startTime = new Date(); + System.out.println("Job started: " + startTime); + int ret = job.waitForCompletion(true) ? 0 : 1; + Date endTime = new Date(); + System.out.println("Job ended: " + endTime); + System.out.println("The job took " + + (endTime.getTime() - startTime.getTime()) / 1000 + + " seconds."); + + return ret; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new RandomTextWriter(), args); + System.exit(res); + } + + /** + * A random list of 1000 words from /usr/share/dict/words + */ + private static String[] words = { + "diurnalness", "Homoiousian", + "spiranthic", "tetragynian", + "silverhead", "ungreat", + "lithograph", "exploiter", + "physiologian", "by", + "hellbender", "Filipendula", + "undeterring", "antiscolic", + "pentagamist", "hypoid", + "cacuminal", "sertularian", + "schoolmasterism", "nonuple", + "gallybeggar", "phytonic", + "swearingly", "nebular", + "Confervales", "thermochemically", + "characinoid", "cocksuredom", + "fallacious", "feasibleness", + "debromination", "playfellowship", + "tramplike", "testa", + "participatingly", "unaccessible", + "bromate", "experientialist", + "roughcast", "docimastical", + "choralcelo", "blightbird", + "peptonate", "sombreroed", + "unschematized", "antiabolitionist", + "besagne", "mastication", + "bromic", "sviatonosite", + "cattimandoo", "metaphrastical", + "endotheliomyoma", "hysterolysis", + "unfulminated", "Hester", + "oblongly", "blurredness", + "authorling", "chasmy", + "Scorpaenidae", "toxihaemia", + "Dictograph", "Quakerishly", + "deaf", "timbermonger", + "strammel", "Thraupidae", + "seditious", "plerome", + "Arneb", "eristically", + "serpentinic", "glaumrie", + "socioromantic", "apocalypst", + "tartrous", "Bassaris", + "angiolymphoma", "horsefly", + "kenno", "astronomize", + "euphemious", "arsenide", + "untongued", "parabolicness", + "uvanite", "helpless", + "gemmeous", "stormy", + "templar", "erythrodextrin", + "comism", "interfraternal", + "preparative", "parastas", + "frontoorbital", "Ophiosaurus", + "diopside", "serosanguineous", + "ununiformly", "karyological", + "collegian", "allotropic", + "depravity", "amylogenesis", + "reformatory", "epidymides", + "pleurotropous", "trillium", + "dastardliness", "coadvice", + "embryotic", "benthonic", + "pomiferous", "figureheadship", + "Megaluridae", "Harpa", + "frenal", "commotion", + "abthainry", "cobeliever", + "manilla", "spiciferous", + "nativeness", "obispo", + "monilioid", "biopsic", + "valvula", "enterostomy", + "planosubulate", "pterostigma", + "lifter", "triradiated", + "venialness", "tum", + "archistome", "tautness", + "unswanlike", "antivenin", + "Lentibulariaceae", "Triphora", + "angiopathy", "anta", + "Dawsonia", "becomma", + "Yannigan", "winterproof", + "antalgol", "harr", + "underogating", "ineunt", + "cornberry", "flippantness", + "scyphostoma", "approbation", + "Ghent", "Macraucheniidae", + "scabbiness", "unanatomized", + "photoelasticity", "eurythermal", + "enation", "prepavement", + "flushgate", "subsequentially", + "Edo", "antihero", + "Isokontae", "unforkedness", + "porriginous", "daytime", + "nonexecutive", "trisilicic", + "morphiomania", "paranephros", + "botchedly", "impugnation", + "Dodecatheon", "obolus", + "unburnt", "provedore", + "Aktistetae", "superindifference", + "Alethea", "Joachimite", + "cyanophilous", "chorograph", + "brooky", "figured", + "periclitation", "quintette", + "hondo", "ornithodelphous", + "unefficient", "pondside", + "bogydom", "laurinoxylon", + "Shiah", "unharmed", + "cartful", "noncrystallized", + "abusiveness", "cromlech", + "japanned", "rizzomed", + "underskin", "adscendent", + "allectory", "gelatinousness", + "volcano", "uncompromisingly", + "cubit", "idiotize", + "unfurbelowed", "undinted", + "magnetooptics", "Savitar", + "diwata", "ramosopalmate", + "Pishquow", "tomorn", + "apopenptic", "Haversian", + "Hysterocarpus", "ten", + "outhue", "Bertat", + "mechanist", "asparaginic", + "velaric", "tonsure", + "bubble", "Pyrales", + "regardful", "glyphography", + "calabazilla", "shellworker", + "stradametrical", "havoc", + "theologicopolitical", "sawdust", + "diatomaceous", "jajman", + "temporomastoid", "Serrifera", + "Ochnaceae", "aspersor", + "trailmaking", "Bishareen", + "digitule", "octogynous", + "epididymitis", "smokefarthings", + "bacillite", "overcrown", + "mangonism", "sirrah", + "undecorated", "psychofugal", + "bismuthiferous", "rechar", + "Lemuridae", "frameable", + "thiodiazole", "Scanic", + "sportswomanship", "interruptedness", + "admissory", "osteopaedion", + "tingly", "tomorrowness", + "ethnocracy", "trabecular", + "vitally", "fossilism", + "adz", "metopon", + "prefatorial", "expiscate", + "diathermacy", "chronist", + "nigh", "generalizable", + "hysterogen", "aurothiosulphuric", + "whitlowwort", "downthrust", + "Protestantize", "monander", + "Itea", "chronographic", + "silicize", "Dunlop", + "eer", "componental", + "spot", "pamphlet", + "antineuritic", "paradisean", + "interruptor", "debellator", + "overcultured", "Florissant", + "hyocholic", "pneumatotherapy", + "tailoress", "rave", + "unpeople", "Sebastian", + "thermanesthesia", "Coniferae", + "swacking", "posterishness", + "ethmopalatal", "whittle", + "analgize", "scabbardless", + "naught", "symbiogenetically", + "trip", "parodist", + "columniform", "trunnel", + "yawler", "goodwill", + "pseudohalogen", "swangy", + "cervisial", "mediateness", + "genii", "imprescribable", + "pony", "consumptional", + "carposporangial", "poleax", + "bestill", "subfebrile", + "sapphiric", "arrowworm", + "qualminess", "ultraobscure", + "thorite", "Fouquieria", + "Bermudian", "prescriber", + "elemicin", "warlike", + "semiangle", "rotular", + "misthread", "returnability", + "seraphism", "precostal", + "quarried", "Babylonism", + "sangaree", "seelful", + "placatory", "pachydermous", + "bozal", "galbulus", + "spermaphyte", "cumbrousness", + "pope", "signifier", + "Endomycetaceae", "shallowish", + "sequacity", "periarthritis", + "bathysphere", "pentosuria", + "Dadaism", "spookdom", + "Consolamentum", "afterpressure", + "mutter", "louse", + "ovoviviparous", "corbel", + "metastoma", "biventer", + "Hydrangea", "hogmace", + "seizing", "nonsuppressed", + "oratorize", "uncarefully", + "benzothiofuran", "penult", + "balanocele", "macropterous", + "dishpan", "marten", + "absvolt", "jirble", + "parmelioid", "airfreighter", + "acocotl", "archesporial", + "hypoplastral", "preoral", + "quailberry", "cinque", + "terrestrially", "stroking", + "limpet", "moodishness", + "canicule", "archididascalian", + "pompiloid", "overstaid", + "introducer", "Italical", + "Christianopaganism", "prescriptible", + "subofficer", "danseuse", + "cloy", "saguran", + "frictionlessly", "deindividualization", + "Bulanda", "ventricous", + "subfoliar", "basto", + "scapuloradial", "suspend", + "stiffish", "Sphenodontidae", + "eternal", "verbid", + "mammonish", "upcushion", + "barkometer", "concretion", + "preagitate", "incomprehensible", + "tristich", "visceral", + "hemimelus", "patroller", + "stentorophonic", "pinulus", + "kerykeion", "brutism", + "monstership", "merciful", + "overinstruct", "defensibly", + "bettermost", "splenauxe", + "Mormyrus", "unreprimanded", + "taver", "ell", + "proacquittal", "infestation", + "overwoven", "Lincolnlike", + "chacona", "Tamil", + "classificational", "lebensraum", + "reeveland", "intuition", + "Whilkut", "focaloid", + "Eleusinian", "micromembrane", + "byroad", "nonrepetition", + "bacterioblast", "brag", + "ribaldrous", "phytoma", + "counteralliance", "pelvimetry", + "pelf", "relaster", + "thermoresistant", "aneurism", + "molossic", "euphonym", + "upswell", "ladhood", + "phallaceous", "inertly", + "gunshop", "stereotypography", + "laryngic", "refasten", + "twinling", "oflete", + "hepatorrhaphy", "electrotechnics", + "cockal", "guitarist", + "topsail", "Cimmerianism", + "larklike", "Llandovery", + "pyrocatechol", "immatchable", + "chooser", "metrocratic", + "craglike", "quadrennial", + "nonpoisonous", "undercolored", + "knob", "ultratense", + "balladmonger", "slait", + "sialadenitis", "bucketer", + "magnificently", "unstipulated", + "unscourged", "unsupercilious", + "packsack", "pansophism", + "soorkee", "percent", + "subirrigate", "champer", + "metapolitics", "spherulitic", + "involatile", "metaphonical", + "stachyuraceous", "speckedness", + "bespin", "proboscidiform", + "gul", "squit", + "yeelaman", "peristeropode", + "opacousness", "shibuichi", + "retinize", "yote", + "misexposition", "devilwise", + "pumpkinification", "vinny", + "bonze", "glossing", + "decardinalize", "transcortical", + "serphoid", "deepmost", + "guanajuatite", "wemless", + "arval", "lammy", + "Effie", "Saponaria", + "tetrahedral", "prolificy", + "excerpt", "dunkadoo", + "Spencerism", "insatiately", + "Gilaki", "oratorship", + "arduousness", "unbashfulness", + "Pithecolobium", "unisexuality", + "veterinarian", "detractive", + "liquidity", "acidophile", + "proauction", "sural", + "totaquina", "Vichyite", + "uninhabitedness", "allegedly", + "Gothish", "manny", + "Inger", "flutist", + "ticktick", "Ludgatian", + "homotransplant", "orthopedical", + "diminutively", "monogoneutic", + "Kenipsim", "sarcologist", + "drome", "stronghearted", + "Fameuse", "Swaziland", + "alen", "chilblain", + "beatable", "agglomeratic", + "constitutor", "tendomucoid", + "porencephalous", "arteriasis", + "boser", "tantivy", + "rede", "lineamental", + "uncontradictableness", "homeotypical", + "masa", "folious", + "dosseret", "neurodegenerative", + "subtransverse", "Chiasmodontidae", + "palaeotheriodont", "unstressedly", + "chalcites", "piquantness", + "lampyrine", "Aplacentalia", + "projecting", "elastivity", + "isopelletierin", "bladderwort", + "strander", "almud", + "iniquitously", "theologal", + "bugre", "chargeably", + "imperceptivity", "meriquinoidal", + "mesophyte", "divinator", + "perfunctory", "counterappellant", + "synovial", "charioteer", + "crystallographical", "comprovincial", + "infrastapedial", "pleasurehood", + "inventurous", "ultrasystematic", + "subangulated", "supraoesophageal", + "Vaishnavism", "transude", + "chrysochrous", "ungrave", + "reconciliable", "uninterpleaded", + "erlking", "wherefrom", + "aprosopia", "antiadiaphorist", + "metoxazine", "incalculable", + "umbellic", "predebit", + "foursquare", "unimmortal", + "nonmanufacture", "slangy", + "predisputant", "familist", + "preaffiliate", "friarhood", + "corelysis", "zoonitic", + "halloo", "paunchy", + "neuromimesis", "aconitine", + "hackneyed", "unfeeble", + "cubby", "autoschediastical", + "naprapath", "lyrebird", + "inexistency", "leucophoenicite", + "ferrogoslarite", "reperuse", + "uncombable", "tambo", + "propodiale", "diplomatize", + "Russifier", "clanned", + "corona", "michigan", + "nonutilitarian", "transcorporeal", + "bought", "Cercosporella", + "stapedius", "glandularly", + "pictorially", "weism", + "disilane", "rainproof", + "Caphtor", "scrubbed", + "oinomancy", "pseudoxanthine", + "nonlustrous", "redesertion", + "Oryzorictinae", "gala", + "Mycogone", "reappreciate", + "cyanoguanidine", "seeingness", + "breadwinner", "noreast", + "furacious", "epauliere", + "omniscribent", "Passiflorales", + "uninductive", "inductivity", + "Orbitolina", "Semecarpus", + "migrainoid", "steprelationship", + "phlogisticate", "mesymnion", + "sloped", "edificator", + "beneficent", "culm", + "paleornithology", "unurban", + "throbless", "amplexifoliate", + "sesquiquintile", "sapience", + "astucious", "dithery", + "boor", "ambitus", + "scotching", "uloid", + "uncompromisingness", "hoove", + "waird", "marshiness", + "Jerusalem", "mericarp", + "unevoked", "benzoperoxide", + "outguess", "pyxie", + "hymnic", "euphemize", + "mendacity", "erythremia", + "rosaniline", "unchatteled", + "lienteria", "Bushongo", + "dialoguer", "unrepealably", + "rivethead", "antideflation", + "vinegarish", "manganosiderite", + "doubtingness", "ovopyriform", + "Cephalodiscus", "Muscicapa", + "Animalivora", "angina", + "planispheric", "ipomoein", + "cuproiodargyrite", "sandbox", + "scrat", "Munnopsidae", + "shola", "pentafid", + "overstudiousness", "times", + "nonprofession", "appetible", + "valvulotomy", "goladar", + "uniarticular", "oxyterpene", + "unlapsing", "omega", + "trophonema", "seminonflammable", + "circumzenithal", "starer", + "depthwise", "liberatress", + "unleavened", "unrevolting", + "groundneedle", "topline", + "wandoo", "umangite", + "ordinant", "unachievable", + "oversand", "snare", + "avengeful", "unexplicit", + "mustafina", "sonable", + "rehabilitative", "eulogization", + "papery", "technopsychology", + "impressor", "cresylite", + "entame", "transudatory", + "scotale", "pachydermatoid", + "imaginary", "yeat", + "slipped", "stewardship", + "adatom", "cockstone", + "skyshine", "heavenful", + "comparability", "exprobratory", + "dermorhynchous", "parquet", + "cretaceous", "vesperal", + "raphis", "undangered", + "Glecoma", "engrain", + "counteractively", "Zuludom", + "orchiocatabasis", "Auriculariales", + "warriorwise", "extraorganismal", + "overbuilt", "alveolite", + "tetchy", "terrificness", + "widdle", "unpremonished", + "rebilling", "sequestrum", + "equiconvex", "heliocentricism", + "catabaptist", "okonite", + "propheticism", "helminthagogic", + "calycular", "giantly", + "wingable", "golem", + "unprovided", "commandingness", + "greave", "haply", + "doina", "depressingly", + "subdentate", "impairment", + "decidable", "neurotrophic", + "unpredict", "bicorporeal", + "pendulant", "flatman", + "intrabred", "toplike", + "Prosobranchiata", "farrantly", + "toxoplasmosis", "gorilloid", + "dipsomaniacal", "aquiline", + "atlantite", "ascitic", + "perculsive", "prospectiveness", + "saponaceous", "centrifugalization", + "dinical", "infravaginal", + "beadroll", "affaite", + "Helvidian", "tickleproof", + "abstractionism", "enhedge", + "outwealth", "overcontribute", + "coldfinch", "gymnastic", + "Pincian", "Munychian", + "codisjunct", "quad", + "coracomandibular", "phoenicochroite", + "amender", "selectivity", + "putative", "semantician", + "lophotrichic", "Spatangoidea", + "saccharogenic", "inferent", + "Triconodonta", "arrendation", + "sheepskin", "taurocolla", + "bunghole", "Machiavel", + "triakistetrahedral", "dehairer", + "prezygapophysial", "cylindric", + "pneumonalgia", "sleigher", + "emir", "Socraticism", + "licitness", "massedly", + "instructiveness", "sturdied", + "redecrease", "starosta", + "evictor", "orgiastic", + "squdge", "meloplasty", + "Tsonecan", "repealableness", + "swoony", "myesthesia", + "molecule", "autobiographist", + "reciprocation", "refective", + "unobservantness", "tricae", + "ungouged", "floatability", + "Mesua", "fetlocked", + "chordacentrum", "sedentariness", + "various", "laubanite", + "nectopod", "zenick", + "sequentially", "analgic", + "biodynamics", "posttraumatic", + "nummi", "pyroacetic", + "bot", "redescend", + "dispermy", "undiffusive", + "circular", "trillion", + "Uraniidae", "ploration", + "discipular", "potentness", + "sud", "Hu", + "Eryon", "plugger", + "subdrainage", "jharal", + "abscission", "supermarket", + "countergabion", "glacierist", + "lithotresis", "minniebush", + "zanyism", "eucalypteol", + "sterilely", "unrealize", + "unpatched", "hypochondriacism", + "critically", "cheesecutter", + }; +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java new file mode 100644 index 00000000..34d0f504 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java @@ -0,0 +1,299 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.ClusterStatus; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Random; + +/** + * + * This class is copied from apache/hadoop + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/RandomWriter.java + * + * This program uses map/reduce to just run a distributed job where there is + * no interaction between the tasks and each task write a large unsorted + * random binary sequence file of BytesWritable. + * In order for this program to generate data for terasort with 10-byte keys + * and 90-byte values, have the following config: + *
{@code
+ * 
+ * 
+ * 
+ *   
+ *     mapreduce.randomwriter.minkey
+ *     10
+ *   
+ *   
+ *     mapreduce.randomwriter.maxkey
+ *     10
+ *   
+ *   
+ *     mapreduce.randomwriter.minvalue
+ *     90
+ *   
+ *   
+ *     mapreduce.randomwriter.maxvalue
+ *     90
+ *   
+ *   
+ *     mapreduce.randomwriter.totalbytes
+ *     1099511627776
+ *   
+ * }
+ * Equivalently, {@link RandomWriter} also supports all the above options + * and ones supported by {@link GenericOptionsParser} via the command-line. + */ +public class RandomWriter extends Configured implements Tool { + public static final String TOTAL_BYTES = "mapreduce.randomwriter.totalbytes"; + public static final String BYTES_PER_MAP = + "mapreduce.randomwriter.bytespermap"; + public static final String MAPS_PER_HOST = + "mapreduce.randomwriter.mapsperhost"; + public static final String MAX_VALUE = "mapreduce.randomwriter.maxvalue"; + public static final String MIN_VALUE = "mapreduce.randomwriter.minvalue"; + public static final String MIN_KEY = "mapreduce.randomwriter.minkey"; + public static final String MAX_KEY = "mapreduce.randomwriter.maxkey"; + + /** + * User counters + */ + enum Counters { + RECORDS_WRITTEN, BYTES_WRITTEN + } + + /** + * A custom input format that creates virtual inputs of a single string + * for each map. + */ + static class RandomInputFormat extends InputFormat { + + /** + * Generate the requested number of file splits, with the filename + * set to the filename of the output file. + */ + public List getSplits(JobContext job) throws IOException { + List result = new ArrayList(); + Path outDir = FileOutputFormat.getOutputPath(job); + int numSplits = + job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1); + for (int i = 0; i < numSplits; ++i) { + result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, + (String[]) null)); + } + return result; + } + + /** + * Return a single record (filename, "") where the filename is taken from + * the file split. + */ + static class RandomRecordReader extends RecordReader { + Path name; + Text key = null; + Text value = new Text(); + + public RandomRecordReader(Path p) { + name = p; + } + + public void initialize(InputSplit split, + TaskAttemptContext context) + throws IOException, InterruptedException { + + } + + public boolean nextKeyValue() { + if (name != null) { + key = new Text(); + key.set(name.getName()); + name = null; + return true; + } + return false; + } + + public Text getCurrentKey() { + return key; + } + + public Text getCurrentValue() { + return value; + } + + public void close() { + } + + public float getProgress() { + return 0.0f; + } + } + + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + return new RandomRecordReader(((FileSplit) split).getPath()); + } + } + + static class RandomMapper extends Mapper { + + private long numBytesToWrite; + private int minKeySize; + private int keySizeRange; + private int minValueSize; + private int valueSizeRange; + private Random random = new Random(); + private BytesWritable randomKey = new BytesWritable(); + private BytesWritable randomValue = new BytesWritable(); + + private void randomizeBytes(byte[] data, int offset, int length) { + for (int i = offset + length - 1; i >= offset; --i) { + data[i] = (byte) random.nextInt(256); + } + } + + /** + * Given an output filename, write a bunch of random records to it. + */ + public void map(WritableComparable key, + Writable value, + Context context) throws IOException, InterruptedException { + int itemCount = 0; + while (numBytesToWrite > 0) { + int keyLength = minKeySize + + (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0); + randomKey.setSize(keyLength); + randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength()); + int valueLength = minValueSize + + (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0); + randomValue.setSize(valueLength); + randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength()); + context.write(randomKey, randomValue); + numBytesToWrite -= keyLength + valueLength; + context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength); + context.getCounter(Counters.RECORDS_WRITTEN).increment(1); + if (++itemCount % 200 == 0) { + context.setStatus("wrote record " + itemCount + ". " + + numBytesToWrite + " bytes left."); + } + } + context.setStatus("done with " + itemCount + " records."); + } + + /** + * Save the values out of the configuaration that we need to write + * the data. + */ + @Override + public void setup(Context context) { + Configuration conf = context.getConfiguration(); + numBytesToWrite = conf.getLong(BYTES_PER_MAP, + 1 * 1024 * 1024 * 1024); + minKeySize = conf.getInt(MIN_KEY, 10); + keySizeRange = + conf.getInt(MAX_KEY, 1000) - minKeySize; + minValueSize = conf.getInt(MIN_VALUE, 0); + valueSizeRange = + conf.getInt(MAX_VALUE, 20000) - minValueSize; + } + } + + /** + * This is the main routine for launching a distributed random write job. + * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. + * The reduce doesn't do anything. + * + * @throws IOException + */ + public int run(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: writer "); + ToolRunner.printGenericCommandUsage(System.out); + return 2; + } + + Path outDir = new Path(args[0]); + Configuration conf = getConf(); + JobClient client = new JobClient(conf); + ClusterStatus cluster = client.getClusterStatus(); + int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); + long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, + 1 * 1024 * 1024 * 1024); + if (numBytesToWritePerMap == 0) { + System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); + return -2; + } + long totalBytesToWrite = conf.getLong(TOTAL_BYTES, + numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); + int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); + if (numMaps == 0 && totalBytesToWrite > 0) { + numMaps = 1; + conf.setLong(BYTES_PER_MAP, totalBytesToWrite); + } + conf.setInt(MRJobConfig.NUM_MAPS, numMaps); + + Job job = Job.getInstance(conf); + + job.setJarByClass(RandomWriter.class); + job.setJobName("random-writer"); + FileOutputFormat.setOutputPath(job, outDir); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(BytesWritable.class); + job.setInputFormatClass(RandomInputFormat.class); + job.setMapperClass(RandomMapper.class); + job.setReducerClass(Reducer.class); + job.setOutputFormatClass(SequenceFileOutputFormat.class); + + System.out.println("Running " + numMaps + " maps."); + + // reducer NONE + job.setNumReduceTasks(0); + + Date startTime = new Date(); + System.out.println("Job started: " + startTime); + int ret = job.waitForCompletion(true) ? 0 : 1; + Date endTime = new Date(); + System.out.println("Job ended: " + endTime); + System.out.println("The job took " + + (endTime.getTime() - startTime.getTime()) / 1000 + + " seconds."); + + return ret; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new RandomWriter(), args); + System.exit(res); + } + +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java new file mode 100644 index 00000000..94d9b191 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java @@ -0,0 +1,58 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import io.pravega.client.stream.Serializer; +import lombok.EqualsAndHashCode; +import org.apache.hadoop.io.Text; + +import java.io.*; +import java.nio.ByteBuffer; + +/** + * An implementation of {@link Serializer} that uses serialization. + */ +@EqualsAndHashCode +public class TextSerializer implements Serializer, Serializable { + + @Override + public ByteBuffer serialize(Text value) { + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + ObjectOutputStream oout; + try { + oout = new ObjectOutputStream(bout); + value.write(oout); + oout.close(); + bout.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + return ByteBuffer.wrap(bout.toByteArray()); + } + + @Override + @SuppressWarnings("unchecked") + public Text deserialize(ByteBuffer serializedValue) { + ByteArrayInputStream bin = new ByteArrayInputStream(serializedValue.array(), + serializedValue.position(), + serializedValue.remaining()); + ObjectInputStream oin; + Text value = new Text(); + try { + oin = new ObjectInputStream(bin); + value.readFields(oin); + return value; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java new file mode 100644 index 00000000..0f1e9a1c --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java @@ -0,0 +1,141 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import io.pravega.connectors.hadoop.PravegaInputFormat; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.io.IOException; +import java.util.StringTokenizer; + +import com.google.common.base.Charsets; + + +/** + * This class is copied from apache/hadoop and modified by adding logic to + * support PravegaInputFormat + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/WordCount.java + * + */ +public class WordCount { + + public static class TokenizerMapper + extends Mapper { + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + public void map(Object ignored, Text value, Context context + ) throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + context.write(word, one); + } + } + } + + public static class IntSumReducer + extends Reducer { + private IntWritable result = new IntWritable(); + + public void reduce(Text key, Iterable values, + Context context + ) throws IOException, InterruptedException { + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + result.set(sum); + context.write(key, result); + } + } + + /** + * Reads the output file + * + * @param path The path to find the output file in. Set in main to the output + * directory. + * @throws IOException If it cannot access the output directory, we throw an exception. + */ + private static void readAndPrint(Path path, Configuration conf) + throws IOException { + FileSystem fs = FileSystem.get(conf); + Path file = new Path(path, "part-r-00000"); + + if (!fs.exists(file)) + throw new IOException("Output not found!"); + + BufferedReader br = null; + + try { + br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8)); + long count = 0; + long length = 0; + + String line; + while ((line = br.readLine()) != null) { + System.out.println(line); + } + } finally { + if (br != null) { + br.close(); + } + } + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); + if (otherArgs.length < 5) { + System.err.println("Usage: wordcount "); + System.exit(2); + } + + conf.setStrings("pravega.uri", otherArgs[1]); + conf.setStrings("pravega.scope", otherArgs[2]); + conf.setStrings("pravega.stream", otherArgs[3]); + conf.setStrings("pravega.deserializer", TextSerializer.class.getName()); + + Job job = Job.getInstance(conf, "word count"); + job.setJarByClass(WordCount.class); + job.setMapperClass(TokenizerMapper.class); + job.setCombinerClass(IntSumReducer.class); + job.setReducerClass(IntSumReducer.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + + job.setInputFormatClass(PravegaInputFormat.class); + + FileInputFormat.addInputPath(job, new Path(otherArgs[0])); + Path outputpath = new Path(otherArgs[4]); + FileOutputFormat.setOutputPath(job, outputpath); + + boolean result = job.waitForCompletion(true); + readAndPrint(outputpath, conf); + System.exit(result ? 0 : 1); + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java new file mode 100644 index 00000000..96952214 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java @@ -0,0 +1,206 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.StringTokenizer; + +import io.pravega.connectors.hadoop.PravegaInputFormat; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import com.google.common.base.Charsets; + +/** + * This class is copied from apache/hadoop and modified by adding logic to + * support PravegaInputFormat + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/WordMean.java + * + */ +public class WordMean extends Configured implements Tool { + + private double mean = 0; + + private final static Text COUNT = new Text("count"); + private final static Text LENGTH = new Text("length"); + private final static LongWritable ONE = new LongWritable(1); + + /** + * Maps words from line of text into 2 key-value pairs; one key-value pair for + * counting the word, another for counting its length. + */ + public static class WordMeanMapper extends + Mapper { + + private LongWritable wordLen = new LongWritable(); + + /** + * Emits 2 key-value pairs for counting the word and its length. Outputs are + * (Text, LongWritable). + * + * @param value + * This will be a line of text coming in from our input file. + */ + public void map(Object key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + String string = itr.nextToken(); + this.wordLen.set(string.length()); + context.write(LENGTH, this.wordLen); + context.write(COUNT, ONE); + } + } + } + + /** + * Performs integer summation of all the values for each key. + */ + public static class WordMeanReducer extends + Reducer { + + private LongWritable sum = new LongWritable(); + + /** + * Sums all the individual values within the iterator and writes them to the + * same key. + * + * @param key + * This will be one of 2 constants: LENGTH_STR or COUNT_STR. + * @param values + * This will be an iterator of all the values associated with that + * key. + */ + public void reduce(Text key, Iterable values, Context context) + throws IOException, InterruptedException { + + int theSum = 0; + for (LongWritable val : values) { + theSum += val.get(); + } + sum.set(theSum); + context.write(key, sum); + } + } + + /** + * Reads the output file and parses the summation of lengths, and the word + * count, to perform a quick calculation of the mean. + * + * @param path + * The path to find the output file in. Set in main to the output + * directory. + * @throws IOException + * If it cannot access the output directory, we throw an exception. + */ + private double readAndCalcMean(Path path, Configuration conf) + throws IOException { + FileSystem fs = FileSystem.get(conf); + Path file = new Path(path, "part-r-00000"); + + if (!fs.exists(file)) + throw new IOException("Output not found!"); + + BufferedReader br = null; + + // average = total sum / number of elements; + try { + br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8)); + + long count = 0; + long length = 0; + + String line; + while ((line = br.readLine()) != null) { + StringTokenizer st = new StringTokenizer(line); + + // grab type + String type = st.nextToken(); + + // differentiate + if (type.equals(COUNT.toString())) { + String countLit = st.nextToken(); + count = Long.parseLong(countLit); + } else if (type.equals(LENGTH.toString())) { + String lengthLit = st.nextToken(); + length = Long.parseLong(lengthLit); + } + } + + double theMean = (((double) length) / ((double) count)); + System.out.println("The mean is: " + theMean); + return theMean; + } finally { + if (br != null) { + br.close(); + } + } + } + + public static void main(String[] args) throws Exception { + ToolRunner.run(new Configuration(), new WordMean(), args); + } + + @Override + public int run(String[] args) throws Exception { + if (args.length != 5) { + System.err.println("Usage: wordmean "); + return 0; + } + + Configuration conf = getConf(); + + conf.setStrings("pravega.uri", args[1]); + conf.setStrings("pravega.scope", args[2]); + conf.setStrings("pravega.stream", args[3]); + conf.setStrings("pravega.deserializer", TextSerializer.class.getName()); + + Job job = Job.getInstance(conf, "word mean"); + job.setJarByClass(WordMean.class); + job.setMapperClass(WordMeanMapper.class); + job.setCombinerClass(WordMeanReducer.class); + job.setReducerClass(WordMeanReducer.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(LongWritable.class); + job.setInputFormatClass(PravegaInputFormat.class); + FileInputFormat.addInputPath(job, new Path(args[0])); + Path outputpath = new Path(args[4]); + FileOutputFormat.setOutputPath(job, outputpath); + boolean result = job.waitForCompletion(true); + mean = readAndCalcMean(outputpath, conf); + + return (result ? 0 : 1); + } + + /** + * Only valuable after run() called. + * + * @return Returns the mean value. + */ + public double getMean() { + return mean; + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java new file mode 100644 index 00000000..28f512c6 --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java @@ -0,0 +1,218 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.hadoop; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.StringTokenizer; + +import io.pravega.connectors.hadoop.PravegaInputFormat; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskCounter; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import com.google.common.base.Charsets; + +/** + * This class is copied from apache/hadoop and modified by adding logic to + * support PravegaInputFormat + * + * https://github.com/apache/hadoop/blob/trunk/hadoop-mapreduce-project/hadoop-mapreduce-examples + * /src/main/java/org/apache/hadoop/examples/WordMedian.java + * + */ +public class WordMedian extends Configured implements Tool { + + private double median = 0; + private final static IntWritable ONE = new IntWritable(1); + + /** + * Maps words from line of text into a key-value pair; the length of the word + * as the key, and 1 as the value. + */ + public static class WordMedianMapper extends + Mapper { + + private IntWritable length = new IntWritable(); + + /** + * Emits a key-value pair for counting the word. Outputs are (IntWritable, + * IntWritable). + * + * @param value + * This will be a line of text coming in from our input file. + */ + public void map(Object key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + String string = itr.nextToken(); + length.set(string.length()); + context.write(length, ONE); + } + } + } + + /** + * Performs integer summation of all the values for each key. + */ + public static class WordMedianReducer extends + Reducer { + + private IntWritable val = new IntWritable(); + + /** + * Sums all the individual values within the iterator and writes them to the + * same key. + * + * @param key + * This will be a length of a word that was read. + * @param values + * This will be an iterator of all the values associated with that + * key. + */ + public void reduce(IntWritable key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + val.set(sum); + context.write(key, val); + } + } + + /** + * This is a standard program to read and find a median value based on a file + * of word counts such as: 1 456, 2 132, 3 56... Where the first values are + * the word lengths and the following values are the number of times that + * words of that length appear. + * + * @param path + * The path to read the HDFS file from (part-r-00000...00001...etc). + * @param medianIndex1 + * The first length value to look for. + * @param medianIndex2 + * The second length value to look for (will be the same as the first + * if there are an even number of words total). + * @throws IOException + * If file cannot be found, we throw an exception. + * */ + private double readAndFindMedian(String path, int medianIndex1, + int medianIndex2, Configuration conf) throws IOException { + FileSystem fs = FileSystem.get(conf); + Path file = new Path(path, "part-r-00000"); + + if (!fs.exists(file)) + throw new IOException("Output not found!"); + + BufferedReader br = null; + + try { + br = new BufferedReader(new InputStreamReader(fs.open(file), Charsets.UTF_8)); + int num = 0; + + String line; + while ((line = br.readLine()) != null) { + StringTokenizer st = new StringTokenizer(line); + + // grab length + String currLen = st.nextToken(); + + // grab count + String lengthFreq = st.nextToken(); + + int prevNum = num; + num += Integer.parseInt(lengthFreq); + + if (medianIndex2 >= prevNum && medianIndex1 <= num) { + System.out.println("The median is: " + currLen); + br.close(); + return Double.parseDouble(currLen); + } else if (medianIndex2 >= prevNum && medianIndex1 < num) { + String nextCurrLen = st.nextToken(); + double theMedian = (Integer.parseInt(currLen) + Integer + .parseInt(nextCurrLen)) / 2.0; + System.out.println("The median is: " + theMedian); + br.close(); + return theMedian; + } + } + } finally { + if (br != null) { + br.close(); + } + } + // error, no median found + return -1; + } + + public static void main(String[] args) throws Exception { + ToolRunner.run(new Configuration(), new WordMedian(), args); + } + + @Override + public int run(String[] args) throws Exception { + if (args.length != 5) { + System.err.println("Usage: wordmedian "); + return 0; + } + + setConf(new Configuration()); + Configuration conf = getConf(); + + conf.setStrings("pravega.uri", args[1]); + conf.setStrings("pravega.scope", args[2]); + conf.setStrings("pravega.stream", args[3]); + conf.setStrings("pravega.deserializer", TextSerializer.class.getName()); + + Job job = Job.getInstance(conf, "word median"); + job.setJarByClass(WordMedian.class); + job.setMapperClass(WordMedianMapper.class); + job.setCombinerClass(WordMedianReducer.class); + job.setReducerClass(WordMedianReducer.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(IntWritable.class); + job.setInputFormatClass(PravegaInputFormat.class); + FileInputFormat.addInputPath(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[4])); + boolean result = job.waitForCompletion(true); + + // Wait for JOB 1 -- get middle value to check for Median + + long totalWords = job.getCounters() + .getGroup(TaskCounter.class.getCanonicalName()) + .findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue(); + int medianIndex1 = (int) Math.ceil((totalWords / 2.0)); + int medianIndex2 = (int) Math.floor((totalWords / 2.0)); + + median = readAndFindMedian(args[4], medianIndex1, medianIndex2, conf); + + return (result ? 0 : 1); + } + + public double getMedian() { + return median; + } +} diff --git a/hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java b/hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java new file mode 100644 index 00000000..f2440f2e --- /dev/null +++ b/hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java @@ -0,0 +1,57 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package io.pravega.examples.spark; + +import io.pravega.connectors.hadoop.EventKey; +import io.pravega.connectors.hadoop.PravegaInputFormat; +import io.pravega.examples.hadoop.TextSerializer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import scala.Tuple2; + +import java.util.Arrays; +import java.util.regex.Pattern; + + +public final class WordCount { + private static final Pattern SPACE = Pattern.compile(" "); + + public static void main(String[] args) throws Exception { + + Configuration conf = new Configuration(); + GenericOptionsParser optionParser = new GenericOptionsParser(conf, args); + String[] remainingArgs = optionParser.getRemainingArgs(); + + if (remainingArgs.length != 3) { + System.err.println("Usage: WordCount "); + System.exit(2); + } + + conf.setStrings(PravegaInputFormat.URI_STRING, remainingArgs[0]); + conf.setStrings(PravegaInputFormat.SCOPE_NAME, remainingArgs[1]); + conf.setStrings(PravegaInputFormat.STREAM_NAME, remainingArgs[2]); + conf.setStrings(PravegaInputFormat.DESERIALIZER, TextSerializer.class.getName()); + + JavaSparkContext sc = new JavaSparkContext(new SparkConf()); + + JavaPairRDD lines = sc.newAPIHadoopRDD(conf, PravegaInputFormat.class, EventKey.class, Text.class); + JavaRDD words = lines.map(x -> x._2).flatMap(s -> Arrays.asList(SPACE.split(s.toString())).iterator()); + JavaPairRDD ones = words.mapToPair(s -> new Tuple2<>(s, 1)); + JavaPairRDD counts = ones.reduceByKey((i1, i2) -> i1 + i2); + + System.out.println("RESULT :" + counts.collect()); + } +} From 715af350f1dd1545ab836311d8eddde9ea28dfa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Wed, 13 Jun 2018 11:55:53 +0200 Subject: [PATCH 20/48] Issue 81: Review and organize docs (#86) (#102) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Reorganizes and improves the available documentation for pravega-samples: - The new approach consists of providing in the initial README an overview of the repository structure and a table with the available example applications. Then, we provide the build instructions that are common to all sub-projects. - The new document also proposes a roadmap for first-time users and a help section. - The standalone and flink examples README files have been improved by deleting all the repeated build instructions and focusing more on the examples of each sub-project (divided into features and scenarios). Signed-off-by: Raúl Gracia --- README.md | 192 +++++++++++++-------- anomaly-detection/{README.MD => README.md} | 61 +++---- flink-examples/README.md | 100 +++++++++-- hadoop-examples/README.md | 39 ++--- settings.gradle | 1 + standalone-examples/README.md | 144 +++++++++------- 6 files changed, 319 insertions(+), 218 deletions(-) rename anomaly-detection/{README.MD => README.md} (84%) diff --git a/README.md b/README.md index d4d1b7b9..9389ea18 100644 --- a/README.md +++ b/README.md @@ -1,104 +1,156 @@ -# pravega-samples +# Pravega and Analytics Connectors Examples -Sample applications for Pravega. +This repository contains code samples to demonstrate how developers can work with +[Pravega](http://pravega.io). We also provide code samples to connect analytics +engines such as [Flink](https://flink.apache.org/) and +[Hadoop](http://hadoop.apache.org/) with Pravega as a storage substrate for data +streams. -## Getting Started +For more information on Pravega, we recommend to read the [documentation and the +developer guide](http://pravega.io). -### Building Pravega +# Repository Structure -Optional: This step is required only if you want to use a different version -of Pravega than is published to maven central. +This repository is divided into sub-projects (`standalone-examples`, `flink-examples` +and `hadoop-examples`) addressed to demonstrate a specific component. In these sub-projects, +we provide a battery of simple code examples aimed at demonstrating how a particular +feature or API works. Moreover, we also include a `scenarios` sub-project that contains +more complex applications, which show use-cases exploiting one or multiple components. + +## Pravega Examples +| Example Name | Description | Language | +| ------------- |:-----| :-----| +| `gettingstarted` | Simple example of how to read/write from/to a Pravega `Stream`. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/gettingstarted) +| `consolerw` | Application that allows users to work with `Stream`, `Transaction` and `StreamCut` APIs via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/consolerw) +| `noop` | Example of how to add a simple callback executed upon a read event. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/noop) +| `statesynchronizer` | Application that allows users to work with `StateSynchronizer` API via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/statesynchronizer) +| `streamcuts` | Application examples demonstrating the use of `StreamCut`s via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/streamcuts) + +> Hint: Have a look to the [terminology and concepts](http://pravega.io/docs/latest/terminology/) in Pravega. + +## Flink-connector Examples +| Example Name | Description | Language | +| ------------- |:-----| :-----| +| `wordcount` | Counting the words continuously from a Pravega `Stream` to demonstrate the usage of Flink connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/java/io/pravega/examples/flink/wordcount) + +## Hadoop-connector Examples +| Example Name | Description | Language | +| ------------- |:-----| :-----| +| `wordcount` | Counts the words from a Pravega `Stream` filled with random text to demonstrate the usage of Hadoop connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/hadoop-examples/src/main/java/io/pravega/examples/hadoop) + + +## Scenarios +| Example Name | Description | Language | +| ------------- |:-----| :-----| +| `turbineheatsensor` | It emulates parallel sensors producing temperature values (writers) and parallel consumers performing real-time statistics (readers) via Pravega client. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/iot) +| `turbineheatprocessor` | A Flink streaming application for processing temperature data from a Pravega stream produced by the `turbineheatsensor` app. The application computes a daily summary of the temperature range observed on that day by each sensor. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/java/io/pravega/examples/flink/iot), [Scala](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/scala/io/pravega/examples/flink/iot) +| `anomaly-detection` | A Flink streaming application for detecting anomalous input patterns using a finite-state machine. | [Java](https://github.com/pravega/pravega-samples/tree/master/anomaly-detection) + + +# Build Instructions + +## Pre-requisites + +* Java 8+ + +## Pravega Build Instructions (Optional) + +For [release builds](https://github.com/pravega/pravega/releases) of Pravega, +the artifacts will already be in Maven Central and you will not need to run this step. +In this sense, you also have the option to download the Pravega `master` snapshots published +in our [JFrog repository](https://oss.jfrog.org/artifactory/jfrog-dependencies/io/pravega/). + +Conversely, if you want to build Pravega from source, you may need to generate the +latest Pravega `jar` files and install them to your local Maven repository. +To this end, please run the following commands: -Install the Pravega client libraries to your local Maven repository: ``` $ git clone https://github.com/pravega/pravega.git -$./gradlew install +$ cd pravega +$ ./gradlew install ``` -### Building the Flink Connector +The above command should generate the required `jar` files into your local Maven repository. +For more information, please visit [Pravega](https://github.com/pravega/pravega). -Optional: This step is required only if you want to use a different version -of Pravega than is published to maven central. +> Hint: If you use a different version of Pravega, please check the `pravegaVersion` property +in `gradle.properties` file. -Install the shaded Flink Connector library to your local Maven repository: -``` -$ git clone https://github.com/pravega/flink-connectors.git -$./gradlew install -``` +## Flink Connector Build Instructions + +To execute Flink connector examples, follow the below steps to build and publish artifacts from +source to local Maven repository: -### Building the Samples -Use the built-in gradle wrapper to build the samples. ``` -$ ./gradlew build -... -BUILD SUCCESSFUL +$ git clone --recursive https://github.com/pravega/flink-connectors.git +$ cd flink-connectors +$ ./gradlew clean install ``` -### Distributing (Flink Samples) -#### Assemble -Use gradle to assemble a distribution folder containing the Flink programs as a ready-to-deploy uber-jar called `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar`. -``` -$ ./gradlew installDist -... -$ ls -R flink-examples/build/install/pravega-flink-examples -bin lib +For more information, please visit [Flink Connectors](https://github.com/pravega/flink-connectors). -flink-examples/build/install/pravega-flink-examples/bin: -run-example +## Hadoop Connector Build Instructions -flink-examples/build/install/pravega-flink-examples/lib: -pravega-flink-examples-0.1.0-SNAPSHOT-all.jar +To execute Hadoop connector examples, follow the below steps to build and publish artifacts from +source to local Maven repository: + +``` +$ git clone --recurse-submodules https://github.com/pravega/hadoop-connectors.git +$ cd hadoop-connectors +$ gradle install ``` -#### Upload -The `upload` task makes it easy to upload the sample binaries to your cluster. First, configure Gradle -with the address of a node in your cluster. Edit `~/.gradle/gradle.properties` to specify a value for `dcosAddress`. +For more information, please visit [Hadoop Connectors](https://github.com/pravega/hadoop-connectors). + +## Pravega Samples Build Instructions + +Finally, we need to build the code of the examples. Note that the `master` branch points to release +artifacts of Pravega and connectors, whereas the `develop` branch works with snapshot artifacts. +To build `pravega-samples` from source, use the built-in gradle wrapper as follows: ``` -$ cat ~/.gradle/gradle.properties -dcosAddress=10.240.124.164 +$ git clone https://github.com/pravega/pravega-samples.git +$ cd pravega-samples +$ ./gradlew clean installDist ``` +To ease their execution, most examples can be run either using the gradle wrapper (gradlew) or +scripts. The above gradle command automatically creates the execution scripts that can be found +under: -Then, upload the samples to the cluster. They'll be copied to `/home/centos` on the target node. ``` -$ ./gradlew upload +pravega-samples/standalone-examples/build/install/pravega-standalone-examples/bin ``` -## Flink Samples +There is a Linux/Mac script and a Windows (.bat) script for each separate executable. -### Anomaly Detection -A Flink streaming application for detecting anomalous input patterns using a finite-state machine. +# Proposed Roadmap -_See the [anomaly-detection/](https://github.com/pravega/pravega-samples/tree/master/anomaly-detection) directory for more information._ +We propose a roadmap to proceed with the execution of examples based on their complexity: +1. [Pravega examples](https://github.com/pravega/pravega-samples/tree/master/standalone-examples): +First step to understand the basics of Pravega and exercise the concepts presented in the documentation. +2. [Flink-connector examples](https://github.com/pravega/pravega-samples/tree/master/flink-examples): +These examples show the basic functionality of the Flink connector for Pravega. +3. [Hadoop-connector examples](https://github.com/pravega/pravega-samples/tree/master/hadoop-examples): +These examples show the basic functionality of the Hadoop connector for Pravega. +4. Scenarios: Applications that go beyond the basic usage of Pravega APIs, which may include complex interactions +between Pravega and analytics engines (e.g., Flink, Hadoop, Spark) to demonstrate analytics use cases. -### Turbine Heat Processor -A Flink streaming application for processing temperature data from a Pravega stream. Complements the Turbine Heat Sensor app (external). The application computes a daily summary of the temperature range observed on that day by each sensor. +# Where to find help + +Documentation on Pravega and Analytics Connectors: +* [Pravega.io](http://pravega.io/), [Pravega Wiki](https://github.com/pravega/pravega/wiki). +* [Flink Connectors Wiki](https://github.com/pravega/flink-connectors/wiki). + +Did you find a problem or bug? +* First, check our [FAQ](http://pravega.io/docs/latest/faq/). +* If the FAQ does not help you, create a [new GitHub issue](https://github.com/pravega/pravega-samples/issues). + +Do you want to contribute a new example application? +* Follow the [guidelines for contributors](https://github.com/pravega/pravega/wiki/Contributing). + +Have fun!! -Automatically creates a scope (default: `examples`) and stream (default: `turbineHeatTest`) as necessary. -#### Running -Run the sample from the command-line: -``` -$ bin/run-example [--controller ] [--input /] [--startTime ] [--output ] -``` -Alternately, run the sample from the Flink UI. -- JAR: `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar` -- Main class: `io.pravega.examples.flink.iot.TurbineHeatProcessor` or `io.pravega.examples.flink.iot.TurbineHeatProcessorScala` -#### Outputs -The application outputs the daily summary as a comma-separated values (CSV) file, one line per sensor per day. The data is -also emitted to stdout (which may be viewed in the Flink UI). For example: -``` -... -SensorAggregate(1065600000,12,Illinois,(60.0,100.0)) -SensorAggregate(1065600000,3,Arkansas,(60.0,100.0)) -SensorAggregate(1065600000,7,Delaware,(60.0,100.0)) -SensorAggregate(1065600000,15,Kansas,(40.0,80.0)) -SensorAggregate(1152000000,3,Arkansas,(60.0,100.0)) -SensorAggregate(1152000000,12,Illinois,(60.0,100.0)) -SensorAggregate(1152000000,15,Kansas,(40.0,80.0)) -SensorAggregate(1152000000,7,Delaware,(60.0,100.0)) -... -``` diff --git a/anomaly-detection/README.MD b/anomaly-detection/README.md similarity index 84% rename from anomaly-detection/README.MD rename to anomaly-detection/README.md index 22cf28a5..d998eceb 100644 --- a/anomaly-detection/README.MD +++ b/anomaly-detection/README.md @@ -1,18 +1,24 @@ - # Event pattern detection with Apache Flink and Pravega +Sample application which simulates network anomaly intrusion and detection using Apache Flink +and Pravega. +This application is based on [streaming-state-machine](https://github.com/StephanEwen/flink-demos/tree/master/streaming-state-machine) +which is slightly extended to demonstrate Pravega/Flink integration capabilities. -Sample application which simulates network anomaly intrusion and detection using Apache Flink and Apache Pravega. - -This application is based on [streaming-state-machine](https://github.com/StephanEwen/flink-demos/tree/master/streaming-state-machine) which is slightly extended to demonstrate Pravega/Flink integration capabilities. +Events in streams (generated by devices and services, such as firewalls, routers, authentication +services etc.,) are expected to occur in certain patterns. Any deviation from these patterns +indicates an anomaly (attempted intrusion) that the streaming system should recognize and that +should trigger an alert. -Events in streams (generated by devices and services, such as firewalls, routers, authentication services etc.,) are expected to occur in certain patterns. Any deviation from these patterns indicates an anomaly (attempted intrusion) that the streaming system should recognize and that should trigger an alert. - -The event patterns are tracked per interacting party (here simplified per source IP address) and are validated by a state machine. The state machine's states define what possible events may occur next, and what new states these events will result in. +The event patterns are tracked per interacting party (here simplified per source IP address) and +are validated by a state machine. The state machine's states define what possible events may +occur next, and what new states these events will result in. -The final aggregated results are grouped under network id which acts as a network domain abstraction hosting multiple server machines. +The final aggregated results are grouped under network id which acts as a network domain +abstraction hosting multiple server machines. -The aggregated results are (optionally) sinked to Elastic Search for visualizing from the Kibana user interface. +The aggregated results are (optionally) sinked to Elastic Search for visualizing from the +Kibana user interface. The following diagram depicts the state machine used in this example. @@ -26,34 +32,18 @@ The following diagram depicts the state machine used in this example. +-----------------+ +--------+ ``` -## Getting Started - -### Build Pravega & Flink Connectors - -Follow the below steps to build and publish artifacts from source to local Maven repository: - -``` -$ git clone https://github.com/pravega/pravega.git -$ ./gradlew clean install - -$ git clone https://github.com/pravega/flink-connectors.git -$ ./gradlew clean install - -``` -Alternatively, follow the instructions from [here](http://pravega.io/docs/getting-started/) to pull from release repository. - -### Build the Sample Code - -Follow the below steps to build the sample code: -``` -$ git clone https://github.com/pravega/pravega-samples.git -$ cd pravega-samples -$ ./gradlew clean installDist -``` +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build [flink-connectors](https://github.com/pravega/flink-connectors) repository +3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +4. Apache Flink running +5. ELK running (optional) ## Running the Anomaly Detection Example -_The example program is copied to a distribution folder when built as shown above. Navigate to `anomaly-detection/build/install/pravega-flink-anomaly-detection/` for the below steps._ +The example program is copied to a distribution folder when built as shown above. +Navigate to `anomaly-detection/build/install/pravega-flink-anomaly-detection/` for the below +steps. The example is split into three separate programs: 1. A utility program to create a Pravega stream for use by the example code. @@ -63,7 +53,6 @@ The example is split into three separate programs: All programs share a configuration file (`conf/app.json`) and a startup script (`bin/anomaly-detection`). Usage: - ``` bin/anomaly-detection --configDir --mode [--stream ] [--controller ] @@ -109,7 +98,7 @@ depending on the configuration: 2. Manually, by pressing ENTER on the console (if `controlledEnv` is `true`) ### Run the Anomaly Detector -The anomaly detector is another Flink program. The program groups events by +The anomaly detector is another Flink program. The program groups events by source IP address, and maintains a state machine for each address as described above. Alerts are aggregated using a tumbling window. Note that the Flink program uses _event time_ to aggregate alerts. diff --git a/flink-examples/README.md b/flink-examples/README.md index c6eef69e..c5ceede1 100644 --- a/flink-examples/README.md +++ b/flink-examples/README.md @@ -1,35 +1,99 @@ -# Pravega Flink Connector Samples -Steps to set up and run Pravega Flink connector samples. +# Flink Connector Examples for Pravega +Battery of code examples to demonstrate the capabilities of Pravega as a data stream storage +system for Apache Flink. -## Pre requisites -1. Java 8 -2. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build [flink-connectors](https://github.com/pravega/flink-connectors) repository +3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +4. Apache Flink running -## Build Pravega Flink Connectors -Follow the below steps to build and publish artifacts from source to local Maven repository: +### Distributing Flink Samples +#### Assemble +Use gradle to assemble a distribution folder containing the Flink programs as a ready-to-deploy +uber-jar called `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar`: ``` -$ git clone https://github.com/pravega/flink-connectors.git -$ cd flink-connectors -$ ./gradlew clean install +$ ./gradlew installDist +... +$ ls -R flink-examples/build/install/pravega-flink-examples +bin lib + +flink-examples/build/install/pravega-flink-examples/bin: +run-example + +flink-examples/build/install/pravega-flink-examples/lib: +pravega-flink-examples-0.1.0-SNAPSHOT-all.jar ``` -## Build the Sample Code +#### Upload +The `upload` task makes it easy to upload the sample binaries to your cluster. First, configure +Gradle with the address of a node in your cluster. Edit `~/.gradle/gradle.properties` to +specify a value for `dcosAddress`. -Follow the below steps to build the sample code: +``` +$ cat ~/.gradle/gradle.properties +dcosAddress=10.240.124.164 +``` +Then, upload the samples to the cluster. They'll be copied to `/home/centos` on the target node. ``` -$ git clone https://github.com/pravega/pravega-samples.git -$ cd pravega-samples -$ ./gradlew clean installDist +$ ./gradlew upload ``` -## Word Count Sample +--- + +# Examples Catalog + +## Word Count This example demonstrates how to use the Pravega Flink Connectors to write data collected -from an external network stream into a Pravega stream and read the data from the Pravega stream. -See [Flink Word Count Sample](doc/flink-wordcount/README.md) for instructions. +from an external network stream into a Pravega `Stream` and read the data from the Pravega `Stream`. +_See [wordcount](doc/flink-wordcount/README.md) for more information and execution instructions_. + +--- + +# Scenarios Catalog + +## Turbine Heat Processor +A Flink streaming application for processing temperature data from a Pravega `Stream`. +Complements the Turbine Heat Sensor app ([`turbineheatsensor`](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/turbineheatsensor)). +The application computes a daily summary of the temperature range observed on that day by each sensor. + +Automatically creates a scope (default: `examples`) and stream (default: `turbineHeatTest`) as necessary. + +### Execution +Run the sample from the command-line: +``` +$ bin/run-example [--controller ] [--input /] [--startTime ] [--output ] +``` + +Alternately, run the sample from the Flink UI. +- JAR: `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar` +- Main class: `io.pravega.examples.flink.iot.TurbineHeatProcessor` or `io.pravega.examples.flink.iot.TurbineHeatProcessorScala` + +### Outputs +The application outputs the daily summary as a comma-separated values (CSV) file, one line per sensor per day. The data is +also emitted to stdout (which may be viewed in the Flink UI). For example: + +``` +... +SensorAggregate(1065600000,12,Illinois,(60.0,100.0)) +SensorAggregate(1065600000,3,Arkansas,(60.0,100.0)) +SensorAggregate(1065600000,7,Delaware,(60.0,100.0)) +SensorAggregate(1065600000,15,Kansas,(40.0,80.0)) +SensorAggregate(1152000000,3,Arkansas,(60.0,100.0)) +SensorAggregate(1152000000,12,Illinois,(60.0,100.0)) +SensorAggregate(1152000000,15,Kansas,(40.0,80.0)) +SensorAggregate(1152000000,7,Delaware,(60.0,100.0)) +... +``` + +## Anomaly Detection +A Flink streaming application for detecting anomalous input patterns using a finite-state machine. +_See the [anomaly-detection](https://github.com/pravega/pravega-samples/tree/master/anomaly-detection/README.md) +for more information and execution instructions_. ## Exactly Once Sample diff --git a/hadoop-examples/README.md b/hadoop-examples/README.md index 385f8cbf..9186f98e 100644 --- a/hadoop-examples/README.md +++ b/hadoop-examples/README.md @@ -7,40 +7,25 @@ You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 --> +# Hadoop Connector Examples for Pravega +Code examples toe give you some basic ideas how to use hadoop-connectors for Pravega. -# hadoop-connectors examples -Examples of Hadoop Connectors for Pravega. +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build [hadoop-connectors](https://github.com/pravega/hadoop-connectors) repository +3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +4. Apache Hadoop running -Description ------------ -These examples give you some basic ideas how to use hadoop-connectors for pravega. +--- -Build -------- -(Most of these steps will be removed when GAed) -### build hadoop connectors -``` -git clone --recurse-submodules https://github.com/pravega/hadoop-connectors.git -cd hadoop-connectors -gradle install +# Examples Catalog -# start pravega whose version matches hadoop-connectors -cd pravega -./gradlew startStandalone -``` +## Word Count -## build hadoop connectors examples -``` -open another terminal and goto ~/pravega-samples -cd hadoop-examples -gradle build -``` - -Run Examples on Local Machine ---- +Hadoop (verified with Hadoop 2.8.3 on Ubuntu 16.04) +### Execution -Hadoop (verified with Hadoop 2.8.3 on Ubuntu 16.04) ``` 1. setup and start hdfs diff --git a/settings.gradle b/settings.gradle index be1eb0ad..eff49fdc 100644 --- a/settings.gradle +++ b/settings.gradle @@ -10,4 +10,5 @@ */ include 'standalone-examples' include 'flink-examples' +include 'hadoop-examples' include 'anomaly-detection' diff --git a/standalone-examples/README.md b/standalone-examples/README.md index dbb3c87f..0ed5f983 100644 --- a/standalone-examples/README.md +++ b/standalone-examples/README.md @@ -1,47 +1,22 @@ -# Standalone Examples of Pravega Applications -These applications only need a running Pravega to execute against. +# Pravega Examples +Set of example applications to demonstrate the features and APIs of Pravega as well as potential use-case scenarios. -## Pre requisites -1. Java 8 -2. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build `pravega-samples` repository -## Publish Pravega jars to local Maven (optional) -If you have downloaded a nightly build of Pravega, you may need to generate the latest Pravega jar files and publish them to your local Maven repository. -For release builds of Pravega, the artifacts will already be in Maven Central and you won't need to run this step. +--- -Note: maven 2 needs to be installed and running on your machine +# Examples Catalog -In the root of Pravega (where Pravega's build.gradle file can be found), run: +## `gettingstarted` +This example consists of two applications, a `HelloWorldReader` that reads from a `Stream`, and a +`HelloWorldWriter` that writes to a `Stream`. -``` -$ ./gradlew install -``` - -The above command should generate the required jar files into your local maven repo. - -## Generate the scripts to make it easier to run the examples -Most examples can be run either using the gradle wrapper (gradlew) or scripts. -To run the examples using scripts, the scripts need to be generated. In the directory where you downloaded the pravega samples, run the following once, and all the scripts will be generated. - -``` -$ ./gradlew installDist -``` - -The scripts can be found under the pravega-samples directory in: - -``` -standalone-examples/build/install/pravega-standalone-examples/bin -``` - -There is a Linux/Mac script and a Windows (.bat) script for each separate executable. - -## HelloPravega Example -This example consists of two applications, a HelloWorldReader that reads from a stream and a HelloWorldWriter, that writes to a stream. You might want to run HelloWorldWriter in one window and HelloWorldReader in another window. - -### HelloWorldWriter -A simple application that shows how to write to a Pravega stream. +### Execution +First, execute `HelloWorldWriter`in a console: ``` $ bin/helloWorldWriter [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] [-routingkey myRK] [-message 'hello world'] ``` @@ -54,10 +29,10 @@ All args are optional, if not included, the defaults are: * routingKey - "helloRoutingKey" * message - "hello world" -The program writes the given message with the given routing key to the stream with given scope/stream name. +The program writes the given message with the given routing key to the `Stream` with given scope/stream +name. -### HelloWorldReader -A simple application that shows how to read from a Pravega stream. +Then, execute `HelloWorldReader`in another console: ``` $ bin/helloWorldReader [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] @@ -69,14 +44,25 @@ All args are optional, if not included, the defaults are: * name - "helloStream" * uri - "tcp://127.0.0.1" (the URI to one of the controller nodes -The program reads all the events from the stream with given scope/stream name and prints each event to the console. +The program reads all the events from the `Stream` with given scope/stream name and prints each event to +the console. + +## `consolerw` +This example includes two applications, a `ConsoleReader` and a `ConsoleWriter`. On the one hand, +`ConsoleReader` continuously reads from a `Stream` and emits all of the events onto the console. +Moreover, it allows you to select a `StreamCut` at a particular point, and then re-read existing +events either from the head of the `Stream` until that point, or from that point to the end of the +`Stream`. -## Console Reader and Writer Example -This example includes two applications, a ConsoleReader and a ConsoleWriter. You might want to run ConsoleReader in one window and ConsoleWriter in another window. +On the other hand, `ConsoleWriter` can write to `Stream`s or `Transaction`s, and manage `Transaction`s. +This application uses the console to present an interactive DSL environment that presents +operations to write events to a `Stream` or into a `Transaction`. In addition, it presents operations +to begin, commit, abort, ping, check status on and retrieve the id of a `Transaction`. -### ConsoleReader -Use this application to launch an application that reads from a stream and emits all of the Events onto the console. This application runs until you terminate it. +### Execution +You might want to run `ConsoleReader` in one window and `ConsoleWriter` in another window. +To run `ConsoleReader`, you can execute the following command: ``` $ bin/consoleReader [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] @@ -87,13 +73,8 @@ All args are optional, if not included, the defaults are: * scope - "examples" * name - "someStream" * uri - "tcp://127.0.0.1" (the URI to one of the controller nodes - -### ConsoleWriter -Use this application to write to streams or transactions, and manage transactions. - -The application uses the console to present an interactive DSL environment that presents operations to write events to -a stream or into a transaction. In addition, it presents operations to begin, commit, abort, ping, check status on and -retrieve the id of a transaction. + +To run `ConsoleWriter`, please execute: ``` $ bin/consoleWriter [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] @@ -104,37 +85,66 @@ All args are optional, if not included, the defaults are: * scope - "examples" * name - "someStream" * uri - "tcp://127.0.0.1" (the URI to one of the controller nodes + +## `noop` + + An example of a simple reader that continually reads the contents of any `Stream`. A binary serializer is used so it + works against any event types. The sample emits basic information about number of events/bytes read every 30 seconds. + + ``` + $ bin/noopReader [--uri tcp://127.0.0.1:9090] [--stream /] + ``` -## State Synchronizer -This example illustrates the use of the Pravega StateSynchronizer. +## `statesynchronizer` +This example illustrates the use of the Pravega `StateSynchronizer` API. +The application implements a `SharedMap` object using `StateSynchronizer`. We implement a +`SharedConfig` object using the `SharedMap`. The `SharedConfig` simulates the idea of a +properties configuration that needs to be kept in sync across multiple processes. -The application implements a SharedMap object using StateSynchronizer. We implement a SharedConfig object using -the SharedMap. The SharedConfig simulates the idea of a properties configuration that needs to be kept in sync -across multiple processes. +### Execution -To demonstrate manipulating the properties of the SharedConfig object, we provide a CLI. +To demonstrate manipulating the properties of the `SharedConfig` object, we provide a CLI. ``` $ bin/sharedConfigCli [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] ``` -Use the simple DSL to GET, PUT, REMOVE keys from the SharedConfig object identified by scope and name. +Use the simple DSL to `GET`, `PUT`, `REMOVE` keys from the `SharedConfig` object identified by +scope and name. It is worthwhile to launch two or more separate CLIs in separate windows using +the same settings and observe how changes in one CLI process are not visible in another CLI +process until that other CLI process invokes `REFRESH`. -It is worthwhile to launch two or more separate CLIs in separate windows using the same settings and observe how changes in one -CLI process are not visible in another CLI process until that other CLI process invokes REFRESH. +## `streamcuts` +This application aims at demonstrating the use of `StreamCut`s four bounded processing +on multiple `Stream`s. At the moment, the application contains two examples accessible via +command line interface: i) Simple example: The user decides which `Stream` slices s/he wants +to read from all the st`Stream`reams by specifying indexes, and the application prints these slices +using `ReaderGroupConfig` methods for bounded processing. ii) Time series example: `Stream`s are +filled with events that are supposed to belong to a certain day with a given value: "_day1:5_". +There is a variable number of events per day in each `Stream`. The user selects a day number, +and the program makes use of `BatchClient` and `StreamCuts` to sum all the values from events +in all `Stream`s belonging to that day. -## TurbineHeatSensor +### Execution -An example of a lightweight IOT application that writes simulated sensor events to a Pravega stream. +To demonstrate the use of `StreamCut`s, we provide a CLI. To use it, please execute: ``` -$ bin/turbineSensor [--stream ] +$ bin/streamCutsCli [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] ``` -## NoopReader +--- -An example of a simple reader that continually reads the contents of any stream. A binary serializer is used so it works against any event types. The sample emits basic information about number of events/bytes read every 30 seconds. +# Scenarios Catalog + +## `turbineheatsensor` + +An example of a lightweight IOT application that writes simulated sensor events to a Pravega +`Stream`. ``` -$ bin/noopReader [--uri tcp://127.0.0.1:9090] [--stream /] +$ bin/turbineSensor [--stream ] ``` + + + From 82b115738fdce0da5ae6ac0df9e2aca885306780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Wed, 13 Jun 2018 15:08:05 +0200 Subject: [PATCH 21/48] Issue 73: Streamcuts samples (#80) (#103) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Adds a new sample for StreamCuts. * Basic StreamCuts sample in ConsoleReader: As a first approximation to StreamCuts, we extended ConsoleReader.java with an additional menu that enables users to create a StreamCut. * New StreamCuts app: A new sample app to show some of the capabilities of StreamCuts in a simple manner (StreamCutsCli.java, StreamCutsExample.java and Constants.java in streamcuts package and build.gradle). Signed-off-by: Raúl Gracia --- standalone-examples/build.gradle | 17 + .../example/consolerw/ConsoleReader.java | 370 +++++++++++++++--- .../pravega/example/streamcuts/Constants.java | 19 + .../example/streamcuts/StreamCutsCli.java | 314 +++++++++++++++ .../example/streamcuts/StreamCutsExample.java | 343 ++++++++++++++++ 5 files changed, 1012 insertions(+), 51 deletions(-) create mode 100644 standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java create mode 100644 standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java create mode 100644 standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java diff --git a/standalone-examples/build.gradle b/standalone-examples/build.gradle index 4cdf576c..12e7082f 100644 --- a/standalone-examples/build.gradle +++ b/standalone-examples/build.gradle @@ -131,6 +131,22 @@ task startNoopReader(type: JavaExec) { } } +task scriptStreamCutsCli(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.streamcuts.StreamCutsCli' + applicationName = 'streamCutsCli' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +task startStreamCutsCli(type: JavaExec) { + main = "io.pravega.example.streamcuts.StreamCutsCli" + classpath = sourceSets.main.runtimeClasspath + if(System.getProperty("exec.args") != null) { + args System.getProperty("exec.args").split() + } +} + distributions { main { baseName = archivesBaseName @@ -143,6 +159,7 @@ distributions { from project.scriptConsoleReader from project.scriptSharedConfigCli from project.scriptNoopReader + from project.scriptStreamCutsCli } into('lib') { from(jar) diff --git a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java b/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java index 13774a03..bd78bfdd 100644 --- a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java +++ b/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java @@ -1,27 +1,45 @@ /* * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 - * + * */ package io.pravega.example.consolerw; +import io.pravega.client.stream.ReaderGroup; +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.StreamCut; +import io.pravega.common.concurrent.ExecutorServiceHelpers; +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStreamReader; import java.net.URI; -import java.util.Collections; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Scanner; import java.util.UUID; - -import io.pravega.client.stream.Stream; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; - import io.pravega.client.ClientFactory; import io.pravega.client.admin.ReaderGroupManager; import io.pravega.client.admin.StreamManager; @@ -31,86 +49,258 @@ import io.pravega.client.stream.ReaderGroupConfig; import io.pravega.client.stream.ReinitializationRequiredException; import io.pravega.client.stream.ScalingPolicy; -import io.pravega.client.stream.Sequence; import io.pravega.client.stream.StreamConfiguration; import io.pravega.client.stream.impl.JavaSerializer; /** - * Reads from a Stream until interrupted. + * This class implements a simple console interface with the client for demonstration purposes. Specifically, this class + * has two main objectives: i) Reads from a configured {@link Stream} until interrupted; ii) It allows developers to + * have an easy first-time interaction with {@link StreamCut} API. */ -public class ConsoleReader { - - private static final int READER_TIMEOUT_MS = 200000; - - public final String scope; - public final String streamName; - public final URI controllerURI; +@Slf4j +public class ConsoleReader implements Closeable { + + private final String scope; + private final String streamName; + private final URI controllerURI; + + private Map streamCut; + + private ExecutorService executor; + private BackgroundReader backgroundReader; + + private static final String[] MENU_TEXT = { + "Enter one of the following commands at the command line prompt:", + "", + "Meanwhile, the program will read and display the events being written to the Stream.", + "", + "STREAMCUT_CREATE - create a StreamCut at the current point in which the reader is reading.", + "STREAMCUT_READ_FROM - reads all the events in the Stream from the available StremCut up to the TAIL", + "STREAMCUT_READ_UP_TO - reads all the events in the Stream from the HEAD up to the available StremCut", + "HELP - print out a list of commands.", + "QUIT - terminate the program." + }; public ConsoleReader(String scope, String streamName, URI controllerURI) { this.scope = scope; this.streamName = streamName; this.controllerURI = controllerURI; + this.backgroundReader = new BackgroundReader(scope, streamName, controllerURI); + executor = Executors.newSingleThreadExecutor(); } - - public void run() { - final String readerGroup = UUID.randomUUID().toString().replace("-", ""); - final ReaderGroupConfig readerGroupConfig = ReaderGroupConfig.builder() - .stream(Stream.of(scope, streamName)) - .build(); - StreamManager streamManager = StreamManager.create(controllerURI); - streamManager.createScope(scope); - StreamConfiguration streamConfig = StreamConfiguration.builder().scope(scope).streamName(streamName) - .scalingPolicy(ScalingPolicy.fixed(1)) - .build(); + /** + * Use the console to accept commands from the command line and execute the commands against the stream. + */ + public void run() throws IOException, InterruptedException { + boolean done = false; - streamManager.createStream(scope, streamName, streamConfig); + outputHelp(); + + // Start backgroundReader thread to display events being written from ConsoleWriter. + executor.submit(backgroundReader); + while(!done){ + String commandLine = readLine("%s >", scope + "/" + streamName).trim(); + if (! commandLine.equals("")) { + done = processCommand(commandLine); + } + } + + // Closing threads and resources. + backgroundReader.close(); + log.info("Waiting for backgroundReader thread to finish..."); + executor.awaitTermination(2, TimeUnit.SECONDS); + } + + @Override + public void close() { + ExecutorServiceHelpers.shutdown(executor); + } + + /** + * Indirection to deal with Eclipse console bug #122429 + */ + private String readLine(String format, Object... args) throws IOException { + if (System.console() != null) { + return System.console().readLine(format, args); + } + System.out.println(String.format(format, args)); + BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); + return reader.readLine(); + } + + /** + * The raw format of the command is COMMAND (not case sensitive). + */ + private boolean processCommand(String rawString) { + boolean ret = false; + final Scanner sc = new Scanner(rawString); + final String command = sc.next(); + List parms; + final String restOfLine; + if (sc.hasNextLine()) { + restOfLine = sc.nextLine(); + final String[] rawParms = restOfLine.split(","); + parms = Arrays.asList(rawParms); + parms.replaceAll(String::trim); + } else { + parms = new ArrayList<>(); + } + + switch(command.toUpperCase()) { + case "STREAMCUT_CREATE": + doCreateStreamCut(); + break; + case "STREAMCUT_READ_FROM": + doReadFromStreamCut(); + break; + case "STREAMCUT_READ_UP_TO": + doReadUpToStreamCut(); + break; + case "HELP" : + doHelp(parms); + break; + case "QUIT" : + ret = true; + output("Exiting...%n"); + break; + default : + output("Wrong option. Please, select a valid one...%n"); + break; + } + sc.close(); + return ret; + } + + /** + * This method gets the current {@link StreamCut} representing the last event read by the main loop for using it in + * further calls that use {@link StreamCut}s for bounded processing. + */ + private void doCreateStreamCut() { + streamCut = backgroundReader.getLastStreamCut(); + output("New StreamCut: %s%n", streamCut.get(Stream.of(scope, streamName)).toString()); + } + + /** + * This method uses {@link ReaderGroupConfig#startingStreamCuts} method to define a start boundary on the events to + * be read by readers. This means that a reader will only read events from the point represented by streamCut + * variable until the tail of the {@link Stream}. + */ + private void doReadFromStreamCut() { + if (streamCut == null) { + output("Please, create a StreamCut before trying to read from its position!%n"); + return; + } + ReaderGroupConfig config = ReaderGroupConfig.builder().stream(Stream.of(scope, streamName)) + .startingStreamCuts(streamCut).build(); + readBasedOnStreamCuts(config); + } + + /** + * This method uses {@link ReaderGroupConfig#endingStreamCuts} method to define a terminal boundary on the events to + * be read by readers. This means that a reader will only read events from the head of the {@link Stream} up to the + * point represented by streamCut variable. + */ + private void doReadUpToStreamCut() { + if (streamCut == null) { + output("Please, create a StreamCut before trying to read up to its position!%n"); + return; + } + ReaderGroupConfig config = ReaderGroupConfig.builder().stream(Stream.of(scope, streamName)) + .endingStreamCuts(streamCut).build(); + readBasedOnStreamCuts(config); + } + + /** + * This method shows a possible usage of {@link StreamCut}s related to bounded stream processing. The input + * parameter has defined a {@link ReaderGroupConfig} with a {@link StreamCut} set to be either the initial or + * terminal boundary for reading events. Once we create a {@link ReaderGroup} with this input configuration, then + * all the readers belonging to this group will consume the events only within the defined boundaries. + * + * @param config Configuration for a {@link ReaderGroup} that will contain read boundaries in that {@link Stream}. + */ + private void readBasedOnStreamCuts(ReaderGroupConfig config) { + final String readerGroup = UUID.randomUUID().toString().replace("-", ""); try (ReaderGroupManager readerGroupManager = ReaderGroupManager.withScope(scope, controllerURI)) { - readerGroupManager.createReaderGroup(readerGroup, readerGroupConfig); + // Create a reader group using the configuration with the defined StreamCut boundaries. + readerGroupManager.createReaderGroup(readerGroup, config); } try (ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI); - EventStreamReader reader = clientFactory.createReader("reader", - readerGroup, - new JavaSerializer(), - ReaderConfig.builder().build())) { - System.out.format("******** Reading events from %s/%s%n", scope, streamName); - EventRead event = null; + EventStreamReader reader = clientFactory.createReader("streamcut-reader", + readerGroup, new JavaSerializer<>(), ReaderConfig.builder().build())) { + + // The reader(s) will only read and display events within the StreamCut boundaries defined. + output("StreamCuts: Bounded processing example in stream %s/%s%n", scope, streamName); + output("Starting boundary for readers: %s (UnboundedStreamCut represents the head of the stream)%n", + config.getStartingStreamCuts().get(Stream.of(scope, streamName))); + output("Terminal boundary for readers: %s (UnboundedStreamCut represents the tail of the stream)%n", + config.getEndingStreamCuts().get(Stream.of(scope, streamName))); + + EventRead event; do { - try { - event = reader.readNextEvent(READER_TIMEOUT_MS); - if(event != null) { - System.out.format("'%s'%n", event.getEvent()); - } - } catch (ReinitializationRequiredException e) { - //There are certain circumstances where the reader needs to be reinitialized - e.printStackTrace(); + event = reader.readNextEvent(1000); + if (event.getEvent() != null) { + // TODO: Problem finding logback.xml in Pravega example applications (Issue #87). + output("[StreamCut read from/up to] Read event: %s%n", event.getEvent()); + log.info("[StreamCut read from/up to] Read event: {}.", event.getEvent()); } - }while(true); + } while (event.getEvent() != null); + } catch (ReinitializationRequiredException e) { + // We do not expect this Exception from the reader in this situation, so we leave. + log.error("Non-expected reader re-initialization."); + } catch (IllegalArgumentException e) { + log.warn("Nothing to read! Maybe your StreamCut is empty or at the head of the Stream and you are trying to" + + "read events up to it."); } } - - public static void main(String[] args) { + private void outputHelp () { + Arrays.stream(MENU_TEXT).forEach(System.out::println); + System.out.println(" "); + } + + private void output(String format, Object... args){ + System.out.format("**** "); + System.out.format(format, args); + } + + private void doHelp(List parms) { + outputHelp(); + if (parms.size() > 0) { + output("Ignoring parameters: '%s'%n", String.join(",", parms)); + } + } + + public static void main(String[] args) throws IOException, InterruptedException { Options options = getOptions(); CommandLine cmd = null; try { cmd = parseCommandLineArgs(options, args); } catch (ParseException e) { - System.out.format("%s.%n", e.getMessage()); + log.info("Exception parsing: {}.", e.getMessage()); final HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("ConsoleReader", options); System.exit(1); } - + final String scope = cmd.getOptionValue("scope") == null ? Constants.DEFAULT_SCOPE : cmd.getOptionValue("scope"); final String streamName = cmd.getOptionValue("name") == null ? Constants.DEFAULT_STREAM_NAME : cmd.getOptionValue("name"); final String uriString = cmd.getOptionValue("uri") == null ? Constants.DEFAULT_CONTROLLER_URI : cmd.getOptionValue("uri"); final URI controllerURI = URI.create(uriString); - + + StreamManager streamManager = StreamManager.create(controllerURI); + streamManager.createScope(scope); + StreamConfiguration streamConfig = StreamConfiguration.builder().scope(scope).streamName(streamName) + .scalingPolicy(ScalingPolicy.fixed(1)) + .build(); + streamManager.createStream(scope, streamName, streamConfig); + streamManager.close(); + ConsoleReader reader = new ConsoleReader(scope, streamName, controllerURI); reader.run(); + System.exit(0); } private static Options getOptions() { @@ -123,8 +313,86 @@ private static Options getOptions() { private static CommandLine parseCommandLineArgs(Options options, String[] args) throws ParseException { CommandLineParser parser = new DefaultParser(); - CommandLine cmd = parser.parse(options, args); - return cmd; + return parser.parse(options, args); } - } + +/** + * This class aims at continuously reading from the {@link Stream} and creating {@link StreamCut}s in a separate thread. + */ +@Slf4j +class BackgroundReader implements Closeable, Runnable { + + private static final int READER_TIMEOUT_MS = 1000; + + private final String scope; + private final String streamName; + private final URI controllerURI; + private final String readerGroupName = UUID.randomUUID().toString().replace("-", ""); + private AtomicReference> lastStreamCut = new AtomicReference<>(); + + private final AtomicBoolean end = new AtomicBoolean(false); + private final ScheduledExecutorService executor; + + BackgroundReader(String scope, String streamName, URI controllerURI) { + this.scope = scope; + this.streamName = streamName; + this.controllerURI = controllerURI; + executor = new ScheduledThreadPoolExecutor(1); + } + + /** + * This method continuously performs two tasks: first, it reads events that are being written by console writer + * or by any other process in that stream. Second, it creates a new StreamCut after every read event. The new + * {@link StreamCut} represents the current tail of the {@link Stream} and it may be used to read events to or from + * that position in the {@link Stream}. + */ + public void run() { + final ReaderGroupConfig readerGroupConfig = ReaderGroupConfig.builder().disableAutomaticCheckpoints() + .stream(Stream.of(scope, streamName)).build(); + + try (ReaderGroupManager readerGroupManager = ReaderGroupManager.withScope(scope, controllerURI); + ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI)) { + + // Create the ReaderGroup to which readers will belong to. + readerGroupManager.createReaderGroup(readerGroupName, readerGroupConfig); + @Cleanup + ReaderGroup readerGroup = readerGroupManager.getReaderGroup(readerGroupName); + + EventStreamReader reader = clientFactory.createReader("backgroundReader", readerGroupName, + new JavaSerializer<>(), ReaderConfig.builder().build()); + EventRead event; + + // Start main loop to continuously read and display events written to the scope/stream. + log.info("Start reading events from {}/{}.", scope, streamName); + do { + event = reader.readNextEvent(READER_TIMEOUT_MS); + if (event.getEvent() != null) { + // TODO: Problem finding logback.xml in Pravega example applications (Issue #87). + System.out.println("[BackgroundReader] Read event: " + event.getEvent()); + log.info("[BackgroundReader] Read event: {}.", event.getEvent()); + } + + // Update the StreamCut after every event read, just in case the user wants to use it. + if (!event.isCheckpoint()) { + readerGroup.initiateCheckpoint("myCheckpoint" + System.nanoTime(), executor) + .thenAccept(checkpoint -> lastStreamCut.set(checkpoint.asImpl().getPositions())); + } + } while (!end.get()); + } catch (ReinitializationRequiredException e) { + // We do not expect this Exception from the reader in this situation, so we leave. + log.error("Non-expected reader re-initialization."); + } + } + + Map getLastStreamCut() { + return lastStreamCut.get(); + } + + @Override + public void close() { + log.info("Closing background thread."); + end.set(true); + ExecutorServiceHelpers.shutdown(executor); + } +} \ No newline at end of file diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java b/standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java new file mode 100644 index 00000000..284118c9 --- /dev/null +++ b/standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.streamcuts; + +/** + * Defines a handful of constants shared by classes in this package. + */ +public class Constants { + static final String DEFAULT_SCOPE = "streamCutsExamples"; + static final String DEFAULT_CONTROLLER_URI = "tcp://127.0.0.1:9090"; +} diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java b/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java new file mode 100644 index 00000000..9f3f0fd0 --- /dev/null +++ b/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.streamcuts; + +import io.pravega.client.stream.ReaderGroupConfig; +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.StreamCut; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URI; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +@Slf4j +public class StreamCutsCli { + + private static final int exampleMaxStreams = 5; + private static final int exampleMaxEvents = 20; + + private final String scope; + private final URI controllerURI; + + private static final String[] INITIAL_MENU = { + "Enter one of the following commands at the command line prompt:", + "", + "SIMPLE - Simple yet illustrative example to show bounded processing with StreamCuts.", + "TIMESERIES - This example gives a sense on the use of StreamCuts for batch processing.", + "HELP - print out a list of commands.", + "QUIT - terminate the program." + }; + + public StreamCutsCli(String scope, URI controllerURI) { + this.scope = scope; + this.controllerURI = controllerURI; + } + + /** + * Use the console to accept commands from the command line and execute the commands against the stream. + */ + public void run() throws IOException { + boolean done = false; + + outputHelp(); + + while(!done){ + String commandLine = readLine("%s >", scope).trim(); + if (! commandLine.equals("")) { + done = processCommand(commandLine); + } + } + } + + /** + * Indirection to deal with Eclipse console bug #122429 + */ + private String readLine(String format, Object... args) throws IOException { + if (System.console() != null) { + return System.console().readLine(format, args); + } + System.out.print(String.format(format, args)); + BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); + return reader.readLine(); + } + + /** + * The raw format of the command is COMMAND (not case sensitive). + */ + private boolean processCommand(String rawString) throws IOException { + boolean ret = false; + final Scanner sc = new Scanner(rawString); + final String command = sc.next(); + List parms; + final String restOfLine; + + if (sc.hasNextLine()) { + restOfLine = sc.nextLine(); + final String[] rawParms = restOfLine.split(","); + parms = Arrays.asList(rawParms); + parms.replaceAll(String::trim); + } else { + parms = new ArrayList<>(); + } + + switch(command.toUpperCase()) { + case "SIMPLE": + doSimpleExample(); + break; + case "TIMESERIES": + doTimeSeriesExample(); + break; + case "HELP" : + doHelp(parms); + break; + case "QUIT" : + ret = true; + output("Exiting...%n"); + break; + default : + output("Wrong option. Please, select a valid one...%n"); + break; + } + sc.close(); + return ret; + } + + // Examples region + + private void doSimpleExample() throws IOException { + final String prefix = "simple_example > "; + + output("You have selected a simple example to see how Pravega StreamCuts work. Great choice!!%n"); + output("Now, we will ask you some questions to set up the example: %n%n"); + + output("How many streams do you want to create? (e.g., from 1 to %s):%n", exampleMaxStreams); + final int numStreams = askForIntInput(prefix, 0, exampleMaxStreams); + + output("How many events per stream do you want to create? (e.g., from 1 to %s):%n", exampleMaxEvents); + final int numEvents = askForIntInput(prefix, 0, exampleMaxEvents); + + // Set up and write data in streams. + StreamCutsExample example = new StreamCutsExample(numStreams, numEvents, scope, controllerURI); + example.createAndPopulateStreamsWithNumbers(); + System.out.println(example.printStreams()); + + output("Your Streams are ready :)%n%n"); + output("Now, to see how StreamCuts work, we are going to build them!%n"); + + // After setting up the streams and populating them, we can exercise StreamCuts multiple times. + do { + doBoundedPrinting(prefix, numStreams, numEvents, example); + output("Do you want to repeat? (Y)%n"); + } while(readLine("%s", prefix).trim().equalsIgnoreCase("Y")); + + log.info("Sealing and deleting streams."); + example.deleteStreams(); + example.close(); + } + + private void doBoundedPrinting(String prefix, int numStreams, int exampleNumEvents, StreamCutsExample example) throws IOException { + Map startStreamCuts = new LinkedHashMap<>(); + Map endStreamCuts = new LinkedHashMap<>(); + + for (String streamName: example.getMyStreamNames()) { + output("[Stream %s] StreamCut start event number.%n", streamName); + int iniEventIndex = askForIntInput(prefix, 0, exampleNumEvents - 1); + output("[Stream %s] StreamCut end event number.%n", streamName); + int endEventIndex = askForIntInput(prefix, iniEventIndex + 1, exampleNumEvents); + final List myStreamCuts = example.createStreamCutsByIndexFor(streamName, iniEventIndex, endEventIndex); + startStreamCuts.put(Stream.of(scope, streamName), myStreamCuts.get(0)); + endStreamCuts.put(Stream.of(scope, streamName), myStreamCuts.get(1)); + } + + // Here we enforce the boundaries for all the streams to be read, which enables bounded processing. + ReaderGroupConfig config = ReaderGroupConfig.builder().startFromStreamCuts(startStreamCuts) + .endingStreamCuts(endStreamCuts) + .build(); + output("Now, look! We can print bounded slices of multiple Streams:%n%n"); + output(example.printBoundedStreams(config)); + } + + private void doTimeSeriesExample() throws IOException { + final String prefix = "timeseries_example > "; + + output("You have selected a timeseries example to see how Pravega StreamCuts work. Perfect!!%n"); + output("Now, we will ask you some questions to set up the example: %n%n"); + + output("How many streams do you want to create? (e.g., from 1 to %s):%n", exampleMaxStreams); + final int numStreams = askForIntInput(prefix, 1, exampleMaxStreams); + + output("How many days do you want to emulate in your data? (e.g., from 1 to %s):%n", exampleMaxEvents); + final int exampleNumDays = askForIntInput(prefix, 1, exampleMaxEvents); + + // Set up and write data in streams. + StreamCutsExample example = new StreamCutsExample(numStreams, exampleNumDays, scope, controllerURI); + + example.createAndPopulateStreamsWithDataSeries(); + System.out.println(example.printStreams()); + + output("Your Streams are ready :)%n%n"); + output("We want to show the use of StreamCuts with BatchClient. To this end, we have created StreamCuts %n" + + "for each stream that bound the events belonging to the same day.%n"); + output("The example consists of summing up all the values from events belonging to the same day (e.g., day1).%n%n"); + + do { + doBoundedSummingOfStreamValues(prefix, exampleNumDays, example); + output("Do you want to repeat? (Y)%n"); + } while(readLine("%s", prefix).trim().equalsIgnoreCase("Y")); + + log.info("Sealing and deleting streams."); + example.deleteStreams(); + example.close(); + } + + private void doBoundedSummingOfStreamValues(String prefix,int exampleNumDays, StreamCutsExample example) throws IOException { + output("For which day number do you want to sum up values?.%n"); + int dayNumber = askForIntInput(prefix, 0, exampleNumDays); + + Map> streamDayStreamCuts = new LinkedHashMap<>(); + for (String streamName: example.getMyStreamNames()) { + final SimpleEntry eventIndexesForDay = example.getStreamEventIndexesForDay(streamName, dayNumber); + + // Due to randomization, there could be streams with no events for a given day. + if (eventIndexesForDay == null){ + continue; + } + output("[Stream %s] Indexes to bound day%s events: %s%n", streamName, dayNumber, eventIndexesForDay.toString()); + + // Get the StreamCuts that define the event boundaries for the given day in this stream. + final List myStreamCuts = example.createStreamCutsByIndexFor(streamName, eventIndexesForDay.getKey(), + eventIndexesForDay.getValue()); + streamDayStreamCuts.put(Stream.of(scope, streamName), myStreamCuts); + } + + // Next, we demonstrate the capabilities of StreamCuts by enabling readers to perform bounded reads. + output("Now, look! We can sum up values from bounded slices of multiple Streams:%n%n"); + output("Result from summing all the values belonging to day%s is: %s!%n", dayNumber, + example.sumBoundedStreams(streamDayStreamCuts)); + } + + // End examples region + + // Console utils region + + private int askForIntInput(String prefix, int minVal, int maxVal) throws IOException { + int result = Integer.MAX_VALUE; + boolean firstAttempt = true; + do { + try { + result = Integer.parseInt(readLine("%s", prefix).trim()); + if (firstAttempt) { + firstAttempt = false; + } else { + output("Please, numbers should be between [%s, %s] %n", minVal, maxVal); + } + } catch (NumberFormatException e) { + output("Please, introduce a correct number%n"); + } + } while (result < minVal || result > maxVal); + return result; + } + + private void outputHelp () { + Arrays.stream(INITIAL_MENU).forEach(System.out::println); + System.out.println(" "); + } + + private void output(String format, Object... args){ + System.out.format("**** "); + System.out.format(format, args); + } + + private void doHelp(List parms) { + outputHelp(); + if (parms.size() > 0) { + output("Ignoring parameters: '%s'%n", String.join(",", parms)); + } + } + + // End console utils region + + public static void main(String[] args) throws IOException { + Options options = getOptions(); + CommandLine cmd = null; + try { + cmd = parseCommandLineArgs(options, args); + } catch (ParseException e) { + log.info("Exception parsing: {}.", e.getMessage()); + final HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("StreamCuts", options); + System.exit(1); + } + + final String scope = cmd.getOptionValue("scope") == null ? Constants.DEFAULT_SCOPE : cmd.getOptionValue("scope"); + final String uriString = cmd.getOptionValue("uri") == null ? Constants.DEFAULT_CONTROLLER_URI : cmd.getOptionValue("uri"); + final URI controllerURI = URI.create(uriString); + + StreamCutsCli console = new StreamCutsCli(scope, controllerURI); + console.run(); + System.exit(0); + } + + private static CommandLine parseCommandLineArgs(Options options, String[] args) throws ParseException { + CommandLineParser parser = new DefaultParser(); + return parser.parse(options, args); + } + + private static Options getOptions() { + final Options options = new Options(); + options.addOption("s", "scope", true, "The scope name of the stream to read from."); + options.addOption("u", "uri", true, "The URI to the controller in the form tcp://host:port"); + return options; + } +} diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java b/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java new file mode 100644 index 00000000..d18b183b --- /dev/null +++ b/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.streamcuts; + +import com.google.common.collect.Lists; +import io.pravega.client.ClientFactory; +import io.pravega.client.admin.ReaderGroupManager; +import io.pravega.client.admin.StreamManager; +import io.pravega.client.batch.BatchClient; +import io.pravega.client.batch.SegmentRange; +import io.pravega.client.batch.StreamSegmentsIterator; +import io.pravega.client.stream.Checkpoint; +import io.pravega.client.stream.EventRead; +import io.pravega.client.stream.EventStreamReader; +import io.pravega.client.stream.EventStreamWriter; +import io.pravega.client.stream.EventWriterConfig; +import io.pravega.client.stream.ReaderConfig; +import io.pravega.client.stream.ReaderGroup; +import io.pravega.client.stream.ReaderGroupConfig; +import io.pravega.client.stream.ReinitializationRequiredException; +import io.pravega.client.stream.ScalingPolicy; +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.StreamConfiguration; +import io.pravega.client.stream.StreamCut; +import io.pravega.client.stream.impl.JavaSerializer; +import java.io.Closeable; +import java.net.URI; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.function.Consumer; +import lombok.Cleanup; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class StreamCutsExample implements Closeable { + + public static final int maxEventsPerDay = 3; + private static final String eventSeparator = ":"; + private static final String streamSeparator = "-"; + + private int numStreams; + private int numEvents; + private URI controllerURI; + private String scope; + private ScheduledExecutorService executor; + private StreamManager streamManager; + private List myStreamNames = new ArrayList<>(); + private Map> perDayEventIndex = new LinkedHashMap<>(); + + public StreamCutsExample(int numStreams, int numEvents, String scope, URI controllerURI) { + this.numStreams = numStreams; + this.numEvents = numEvents; + this.controllerURI = controllerURI; + this.scope = scope; + streamManager = StreamManager.create(controllerURI); + executor = new ScheduledThreadPoolExecutor(1); + } + + /** + * A {@link StreamCut} is a collection of offsets, one for each open segment of the {@link Stream}, which indicates + * an event boundary. With a {@link StreamCut}, users can instruct readers to read from and/or up to a particular + * event boundary (e.g., read events from 100 to 200, events created since Tuesday) on multiple {@link Stream}s. To + * this end, Pravega allows us to create {@link StreamCut}s while readers are processing a {@link Stream} (e.g., via + * a {@link Checkpoint}) that can be used in the future to bound the processing of a set of {@link Stream}s. In this + * method, we read create two {@link StreamCut}s for a {@link Stream} according to the initial and final event + * indexes passed by parameter. + * + * @param streamName Name of the {@link Stream} from which {@link StreamCut}s will be created. + * @param iniEventIndex Index of the initial boundary for the {@link Stream} slice to process. + * @param endEventIndex Index of the final boundary for the {@link Stream} slice to process. + * @return Initial and final {@link Stream} boundaries represented as {@link StreamCut}s. + */ + public List createStreamCutsByIndexFor(String streamName, int iniEventIndex, int endEventIndex) { + // Create the StreamCuts for the streams. + final List streamCuts = new ArrayList<>(); + final String randomId = String.valueOf(new Random(System.nanoTime()).nextInt()); + + // Free resources after execution. + try (ReaderGroupManager manager = ReaderGroupManager.withScope(scope, controllerURI); + ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI)) { + + // Create a reader group and a reader to read from the stream. + final String readerGroupName = streamName + randomId; + ReaderGroupConfig config = ReaderGroupConfig.builder().stream(Stream.of(scope, streamName)).build(); + manager.createReaderGroup(readerGroupName, config); + @Cleanup + ReaderGroup readerGroup = manager.getReaderGroup(readerGroupName); + @Cleanup + EventStreamReader reader = clientFactory.createReader(randomId, readerGroup.getGroupName(), + new JavaSerializer<>(), ReaderConfig.builder().build()); + + // Read streams and create the StreamCuts during the read process. + Checkpoint checkpoint; + int eventIndex = 0; + EventRead event; + do { + // Here is where we create a StreamCut that points to the event indicated by the user. + if (eventIndex == iniEventIndex || eventIndex == endEventIndex) { + reader.close(); + checkpoint = readerGroup.initiateCheckpoint(randomId + eventIndex, executor).join(); + streamCuts.add(checkpoint.asImpl().getPositions().values().iterator().next()); + reader = clientFactory.createReader(randomId, readerGroup.getGroupName(), + new JavaSerializer<>(), ReaderConfig.builder().build()); + } + + event = reader.readNextEvent(1000); + eventIndex++; + } while (event.isCheckpoint() || event.getEvent() != null); + + // If there is only the initial StreamCut, this means that the final one is the tail of the stream. + if (streamCuts.size() == 1) { + streamCuts.add(StreamCut.UNBOUNDED); + } + } catch (ReinitializationRequiredException e) { + // We do not expect this Exception from the reader in this situation, so we leave. + log.error("Non-expected reader re-initialization."); + } + return streamCuts; + } + + /** + * This method is an example of bounded processing in Pravega with {@link StreamCut}s. {@link ReaderGroupConfig} + * contains the information related to the {@link Stream}s to be read as well as the (optional) user-defined + * boundaries in the form of {@link StreamCut}s that will limit the events to be read by reader processes. Note that + * event readers (i.e., {@link EventStreamReader}) are agnostic to any notion of boundaries and they do not interact + * with {@link StreamCut}s; they only consume events, which will be bounded within specific {@link Stream} slices as + * configured in {@link ReaderGroupConfig}. The method basically creates a string representation of the events read + * from {@link Stream}s within the bounds defined in the configuration parameter. + * + * @param config Configuration for the {@link ReaderGroup}, possibly containing {@link StreamCut} boundaries for + * limiting the number of events to read. + * @return String representation of the events read by the reader. + */ + public String printBoundedStreams(ReaderGroupConfig config) { + StringBuilder result = new StringBuilder(); + final String randomId = String.valueOf(new Random(System.nanoTime()).nextInt()); + try (ReaderGroupManager manager = ReaderGroupManager.withScope(scope, controllerURI); + ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI)) { + final String readerGroupName = "RG" + randomId; + manager.createReaderGroup(readerGroupName, config); + @Cleanup + EventStreamReader reader = clientFactory.createReader(randomId, readerGroupName, + new JavaSerializer<>(), ReaderConfig.builder().build()); + + // Write dummy events that identify each Stream. + EventRead event; + do { + event = reader.readNextEvent(1000); + if (event.getEvent() != null) { + result = result.append(event.getEvent()).append('|'); + } + + } while (event.isCheckpoint() || event.getEvent() != null); + + result = result.append('\n'); + } catch (ReinitializationRequiredException e) { + // We do not expect this Exception from the reader in this situation, so we leave. + log.error("Non-expected reader re-initialization."); + } + return result.toString(); + } + + /** + * A good use-case for {@link StreamCut}s is to allow efficient batch processing of data events within specific + * boundaries (e.g., perform a mean on the temperature values in 1986). Instead of ingesting all the data and force + * the reader to discard irrelevant events, {@link StreamCut}s help readers to only read the events that are + * important for a particular task. In this sense, this method enables the Pravega {@link BatchClient} to read from + * various {@link Stream}s within the specific ranges passed as input, and the sum up all the values contained in + * read events. + * + * @param streamCuts Map that defines the slices to read of a set of {@link Stream}s. + * @return Sum of all the values of time series data belonging to {@link Stream}s and bounded by {@link StreamCut}s. + */ + public int sumBoundedStreams(Map> streamCuts) { + int totalSumValuesInDay = 0; + try (ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI)) { + final BatchClient batchClient = clientFactory.createBatchClient(); + for (Stream myStream: streamCuts.keySet()) { + + // Get the cuts for this stream that will bound the number of events to read. + final StreamCut startStreamCut = streamCuts.get(myStream).get(0); + final StreamCut endStreamCut = streamCuts.get(myStream).get(1); + + // Then, we get the segment ranges according to the StreamCuts. + StreamSegmentsIterator segments = batchClient.getSegments(myStream, startStreamCut, endStreamCut); + List ranges = Lists.newArrayList(segments.getIterator()); + + // We basically sum up all the values of events within the ranges. + for (SegmentRange range: ranges) { + List eventData = Lists.newArrayList(batchClient.readSegment(range, new JavaSerializer<>())); + totalSumValuesInDay += eventData.stream().map(s -> s.split(eventSeparator)[2]).mapToInt(Integer::valueOf).sum(); + } + } + } + return totalSumValuesInDay; + } + + // Region stream utils + + public void createAndPopulateStreamsWithNumbers() { + Consumer consumer = this::numericDataEvents; + createAndPopulateStreams(consumer); + } + + public void createAndPopulateStreamsWithDataSeries() { + Consumer consumer = this::dataSeriesEvents; + createAndPopulateStreams(consumer); + } + + public SimpleEntry getStreamEventIndexesForDay(String streamName, int day) { + return perDayEventIndex.get(getStreamDayKey(streamName, day)); + } + + /** + * This method first creates the scope that will contain the streams to write and read events. + */ + public void createAndPopulateStreams(Consumer createDataEvents) { + // Create the scope in first place, before creating the Streams. + streamManager.createScope(scope); + + // Create Streams and write dummy events in them. + for (char streamId = 'a'; streamId < 'a' + numStreams; streamId++) { + String streamName = String.valueOf(streamId) + streamSeparator + System.nanoTime(); + myStreamNames.add(streamName); + StreamConfiguration streamConfig = StreamConfiguration.builder().scalingPolicy(ScalingPolicy.fixed(1)).build(); + streamManager.createStream(scope, streamName, streamConfig); + + // Note that we use the try-with-resources statement for those classes that should be closed after usage. + try (ClientFactory clientFactory = ClientFactory.withScope(scope, controllerURI); + EventStreamWriter writer = clientFactory.createEventWriter(streamName, + new JavaSerializer<>(), EventWriterConfig.builder().build())) { + + // Write data to the streams according to our preferences + final SimpleEntry, String> writerAndStreamName = new SimpleEntry<>(writer, streamName); + createDataEvents.accept(writerAndStreamName); + } + } + } + + public void numericDataEvents(SimpleEntry, String> writerAndStreamName) { + // Write dummy events that identify each Stream. + StringBuilder sb = new StringBuilder(); + char streamBaseId = writerAndStreamName.getValue().charAt(0); + for (int j = 0; j < numEvents; j++) { + writerAndStreamName.getKey().writeEvent(sb.append(streamBaseId).append(j).toString()).join(); + sb.setLength(0); + } + } + + public void dataSeriesEvents(SimpleEntry, String> writerAndStreamName) { + StringBuilder sb = new StringBuilder(); + Random random = new Random(); + int totalEventsSoFar = 0; + char streamBaseId = writerAndStreamName.getValue().charAt(0); + for (int i = 0; i < numEvents; i++) { + final String daySuffix = eventSeparator + "day" + i; + int eventsPerDay = random.nextInt(maxEventsPerDay); + int lastDayEventIndex; + + // Write events specifying the day they belong to and the value in their content. + for (lastDayEventIndex = 0; lastDayEventIndex < eventsPerDay; lastDayEventIndex++) { + writerAndStreamName.getKey().writeEvent(sb.append(streamBaseId) + .append(daySuffix) + .append(eventSeparator) + .append(random.nextInt(20)).toString()).join(); + sb.setLength(0); + } + + // Record the event indexes of events for day currentDayNumber + if (lastDayEventIndex > 0) { + perDayEventIndex.put(writerAndStreamName.getValue() + daySuffix, + new SimpleEntry<>(totalEventsSoFar, totalEventsSoFar + lastDayEventIndex)); + totalEventsSoFar += lastDayEventIndex; + } + } + } + + /** + * This method provides a print facility on the contents of all the {@link Stream}s. + * + * @return String containing the content of events for a specific {@link Stream}. + */ + public String printStreams() { + StringBuilder result = new StringBuilder(); + for (String streamName: myStreamNames) { + ReaderGroupConfig config = ReaderGroupConfig.builder().stream(Stream.of(scope, streamName)).build(); + result = result.append(printBoundedStreams(config)); + } + + return result.toString(); + } + + /** + * We delete all the {@link Stream}s created every example execution. + */ + public void deleteStreams() { + // Delete the streams for next execution. + for (String streamName: myStreamNames) { + try { + streamManager.sealStream(scope, streamName); + Thread.sleep(500); + streamManager.deleteStream(scope, streamName); + Thread.sleep(500); + } catch (InterruptedException e) { + log.error("Problem while sleeping current Thread in deleteStreams: {}.", e); + } + } + myStreamNames.clear(); + perDayEventIndex.clear(); + } + + // End region stream utils + + /** + * Close resources. + */ + public void close() { + streamManager.close(); + executor.shutdown(); + } + + public List getMyStreamNames() { + return myStreamNames; + } + + private String getStreamDayKey (String streamName, int day) { + return streamName + eventSeparator + "day" + day; + } +} From acb70f61f5e825ffb59fb4f8740d4963c111c226 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Fri, 15 Jun 2018 05:56:59 +0200 Subject: [PATCH 22/48] Issue 83: Reorganize repository structure (#105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raúl Gracia --- README.md | 158 ++++++++++++------ flink-connector-examples/README.md | 58 +++++++ .../build.gradle | 10 +- .../doc/exactly-once/README.md | 2 +- .../doc/flink-wordcount/README.md | 32 ++-- .../image/flink-wordcount-intellij-01.png | Bin .../image/flink-wordcount-intellij-02.png | Bin .../image/flink-wordcount-intellij-03.png | Bin .../image/flink-wordcount-intellij-04.png | Bin .../image/flink-wordcount-submit-writer.png | Bin .../java/io/pravega/example}/flink/Utils.java | 102 +++++------ .../flink/primer/datatype/Constants.java | 2 +- .../flink/primer/datatype/IntegerEvent.java | 2 +- .../primer/process/ExactlyOnceChecker.java | 8 +- .../primer/process/ExactlyOnceWriter.java | 16 +- .../source/ThrottledIntegerEventProducer.java | 5 +- .../flink/primer/util/FailingMapper.java | 2 +- .../example}/flink/wordcount/Constants.java | 2 +- .../example}/flink/wordcount/WordCount.java | 2 +- .../flink/wordcount/WordCountReader.java | 4 +- .../flink/wordcount/WordCountWriter.java | 4 +- .../src/main/resources/log4j.properties | 0 flink-examples/README.md | 102 ----------- flink-examples/src/main/dist/bin/run-example | 3 - gradle.properties | 33 ++-- .../README.md | 7 +- .../build.gradle | 11 +- .../hadoop/wordcount}/ExampleDriver.java | 2 +- .../wordcount}/PravegaOutputFormat.java | 2 +- .../wordcount}/PravegaOutputRecordWriter.java | 3 +- .../hadoop/wordcount}/RandomTextWriter.java | 2 +- .../hadoop/wordcount}/RandomWriter.java | 2 +- .../hadoop/wordcount}/TextSerializer.java | 2 +- .../example/hadoop/wordcount}/WordCount.java | 2 +- .../example/hadoop/wordcount}/WordMean.java | 2 +- .../example/hadoop/wordcount}/WordMedian.java | 2 +- .../example/spark/wordcount}/WordCount.java | 4 +- hadoop-examples/gradle.properties | 7 - .../README.md | 21 +-- .../bin/.gitignore | 0 .../build.gradle | 83 ++++----- .../src/main/dist/Dockerfile | 0 .../example/consolerw/ConsoleReader.java | 0 .../example/consolerw/ConsoleWriter.java | 0 .../pravega/example/consolerw/Constants.java | 0 .../example/gettingstarted/Constants.java | 0 .../gettingstarted/HelloWorldReader.java | 0 .../gettingstarted/HelloWorldWriter.java | 0 .../example/noop/BinarySerializer.java | 0 .../io/pravega/example/noop/NoopReader.java | 0 .../io/pravega/example/noop/SimpleReader.java | 0 .../statesynchronizer/SharedConfig.java | 0 .../statesynchronizer/SharedConfigCli.java | 0 .../example/statesynchronizer/SharedMap.java | 0 .../pravega/example/streamcuts/Constants.java | 0 .../example/streamcuts/StreamCutsCli.java | 0 .../example/streamcuts/StreamCutsExample.java | 0 .../src/main/resources/logback.xml | 0 .../anomaly-detection}/README.md | 13 +- .../anomaly-detection}/build.gradle | 19 ++- .../src/main/dist/conf/app.json | 0 .../src/main/dist/conf/log4j.properties | 0 .../anomalydetection/ApplicationMain.java | 0 .../event/AppConfiguration.java | 0 .../event/pipeline/AbstractPipeline.java | 0 .../pipeline/EventStateMachineMapper.java | 0 .../event/pipeline/PipelineRunner.java | 0 .../PravegaAnomalyDetectionProcessor.java | 0 .../event/pipeline/PravegaEventPublisher.java | 0 .../event/pipeline/StreamCreator.java | 0 .../ControlledSourceContextProducer.java | 0 .../event/producer/SourceContextProducer.java | 0 .../anomalydetection/event/state/Event.java | 0 .../event/state/EventStateMachine.java | 0 .../event/state/EventsGenerator.java | 0 .../anomalydetection/event/state/Result.java | 0 .../src/main/resources/Network-Anomaly.png | Bin scenarios/turbine-heat-processor/README.md | 41 +++++ scenarios/turbine-heat-processor/build.gradle | 65 +++++++ .../src/main/dist/bin/run-example | 12 ++ .../SensorAggregate.java | 14 +- .../turbineheatprocessor}/SensorEvent.java | 12 +- .../TurbineHeatProcessor.java | 3 +- .../pravega/turbineheatprocessor/Utils.java | 51 ++++++ .../src/main/resources/logback.xml | 24 +++ .../TurbineHeatProcessorScala.scala | 3 +- scenarios/turbine-heat-sensor/README.md | 12 ++ scenarios/turbine-heat-sensor/build.gradle | 71 ++++++++ .../src/main/dist/Dockerfile | 7 + .../pravega/turbineheatsensor}/PerfStats.java | 2 +- .../turbineheatsensor}/TurbineHeatSensor.java | 2 +- .../src/main/resources/logback.xml | 24 +++ settings.gradle | 14 +- 93 files changed, 696 insertions(+), 392 deletions(-) create mode 100644 flink-connector-examples/README.md rename {flink-examples => flink-connector-examples}/build.gradle (86%) rename {flink-examples => flink-connector-examples}/doc/exactly-once/README.md (98%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/README.md (79%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/image/flink-wordcount-intellij-01.png (100%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/image/flink-wordcount-intellij-02.png (100%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/image/flink-wordcount-intellij-03.png (100%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/image/flink-wordcount-intellij-04.png (100%) rename {flink-examples => flink-connector-examples}/doc/flink-wordcount/image/flink-wordcount-submit-writer.png (100%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/Utils.java (95%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/datatype/Constants.java (93%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/datatype/IntegerEvent.java (96%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/process/ExactlyOnceChecker.java (95%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/process/ExactlyOnceWriter.java (89%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/source/ThrottledIntegerEventProducer.java (98%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/primer/util/FailingMapper.java (98%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/wordcount/Constants.java (95%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/wordcount/WordCount.java (95%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/wordcount/WordCountReader.java (97%) rename {flink-examples/src/main/java/io/pravega/examples => flink-connector-examples/src/main/java/io/pravega/example}/flink/wordcount/WordCountWriter.java (98%) rename {flink-examples => flink-connector-examples}/src/main/resources/log4j.properties (100%) delete mode 100644 flink-examples/README.md delete mode 100755 flink-examples/src/main/dist/bin/run-example rename {hadoop-examples => hadoop-connector-examples}/README.md (91%) rename {hadoop-examples => hadoop-connector-examples}/build.gradle (84%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/ExampleDriver.java (97%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/PravegaOutputFormat.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/PravegaOutputRecordWriter.java (94%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/RandomTextWriter.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/RandomWriter.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/TextSerializer.java (97%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/WordCount.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/WordMean.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/hadoop => hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount}/WordMedian.java (99%) rename {hadoop-examples/src/main/java/io/pravega/examples/spark => hadoop-connector-examples/src/main/java/io/pravega/example/spark/wordcount}/WordCount.java (95%) delete mode 100644 hadoop-examples/gradle.properties rename {standalone-examples => pravega-client-examples}/README.md (93%) rename {standalone-examples => pravega-client-examples}/bin/.gitignore (100%) rename {standalone-examples => pravega-client-examples}/build.gradle (61%) rename {standalone-examples => pravega-client-examples}/src/main/dist/Dockerfile (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/consolerw/ConsoleReader.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/consolerw/Constants.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/gettingstarted/Constants.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/gettingstarted/HelloWorldReader.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/gettingstarted/HelloWorldWriter.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/noop/BinarySerializer.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/noop/NoopReader.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/noop/SimpleReader.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/statesynchronizer/SharedConfig.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/statesynchronizer/SharedConfigCli.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/statesynchronizer/SharedMap.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/streamcuts/Constants.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java (100%) rename {standalone-examples => pravega-client-examples}/src/main/resources/logback.xml (100%) rename {anomaly-detection => scenarios/anomaly-detection}/README.md (95%) rename {anomaly-detection => scenarios/anomaly-detection}/build.gradle (94%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/dist/conf/app.json (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/dist/conf/log4j.properties (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/ApplicationMain.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/AppConfiguration.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/AbstractPipeline.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/EventStateMachineMapper.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/PipelineRunner.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaAnomalyDetectionProcessor.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaEventPublisher.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/pipeline/StreamCreator.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/producer/ControlledSourceContextProducer.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/producer/SourceContextProducer.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/state/Event.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/state/EventStateMachine.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/state/EventsGenerator.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/java/io/pravega/anomalydetection/event/state/Result.java (100%) rename {anomaly-detection => scenarios/anomaly-detection}/src/main/resources/Network-Anomaly.png (100%) create mode 100644 scenarios/turbine-heat-processor/README.md create mode 100644 scenarios/turbine-heat-processor/build.gradle create mode 100755 scenarios/turbine-heat-processor/src/main/dist/bin/run-example rename {flink-examples/src/main/java/io/pravega/examples/flink/iot => scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor}/SensorAggregate.java (81%) rename {flink-examples/src/main/java/io/pravega/examples/flink/iot => scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor}/SensorEvent.java (77%) rename {flink-examples/src/main/java/io/pravega/examples/flink/iot => scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor}/TurbineHeatProcessor.java (98%) create mode 100644 scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/Utils.java create mode 100644 scenarios/turbine-heat-processor/src/main/resources/logback.xml rename {flink-examples/src/main/scala/io/pravega/examples/flink/iot => scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor}/TurbineHeatProcessorScala.scala (98%) create mode 100644 scenarios/turbine-heat-sensor/README.md create mode 100644 scenarios/turbine-heat-sensor/build.gradle create mode 100644 scenarios/turbine-heat-sensor/src/main/dist/Dockerfile rename {standalone-examples/src/main/java/io/pravega/example/iot => scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor}/PerfStats.java (99%) rename {standalone-examples/src/main/java/io/pravega/example/iot => scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor}/TurbineHeatSensor.java (99%) create mode 100644 scenarios/turbine-heat-sensor/src/main/resources/logback.xml diff --git a/README.md b/README.md index 9389ea18..1ba1f53a 100644 --- a/README.md +++ b/README.md @@ -11,58 +11,66 @@ developer guide](http://pravega.io). # Repository Structure -This repository is divided into sub-projects (`standalone-examples`, `flink-examples` -and `hadoop-examples`) addressed to demonstrate a specific component. In these sub-projects, -we provide a battery of simple code examples aimed at demonstrating how a particular -feature or API works. Moreover, we also include a `scenarios` sub-project that contains -more complex applications, which show use-cases exploiting one or multiple components. +This repository is divided into sub-projects (`pravega-client-examples`, `flink-connector-examples` +and `hadoop-connector-examples`), each one addressed to demonstrate a specific component. In these sub-projects, +we provide a battery of simple code examples aimed at illustrating how a particular +feature or API works. Moreover, we also include a `scenarios` folder that contains +more complex applications as sub-projects, which show use-cases exploiting one or multiple components. -## Pravega Examples +> Hint: Have a look to the [terminology and concepts](http://pravega.io/docs/latest/terminology/) in Pravega. + +## Pravega Client Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `gettingstarted` | Simple example of how to read/write from/to a Pravega `Stream`. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/gettingstarted) -| `consolerw` | Application that allows users to work with `Stream`, `Transaction` and `StreamCut` APIs via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/consolerw) -| `noop` | Example of how to add a simple callback executed upon a read event. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/noop) -| `statesynchronizer` | Application that allows users to work with `StateSynchronizer` API via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/statesynchronizer) -| `streamcuts` | Application examples demonstrating the use of `StreamCut`s via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/streamcuts) +| `gettingstarted` | Simple example of how to read/write from/to a Pravega `Stream`. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/gettingstarted) +| `consolerw` | Application that allows users to work with `Stream`, `Transaction` and `StreamCut` APIs via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/consolerw) +| `noop` | Example of how to add a simple callback executed upon a read event. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/noop) +| `statesynchronizer` | Application that allows users to work with `StateSynchronizer` API via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer) +| `streamcuts` | Application examples demonstrating the use of `StreamCut`s via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/streamcuts) -> Hint: Have a look to the [terminology and concepts](http://pravega.io/docs/latest/terminology/) in Pravega. +The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples). -## Flink-connector Examples +## Flink Connector Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `wordcount` | Counting the words continuously from a Pravega `Stream` to demonstrate the usage of Flink connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/java/io/pravega/examples/flink/wordcount) +| `wordcount` | Counting the words continuously from a Pravega `Stream` to demonstrate the usage of Flink connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount) +| `primer` | This sample demonstrates Pravega "exactly-once" feature jointly with Flink checkpointing and exactly-once mode. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples/src/main/java/io/pravega/example/flink/primer) -## Hadoop-connector Examples +The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples). + +## Hadoop Connector Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `wordcount` | Counts the words from a Pravega `Stream` filled with random text to demonstrate the usage of Hadoop connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/hadoop-examples/src/main/java/io/pravega/examples/hadoop) +| `wordcount` | Counts the words from a Pravega `Stream` filled with random text to demonstrate the usage of Hadoop connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount) +The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples). ## Scenarios | Example Name | Description | Language | | ------------- |:-----| :-----| -| `turbineheatsensor` | It emulates parallel sensors producing temperature values (writers) and parallel consumers performing real-time statistics (readers) via Pravega client. | [Java](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/iot) -| `turbineheatprocessor` | A Flink streaming application for processing temperature data from a Pravega stream produced by the `turbineheatsensor` app. The application computes a daily summary of the temperature range observed on that day by each sensor. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/java/io/pravega/examples/flink/iot), [Scala](https://github.com/pravega/pravega-samples/tree/master/flink-examples/src/main/scala/io/pravega/examples/flink/iot) -| `anomaly-detection` | A Flink streaming application for detecting anomalous input patterns using a finite-state machine. | [Java](https://github.com/pravega/pravega-samples/tree/master/anomaly-detection) +| [`turbineheatsensor`](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-sensor) | It emulates parallel sensors producing temperature values (writers) and parallel consumers performing real-time statistics (readers) via Pravega client. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor) +| [`turbineheatprocessor`](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor) | A Flink streaming application for processing temperature data from a Pravega stream produced by the `turbineheatsensor` app. The application computes a daily summary of the temperature range observed on that day by each sensor. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor), [Scala](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor) +| [`anomaly-detection`](https://github.com/pravega/pravega-samples/tree/master/scenarios/anomaly-detection) | A Flink streaming application for detecting anomalous input patterns using a finite-state machine. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection) # Build Instructions +Next, we provide instructions for building the `pravega-samples` repository. There are two main options: +- _Out-of-the-box_: If you want a quick start, run the samples by building `pravega-samples` out-of-the-box +(go straight to section `Pravega Samples Build Instructions`). +- _Build from source_: If you want to have fun building the different projects from source, please read +section `Building Pravega Components from Source (Optional)` before building `pravega-samples`. + ## Pre-requisites -* Java 8+ +* Java 8 -## Pravega Build Instructions (Optional) +## Building Pravega Components from Source (Optional) -For [release builds](https://github.com/pravega/pravega/releases) of Pravega, -the artifacts will already be in Maven Central and you will not need to run this step. -In this sense, you also have the option to download the Pravega `master` snapshots published -in our [JFrog repository](https://oss.jfrog.org/artifactory/jfrog-dependencies/io/pravega/). +### Pravega Build Instructions -Conversely, if you want to build Pravega from source, you may need to generate the -latest Pravega `jar` files and install them to your local Maven repository. -To this end, please run the following commands: +If you want to build Pravega from source, you may need to generate the latest Pravega `jar` files and install them to +your local Maven repository. To build Pravega from sources and use it here, please run the following commands: ``` $ git clone https://github.com/pravega/pravega.git @@ -71,71 +79,118 @@ $ ./gradlew install ``` The above command should generate the required `jar` files into your local Maven repository. -For more information, please visit [Pravega](https://github.com/pravega/pravega). -> Hint: If you use a different version of Pravega, please check the `pravegaVersion` property -in `gradle.properties` file. +> Hint: For using in the sample applications the Pravega version you just built, you need to update the +`pravegaVersion=` property in `gradle.properties` file +of `pravega-samples`. -## Flink Connector Build Instructions +For more information, please visit [Pravega](https://github.com/pravega/pravega). -To execute Flink connector examples, follow the below steps to build and publish artifacts from +### Flink Connector Build Instructions + +To build the Flink connector from source, follow the below steps to build and publish artifacts from source to local Maven repository: ``` $ git clone --recursive https://github.com/pravega/flink-connectors.git $ cd flink-connectors -$ ./gradlew clean install +$ ./gradlew install ``` +> Hint: For using in the sample applications the Flink connector version you just built, you need to update the +`flinkConnectorVersion=` property in `gradle.properties` file +of `pravega-samples`. + + For more information, please visit [Flink Connectors](https://github.com/pravega/flink-connectors). -## Hadoop Connector Build Instructions +### Hadoop Connector Build Instructions -To execute Hadoop connector examples, follow the below steps to build and publish artifacts from +To build the Hadoop connector from source, follow the below steps to build and publish artifacts from source to local Maven repository: ``` $ git clone --recurse-submodules https://github.com/pravega/hadoop-connectors.git $ cd hadoop-connectors -$ gradle install +$ ./gradlew install ``` +> Hint: For using in the sample applications the Hadoop connector version you just built, you need to update the +`hadoopConnectorVersion=` property in `gradle.properties` file +of `pravega-samples`. + + For more information, please visit [Hadoop Connectors](https://github.com/pravega/hadoop-connectors). +### Configuring Pravega Samples for Running with Source Builds + +In the previous instructions, we noted that you will need to change the `gradle.properties` file in +`pravega-samples` for using the Pravega components built from source. Here we provide an example of how to do so: + +1) Imagine that we want to build Pravega from source. Let us assume that we +executed `git clone https://github.com/pravega/pravega.git` and the last commit of +`master` branch is `2990193xxx`. + +2) After executing `./gradlew install`, we will see in our local Maven repository +(e.g., `~/.m2/repository/io/pravega/*`) artifacts that contain in their names that commit version +such as `0.3.0-1889.2990193-SNAPSHOT`. These artifacts are the result from building Pravega from source. + +3) The only thing you have to do is to set `pravegaVersion=0.3.0-1889.2990193-SNAPSHOT` in the `gradle.properties` +file of `pravega-samples`. + +While this example is for Pravega, the same procedure applies for Flink and Hadoop connectors. + + ## Pravega Samples Build Instructions -Finally, we need to build the code of the examples. Note that the `master` branch points to release -artifacts of Pravega and connectors, whereas the `develop` branch works with snapshot artifacts. -To build `pravega-samples` from source, use the built-in gradle wrapper as follows: +The `pravega-samples` project is prepared for working out-of-the-box with +[release artifacts](https://github.com/pravega/pravega/releases) of Pravega components, which are already +available in Maven central. To build `pravega-samples` from source, use the built-in gradle wrapper as follows: ``` $ git clone https://github.com/pravega/pravega-samples.git $ cd pravega-samples $ ./gradlew clean installDist ``` -To ease their execution, most examples can be run either using the gradle wrapper (gradlew) or -scripts. The above gradle command automatically creates the execution scripts that can be found -under: +That's it! You are good to go and execute the examples :) + +To ease their execution, most examples can be run either using the gradle wrapper (gradlew) or scripts. +The above gradle command automatically creates the execution scripts that can be found under: ``` -pravega-samples/standalone-examples/build/install/pravega-standalone-examples/bin +pravega-samples/pravega-client-examples/build/install/pravega-client-examples/bin ``` There is a Linux/Mac script and a Windows (.bat) script for each separate executable. +_Working with `develop` branch_: If you are curious about the most recent sample applications, +you may like to try the `develop` version of `pravega-samples` as well. To do so, just clone the +`develop` branch instead of `master` (default): + +``` +$ git clone -b develop https://github.com/pravega/pravega-samples.git +$ cd pravega-samples +$ ./gradlew clean installDist +``` + +The `develop` branch works with Pravega snapshots artifacts published in +our [JFrog repository](https://oss.jfrog.org/artifactory/jfrog-dependencies/io/pravega/) instead of +using release versions. + + # Proposed Roadmap We propose a roadmap to proceed with the execution of examples based on their complexity: -1. [Pravega examples](https://github.com/pravega/pravega-samples/tree/master/standalone-examples): +1. [Pravega client examples](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples): First step to understand the basics of Pravega and exercise the concepts presented in the documentation. -2. [Flink-connector examples](https://github.com/pravega/pravega-samples/tree/master/flink-examples): +2. [Flink connector examples](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples): These examples show the basic functionality of the Flink connector for Pravega. -3. [Hadoop-connector examples](https://github.com/pravega/pravega-samples/tree/master/hadoop-examples): +3. [Hadoop connector examples](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples): These examples show the basic functionality of the Hadoop connector for Pravega. -4. Scenarios: Applications that go beyond the basic usage of Pravega APIs, which may include complex interactions +4. [Scenarios](https://github.com/pravega/pravega-samples/tree/master/scenarios): Applications that go beyond the basic usage of Pravega APIs, which may include complex interactions between Pravega and analytics engines (e.g., Flink, Hadoop, Spark) to demonstrate analytics use cases. -# Where to find help +# Where to Find Help Documentation on Pravega and Analytics Connectors: * [Pravega.io](http://pravega.io/), [Pravega Wiki](https://github.com/pravega/pravega/wiki). @@ -149,8 +204,3 @@ Do you want to contribute a new example application? * Follow the [guidelines for contributors](https://github.com/pravega/pravega/wiki/Contributing). Have fun!! - - - - - diff --git a/flink-connector-examples/README.md b/flink-connector-examples/README.md new file mode 100644 index 00000000..a061c894 --- /dev/null +++ b/flink-connector-examples/README.md @@ -0,0 +1,58 @@ +# Flink Connector Examples for Pravega +Battery of code examples to demonstrate the capabilities of Pravega as a data stream storage +system for Apache Flink. + +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +3. Apache Flink running + + +### Distributing Flink Samples +#### Assemble +Use gradle to assemble a distribution folder containing the Flink programs as a ready-to-deploy +uber-jar called `pravega-flink-examples--all.jar`: + +``` +$ ./gradlew installDist +... +$ ls -R flink-connector-examples/build/install/pravega-flink-examples +bin lib + +flink-connector-examples/build/install/pravega-flink-examples/bin: +run-example + +flink-connector-examples/build/install/pravega-flink-examples/lib: +pravega-flink-examples--all.jar +``` + +#### Upload +The `upload` task makes it easy to upload the sample binaries to your cluster. First, configure +Gradle with the address of a node in your cluster. Edit `~/.gradle/gradle.properties` to +specify a value for `dcosAddress`. + +``` +$ cat ~/.gradle/gradle.properties +dcosAddress=10.240.124.164 +``` + +Then, upload the samples to the cluster. They will be copied to `/home/centos` on the target node. +``` +$ ./gradlew upload +``` + +--- + +# Examples Catalog + +## Word Count + +This example demonstrates how to use the Pravega Flink Connectors to write data collected +from an external network stream into a Pravega `Stream` and read the data from the Pravega `Stream`. +See [wordcount](doc/flink-wordcount/README.md) for more information and execution instructions. + + +## Exactly Once Sample + +This sample demonstrates Pravega EXACTLY_ONCE feature in conjuction with Flink checkpointing and exactly-once mode. +See [Exactly Once Sample](doc/exactly-once/README.md) for instructions. diff --git a/flink-examples/build.gradle b/flink-connector-examples/build.gradle similarity index 86% rename from flink-examples/build.gradle rename to flink-connector-examples/build.gradle index 04b42594..04164246 100644 --- a/flink-examples/build.gradle +++ b/flink-connector-examples/build.gradle @@ -27,7 +27,7 @@ ext { dependencies { compile "org.scala-lang.modules:scala-java8-compat_2.11:${scalaJava8CompatVersion}" - compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" + compile "io.pravega:pravega-connectors-flink_2.11:${flinkConnectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" @@ -42,28 +42,28 @@ shadowJar { task scriptWordCountWriter(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.wordcount.WordCountWriter' + mainClassName = 'io.pravega.example.flink.wordcount.WordCountWriter' applicationName = 'wordCountWriter' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptWordCountReader(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.wordcount.WordCountReader' + mainClassName = 'io.pravega.example.flink.wordcount.WordCountReader' applicationName = 'wordCountReader' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptExactlyOnceWriter(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceWriter' + mainClassName = 'io.pravega.example.flink.primer.process.ExactlyOnceWriter' applicationName = 'exactlyOnceWriter' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptExactlyOnceChecker(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceChecker' + mainClassName = 'io.pravega.example.flink.primer.process.ExactlyOnceChecker' applicationName = 'exactlyOnceChecker' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } diff --git a/flink-examples/doc/exactly-once/README.md b/flink-connector-examples/doc/exactly-once/README.md similarity index 98% rename from flink-examples/doc/exactly-once/README.md rename to flink-connector-examples/doc/exactly-once/README.md index c7f1146c..345e3fa5 100644 --- a/flink-examples/doc/exactly-once/README.md +++ b/flink-connector-examples/doc/exactly-once/README.md @@ -5,7 +5,7 @@ This example demonstrates how Pravega EXACTLY_ONCE mode works in conjection with The example consists of two applications, a writer and a checker. ``` -$ cd flink-examples/build/install/pravega-flink-examples +$ cd flink-connector-examples/build/install/pravega-flink-examples $ bin/exactlyOnceChecker [--controller tcp://localhost:9090] [--scope examples] [--stream mystream] $ bin/exactlyOnceWriter [--controller tcp://localhost:9090] [--scope examples] [--stream mystream] [--num-events 50] [--exactlyonce true] ``` diff --git a/flink-examples/doc/flink-wordcount/README.md b/flink-connector-examples/doc/flink-wordcount/README.md similarity index 79% rename from flink-examples/doc/flink-wordcount/README.md rename to flink-connector-examples/doc/flink-wordcount/README.md index a8025200..75daca9e 100644 --- a/flink-examples/doc/flink-wordcount/README.md +++ b/flink-connector-examples/doc/flink-wordcount/README.md @@ -1,24 +1,24 @@ # Word Count Example Using Pravega Flink Connectors -This example consists of two applications, a WordCountWriter that reads data from a +This example consists of two applications, a `WordCountWriter` that reads data from a network stream, transforms the data, and writes the data to a Pravega stream; and a -WordCountReader that reads from a Pravega stream and prints the word counts summary. +`WordCountReader` that reads from a Pravega stream and prints the word counts summary. The scripts can be found under the flink-examples directory in: ``` -flink-examples/build/install/pravega-flink-examples/bin +flink-connector-examples/build/install/pravega-flink-examples/bin ``` -You might want to run WordCountWriter in one window and WordCountReader in another. +You might want to run `WordCountWriter` in one window and `WordCountReader` in another. ## Start WordCountWriter The application reads text from a socket, once every 5 seconds prints the distinct words and counts from the previous 5 seconds, and writes the word counts to a Pravega stream and prints word counts. -First, use netcat to start local server via +First, use `netcat` to start local server via ``` $ nc -lk 9999 ``` -Then start the WordCountWriter +Then start the `WordCountWriter`: ``` $ bin/wordCountWriter [-host localhost] [-port 9999] [-scope examples] [-stream wordcount] [-controller tcp://localhost:9090] ``` @@ -41,13 +41,13 @@ All args are optional, if not included, the defaults are: * stream - "wordcount" * controller - "tcp://localhost:9090" -Now in the windows where netcat is running, enter some text, for example, +Now in the windows where `netcat` is running, enter some text, for example, ``` $ nc -lk 9999 aa bb cc aa ``` -In the windows where wordCountReader is running, it should show output similar to the sample output below +In the windows where `WordCountReader` is running, it should show output similar to the sample output below ``` 4> Word: cc: Count: 1 4> Word: aa: Count: 2 @@ -67,25 +67,25 @@ taskmanager.numberOfTaskSlots: 4 By default, Flink job manager runs on port 6123. -Point your browser to http://:8081 to make sure Flink is running; then click "Running Jobs" +Point your browser to `http://:8081` to make sure Flink is running; then click "Running Jobs" ### Start WordCountWriter ``` $ cd flink-examples/build/install/pravega-flink-examples $ flink run -m localhost:6123 -c io.pravega.examples.flink.wordcount.WordCountWriter lib/pravega-flink-examples-0.2.0-SNAPSHOT-all.jar --host localhost --port 9999 --controller tcp://localhost:9090 ``` -The WordCountWriter job should show up on the Flink UI as a running job. +The `WordCountWriter` job should show up on the Flink UI as a running job. ### Start WordCountReader -In a different window +In a different window: ``` $ cd flink-examples/build/install/pravega-flink-examples $ flink run -m localhost:6123 -c io.pravega.examples.flink.wordcount.WordCountReader lib/pravega-flink-examples-0.2.0-SNAPSHOT-all.jar --controller tcp://localhost:9090 ``` -The WordCountReader job should show up on the Flink UI as a running job. +The `WordCountReader` job should show up on the Flink UI as a running job. ### View Output -Now in the windows where netcat is running, enter some text, for example, +Now in the windows where `netcat` is running, enter some text, for example, ``` $ nc -lk 9999 aa bb cc aa @@ -102,7 +102,7 @@ Word: bb: Count: 1 You can also submit flink jobs via Flink UI. -Submit WordCountWriter job +Submit `WordCountWriter` job - Click **Submit new Job** - Click **Add New** button @@ -115,7 +115,7 @@ Submit WordCountWriter job ![](image/flink-wordcount-submit-writer.png) -Submit WordCountReader job +Submit `WordCountReader` job Repeat the steps above except - in the **Entry Class** field, enter: ```io.pravega.examples.flink.wordcount.WordCountReader``` @@ -133,7 +133,7 @@ Repeat the steps above except ![](image/flink-wordcount-intellij-02.png) -- Run Configuration for WordCountWriter +- Run Configuration for `WordCountWriter` ![](image/flink-wordcount-intellij-04.png) diff --git a/flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-01.png b/flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-01.png similarity index 100% rename from flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-01.png rename to flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-01.png diff --git a/flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-02.png b/flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-02.png similarity index 100% rename from flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-02.png rename to flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-02.png diff --git a/flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-03.png b/flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-03.png similarity index 100% rename from flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-03.png rename to flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-03.png diff --git a/flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-04.png b/flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-04.png similarity index 100% rename from flink-examples/doc/flink-wordcount/image/flink-wordcount-intellij-04.png rename to flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-intellij-04.png diff --git a/flink-examples/doc/flink-wordcount/image/flink-wordcount-submit-writer.png b/flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-submit-writer.png similarity index 100% rename from flink-examples/doc/flink-wordcount/image/flink-wordcount-submit-writer.png rename to flink-connector-examples/doc/flink-wordcount/image/flink-wordcount-submit-writer.png diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/Utils.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/Utils.java similarity index 95% rename from flink-examples/src/main/java/io/pravega/examples/flink/Utils.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/Utils.java index f1f8f419..9b7d183c 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/Utils.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/Utils.java @@ -1,51 +1,51 @@ -/* - * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - */ -package io.pravega.examples.flink; - -import io.pravega.client.admin.StreamManager; -import io.pravega.client.stream.Stream; -import io.pravega.client.stream.StreamConfiguration; -import io.pravega.connectors.flink.PravegaConfig; - -public class Utils { - - /** - * Creates a Pravega stream with a default configuration. - * - * @param pravegaConfig the Pravega configuration. - * @param streamName the stream name (qualified or unqualified). - */ - public static Stream createStream(PravegaConfig pravegaConfig, String streamName) { - return createStream(pravegaConfig, streamName, StreamConfiguration.builder().build()); - } - - /** - * Creates a Pravega stream with a given configuration. - * - * @param pravegaConfig the Pravega configuration. - * @param streamName the stream name (qualified or unqualified). - * @param streamConfig the stream configuration (scaling policy, retention policy). - */ - public static Stream createStream(PravegaConfig pravegaConfig, String streamName, StreamConfiguration streamConfig) { - // resolve the qualified name of the stream - Stream stream = pravegaConfig.resolve(streamName); - - try(StreamManager streamManager = StreamManager.create(pravegaConfig.getClientConfig())) { - // create the requested scope (if necessary) - streamManager.createScope(stream.getScope()); - - // create the requested stream based on the given stream configuration - streamManager.createStream(stream.getScope(), stream.getStreamName(), streamConfig); - } - - return stream; - } -} +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.flink; + +import io.pravega.client.admin.StreamManager; +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.StreamConfiguration; +import io.pravega.connectors.flink.PravegaConfig; + +public class Utils { + + /** + * Creates a Pravega stream with a default configuration. + * + * @param pravegaConfig the Pravega configuration. + * @param streamName the stream name (qualified or unqualified). + */ + public static Stream createStream(PravegaConfig pravegaConfig, String streamName) { + return createStream(pravegaConfig, streamName, StreamConfiguration.builder().build()); + } + + /** + * Creates a Pravega stream with a given configuration. + * + * @param pravegaConfig the Pravega configuration. + * @param streamName the stream name (qualified or unqualified). + * @param streamConfig the stream configuration (scaling policy, retention policy). + */ + public static Stream createStream(PravegaConfig pravegaConfig, String streamName, StreamConfiguration streamConfig) { + // resolve the qualified name of the stream + Stream stream = pravegaConfig.resolve(streamName); + + try(StreamManager streamManager = StreamManager.create(pravegaConfig.getClientConfig())) { + // create the requested scope (if necessary) + streamManager.createScope(stream.getScope()); + + // create the requested stream based on the given stream configuration + streamManager.createStream(stream.getScope(), stream.getStreamName(), streamConfig); + } + + return stream; + } +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/Constants.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java similarity index 93% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/Constants.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java index 06b4a805..cf6e5730 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/Constants.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.primer.datatype; +package io.pravega.example.flink.primer.datatype; /** * Defines a handful of constants shared by classes in this package. diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/IntegerEvent.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/IntegerEvent.java similarity index 96% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/IntegerEvent.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/IntegerEvent.java index b4c69ed4..832179d5 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/datatype/IntegerEvent.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/IntegerEvent.java @@ -9,7 +9,7 @@ * */ -package io.pravega.examples.flink.primer.datatype; +package io.pravega.example.flink.primer.datatype; import java.io.Serializable; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceChecker.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceChecker.java similarity index 95% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceChecker.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceChecker.java index b0d05b37..a4f12aea 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceChecker.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceChecker.java @@ -8,15 +8,15 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.primer.process; +package io.pravega.example.flink.primer.process; import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; -import io.pravega.examples.flink.primer.datatype.Constants; -import io.pravega.examples.flink.primer.datatype.IntegerEvent; +import io.pravega.example.flink.Utils; +import io.pravega.example.flink.primer.datatype.Constants; +import io.pravega.example.flink.primer.datatype.IntegerEvent; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.datastream.DataStream; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java similarity index 89% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java index 653fe813..51e37560 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/process/ExactlyOnceWriter.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java @@ -8,18 +8,18 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.primer.process; +package io.pravega.example.flink.primer.process; import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.*; import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; -import io.pravega.examples.flink.primer.datatype.IntegerEvent; -import io.pravega.examples.flink.primer.datatype.Constants; -import io.pravega.examples.flink.primer.source.ThrottledIntegerEventProducer; -import io.pravega.examples.flink.primer.util.FailingMapper; - import org.apache.flink.api.common.restartstrategy.RestartStrategies; - import org.apache.flink.api.common.time.Time; +import io.pravega.example.flink.Utils; +import io.pravega.example.flink.primer.datatype.IntegerEvent; +import io.pravega.example.flink.primer.datatype.Constants; +import io.pravega.example.flink.primer.source.ThrottledIntegerEventProducer; +import io.pravega.example.flink.primer.util.FailingMapper; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.time.Time; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/source/ThrottledIntegerEventProducer.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/source/ThrottledIntegerEventProducer.java similarity index 98% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/source/ThrottledIntegerEventProducer.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/source/ThrottledIntegerEventProducer.java index 31d82a97..54cb03ea 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/source/ThrottledIntegerEventProducer.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/source/ThrottledIntegerEventProducer.java @@ -1,4 +1,3 @@ - /* * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. * @@ -10,10 +9,10 @@ * */ -package io.pravega.examples.flink.primer.source; +package io.pravega.example.flink.primer.source; -import io.pravega.examples.flink.primer.datatype.IntegerEvent; +import io.pravega.example.flink.primer.datatype.IntegerEvent; import org.apache.flink.runtime.state.CheckpointListener; import org.apache.flink.streaming.api.checkpoint.ListCheckpointed; import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/primer/util/FailingMapper.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/util/FailingMapper.java similarity index 98% rename from flink-examples/src/main/java/io/pravega/examples/flink/primer/util/FailingMapper.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/primer/util/FailingMapper.java index d081b15d..73a41efb 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/primer/util/FailingMapper.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/util/FailingMapper.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.flink.primer.util; +package io.pravega.example.flink.primer.util; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.streaming.api.checkpoint.ListCheckpointed; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/Constants.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/Constants.java similarity index 95% rename from flink-examples/src/main/java/io/pravega/examples/flink/wordcount/Constants.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/Constants.java index 5a17e8a3..c32ee9fe 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/Constants.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/Constants.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.wordcount; +package io.pravega.example.flink.wordcount; /** * Defines a handful of constants shared by classes in this package. diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCount.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCount.java similarity index 95% rename from flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCount.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCount.java index 4a0db18d..7d44a57d 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCount.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCount.java @@ -9,7 +9,7 @@ * */ -package io.pravega.examples.flink.wordcount; +package io.pravega.example.flink.wordcount; import java.io.Serializable; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountReader.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountReader.java similarity index 97% rename from flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountReader.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountReader.java index 307de415..f6db33fa 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountReader.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountReader.java @@ -8,13 +8,13 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.wordcount; +package io.pravega.example.flink.wordcount; import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; +import io.pravega.example.flink.Utils; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.java.utils.ParameterTool; import org.apache.flink.streaming.api.datastream.DataStream; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountWriter.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountWriter.java similarity index 98% rename from flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountWriter.java rename to flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountWriter.java index b9542d4d..0c662404 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/wordcount/WordCountWriter.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount/WordCountWriter.java @@ -9,14 +9,14 @@ * */ -package io.pravega.examples.flink.wordcount; +package io.pravega.example.flink.wordcount; import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.FlinkPravegaWriter; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.PravegaEventRouter; import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; +import io.pravega.example.flink.Utils; import org.apache.flink.api.java.utils.ParameterTool; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/flink-examples/src/main/resources/log4j.properties b/flink-connector-examples/src/main/resources/log4j.properties similarity index 100% rename from flink-examples/src/main/resources/log4j.properties rename to flink-connector-examples/src/main/resources/log4j.properties diff --git a/flink-examples/README.md b/flink-examples/README.md deleted file mode 100644 index c5ceede1..00000000 --- a/flink-examples/README.md +++ /dev/null @@ -1,102 +0,0 @@ -# Flink Connector Examples for Pravega -Battery of code examples to demonstrate the capabilities of Pravega as a data stream storage -system for Apache Flink. - -## Pre-requisites -1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) -2. Build [flink-connectors](https://github.com/pravega/flink-connectors) repository -3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository -4. Apache Flink running - - -### Distributing Flink Samples -#### Assemble -Use gradle to assemble a distribution folder containing the Flink programs as a ready-to-deploy -uber-jar called `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar`: - -``` -$ ./gradlew installDist -... -$ ls -R flink-examples/build/install/pravega-flink-examples -bin lib - -flink-examples/build/install/pravega-flink-examples/bin: -run-example - -flink-examples/build/install/pravega-flink-examples/lib: -pravega-flink-examples-0.1.0-SNAPSHOT-all.jar -``` - -#### Upload -The `upload` task makes it easy to upload the sample binaries to your cluster. First, configure -Gradle with the address of a node in your cluster. Edit `~/.gradle/gradle.properties` to -specify a value for `dcosAddress`. - -``` -$ cat ~/.gradle/gradle.properties -dcosAddress=10.240.124.164 -``` - -Then, upload the samples to the cluster. They'll be copied to `/home/centos` on the target node. -``` -$ ./gradlew upload -``` - ---- - -# Examples Catalog - -## Word Count - -This example demonstrates how to use the Pravega Flink Connectors to write data collected -from an external network stream into a Pravega `Stream` and read the data from the Pravega `Stream`. -_See [wordcount](doc/flink-wordcount/README.md) for more information and execution instructions_. - ---- - -# Scenarios Catalog - -## Turbine Heat Processor -A Flink streaming application for processing temperature data from a Pravega `Stream`. -Complements the Turbine Heat Sensor app ([`turbineheatsensor`](https://github.com/pravega/pravega-samples/tree/master/standalone-examples/src/main/java/io/pravega/example/turbineheatsensor)). -The application computes a daily summary of the temperature range observed on that day by each sensor. - -Automatically creates a scope (default: `examples`) and stream (default: `turbineHeatTest`) as necessary. - -### Execution -Run the sample from the command-line: -``` -$ bin/run-example [--controller ] [--input /] [--startTime ] [--output ] -``` - -Alternately, run the sample from the Flink UI. -- JAR: `pravega-flink-examples-0.1.0-SNAPSHOT-all.jar` -- Main class: `io.pravega.examples.flink.iot.TurbineHeatProcessor` or `io.pravega.examples.flink.iot.TurbineHeatProcessorScala` - -### Outputs -The application outputs the daily summary as a comma-separated values (CSV) file, one line per sensor per day. The data is -also emitted to stdout (which may be viewed in the Flink UI). For example: - -``` -... -SensorAggregate(1065600000,12,Illinois,(60.0,100.0)) -SensorAggregate(1065600000,3,Arkansas,(60.0,100.0)) -SensorAggregate(1065600000,7,Delaware,(60.0,100.0)) -SensorAggregate(1065600000,15,Kansas,(40.0,80.0)) -SensorAggregate(1152000000,3,Arkansas,(60.0,100.0)) -SensorAggregate(1152000000,12,Illinois,(60.0,100.0)) -SensorAggregate(1152000000,15,Kansas,(40.0,80.0)) -SensorAggregate(1152000000,7,Delaware,(60.0,100.0)) -... -``` - -## Anomaly Detection -A Flink streaming application for detecting anomalous input patterns using a finite-state machine. -_See the [anomaly-detection](https://github.com/pravega/pravega-samples/tree/master/anomaly-detection/README.md) -for more information and execution instructions_. - - -## Exactly Once Sample - -This sample demonstrates Pravega EXACTLY_ONCE feature in conjuction with Flink checkpointing and exactly-once mode. -See [Exactly Once Sample](doc/exactly-once/README.md) for instructions. diff --git a/flink-examples/src/main/dist/bin/run-example b/flink-examples/src/main/dist/bin/run-example deleted file mode 100755 index 7987c17c..00000000 --- a/flink-examples/src/main/dist/bin/run-example +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -flink run -c io.pravega.examples.flink.iot.TurbineHeatProcessor lib/pravega-flink-examples-0.1.0-SNAPSHOT-all.jar $* \ No newline at end of file diff --git a/gradle.properties b/gradle.properties index 11e144b2..25b8db4a 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,22 +1,27 @@ -/* - * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - */ +# +# Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# ### clusters dcosAddress=master.mesos ### dependencies -pravegaVersion=0.3.0-50.5f4d75b-SNAPSHOT -connectorVersion=0.3.0-102.0e30f47-SNAPSHOT +pravegaVersion=0.3.0-50.68f85f0-SNAPSHOT +flinkConnectorVersion=0.3.0-101.4d4269d-SNAPSHOT +hadoopConnectorVersion=0.3.0-17.a171316-SNAPSHOT -### outputs +### Pravega-samples output library samplesVersion=0.3.0-SNAPSHOT -### Flink +### Flink-connector examples flinkVersion=1.4.0 + +### Hadoop-connector examples +hadoopVersion=2.8.1 +scalaVersion=2.11.8 +sparkVersion=2.2.0 diff --git a/hadoop-examples/README.md b/hadoop-connector-examples/README.md similarity index 91% rename from hadoop-examples/README.md rename to hadoop-connector-examples/README.md index 9186f98e..5adfba20 100644 --- a/hadoop-examples/README.md +++ b/hadoop-connector-examples/README.md @@ -12,9 +12,8 @@ Code examples toe give you some basic ideas how to use hadoop-connectors for Pra ## Pre-requisites 1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) -2. Build [hadoop-connectors](https://github.com/pravega/hadoop-connectors) repository -3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository -4. Apache Hadoop running +2. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +3. Apache Hadoop running --- @@ -68,7 +67,7 @@ Additionally, you can run WordCount program (more will be coming soon) on top of ./HiBench/bin/workloads/micro/wordcount/hadoop/run.sh 4. check report - file:////hadoop-examples/HiBench/report/wordcount/hadoop/monitor.html + file:////hadoop-connector-examples/HiBench/report/wordcount/hadoop/monitor.html ``` diff --git a/hadoop-examples/build.gradle b/hadoop-connector-examples/build.gradle similarity index 84% rename from hadoop-examples/build.gradle rename to hadoop-connector-examples/build.gradle index 8d446aeb..372bb763 100644 --- a/hadoop-examples/build.gradle +++ b/hadoop-connector-examples/build.gradle @@ -17,7 +17,7 @@ apply plugin: "distribution" sourceCompatibility = 1.8 archivesBaseName = 'pravega-hadoop-examples' -version = connectorVersion +version = hadoopConnectorVersion repositories { mavenLocal() @@ -31,11 +31,12 @@ repositories { mavenCentral() maven { url "https://repository.apache.org/snapshots" } maven { url "https://oss.sonatype.org/content/repositories/snapshots" } + maven { url "https://oss.jfrog.org/artifactory/jfrog-dependencies" } } } dependencies { - compile "io.pravega:hadoop-connectors:${connectorVersion}" + compile "io.pravega:pravega-connectors-hadoop:${hadoopConnectorVersion}" compileOnly "org.apache.hadoop:hadoop-common:${hadoopVersion}" compileOnly "org.apache.hadoop:hadoop-mapreduce-client-core:${hadoopVersion}" compileOnly "org.apache.spark:spark-core_2.11:${sparkVersion}" @@ -48,7 +49,7 @@ shadowJar { } manifest { attributes( - 'Main-Class': 'io.pravega.examples.hadoop.ExampleDriver', + 'Main-Class': 'io.pravega.example.hadoop.wordcount.ExampleDriver', ) } } @@ -87,8 +88,8 @@ task genHiBenchConfig (dependsOn: buildHiBench, type: Copy) { doLast { def file = new File('./HiBench/conf/hadoop.conf') def newConfig = file.text - .replace('/PATH/TO/YOUR/HADOOP/ROOT', System.getenv("HADOOP_HOME")) - .replace('hdfs://localhost:8020', System.getenv("HDFS")) + .replace('/PATH/TO/YOUR/HADOOP/ROOT', System.getenv("HADOOP_HOME")) + .replace('hdfs://localhost:8020', System.getenv("HDFS")) file.text = newConfig } } diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/ExampleDriver.java similarity index 97% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/ExampleDriver.java index 3f325561..0ef6ee7e 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/ExampleDriver.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/ExampleDriver.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import org.apache.hadoop.util.ProgramDriver; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputFormat.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputFormat.java index 7518e270..72d16dc2 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputFormat.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputFormat.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import com.google.common.annotations.VisibleForTesting; import io.pravega.client.ClientFactory; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputRecordWriter.java similarity index 94% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputRecordWriter.java index 968885ae..875233a5 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/PravegaOutputRecordWriter.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/PravegaOutputRecordWriter.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import io.pravega.client.stream.EventStreamWriter; import org.apache.hadoop.mapreduce.RecordWriter; @@ -19,7 +19,6 @@ import javax.annotation.concurrent.NotThreadSafe; import java.io.IOException; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; /** * A RecordWriter that can write events to Pravega. diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomTextWriter.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomTextWriter.java index 9dd815b1..ddb22094 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomTextWriter.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomTextWriter.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomWriter.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomWriter.java index 34d0f504..cb7acebc 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/RandomWriter.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/RandomWriter.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/TextSerializer.java similarity index 97% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/TextSerializer.java index 94d9b191..8d7292b2 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/TextSerializer.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/TextSerializer.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import io.pravega.client.stream.Serializer; import lombok.EqualsAndHashCode; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordCount.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordCount.java index 0f1e9a1c..79ccdc15 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordCount.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordCount.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import io.pravega.connectors.hadoop.PravegaInputFormat; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMean.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMean.java index 96952214..18fdfa9c 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMean.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMean.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import java.io.BufferedReader; import java.io.IOException; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMedian.java similarity index 99% rename from hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMedian.java index 28f512c6..12420047 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/hadoop/WordMedian.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount/WordMedian.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.hadoop; +package io.pravega.example.hadoop.wordcount; import java.io.BufferedReader; import java.io.IOException; diff --git a/hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java b/hadoop-connector-examples/src/main/java/io/pravega/example/spark/wordcount/WordCount.java similarity index 95% rename from hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java rename to hadoop-connector-examples/src/main/java/io/pravega/example/spark/wordcount/WordCount.java index f2440f2e..5e196c77 100644 --- a/hadoop-examples/src/main/java/io/pravega/examples/spark/WordCount.java +++ b/hadoop-connector-examples/src/main/java/io/pravega/example/spark/wordcount/WordCount.java @@ -8,11 +8,11 @@ * http://www.apache.org/licenses/LICENSE-2.0 */ -package io.pravega.examples.spark; +package io.pravega.example.spark.wordcount; import io.pravega.connectors.hadoop.EventKey; import io.pravega.connectors.hadoop.PravegaInputFormat; -import io.pravega.examples.hadoop.TextSerializer; +import io.pravega.example.hadoop.wordcount.TextSerializer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.util.GenericOptionsParser; diff --git a/hadoop-examples/gradle.properties b/hadoop-examples/gradle.properties deleted file mode 100644 index eec457f3..00000000 --- a/hadoop-examples/gradle.properties +++ /dev/null @@ -1,7 +0,0 @@ -# -# Copyright (c) 2018 Dell Inc., or its subsidiaries. -# -hadoopVersion=2.8.1 -scalaVersion=2.11.8 -sparkVersion=2.2.0 -connectorVersion=0.3.0-SNAPSHOT diff --git a/standalone-examples/README.md b/pravega-client-examples/README.md similarity index 93% rename from standalone-examples/README.md rename to pravega-client-examples/README.md index 0ed5f983..74ac6255 100644 --- a/standalone-examples/README.md +++ b/pravega-client-examples/README.md @@ -91,6 +91,8 @@ All args are optional, if not included, the defaults are: An example of a simple reader that continually reads the contents of any `Stream`. A binary serializer is used so it works against any event types. The sample emits basic information about number of events/bytes read every 30 seconds. +### Execution + ``` $ bin/noopReader [--uri tcp://127.0.0.1:9090] [--stream /] ``` @@ -115,10 +117,10 @@ the same settings and observe how changes in one CLI process are not visible in process until that other CLI process invokes `REFRESH`. ## `streamcuts` -This application aims at demonstrating the use of `StreamCut`s four bounded processing +This application aims at demonstrating the use of `StreamCut`s for bounded processing on multiple `Stream`s. At the moment, the application contains two examples accessible via command line interface: i) Simple example: The user decides which `Stream` slices s/he wants -to read from all the st`Stream`reams by specifying indexes, and the application prints these slices +to read from all the `Stream`s by specifying indexes, and the application prints these slices using `ReaderGroupConfig` methods for bounded processing. ii) Time series example: `Stream`s are filled with events that are supposed to belong to a certain day with a given value: "_day1:5_". There is a variable number of events per day in each `Stream`. The user selects a day number, @@ -133,18 +135,3 @@ To demonstrate the use of `StreamCut`s, we provide a CLI. To use it, please exec $ bin/streamCutsCli [-scope myScope] [-name myStream] [-uri tcp://127.0.0.1:9090] ``` ---- - -# Scenarios Catalog - -## `turbineheatsensor` - -An example of a lightweight IOT application that writes simulated sensor events to a Pravega -`Stream`. - -``` -$ bin/turbineSensor [--stream ] -``` - - - diff --git a/standalone-examples/bin/.gitignore b/pravega-client-examples/bin/.gitignore similarity index 100% rename from standalone-examples/bin/.gitignore rename to pravega-client-examples/bin/.gitignore diff --git a/standalone-examples/build.gradle b/pravega-client-examples/build.gradle similarity index 61% rename from standalone-examples/build.gradle rename to pravega-client-examples/build.gradle index 12e7082f..097ebcb1 100644 --- a/standalone-examples/build.gradle +++ b/pravega-client-examples/build.gradle @@ -8,25 +8,25 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ - + apply plugin: 'java' apply plugin: 'distribution' apply plugin: 'idea' apply plugin: 'eclipse' sourceCompatibility = 1.8 -archivesBaseName = 'pravega-standalone-examples' +archivesBaseName = 'pravega-client-examples' resources ext { - junitVersion = '4.11' - commonsCLIVersion = '1.4' + junitVersion = '4.11' + commonsCLIVersion = '1.4' } dependencies { testCompile "junit:junit:${junitVersion}" - + compile "io.pravega:pravega-client:${pravegaVersion}", "io.pravega:pravega-common:${pravegaVersion}", "commons-cli:commons-cli:${commonsCLIVersion}" @@ -35,84 +35,70 @@ dependencies { compile "ch.qos.logback:logback-classic:1.1.7" } +// Build examples + task scriptHelloWorldWriter(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.gettingstarted.HelloWorldWriter' - applicationName = 'helloWorldWriter' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.gettingstarted.HelloWorldWriter' + applicationName = 'helloWorldWriter' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptHelloWorldReader(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.gettingstarted.HelloWorldReader' - applicationName = 'helloWorldReader' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.gettingstarted.HelloWorldReader' + applicationName = 'helloWorldReader' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task startHelloWorldWriter(type: JavaExec) { main = "io.pravega.example.gettingstarted.HelloWorldWriter" classpath = sourceSets.main.runtimeClasspath if(System.getProperty("exec.args") != null) { - args System.getProperty("exec.args").split() + args System.getProperty("exec.args").split() } } task startHelloWorldReader(type: JavaExec) { main = "io.pravega.example.gettingstarted.HelloWorldReader" classpath = sourceSets.main.runtimeClasspath - if(System.getProperty("exec.args") != null) { - args System.getProperty("exec.args").split() - } -} - -task scriptTurbineSensor(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.iot.TurbineHeatSensor' - applicationName = 'turbineSensor' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath -} - -task startTurbineSensor(type: JavaExec) { - main = "io.pravega.example.iot.TurbineHeatSensor" - classpath = sourceSets.main.runtimeClasspath if(System.getProperty("exec.args") != null) { args System.getProperty("exec.args").split() } } task scriptConsoleWriter(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.consolerw.ConsoleWriter' - applicationName = 'consoleWriter' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.consolerw.ConsoleWriter' + applicationName = 'consoleWriter' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptConsoleReader(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.consolerw.ConsoleReader' - applicationName = 'consoleReader' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.consolerw.ConsoleReader' + applicationName = 'consoleReader' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task startConsoleReader(type: JavaExec) { main = "io.pravega.example.consolerw.ConsoleReader" classpath = sourceSets.main.runtimeClasspath if(System.getProperty("exec.args") != null) { - args System.getProperty("exec.args").split() + args System.getProperty("exec.args").split() } } task scriptSharedConfigCli(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.example.statesynchronizer.SharedConfigCli' - applicationName = 'sharedConfigCli' - defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.statesynchronizer.SharedConfigCli' + applicationName = 'sharedConfigCli' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } task scriptNoopReader(type: CreateStartScripts) { @@ -154,7 +140,6 @@ distributions { into('bin') { from project.scriptHelloWorldWriter from project.scriptHelloWorldReader - from project.scriptTurbineSensor from project.scriptConsoleWriter from project.scriptConsoleReader from project.scriptSharedConfigCli diff --git a/standalone-examples/src/main/dist/Dockerfile b/pravega-client-examples/src/main/dist/Dockerfile similarity index 100% rename from standalone-examples/src/main/dist/Dockerfile rename to pravega-client-examples/src/main/dist/Dockerfile diff --git a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java b/pravega-client-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java rename to pravega-client-examples/src/main/java/io/pravega/example/consolerw/ConsoleReader.java diff --git a/standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java b/pravega-client-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java rename to pravega-client-examples/src/main/java/io/pravega/example/consolerw/ConsoleWriter.java diff --git a/standalone-examples/src/main/java/io/pravega/example/consolerw/Constants.java b/pravega-client-examples/src/main/java/io/pravega/example/consolerw/Constants.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/consolerw/Constants.java rename to pravega-client-examples/src/main/java/io/pravega/example/consolerw/Constants.java diff --git a/standalone-examples/src/main/java/io/pravega/example/gettingstarted/Constants.java b/pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/Constants.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/gettingstarted/Constants.java rename to pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/Constants.java diff --git a/standalone-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldReader.java b/pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldReader.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldReader.java rename to pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldReader.java diff --git a/standalone-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldWriter.java b/pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldWriter.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldWriter.java rename to pravega-client-examples/src/main/java/io/pravega/example/gettingstarted/HelloWorldWriter.java diff --git a/standalone-examples/src/main/java/io/pravega/example/noop/BinarySerializer.java b/pravega-client-examples/src/main/java/io/pravega/example/noop/BinarySerializer.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/noop/BinarySerializer.java rename to pravega-client-examples/src/main/java/io/pravega/example/noop/BinarySerializer.java diff --git a/standalone-examples/src/main/java/io/pravega/example/noop/NoopReader.java b/pravega-client-examples/src/main/java/io/pravega/example/noop/NoopReader.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/noop/NoopReader.java rename to pravega-client-examples/src/main/java/io/pravega/example/noop/NoopReader.java diff --git a/standalone-examples/src/main/java/io/pravega/example/noop/SimpleReader.java b/pravega-client-examples/src/main/java/io/pravega/example/noop/SimpleReader.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/noop/SimpleReader.java rename to pravega-client-examples/src/main/java/io/pravega/example/noop/SimpleReader.java diff --git a/standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfig.java b/pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfig.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfig.java rename to pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfig.java diff --git a/standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfigCli.java b/pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfigCli.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfigCli.java rename to pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedConfigCli.java diff --git a/standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedMap.java b/pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedMap.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/statesynchronizer/SharedMap.java rename to pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer/SharedMap.java diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java b/pravega-client-examples/src/main/java/io/pravega/example/streamcuts/Constants.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/streamcuts/Constants.java rename to pravega-client-examples/src/main/java/io/pravega/example/streamcuts/Constants.java diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java b/pravega-client-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java rename to pravega-client-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsCli.java diff --git a/standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java b/pravega-client-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java similarity index 100% rename from standalone-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java rename to pravega-client-examples/src/main/java/io/pravega/example/streamcuts/StreamCutsExample.java diff --git a/standalone-examples/src/main/resources/logback.xml b/pravega-client-examples/src/main/resources/logback.xml similarity index 100% rename from standalone-examples/src/main/resources/logback.xml rename to pravega-client-examples/src/main/resources/logback.xml diff --git a/anomaly-detection/README.md b/scenarios/anomaly-detection/README.md similarity index 95% rename from anomaly-detection/README.md rename to scenarios/anomaly-detection/README.md index d998eceb..9c1e7a4c 100644 --- a/anomaly-detection/README.md +++ b/scenarios/anomaly-detection/README.md @@ -34,15 +34,14 @@ The following diagram depicts the state machine used in this example. ## Pre-requisites 1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) -2. Build [flink-connectors](https://github.com/pravega/flink-connectors) repository -3. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository -4. Apache Flink running -5. ELK running (optional) +2. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +3. Apache Flink running +4. ELK running (optional) -## Running the Anomaly Detection Example +## Execution The example program is copied to a distribution folder when built as shown above. -Navigate to `anomaly-detection/build/install/pravega-flink-anomaly-detection/` for the below +Navigate to `scenarios/anomaly-detection/build/install/pravega-flink-anomaly-detection/` for the below steps. The example is split into three separate programs: @@ -110,7 +109,7 @@ $ bin/anomaly-detection --configDir conf/ --mode 3 - To run on a cluster: ``` -$ flink run -c io.pravega.anomalydetection.ApplicationMain lib/pravega-flink-anomaly-detection-0.1.0-SNAPSHOT-all.jar --configDir conf/ --mode 3 +$ flink run -c io.pravega.anomalydetection.ApplicationMain lib/pravega-flink-scenario-anomaly-detection-0.1.0-SNAPSHOT-all.jar --configDir conf/ --mode 3 ``` _Ensure that `$FLINK_HOME/bin` is on your path to use the `flink` command shown above._ diff --git a/anomaly-detection/build.gradle b/scenarios/anomaly-detection/build.gradle similarity index 94% rename from anomaly-detection/build.gradle rename to scenarios/anomaly-detection/build.gradle index c45972e5..b0bf8a01 100644 --- a/anomaly-detection/build.gradle +++ b/scenarios/anomaly-detection/build.gradle @@ -8,24 +8,29 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ - plugins { id 'com.github.johnrengelman.shadow' version '1.2.4' } +repositories { + mavenLocal() + jcenter() + mavenCentral() +} + apply plugin: "java" apply plugin: "distribution" apply plugin: 'application' sourceCompatibility = "1.8" -mainClassName = 'io.pravega.anomalydetection.ApplicationMain' +mainClassName = "io.pravega.anomalydetection.ApplicationMain" applicationDefaultJvmArgs = ["-Dlog4j.configuration=file:conf/log4j.properties"] -archivesBaseName = 'pravega-flink-anomaly-detection' +archivesBaseName = 'pravega-flink-scenario-anomaly-detection' dependencies { compile "org.slf4j:slf4j-log4j12:1.7.14" - compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" + compile "io.pravega:pravega-connectors-flink_2.11:${flinkConnectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-connector-elasticsearch5_2.11:${flinkVersion}" } @@ -35,8 +40,8 @@ shadowJar { include dependency("io.pravega:pravega-connectors-flink_2.11") //All below dependencies are from Elastic Search 5 connector - include dependency("org.apache.flink:flink-connector-elasticsearch5_2.11") - include dependency("org.apache.flink:flink-connector-elasticsearch-base_2.11") + include dependency("org.apache.flink:flink-connector-elasticsearch5_2.11:1.4") + include dependency("org.apache.flink:flink-connector-elasticsearch-base_2.11:1.4") include dependency("org.elasticsearch.client:transport:5.1.2") include dependency("org.elasticsearch:elasticsearch:5.1.2") include dependency("org.apache.lucene:lucene-core:6.3.0") @@ -111,4 +116,4 @@ run { args(argsList.split(' ')) } -} \ No newline at end of file +} diff --git a/anomaly-detection/src/main/dist/conf/app.json b/scenarios/anomaly-detection/src/main/dist/conf/app.json similarity index 100% rename from anomaly-detection/src/main/dist/conf/app.json rename to scenarios/anomaly-detection/src/main/dist/conf/app.json diff --git a/anomaly-detection/src/main/dist/conf/log4j.properties b/scenarios/anomaly-detection/src/main/dist/conf/log4j.properties similarity index 100% rename from anomaly-detection/src/main/dist/conf/log4j.properties rename to scenarios/anomaly-detection/src/main/dist/conf/log4j.properties diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/ApplicationMain.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/ApplicationMain.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/ApplicationMain.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/ApplicationMain.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/AppConfiguration.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/AppConfiguration.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/AppConfiguration.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/AppConfiguration.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/AbstractPipeline.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/AbstractPipeline.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/AbstractPipeline.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/AbstractPipeline.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/EventStateMachineMapper.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/EventStateMachineMapper.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/EventStateMachineMapper.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/EventStateMachineMapper.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PipelineRunner.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PipelineRunner.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PipelineRunner.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PipelineRunner.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaAnomalyDetectionProcessor.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaAnomalyDetectionProcessor.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaAnomalyDetectionProcessor.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaAnomalyDetectionProcessor.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaEventPublisher.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaEventPublisher.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaEventPublisher.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/PravegaEventPublisher.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/StreamCreator.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/StreamCreator.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/StreamCreator.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/pipeline/StreamCreator.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/ControlledSourceContextProducer.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/ControlledSourceContextProducer.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/ControlledSourceContextProducer.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/ControlledSourceContextProducer.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/SourceContextProducer.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/SourceContextProducer.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/SourceContextProducer.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/producer/SourceContextProducer.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Event.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Event.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Event.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Event.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventStateMachine.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventStateMachine.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventStateMachine.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventStateMachine.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventsGenerator.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventsGenerator.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventsGenerator.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/EventsGenerator.java diff --git a/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Result.java b/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Result.java similarity index 100% rename from anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Result.java rename to scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection/event/state/Result.java diff --git a/anomaly-detection/src/main/resources/Network-Anomaly.png b/scenarios/anomaly-detection/src/main/resources/Network-Anomaly.png similarity index 100% rename from anomaly-detection/src/main/resources/Network-Anomaly.png rename to scenarios/anomaly-detection/src/main/resources/Network-Anomaly.png diff --git a/scenarios/turbine-heat-processor/README.md b/scenarios/turbine-heat-processor/README.md new file mode 100644 index 00000000..f7813454 --- /dev/null +++ b/scenarios/turbine-heat-processor/README.md @@ -0,0 +1,41 @@ +# Turbine Heat Processor +A Flink streaming application for processing temperature data from a Pravega `Stream`. +Complements the Turbine Heat Sensor app +([`turbineheatsensor`](https://github.com/pravega/pravega-samples/scenarios/turbine-heat-sensor)). +The application computes a daily summary of the temperature range observed on that day by each sensor. + +Automatically creates a scope (default: `examples`) and stream (default: `turbineHeatTest`) as necessary. + +## Pre-requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build [pravega-samples](https://github.com/pravega/pravega-samples) repository +3. Apache Flink running + + +## Execution +Run the sample from the command-line: +``` +$ bin/run-example [--controller ] [--input /] [--startTime ] [--output ] +``` + +Alternately, run the sample from the Flink UI. +- JAR: `pravega-flink-examples--all.jar` +- Main class: `io.pravega.turbineheatprocessor.TurbineHeatProcessor` or `io.pravega.turbineheatprocessor.TurbineHeatProcessorScala` + +## Outputs +The application outputs the daily summary as a comma-separated values (CSV) file, one line per sensor per day. The data is +also emitted to stdout (which may be viewed in the Flink UI). For example: + +``` +... +SensorAggregate(1065600000,12,Illinois,(60.0,100.0)) +SensorAggregate(1065600000,3,Arkansas,(60.0,100.0)) +SensorAggregate(1065600000,7,Delaware,(60.0,100.0)) +SensorAggregate(1065600000,15,Kansas,(40.0,80.0)) +SensorAggregate(1152000000,3,Arkansas,(60.0,100.0)) +SensorAggregate(1152000000,12,Illinois,(60.0,100.0)) +SensorAggregate(1152000000,15,Kansas,(40.0,80.0)) +SensorAggregate(1152000000,7,Delaware,(60.0,100.0)) +... +``` + diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle new file mode 100644 index 00000000..b080e52d --- /dev/null +++ b/scenarios/turbine-heat-processor/build.gradle @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +plugins { + id 'com.github.johnrengelman.shadow' version '1.2.4' +} + +apply plugin: "java" +apply plugin: "scala" +apply plugin: "distribution" +apply plugin: 'idea' +apply plugin: 'eclipse' + +sourceCompatibility = "1.8" +archivesBaseName = 'pravega-flink-scenario-turbineheatprocessor' + +ext { + scalaJava8CompatVersion = '0.7.0' +} + +dependencies { + compile "org.scala-lang.modules:scala-java8-compat_2.11:${scalaJava8CompatVersion}" + compile "io.pravega:pravega-connectors-flink_2.11:${flinkConnectorVersion}" + compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" + compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" + compile "org.slf4j:slf4j-log4j12:1.7.14" +} + +shadowJar { + dependencies { + include dependency("org.scala-lang.modules:scala-java8-compat_2.11") + include dependency("io.pravega:pravega-connectors-flink_2.11") + } +} + +task scriptTrubineHeatProcessor(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.turbineheatprocessor.TurbineHeatProcessor' + applicationName = 'turbineHeatProcessor' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +distributions { + main { + baseName = archivesBaseName + contents { + into('lib') { + from shadowJar + from(project.configurations.shadow) + from(jar) + from(project.configurations.runtime) + } + into('bin') { + from project.scriptTrubineHeatProcessor + } + } + } +} \ No newline at end of file diff --git a/scenarios/turbine-heat-processor/src/main/dist/bin/run-example b/scenarios/turbine-heat-processor/src/main/dist/bin/run-example new file mode 100755 index 00000000..a99bb846 --- /dev/null +++ b/scenarios/turbine-heat-processor/src/main/dist/bin/run-example @@ -0,0 +1,12 @@ +# +# Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#!/usr/bin/env bash + +flink run -c io.pravega.turbineheatprocessor.TurbineHeatProcessor lib/pravega-flink-scenario-turbineheatprocessor-0.3.0-SNAPSHOT-all.jar $* \ No newline at end of file diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorAggregate.java b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorAggregate.java similarity index 81% rename from flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorAggregate.java rename to scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorAggregate.java index 69598630..cae110c9 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorAggregate.java +++ b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorAggregate.java @@ -1,6 +1,14 @@ -package io.pravega.examples.flink.iot; - -import org.apache.commons.lang3.StringUtils; +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.turbineheatprocessor; public class SensorAggregate { private long startTime; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorEvent.java b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorEvent.java similarity index 77% rename from flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorEvent.java rename to scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorEvent.java index 83ce00f9..b672c450 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/iot/SensorEvent.java +++ b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/SensorEvent.java @@ -1,4 +1,14 @@ -package io.pravega.examples.flink.iot; +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.turbineheatprocessor; public class SensorEvent { private long timestamp; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/iot/TurbineHeatProcessor.java b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/TurbineHeatProcessor.java similarity index 98% rename from flink-examples/src/main/java/io/pravega/examples/flink/iot/TurbineHeatProcessor.java rename to scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/TurbineHeatProcessor.java index d4a50469..b3e5ab8a 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/iot/TurbineHeatProcessor.java +++ b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/TurbineHeatProcessor.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.iot; +package io.pravega.turbineheatprocessor; import io.pravega.client.stream.ScalingPolicy; import io.pravega.client.stream.Stream; @@ -16,7 +16,6 @@ import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; import org.apache.flink.api.common.functions.FoldFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.java.utils.ParameterTool; diff --git a/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/Utils.java b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/Utils.java new file mode 100644 index 00000000..b9bd54b9 --- /dev/null +++ b/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor/Utils.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.turbineheatprocessor; + +import io.pravega.client.admin.StreamManager; +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.StreamConfiguration; +import io.pravega.connectors.flink.PravegaConfig; + +public class Utils { + + /** + * Creates a Pravega stream with a default configuration. + * + * @param pravegaConfig the Pravega configuration. + * @param streamName the stream name (qualified or unqualified). + */ + public static Stream createStream(PravegaConfig pravegaConfig, String streamName) { + return createStream(pravegaConfig, streamName, StreamConfiguration.builder().build()); + } + + /** + * Creates a Pravega stream with a given configuration. + * + * @param pravegaConfig the Pravega configuration. + * @param streamName the stream name (qualified or unqualified). + * @param streamConfig the stream configuration (scaling policy, retention policy). + */ + public static Stream createStream(PravegaConfig pravegaConfig, String streamName, StreamConfiguration streamConfig) { + // resolve the qualified name of the stream + Stream stream = pravegaConfig.resolve(streamName); + + try(StreamManager streamManager = StreamManager.create(pravegaConfig.getClientConfig())) { + // create the requested scope (if necessary) + streamManager.createScope(stream.getScope()); + + // create the requested stream based on the given stream configuration + streamManager.createStream(stream.getScope(), stream.getStreamName(), streamConfig); + } + + return stream; + } +} diff --git a/scenarios/turbine-heat-processor/src/main/resources/logback.xml b/scenarios/turbine-heat-processor/src/main/resources/logback.xml new file mode 100644 index 00000000..ae1a2a69 --- /dev/null +++ b/scenarios/turbine-heat-processor/src/main/resources/logback.xml @@ -0,0 +1,24 @@ + + + + + System.out + + %-5level [%d{yyyy-MM-dd HH:mm:ss.SSS}] [%thread] %logger{36}: %msg%n + + + + + + + diff --git a/flink-examples/src/main/scala/io/pravega/examples/flink/iot/TurbineHeatProcessorScala.scala b/scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor/TurbineHeatProcessorScala.scala similarity index 98% rename from flink-examples/src/main/scala/io/pravega/examples/flink/iot/TurbineHeatProcessorScala.scala rename to scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor/TurbineHeatProcessorScala.scala index 5a444147..0ca2d797 100644 --- a/flink-examples/src/main/scala/io/pravega/examples/flink/iot/TurbineHeatProcessorScala.scala +++ b/scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor/TurbineHeatProcessorScala.scala @@ -8,12 +8,11 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.examples.flink.iot +package io.pravega.turbineheatprocessor; import io.pravega.client.stream.{ScalingPolicy, StreamConfiguration} import io.pravega.connectors.flink.serialization.PravegaSerialization import io.pravega.connectors.flink.{FlinkPravegaReader, PravegaConfig} -import io.pravega.examples.flink.Utils import org.apache.flink.api.java.utils.ParameterTool import org.apache.flink.core.fs.FileSystem import org.apache.flink.streaming.api.TimeCharacteristic diff --git a/scenarios/turbine-heat-sensor/README.md b/scenarios/turbine-heat-sensor/README.md new file mode 100644 index 00000000..9f70506e --- /dev/null +++ b/scenarios/turbine-heat-sensor/README.md @@ -0,0 +1,12 @@ +# Turbine Heat Sensor +An example of a lightweight IOT application that writes simulated sensor events to a Pravega stream. + +## Pre requisites +1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) +2. Build `pravega-samples` repository + +# Execution + +``` +$ bin/turbineSensor [--stream ] +``` diff --git a/scenarios/turbine-heat-sensor/build.gradle b/scenarios/turbine-heat-sensor/build.gradle new file mode 100644 index 00000000..0e03e138 --- /dev/null +++ b/scenarios/turbine-heat-sensor/build.gradle @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ + +apply plugin: 'java' +apply plugin: 'distribution' +apply plugin: 'idea' +apply plugin: 'eclipse' + +sourceCompatibility = 1.8 +archivesBaseName = 'pravega-client-scenario-turbineheatsensor' + +resources + +ext { + junitVersion = '4.11' + commonsCLIVersion = '1.4' +} + +dependencies { + testCompile "junit:junit:${junitVersion}" + + compile "io.pravega:pravega-client:${pravegaVersion}", + "io.pravega:pravega-common:${pravegaVersion}", + "commons-cli:commons-cli:${commonsCLIVersion}" + + compile "org.slf4j:slf4j-api:1.7.14" + compile "ch.qos.logback:logback-classic:1.1.7" +} + +task scriptTurbineSensor(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.turbineheatsensor.TurbineHeatSensor' + applicationName = 'turbineSensor' + defaultJvmOpts = ["-Dlogback.configurationFile=file:conf/logback.xml"] + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +task startTurbineSensor(type: JavaExec) { + main = "io.pravega.turbineheatsensor.TurbineHeatSensor" + classpath = sourceSets.main.runtimeClasspath + if(System.getProperty("exec.args") != null) { + args System.getProperty("exec.args").split() + } +} + + +distributions { + main { + baseName = archivesBaseName + contents { + into('bin') { + from project.scriptTurbineSensor + } + into('lib') { + from(jar) + from(project.configurations.runtime) + } + into('conf') { + from(project.file('src/main/resources/logback.xml')) + } + } + } +} diff --git a/scenarios/turbine-heat-sensor/src/main/dist/Dockerfile b/scenarios/turbine-heat-sensor/src/main/dist/Dockerfile new file mode 100644 index 00000000..d6987d15 --- /dev/null +++ b/scenarios/turbine-heat-sensor/src/main/dist/Dockerfile @@ -0,0 +1,7 @@ +FROM java:8 + +WORKDIR /opt/pravega-samples +COPY bin/ bin/ +COPY conf/ conf/ +COPY lib/ lib/ + diff --git a/standalone-examples/src/main/java/io/pravega/example/iot/PerfStats.java b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/PerfStats.java similarity index 99% rename from standalone-examples/src/main/java/io/pravega/example/iot/PerfStats.java rename to scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/PerfStats.java index aa26cb5b..24848b11 100644 --- a/standalone-examples/src/main/java/io/pravega/example/iot/PerfStats.java +++ b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/PerfStats.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.example.iot; +package io.pravega.turbineheatsensor; import java.util.Arrays; import java.util.concurrent.CompletableFuture; diff --git a/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java similarity index 99% rename from standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java rename to scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java index 586f4219..573811db 100644 --- a/standalone-examples/src/main/java/io/pravega/example/iot/TurbineHeatSensor.java +++ b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java @@ -8,7 +8,7 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -package io.pravega.example.iot; +package io.pravega.turbineheatsensor; import io.pravega.client.ClientFactory; import io.pravega.client.admin.ReaderGroupManager; diff --git a/scenarios/turbine-heat-sensor/src/main/resources/logback.xml b/scenarios/turbine-heat-sensor/src/main/resources/logback.xml new file mode 100644 index 00000000..ae1a2a69 --- /dev/null +++ b/scenarios/turbine-heat-sensor/src/main/resources/logback.xml @@ -0,0 +1,24 @@ + + + + + System.out + + %-5level [%d{yyyy-MM-dd HH:mm:ss.SSS}] [%thread] %logger{36}: %msg%n + + + + + + + diff --git a/settings.gradle b/settings.gradle index eff49fdc..8b840221 100644 --- a/settings.gradle +++ b/settings.gradle @@ -8,7 +8,13 @@ * http://www.apache.org/licenses/LICENSE-2.0 * */ -include 'standalone-examples' -include 'flink-examples' -include 'hadoop-examples' -include 'anomaly-detection' +// Build examples +include 'pravega-client-examples' +include 'flink-connector-examples' +include 'hadoop-connector-examples' + +// Build scenarios +include 'scenarios/turbine-heat-sensor' +include 'scenarios/turbine-heat-processor' +include 'scenarios/anomaly-detection' + From 43ebd5dcd069edefc2411c2c5da2ed27faa85e31 Mon Sep 17 00:00:00 2001 From: Eron Wright Date: Thu, 14 Jun 2018 21:02:01 -0700 Subject: [PATCH 23/48] Update README.md --- scenarios/turbine-heat-processor/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scenarios/turbine-heat-processor/README.md b/scenarios/turbine-heat-processor/README.md index f7813454..affbf3e9 100644 --- a/scenarios/turbine-heat-processor/README.md +++ b/scenarios/turbine-heat-processor/README.md @@ -1,7 +1,7 @@ # Turbine Heat Processor A Flink streaming application for processing temperature data from a Pravega `Stream`. Complements the Turbine Heat Sensor app -([`turbineheatsensor`](https://github.com/pravega/pravega-samples/scenarios/turbine-heat-sensor)). +([`turbineheatsensor`](../turbine-heat-sensor)). The application computes a daily summary of the temperature range observed on that day by each sensor. Automatically creates a scope (default: `examples`) and stream (default: `turbineHeatTest`) as necessary. From b9cd292b12876ba7e7fa5d9020b7a96b05590112 Mon Sep 17 00:00:00 2001 From: Vijay Srinivasaraghavan Date: Mon, 18 Jun 2018 07:02:34 -0700 Subject: [PATCH 24/48] [issue-104] Fixed incorrect hadoop connector artifact reference (#106) * Moves configurations from hadoop-examples gradle.properties to root gradle.properties * Updates hadoop connector snapshot version Signed-off-by: Vijay Srinivasaraghavan --- gradle.properties | 8 ++++---- hadoop-connector-examples/build.gradle | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/gradle.properties b/gradle.properties index 25b8db4a..73ba561f 100644 --- a/gradle.properties +++ b/gradle.properties @@ -11,9 +11,8 @@ dcosAddress=master.mesos ### dependencies -pravegaVersion=0.3.0-50.68f85f0-SNAPSHOT -flinkConnectorVersion=0.3.0-101.4d4269d-SNAPSHOT -hadoopConnectorVersion=0.3.0-17.a171316-SNAPSHOT +pravegaVersion=0.3.0-50.5f4d75b-SNAPSHOT +flinkConnectorVersion=0.3.0-102.0e30f47-SNAPSHOT ### Pravega-samples output library samplesVersion=0.3.0-SNAPSHOT @@ -21,7 +20,8 @@ samplesVersion=0.3.0-SNAPSHOT ### Flink-connector examples flinkVersion=1.4.0 -### Hadoop-connector examples +#hadoop connector hadoopVersion=2.8.1 scalaVersion=2.11.8 sparkVersion=2.2.0 +hadoopConnectorVersion=0.3.0-17.a171316-SNAPSHOT diff --git a/hadoop-connector-examples/build.gradle b/hadoop-connector-examples/build.gradle index 372bb763..8a6f4849 100644 --- a/hadoop-connector-examples/build.gradle +++ b/hadoop-connector-examples/build.gradle @@ -17,7 +17,6 @@ apply plugin: "distribution" sourceCompatibility = 1.8 archivesBaseName = 'pravega-hadoop-examples' -version = hadoopConnectorVersion repositories { mavenLocal() @@ -45,7 +44,7 @@ dependencies { shadowJar { version = version dependencies { - include dependency("io.pravega:hadoop-connectors") + include dependency("io.pravega:pravega-connectors-hadoop") } manifest { attributes( From 985f3ffc5c02cc3d6d87b7de4fe20e79f73d9657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Mon, 18 Jun 2018 16:57:44 +0200 Subject: [PATCH 25/48] Issue 85: turbineSensor throws occasional NPE (#111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix in TurbineHeatSensor.java to avoid the application to throw NullPointerException. Note that this PR only targets this specific problem (i.e., other exceptions or problems will be treated separately). Signed-off-by: Raúl Gracia --- .../pravega/turbineheatsensor/TurbineHeatSensor.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java index 573811db..a7bbc20e 100644 --- a/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java +++ b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java @@ -438,13 +438,16 @@ public void run() { do { try { final EventRead result = reader.readNextEvent(0); - produceStats.runAndRecordTime(() -> { - return CompletableFuture.completedFuture(null); - }, Long.parseLong(result.getEvent().split(",")[0]), 100); + if (result.getEvent() != null) { + consumeStats.runAndRecordTime(() -> { + return CompletableFuture.completedFuture(null); + }, Long.parseLong(result.getEvent().split(",")[0]), 100); + totalEvents--; + } } catch (ReinitializationRequiredException e) { e.printStackTrace(); } - } while ( totalEvents-- > 0 ); + } while (totalEvents > 0); } finally { reader.close(); From 73a14cd5ec6ef60d75146c3c4accd8b8869c061d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Tue, 19 Jun 2018 01:59:30 +0200 Subject: [PATCH 26/48] Issue 108: Relative documentation links. (#109) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raúl Gracia --- README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 1ba1f53a..e2e03363 100644 --- a/README.md +++ b/README.md @@ -22,35 +22,35 @@ more complex applications as sub-projects, which show use-cases exploiting one o ## Pravega Client Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `gettingstarted` | Simple example of how to read/write from/to a Pravega `Stream`. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/gettingstarted) -| `consolerw` | Application that allows users to work with `Stream`, `Transaction` and `StreamCut` APIs via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/consolerw) -| `noop` | Example of how to add a simple callback executed upon a read event. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/noop) -| `statesynchronizer` | Application that allows users to work with `StateSynchronizer` API via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer) -| `streamcuts` | Application examples demonstrating the use of `StreamCut`s via CLI. | [Java](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples/src/main/java/io/pravega/example/streamcuts) +| `gettingstarted` | Simple example of how to read/write from/to a Pravega `Stream`. | [Java](pravega-client-examples/src/main/java/io/pravega/example/gettingstarted) +| `consolerw` | Application that allows users to work with `Stream`, `Transaction` and `StreamCut` APIs via CLI. | [Java](pravega-client-examples/src/main/java/io/pravega/example/consolerw) +| `noop` | Example of how to add a simple callback executed upon a read event. | [Java](pravega-client-examples/src/main/java/io/pravega/example/noop) +| `statesynchronizer` | Application that allows users to work with `StateSynchronizer` API via CLI. | [Java](pravega-client-examples/src/main/java/io/pravega/example/statesynchronizer) +| `streamcuts` | Application examples demonstrating the use of `StreamCut`s via CLI. | [Java](pravega-client-examples/src/main/java/io/pravega/example/streamcuts) -The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples). +The related documentation and instructions are [here](pravega-client-examples). ## Flink Connector Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `wordcount` | Counting the words continuously from a Pravega `Stream` to demonstrate the usage of Flink connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount) -| `primer` | This sample demonstrates Pravega "exactly-once" feature jointly with Flink checkpointing and exactly-once mode. | [Java](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples/src/main/java/io/pravega/example/flink/primer) +| `wordcount` | Counting the words continuously from a Pravega `Stream` to demonstrate the usage of Flink connector for Pravega. | [Java](flink-connector-examples/src/main/java/io/pravega/example/flink/wordcount) +| `primer` | This sample demonstrates Pravega "exactly-once" feature jointly with Flink checkpointing and exactly-once mode. | [Java](flink-connector-examples/src/main/java/io/pravega/example/flink/primer) -The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples). +The related documentation and instructions are [here](flink-connector-examples). ## Hadoop Connector Examples | Example Name | Description | Language | | ------------- |:-----| :-----| -| `wordcount` | Counts the words from a Pravega `Stream` filled with random text to demonstrate the usage of Hadoop connector for Pravega. | [Java](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount) +| `wordcount` | Counts the words from a Pravega `Stream` filled with random text to demonstrate the usage of Hadoop connector for Pravega. | [Java](hadoop-connector-examples/src/main/java/io/pravega/example/hadoop/wordcount) -The related documentation and instructions are [here](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples). +The related documentation and instructions are [here](hadoop-connector-examples). ## Scenarios | Example Name | Description | Language | | ------------- |:-----| :-----| -| [`turbineheatsensor`](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-sensor) | It emulates parallel sensors producing temperature values (writers) and parallel consumers performing real-time statistics (readers) via Pravega client. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor) -| [`turbineheatprocessor`](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor) | A Flink streaming application for processing temperature data from a Pravega stream produced by the `turbineheatsensor` app. The application computes a daily summary of the temperature range observed on that day by each sensor. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor), [Scala](https://github.com/pravega/pravega-samples/tree/master/scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor) -| [`anomaly-detection`](https://github.com/pravega/pravega-samples/tree/master/scenarios/anomaly-detection) | A Flink streaming application for detecting anomalous input patterns using a finite-state machine. | [Java](https://github.com/pravega/pravega-samples/tree/master/scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection) +| [`turbineheatsensor`](scenarios/turbine-heat-sensor) | It emulates parallel sensors producing temperature values (writers) and parallel consumers performing real-time statistics (readers) via Pravega client. | [Java](scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor) +| [`turbineheatprocessor`](scenarios/turbine-heat-processor) | A Flink streaming application for processing temperature data from a Pravega stream produced by the `turbineheatsensor` app. The application computes a daily summary of the temperature range observed on that day by each sensor. | [Java](scenarios/turbine-heat-processor/src/main/java/io/pravega/turbineheatprocessor), [Scala](scenarios/turbine-heat-processor/src/main/scala/io/pravega/turbineheatprocessor) +| [`anomaly-detection`](scenarios/anomaly-detection) | A Flink streaming application for detecting anomalous input patterns using a finite-state machine. | [Java](scenarios/anomaly-detection/src/main/java/io/pravega/anomalydetection) # Build Instructions @@ -181,13 +181,13 @@ using release versions. # Proposed Roadmap We propose a roadmap to proceed with the execution of examples based on their complexity: -1. [Pravega client examples](https://github.com/pravega/pravega-samples/tree/master/pravega-client-examples): +1. [Pravega client examples](pravega-client-examples): First step to understand the basics of Pravega and exercise the concepts presented in the documentation. -2. [Flink connector examples](https://github.com/pravega/pravega-samples/tree/master/flink-connector-examples): +2. [Flink connector examples](flink-connector-examples): These examples show the basic functionality of the Flink connector for Pravega. -3. [Hadoop connector examples](https://github.com/pravega/pravega-samples/tree/master/hadoop-connector-examples): +3. [Hadoop connector examples](hadoop-connector-examples): These examples show the basic functionality of the Hadoop connector for Pravega. -4. [Scenarios](https://github.com/pravega/pravega-samples/tree/master/scenarios): Applications that go beyond the basic usage of Pravega APIs, which may include complex interactions +4. [Scenarios](scenarios): Applications that go beyond the basic usage of Pravega APIs, which may include complex interactions between Pravega and analytics engines (e.g., Flink, Hadoop, Spark) to demonstrate analytics use cases. # Where to Find Help From e3fb288fb0193ef39006b23ffa729ce7db0d0717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Tue, 19 Jun 2018 19:35:23 +0200 Subject: [PATCH 27/48] Issue 116: Deleted upload instructions in Flink connector samples, as well as the build associated tasks/properties. (#117) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Raúl Gracia --- build.gradle | 17 ----------------- flink-connector-examples/README.md | 20 ++------------------ gradle.properties | 3 --- pravega-client-examples/README.md | 5 +++++ 4 files changed, 7 insertions(+), 38 deletions(-) diff --git a/build.gradle b/build.gradle index c306ee74..67a64ca7 100644 --- a/build.gradle +++ b/build.gradle @@ -43,21 +43,4 @@ subprojects { group "io.pravega" version samplesVersion } - - remotes { - dcos { - host = dcosAddress - user = 'centos' - agent = true - } - } - ssh.settings { - knownHosts = allowAnyHosts - } - - afterEvaluate { - task upload(type: Exec, dependsOn: installDist) { - commandLine 'rsync', '-az', project.installDist.destinationDir, "${remotes.dcos.user}@${remotes.dcos.host}:~" - } - } } diff --git a/flink-connector-examples/README.md b/flink-connector-examples/README.md index a061c894..760a52ea 100644 --- a/flink-connector-examples/README.md +++ b/flink-connector-examples/README.md @@ -8,8 +8,7 @@ system for Apache Flink. 3. Apache Flink running -### Distributing Flink Samples -#### Assemble +## Distributing Flink Samples Use gradle to assemble a distribution folder containing the Flink programs as a ready-to-deploy uber-jar called `pravega-flink-examples--all.jar`: @@ -23,22 +22,7 @@ flink-connector-examples/build/install/pravega-flink-examples/bin: run-example flink-connector-examples/build/install/pravega-flink-examples/lib: -pravega-flink-examples--all.jar -``` - -#### Upload -The `upload` task makes it easy to upload the sample binaries to your cluster. First, configure -Gradle with the address of a node in your cluster. Edit `~/.gradle/gradle.properties` to -specify a value for `dcosAddress`. - -``` -$ cat ~/.gradle/gradle.properties -dcosAddress=10.240.124.164 -``` - -Then, upload the samples to the cluster. They will be copied to `/home/centos` on the target node. -``` -$ ./gradlew upload +pravega-flink-examples-VERSION-all.jar ``` --- diff --git a/gradle.properties b/gradle.properties index 73ba561f..e0574f93 100644 --- a/gradle.properties +++ b/gradle.properties @@ -7,9 +7,6 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -### clusters -dcosAddress=master.mesos - ### dependencies pravegaVersion=0.3.0-50.5f4d75b-SNAPSHOT flinkConnectorVersion=0.3.0-102.0e30f47-SNAPSHOT diff --git a/pravega-client-examples/README.md b/pravega-client-examples/README.md index 74ac6255..307b6827 100644 --- a/pravega-client-examples/README.md +++ b/pravega-client-examples/README.md @@ -6,6 +6,11 @@ Set of example applications to demonstrate the features and APIs of Pravega as w 1. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) 2. Build `pravega-samples` repository +Please note that after building `pravega-samples`, all the executables used here are located in: +``` +pravega-samples/pravega-client-examples/build/install/pravega-client-examples +``` + --- # Examples Catalog From 9987ad9b030ebe22921c80861d809a6d75a067f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Gracia?= Date: Wed, 20 Jun 2018 14:20:49 +0200 Subject: [PATCH 28/48] Issue 84: Channel not shutdown correctly in TurbineHeatSensor sample (#110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixes in TurbineHeatSensor.java the use of client factories while creating writers. Note that this PR specifically targets to avoid java.lang.RuntimeException: ManagedChannel allocation site exception to be thrown (e.g., the app may throw other exceptions that will be addressed separately). Signed-off-by: Raúl Gracia --- .../turbineheatsensor/TurbineHeatSensor.java | 33 ++++++++----------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java index a7bbc20e..24df79aa 100644 --- a/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java +++ b/scenarios/turbine-heat-sensor/src/main/java/io/pravega/turbineheatsensor/TurbineHeatSensor.java @@ -38,7 +38,6 @@ public class TurbineHeatSensor { private static String streamName = DEFAULT_STREAM_NAME; private static String scopeName = DEFAULT_SCOPE_NAME; - private static ClientFactory clientFactory; private static StreamManager streamManager; private static ReaderGroupManager readerGroupManager; @@ -84,8 +83,10 @@ public static void main(String[] args) throws Exception { // Initialize executor ExecutorService executor = Executors.newFixedThreadPool(producerCount + 10); + URI controllerUri; + ClientFactory clientFactory; try { - URI controllerUri = new URI(TurbineHeatSensor.controllerUri); + controllerUri = new URI(TurbineHeatSensor.controllerUri); clientFactory = ClientFactory.withScope(scopeName, controllerUri); streamManager = StreamManager.create(controllerUri); readerGroupManager = ReaderGroupManager.withScope(scopeName, controllerUri); @@ -108,28 +109,24 @@ public static void main(String[] args) throws Exception { if ( !onlyWrite ) { consumeStats = new PerfStats(producerCount * eventsPerSec * runtimeSec, reportingInterval, messageSize); - SensorReader reader = new SensorReader(producerCount * eventsPerSec * runtimeSec); + SensorReader reader = new SensorReader(producerCount * eventsPerSec * runtimeSec, clientFactory); executor.execute(reader); } /* Create producerCount number of threads to simulate sensors. */ Instant startEventTime = Instant.EPOCH.plus(8, ChronoUnit.HOURS); // sunrise for (int i = 0; i < producerCount; i++) { - URI controllerUri = new URI(TurbineHeatSensor.controllerUri); - ClientFactory factory = ClientFactory.withScope(scopeName, controllerUri); - double baseTemperature = locations[i % locations.length].length() * 10; TemperatureSensor sensor = new TemperatureSensor(i, locations[i % locations.length], baseTemperature, 20, startEventTime); TemperatureSensors worker; if ( isTransaction ) { worker = new TransactionTemperatureSensors(sensor, eventsPerSec, runtimeSec, - isTransaction, factory); + isTransaction, clientFactory); } else { worker = new TemperatureSensors(sensor, eventsPerSec, runtimeSec, - isTransaction, factory); + isTransaction, clientFactory); } executor.execute(worker); - } executor.shutdown(); @@ -142,6 +139,7 @@ public static void main(String[] args) throws Exception { if ( !onlyWrite ) { consumeStats.printTotal(); } + clientFactory.close(); // ZipKinTracer.getTracer().close(); System.exit(0); } @@ -303,7 +301,7 @@ private static class TemperatureSensors implements Runnable { EventWriterConfig eventWriterConfig = EventWriterConfig.builder() .transactionTimeoutTime(DEFAULT_TXN_TIMEOUT_MS) .build(); - this.producer = clientFactory.createEventWriter(streamName, SERIALIZER, eventWriterConfig); + this.producer = factory.createEventWriter(streamName, SERIALIZER, eventWriterConfig); } @@ -378,8 +376,7 @@ void runLoop(BiFunction fn) { System.exit(1); } } - producer.flush(); - //producer.close(); + producer.close(); try { //Wait for the last packet to get acked retFuture.get(); @@ -424,16 +421,16 @@ BiFunction sendFunction() { private static class SensorReader implements Runnable { private final JavaSerializer SERIALIZER = new JavaSerializer<>(); - + final EventStreamReader reader; private int totalEvents; - public SensorReader(int totalEvents) { - this.totalEvents = totalEvents; + public SensorReader(int totalEvents, ClientFactory clientFactory) { + this.totalEvents = totalEvents; + reader = createReader(clientFactory); } @Override public void run() { - EventStreamReader reader = createReader(); try { do { try { @@ -454,7 +451,7 @@ public void run() { } } - public EventStreamReader createReader() { + public EventStreamReader createReader(ClientFactory clientFactory) { String readerName = "Reader"; //reusing a reader group name doesn't work (probably because the sequence is already consumed) @@ -468,6 +465,4 @@ public EventStreamReader createReader() { return clientFactory.createReader(readerName, readerGroup, SERIALIZER, readerConfig); } } - - } From ff6e1278fdbd210874462201a05e63cd4fe615b0 Mon Sep 17 00:00:00 2001 From: Vijay Srinivasaraghavan Date: Wed, 20 Jun 2018 12:54:54 -0700 Subject: [PATCH 29/48] [issue-121] Fix transaction timeout API change (#122) * updated connector snapshot version * fixed connector writer transaction API change Signed-off-by: Vijay Srinivasaraghavan --- .../example/flink/primer/process/ExactlyOnceWriter.java | 4 ++-- gradle.properties | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java index 51e37560..8e16ed59 100644 --- a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/ExactlyOnceWriter.java @@ -38,7 +38,7 @@ */ public class ExactlyOnceWriter { private static final long checkpointIntervalMillis = 100; - private static final Time txnTimeoutMillis = Time.milliseconds(30 * 1000); + private static final Time txnLeaseRenewalPeriod = Time.milliseconds(30 * 1000); private static final int defaultNumEvents = 50; // read data from the time when the program starts @@ -81,7 +81,7 @@ public static void main(String[] args) throws Exception { .withPravegaConfig(pravegaConfig) .forStream(stream) .withEventRouter( new EventRouter()) - .withTxnTimeout(txnTimeoutMillis) + .withTxnLeaseRenewalPeriod(txnLeaseRenewalPeriod) .withWriterMode( exactlyOnce ? PravegaWriterMode.EXACTLY_ONCE : PravegaWriterMode.ATLEAST_ONCE ) .withSerializationSchema(PravegaSerialization.serializationFor(IntegerEvent.class)) .build(); diff --git a/gradle.properties b/gradle.properties index e0574f93..c828af05 100644 --- a/gradle.properties +++ b/gradle.properties @@ -9,7 +9,7 @@ # ### dependencies pravegaVersion=0.3.0-50.5f4d75b-SNAPSHOT -flinkConnectorVersion=0.3.0-102.0e30f47-SNAPSHOT +flinkConnectorVersion=0.3.0-101.efc1378-SNAPSHOT ### Pravega-samples output library samplesVersion=0.3.0-SNAPSHOT From eb29d1702149b45616c2104445937d336da4bb90 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 23 Mar 2018 18:20:51 -0400 Subject: [PATCH 30/48] example to generate alert from apache access log via logstash pravega output plugin Signed-off-by: Lida He add example to process apache access log and generate high 500 response alert logstash config and first cut of readme for alert sample instruction to run high count alerter sample. instruction to run the high count alerter sample print output to stdout Add flink references update flink references Steps to use wordCountWriter instead of logstash Signed-off-by: Lida He Add link to readme file for high error count alerter Signed-off-by: Lida He Update README.md Signed-off-by: Lida He add license to conf files Signed-off-by: Lida He read access log from file instead of stdin Signed-off-by: Lida He Update instruction to read access log from file. Signed-off-by: Lida He Update instruction to read from file. Signed-off-by: Lida He Update instruction to read access log from file Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Signed-off-by: Lida He --- flink-examples/README.md | 39 ++++ .../flink-high-error-count-alert/README.md | 133 +++++++++++ .../filters/01-file-input.conf | 17 ++ .../filters/10-apache-accesslog-filter.conf | 23 ++ .../filters/90-pravega-output.conf | 18 ++ .../filters/95-stdout-output.conf | 14 ++ .../src/main/dist/bin/create-stream.sh | 22 ++ .../examples/flink/alert/AccessLog.java | 97 +++++++++ .../examples/flink/alert/Constants.java | 25 +++ .../flink/alert/HighCountAlerter.java | 206 ++++++++++++++++++ scenarios/turbine-heat-processor/build.gradle | 33 ++- 11 files changed, 620 insertions(+), 7 deletions(-) create mode 100644 flink-examples/README.md create mode 100644 flink-examples/doc/flink-high-error-count-alert/README.md create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf create mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf create mode 100755 flink-examples/src/main/dist/bin/create-stream.sh create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java create mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java diff --git a/flink-examples/README.md b/flink-examples/README.md new file mode 100644 index 00000000..fd847a18 --- /dev/null +++ b/flink-examples/README.md @@ -0,0 +1,39 @@ +# Pravega Flink Connector Samples +Steps to set up and run Pravega Flink connector samples. + +## Pre requisites +1. Java 8 +2. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) + +## Build Pravega Flink Connectors + +Follow the below steps to build and publish artifacts from source to local Maven repository: + +``` +$ git clone https://github.com/pravega/flink-connectors.git +$ cd flink-connectors +$ ./gradlew clean install +``` + +## Build the Sample Code + +Follow the below steps to build the sample code: + +``` +$ git clone https://github.com/pravega/pravega-samples.git +$ cd pravega-samples +$ ./gradlew clean installDist +``` + +## Word Count Sample + +This example demonstrates how to use the Pravega Flink Connectors to write data collected +from an external network stream into a Pravega stream and read the data from the Pravega stream. +See [Flink Word Count Sample](doc/flink-wordcount/README.md) for instructions. + +## High Error Count Alert + +This example demonstrates how to use the Pravega Flink connectors to read and +parse Apache access logs from logstash via the [logstash pravega output plugin](https://github.com/pravega/logstash-output-pravega), +and how to generate alert when error count is high within a time frame. +See [High Error Count Alert](doc/flink-high-error-count-alert/README.md) for instructions. diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md new file mode 100644 index 00000000..cc63c867 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -0,0 +1,133 @@ +# High Count Alert # + +The application reads apache access logs from a Pravega stream and once every 2 seconds +counts the number of 500 responses in the last 30 seconds, and generates +alert when the counts of 500 responses exceed 6. + +## Prerequistes ## + +A Docker image containing Pravega and Logstash had been prepared to simplify the demo. Skip ahead to the **Run in Docker Container** section in this document if you have docker environment handy. + +Otherwise proceed to set up Logstash and Pravega + +1. Logstash installed, see [Install logstash](https://www.elastic.co/guide/en/logstash/5.6/installing-logstash.html). +2. Pravega running, see [here](http://pravega.io/docs/latest/getting-started/) for instructions. + +## Start Logstash with Pravega Output Plugin ## + +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.2.0.gem`. + +Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` +``` +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.2.0.gem +``` + +Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. +update **pravega_endpoint** in ~/pravega/90-pravega-output.conf + +``` +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller + stream_name => "apacheaccess" + scope => "myscope" + } +} +``` + +Start logstash, assuming it is installed at /usr/share/logstash/bin. +Note that sometimes it may take a minute or two for logstash to start. For troubleshooting, the logstash log files are +normally at /var/log/logstash. To restart, type Ctrl-C, then re-run the command. + +``` +$ sudo /usr/share/logstash/bin -f ~/pravega +Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties +``` + +Normally Logstash is configured to receive data from remote log shippers, such as filebeat. For simplicity in this demo +Logstash is configured read data from /tmp/access.log. + +## Run in Docker Container ## + +Create a file at /tmp/access.log +``` +$ touch /tmp/access.log +``` + +Run script below to start container from prebuilt image. Adjust parameters to your need. +``` +#!/bin/sh +set -u + +PRAVEGA_SCOPE=myscope +PRAVEGA_STREAM=apacheaccess +CONTAINER_NAME=pravega +IMAGE_NAME=emccorp/pravega-demo + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -v /tmp/access.log:/opt/data/access.log \ + -v /tmp/logs/:/var/log/pravega/ \ + -e PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT} \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} +``` + +More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) + +## Run HighCountAlerter ## + +Run the alerter. Adjust the controller and scope/stream if necessary. +``` +$ cd flink-examples/build/install/pravega-flink-examples +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apacheaccess] +``` + +## Input Data ## + +Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. +``` +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +``` + +Logstash will push the data to Pravega in json string, e.g., +``` +{ + "request" => "/mapping/", + "agent" => "\"python-client\"", + "auth" => "peter", + "ident" => "-", + "verb" => "PUT", + "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", + "referrer" => "\"http://example.com/myapp\"", + "@timestamp" => 2018-03-19T06:24:01.000Z, + "response" => "500", + "bytes" => "182", + "clientip" => "10.1.1.11", + "@version" => "1", + "host" => "lglca061.lss.emc.com", + "httpversion" => "1.1" +} +``` + +## View Alert ## +In the HighCountAlerter window, you should see output like the following. Once the 500 response counts reach 6 or above, it +should print **High 500 responses** alerts. +``` +3> Response count: 500 : 1 +3> Response count: 500 : 2 +3> Response count: 500 : 4 +3> Response count: 500 : 6 +2> High 500 responses: 500 : 6 +3> Response count: 500 : 8 +3> High 500 responses: 500 : 8 +3> Response count: 500 : 8 +2> High 500 responses: 500 : 8 +3> Response count: 500 : 7 +3> High 500 responses: 500 : 7 +3> Response count: 500 : 5 +3> Response count: 500 : 3 +3> Response count: 500 : 1 +``` diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf new file mode 100644 index 00000000..22ae47fe --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf @@ -0,0 +1,17 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +input { + file { + path => "/tmp/access.log" + start_position => beginning + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf new file mode 100644 index 00000000..d33b4b95 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf @@ -0,0 +1,23 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +filter { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + } + date { + match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] + } + mutate { + remove_field => [ "timestamp" ] + } + +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf new file mode 100644 index 00000000..9da70a63 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf @@ -0,0 +1,18 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" + stream_name => "apacheaccess" + scope => "myscope" + } +} + diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf new file mode 100644 index 00000000..04986e41 --- /dev/null +++ b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf @@ -0,0 +1,14 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + stdout { codec => rubydebug } +} + diff --git a/flink-examples/src/main/dist/bin/create-stream.sh b/flink-examples/src/main/dist/bin/create-stream.sh new file mode 100755 index 00000000..9993ee91 --- /dev/null +++ b/flink-examples/src/main/dist/bin/create-stream.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# sample script to create scope and stream using Pravega REST API +# +host=localhost +port=9091 +scope=myscope +stream=apacheaccess +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes +-d '{ + "scopeName": "'${scope}'" +}' + +curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes/${scope}/streams \ +-d '{ + "streamName": "'${stream}'", + "scopeName": "'${scope}'", + "scalingPolicy":{ + "type": "FIXED_NUM_SEGMENTS", + "minSegments": 1 + } +}' diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java new file mode 100644 index 00000000..4772c6fc --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import io.pravega.shaded.com.google.gson.Gson; + +/** + * Object to process Apache access log + */ +public class AccessLog { + private String ClientIP; + private String Status; + private long Timestamp; + private String Verb; + + public AccessLog(){ + Status=Verb=ClientIP=""; + Timestamp=0L; + } + + public String getClientIP() { + return ClientIP; + } + + public void setClientIP(String clientIP) { + ClientIP = clientIP; + } + + public String getStatus() { + return Status; + } + + public void setStatus(String status) { + Status = status; + } + + public long getTimestamp() { + return Timestamp; + } + + public void setTimestamp(long timestamp) { + this.Timestamp = timestamp; + } + + public String getVerb() { + return Verb; + } + + public void setVerb(String verb) { + Verb = verb; + } + + /** + * The events in the DataStream to which you want to apply pattern matching must + * implement proper equals() and hashCode() methods because these are used for + * comparing and matching events. + */ + @Override + public boolean equals(Object obj) { + if(this==obj){ + return true; + } + if(!(obj instanceof AccessLog)){ + return false; + } + AccessLog accessLog =(AccessLog)obj; + return accessLog.Verb.equals(Verb) && + accessLog.Status.equals(Status) && + accessLog.Timestamp==Timestamp && + accessLog.ClientIP.equals(ClientIP); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((Status == null) ? 0 : Status.hashCode()); + result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); + result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); + return result; + } + + @Override + public String toString() { + Gson gson = new Gson(); + return gson.toJson(this); + } +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java new file mode 100644 index 00000000..a0acb073 --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +/** + * Defines a handful of constants shared by classes in this package. + * + */ +public class Constants { + protected static final String STREAM_PARAM = "stream"; + protected static final String DEFAULT_STREAM = "myscope/apacheaccess"; + protected static final String CONTROLLER_PARAM = "controller"; + protected static final String DEFAULT_CONTROLLER = "tcp://127.0.0.1:9090"; + protected static final Integer ALERT_THRESHOLD = 6; + protected static final Integer ALERT_WINDOW = 30; + protected static final Integer ALERT_INTERVAL = 2; +} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java new file mode 100644 index 00000000..bf08fd9a --- /dev/null +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.examples.flink.alert; + +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import io.pravega.connectors.flink.FlinkPravegaReader; +import io.pravega.connectors.flink.util.FlinkPravegaParams; +import io.pravega.connectors.flink.util.StreamId; +import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.api.common.functions.FilterFunction; +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.cep.CEP; +import org.apache.flink.cep.PatternSelectFunction; +import org.apache.flink.cep.PatternStream; +import org.apache.flink.cep.pattern.Pattern; +import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.windowing.time.Time; +import org.apache.flink.util.Collector; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +/* + * This application has the following input parameters + * stream - Pravega stream name to write to + * controller - the Pravega controller URI, e.g., tcp://localhost:9090 + * Note that this parameter is processed in pravega flink connector + */ +public class HighCountAlerter { + + // Logger initialization + private static final Logger LOG = LoggerFactory.getLogger(HighCountAlerter.class); + + // The application reads data from specified Pravega stream and once every ALERT_INTERVAL (2 seconds) + // counts the number of 500 responses in the last ALERT_WINDOW (30 seconds), and generates + // alert when the counts exceed ALERT_THRESHOLD (6). + + public static void main(String[] args) throws Exception { + LOG.info("Starting HighErrorAlerter..."); + + // initialize the parameter utility tool in order to retrieve input parameters + ParameterTool params = ParameterTool.fromArgs(args); + + // create Pravega helper utility for Flink using the input paramaters + FlinkPravegaParams helper = new FlinkPravegaParams(params); + + // get the Pravega stream from the input parameters + StreamId streamId = helper.getStreamFromParam(Constants.STREAM_PARAM, + Constants.DEFAULT_STREAM); + + // create the Pravega stream is not exists. + helper.createStream(streamId); + + // initialize Flink execution environment + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + // create the Pravega stream reader + long startTime = 0; + FlinkPravegaReader reader = helper.newReader(streamId, startTime, String.class); + + // add the Pravega reader as the data source + DataStream inputStream = env.addSource(reader); + + // create an output sink to stdout for verification + //inputStream.print(); + + // transform logs + DataStream dataStream = inputStream.map(new ParseLogData()); + + // create an output sink to stdout for verification + //dataStream.print(); + + // get responses and their counts + DataStream countStream = + dataStream.flatMap(new FlatMapFunction() { + @Override + public void flatMap(AccessLog value, Collector out) throws Exception { + out.collect(new ResponseCount(value.getStatus(), 1)); + } + }).filter((FilterFunction) count -> { + return !count.response.isEmpty(); + }).keyBy("response") + .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) + .sum("count"); + + // create an output sink to stdout for verification + countStream.print(); + + // create alert pattern + Pattern pattern500 = Pattern.begin("500pattern") + .where(new SimpleCondition() { + @Override + public boolean filter(ResponseCount value) throws Exception { + return value.count >= Constants.ALERT_THRESHOLD && + value.response.equals("500"); + } + }); + + PatternStream patternStream = CEP.pattern(countStream, pattern500); + + DataStream alertStream = patternStream.select( + new PatternSelectFunction() { + @Override + public Alert select(Map> pattern) throws Exception { + ResponseCount count = pattern.get("500pattern").get(0); + return new Alert(count.response, count.count, "High 500 responses"); + } + }); + + // create an output sink to stdout for verification + alertStream.print(); + + + // execute within the Flink environment + env.execute("HighCountAlerter"); + + LOG.info("Ending HighCountAlerter..."); + } + + //Parse the incoming streams & convert into Java PoJos + private static class ParseLogData implements MapFunction{ + public AccessLog map(String record) throws Exception { + // TODO: handle exceptions + Gson gson = new Gson(); + AccessLog accessLog = new AccessLog(); + JsonParser parser = new JsonParser(); + JsonObject obj = parser.parse(record).getAsJsonObject(); + if (obj.has("verb")) { + String verb = obj.get("verb").getAsString(); + accessLog.setVerb(verb); + } + if (obj.has("response")) { + String response = obj.get("response").getAsString(); + accessLog.setStatus(response); + } + if (obj.has("@timestamp")) { + String timestamp = obj.get("@timestamp").getAsString(); + + DateTime dateTime = new DateTime(timestamp); + accessLog.setTimestamp(dateTime.getMillis()); + } + if (obj.has("clientip")) { + String client = obj.get("clientip").getAsString(); + accessLog.setClientIP(client); + } + return accessLog; + } + } + + // Data type access status count + public static class ResponseCount { + + public String response; + public long count; + + public ResponseCount() {} + + public ResponseCount(String status, long count) { + this.response = status; + this.count = count; + } + + @Override + public String toString() { + return "Response count: " + response + " : " + count; + } + } + + // Data type access status count + public static class Alert { + + private String response; + private long count; + private String description; + + public Alert() {} + + public Alert(String response, long count, String description) { + this.response = response; + this.count = count; + this.description = description; + } + + @Override + public String toString() { + return description + ": " + response + " : " + count; + } + } + +} diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index b080e52d..e55acade 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -19,7 +19,7 @@ apply plugin: 'idea' apply plugin: 'eclipse' sourceCompatibility = "1.8" -archivesBaseName = 'pravega-flink-scenario-turbineheatprocessor' +archivesBaseName = 'pravega-flink-examples' ext { scalaJava8CompatVersion = '0.7.0' @@ -27,10 +27,13 @@ ext { dependencies { compile "org.scala-lang.modules:scala-java8-compat_2.11:${scalaJava8CompatVersion}" - compile "io.pravega:pravega-connectors-flink_2.11:${flinkConnectorVersion}" + compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" + compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.14" + compile "com.google.code.gson:gson:2.3.+" + compile "joda-time:joda-time:2.9.+" } shadowJar { @@ -40,10 +43,24 @@ shadowJar { } } -task scriptTrubineHeatProcessor(type: CreateStartScripts) { +task scriptWordCountWriter(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.turbineheatprocessor.TurbineHeatProcessor' - applicationName = 'turbineHeatProcessor' + mainClassName = 'io.pravega.examples.flink.wordcount.WordCountWriter' + applicationName = 'wordCountWriter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +task scriptWordCountReader(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.wordcount.WordCountReader' + applicationName = 'wordCountReader' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +task scriptFlinkAlerter(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' + applicationName = 'highCountAlerter' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } @@ -58,8 +75,10 @@ distributions { from(project.configurations.runtime) } into('bin') { - from project.scriptTrubineHeatProcessor + from project.scriptWordCountWriter + from project.scriptWordCountReader + from project.scriptFlinkAlerter } } } -} \ No newline at end of file +} From 44d2ab066a489afde179a285ebb09b53641b1f4e Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:11:07 -0400 Subject: [PATCH 31/48] correct typo Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index cc63c867..6d9cb105 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -89,7 +89,7 @@ $ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apa Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. ``` -echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log ``` Logstash will push the data to Pravega in json string, e.g., From 6bf0c086e418085c710135a9b54493af7a06d132 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:13:17 -0400 Subject: [PATCH 32/48] apply filter earlier to let in just 500 responses Signed-off-by: Lida He --- .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index bf08fd9a..59211d13 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -94,7 +94,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.isEmpty(); + return !count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -107,8 +107,7 @@ public void flatMap(AccessLog value, Collector out) throws Except .where(new SimpleCondition() { @Override public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD && - value.response.equals("500"); + return value.count >= Constants.ALERT_THRESHOLD; } }); From b3c84518fa2219ac8881cfd0b8b560f0773e733b Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 21:43:44 -0400 Subject: [PATCH 33/48] use jackson to covert object to/from json Signed-off-by: Lida He --- .../examples/flink/alert/AccessLog.java | 39 +++++++++++++------ .../flink/alert/HighCountAlerter.java | 35 +++-------------- scenarios/turbine-heat-processor/build.gradle | 1 - 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index 4772c6fc..a57c6c3e 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,22 +10,30 @@ */ package io.pravega.examples.flink.alert; -import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.joda.time.DateTime; /** * Object to process Apache access log */ +@JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { private String ClientIP; + private String Status; - private long Timestamp; + private String Verb; + private String TimestampStr; + public AccessLog(){ Status=Verb=ClientIP=""; - Timestamp=0L; } + @JsonProperty("clientip") public String getClientIP() { return ClientIP; } @@ -34,6 +42,7 @@ public void setClientIP(String clientIP) { ClientIP = clientIP; } + @JsonProperty("response") public String getStatus() { return Status; } @@ -42,14 +51,16 @@ public void setStatus(String status) { Status = status; } - public long getTimestamp() { - return Timestamp; - } + @JsonProperty("@timestamp") + public String getTimestampStr() { return TimestampStr; } + + public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } - public void setTimestamp(long timestamp) { - this.Timestamp = timestamp; + public long getTimestampMillis() { + return new DateTime(getTimestampStr()).getMillis(); } + @JsonProperty("verb") public String getVerb() { return Verb; } @@ -74,7 +85,7 @@ public boolean equals(Object obj) { AccessLog accessLog =(AccessLog)obj; return accessLog.Verb.equals(Verb) && accessLog.Status.equals(Status) && - accessLog.Timestamp==Timestamp && + accessLog.TimestampStr.equals(TimestampStr) && accessLog.ClientIP.equals(ClientIP); } @@ -83,15 +94,19 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); + } } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 59211d13..b03d7963 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,12 +10,9 @@ */ package io.pravega.examples.flink.alert; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.util.FlinkPravegaParams; import io.pravega.connectors.flink.util.StreamId; -import io.pravega.shaded.com.google.gson.Gson; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -25,11 +22,11 @@ import org.apache.flink.cep.PatternStream; import org.apache.flink.cep.pattern.Pattern; import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.util.Collector; -import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,7 +91,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.equals("500"); + return count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -134,31 +131,9 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ - public AccessLog map(String record) throws Exception { - // TODO: handle exceptions - Gson gson = new Gson(); - AccessLog accessLog = new AccessLog(); - JsonParser parser = new JsonParser(); - JsonObject obj = parser.parse(record).getAsJsonObject(); - if (obj.has("verb")) { - String verb = obj.get("verb").getAsString(); - accessLog.setVerb(verb); - } - if (obj.has("response")) { - String response = obj.get("response").getAsString(); - accessLog.setStatus(response); - } - if (obj.has("@timestamp")) { - String timestamp = obj.get("@timestamp").getAsString(); - - DateTime dateTime = new DateTime(timestamp); - accessLog.setTimestamp(dateTime.getMillis()); - } - if (obj.has("clientip")) { - String client = obj.get("clientip").getAsString(); - accessLog.setClientIP(client); - } - return accessLog; + public AccessLog map(String value) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(value, AccessLog.class); } } diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index e55acade..784d2c60 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -32,7 +32,6 @@ dependencies { compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.14" - compile "com.google.code.gson:gson:2.3.+" compile "joda-time:joda-time:2.9.+" } From 0be606ead973c15c7e658cd33dc0b55d2ceb0ede Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 22:35:40 -0400 Subject: [PATCH 34/48] merge with develop branch Signed-off-by: Lida He --- scenarios/turbine-heat-processor/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index 784d2c60..549534d0 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -9,7 +9,7 @@ * */ plugins { - id 'com.github.johnrengelman.shadow' version '1.2.4' + id 'com.github.johnrengelman.shadow' version '2.0.3' } apply plugin: "java" @@ -30,8 +30,8 @@ dependencies { compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" + compile "org.slf4j:slf4j-log4j12:1.7.25" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" - compile "org.slf4j:slf4j-log4j12:1.7.14" compile "joda-time:joda-time:2.9.+" } From a65195754d510661b96c51b3150abb2794b7fdac Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 23 Mar 2018 18:20:51 -0400 Subject: [PATCH 35/48] example to generate alert from apache access log via logstash pravega output plugin Signed-off-by: Lida He add example to process apache access log and generate high 500 response alert logstash config and first cut of readme for alert sample instruction to run high count alerter sample. instruction to run the high count alerter sample print output to stdout Add flink references update flink references Steps to use wordCountWriter instead of logstash Signed-off-by: Lida He Add link to readme file for high error count alerter Signed-off-by: Lida He Update README.md Signed-off-by: Lida He add license to conf files Signed-off-by: Lida He read access log from file instead of stdin Signed-off-by: Lida He Update instruction to read access log from file. Signed-off-by: Lida He Update instruction to read from file. Signed-off-by: Lida He Update instruction to read access log from file Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Update README.md Signed-off-by: Lida He Signed-off-by: Lida He --- .../flink-high-error-count-alert/README.md | 4 ++ .../examples/flink/alert/AccessLog.java | 39 ++++++------------- .../flink/alert/HighCountAlerter.java | 38 +++++++++++++++--- 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index 6d9cb105..cab2285e 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -89,7 +89,11 @@ $ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apa Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. ``` +<<<<<<< 0be606ead973c15c7e658cd33dc0b55d2ceb0ede echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log +======= +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog +>>>>>>> example to generate alert from apache access log via logstash pravega output plugin ``` Logstash will push the data to Pravega in json string, e.g., diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index a57c6c3e..4772c6fc 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,30 +10,22 @@ */ package io.pravega.examples.flink.alert; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.joda.time.DateTime; +import io.pravega.shaded.com.google.gson.Gson; /** * Object to process Apache access log */ -@JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { private String ClientIP; - private String Status; - + private long Timestamp; private String Verb; - private String TimestampStr; - public AccessLog(){ Status=Verb=ClientIP=""; + Timestamp=0L; } - @JsonProperty("clientip") public String getClientIP() { return ClientIP; } @@ -42,7 +34,6 @@ public void setClientIP(String clientIP) { ClientIP = clientIP; } - @JsonProperty("response") public String getStatus() { return Status; } @@ -51,16 +42,14 @@ public void setStatus(String status) { Status = status; } - @JsonProperty("@timestamp") - public String getTimestampStr() { return TimestampStr; } - - public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } + public long getTimestamp() { + return Timestamp; + } - public long getTimestampMillis() { - return new DateTime(getTimestampStr()).getMillis(); + public void setTimestamp(long timestamp) { + this.Timestamp = timestamp; } - @JsonProperty("verb") public String getVerb() { return Verb; } @@ -85,7 +74,7 @@ public boolean equals(Object obj) { AccessLog accessLog =(AccessLog)obj; return accessLog.Verb.equals(Verb) && accessLog.Status.equals(Status) && - accessLog.TimestampStr.equals(TimestampStr) && + accessLog.Timestamp==Timestamp && accessLog.ClientIP.equals(ClientIP); } @@ -94,19 +83,15 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((Status == null) ? 0 : Status.hashCode()); + result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); - result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); return result; } @Override public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); - } + Gson gson = new Gson(); + return gson.toJson(this); } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index b03d7963..bf08fd9a 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,9 +10,12 @@ */ package io.pravega.examples.flink.alert; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.util.FlinkPravegaParams; import io.pravega.connectors.flink.util.StreamId; +import io.pravega.shaded.com.google.gson.Gson; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -22,11 +25,11 @@ import org.apache.flink.cep.PatternStream; import org.apache.flink.cep.pattern.Pattern; import org.apache.flink.cep.pattern.conditions.SimpleCondition; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.util.Collector; +import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,7 +94,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return count.response.equals("500"); + return !count.response.isEmpty(); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -104,7 +107,8 @@ public void flatMap(AccessLog value, Collector out) throws Except .where(new SimpleCondition() { @Override public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD; + return value.count >= Constants.ALERT_THRESHOLD && + value.response.equals("500"); } }); @@ -131,9 +135,31 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ - public AccessLog map(String value) throws Exception { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(value, AccessLog.class); + public AccessLog map(String record) throws Exception { + // TODO: handle exceptions + Gson gson = new Gson(); + AccessLog accessLog = new AccessLog(); + JsonParser parser = new JsonParser(); + JsonObject obj = parser.parse(record).getAsJsonObject(); + if (obj.has("verb")) { + String verb = obj.get("verb").getAsString(); + accessLog.setVerb(verb); + } + if (obj.has("response")) { + String response = obj.get("response").getAsString(); + accessLog.setStatus(response); + } + if (obj.has("@timestamp")) { + String timestamp = obj.get("@timestamp").getAsString(); + + DateTime dateTime = new DateTime(timestamp); + accessLog.setTimestamp(dateTime.getMillis()); + } + if (obj.has("clientip")) { + String client = obj.get("clientip").getAsString(); + accessLog.setClientIP(client); + } + return accessLog; } } From ec03f2ca02687f9326908a620a2a016839ae6c27 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:11:07 -0400 Subject: [PATCH 36/48] correct typo Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index cab2285e..6d9cb105 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -89,11 +89,7 @@ $ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apa Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. ``` -<<<<<<< 0be606ead973c15c7e658cd33dc0b55d2ceb0ede echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log -======= -echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/accesslog ->>>>>>> example to generate alert from apache access log via logstash pravega output plugin ``` Logstash will push the data to Pravega in json string, e.g., From f4c4a612bb24466eeaf369676fdf3671ed35e281 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 17:13:17 -0400 Subject: [PATCH 37/48] apply filter earlier to let in just 500 responses Signed-off-by: Lida He --- .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index bf08fd9a..59211d13 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -94,7 +94,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.isEmpty(); + return !count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -107,8 +107,7 @@ public void flatMap(AccessLog value, Collector out) throws Except .where(new SimpleCondition() { @Override public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD && - value.response.equals("500"); + return value.count >= Constants.ALERT_THRESHOLD; } }); From 7e8020bafd0db3b4c13bd8701d400c224b0d9263 Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 22:36:50 -0400 Subject: [PATCH 38/48] merge with develop branch Signed-off-by: Lida He --- scenarios/turbine-heat-processor/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index 549534d0..4f7e3a1d 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -32,6 +32,7 @@ dependencies { compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" + compile "org.slf4j:slf4j-log4j12:1.7.14" compile "joda-time:joda-time:2.9.+" } From d3e5b4bb0d5bf8e55a03fcfcf85c997d0300b27a Mon Sep 17 00:00:00 2001 From: Lida He Date: Thu, 17 May 2018 21:43:44 -0400 Subject: [PATCH 39/48] use jackson to covert object to/from json Signed-off-by: Lida He --- .../examples/flink/alert/AccessLog.java | 39 +++++++++++++------ .../flink/alert/HighCountAlerter.java | 35 +++-------------- 2 files changed, 32 insertions(+), 42 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index 4772c6fc..a57c6c3e 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,22 +10,30 @@ */ package io.pravega.examples.flink.alert; -import io.pravega.shaded.com.google.gson.Gson; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.joda.time.DateTime; /** * Object to process Apache access log */ +@JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { private String ClientIP; + private String Status; - private long Timestamp; + private String Verb; + private String TimestampStr; + public AccessLog(){ Status=Verb=ClientIP=""; - Timestamp=0L; } + @JsonProperty("clientip") public String getClientIP() { return ClientIP; } @@ -34,6 +42,7 @@ public void setClientIP(String clientIP) { ClientIP = clientIP; } + @JsonProperty("response") public String getStatus() { return Status; } @@ -42,14 +51,16 @@ public void setStatus(String status) { Status = status; } - public long getTimestamp() { - return Timestamp; - } + @JsonProperty("@timestamp") + public String getTimestampStr() { return TimestampStr; } + + public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } - public void setTimestamp(long timestamp) { - this.Timestamp = timestamp; + public long getTimestampMillis() { + return new DateTime(getTimestampStr()).getMillis(); } + @JsonProperty("verb") public String getVerb() { return Verb; } @@ -74,7 +85,7 @@ public boolean equals(Object obj) { AccessLog accessLog =(AccessLog)obj; return accessLog.Verb.equals(Verb) && accessLog.Status.equals(Status) && - accessLog.Timestamp==Timestamp && + accessLog.TimestampStr.equals(TimestampStr) && accessLog.ClientIP.equals(ClientIP); } @@ -83,15 +94,19 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + (int) (Timestamp ^ (Timestamp >>> 32)); result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); + result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); + } } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 59211d13..b03d7963 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,12 +10,9 @@ */ package io.pravega.examples.flink.alert; -import com.google.gson.JsonObject; -import com.google.gson.JsonParser; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.util.FlinkPravegaParams; import io.pravega.connectors.flink.util.StreamId; -import io.pravega.shaded.com.google.gson.Gson; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -25,11 +22,11 @@ import org.apache.flink.cep.PatternStream; import org.apache.flink.cep.pattern.Pattern; import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.util.Collector; -import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,7 +91,7 @@ public void flatMap(AccessLog value, Collector out) throws Except out.collect(new ResponseCount(value.getStatus(), 1)); } }).filter((FilterFunction) count -> { - return !count.response.equals("500"); + return count.response.equals("500"); }).keyBy("response") .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) .sum("count"); @@ -134,31 +131,9 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ - public AccessLog map(String record) throws Exception { - // TODO: handle exceptions - Gson gson = new Gson(); - AccessLog accessLog = new AccessLog(); - JsonParser parser = new JsonParser(); - JsonObject obj = parser.parse(record).getAsJsonObject(); - if (obj.has("verb")) { - String verb = obj.get("verb").getAsString(); - accessLog.setVerb(verb); - } - if (obj.has("response")) { - String response = obj.get("response").getAsString(); - accessLog.setStatus(response); - } - if (obj.has("@timestamp")) { - String timestamp = obj.get("@timestamp").getAsString(); - - DateTime dateTime = new DateTime(timestamp); - accessLog.setTimestamp(dateTime.getMillis()); - } - if (obj.has("clientip")) { - String client = obj.get("clientip").getAsString(); - accessLog.setClientIP(client); - } - return accessLog; + public AccessLog map(String value) throws Exception { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(value, AccessLog.class); } } From d0fd8c5e16f49ae0b79983852157309a48f98761 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 01:03:25 -0400 Subject: [PATCH 40/48] use builder api Signed-off-by: Lida He --- .../flink-high-error-count-alert/README.md | 4 +-- .../filters/90-pravega-output.conf | 2 +- .../examples/flink/alert/Constants.java | 5 ++- .../flink/alert/HighCountAlerter.java | 32 +++++++++++-------- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index 6d9cb105..5b3ad4c4 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -30,7 +30,7 @@ output { pravega { pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller stream_name => "apacheaccess" - scope => "myscope" + scope => "examples" } } ``` @@ -82,7 +82,7 @@ More details can be found on github [pravega docker](https://github.com/hldnova/ Run the alerter. Adjust the controller and scope/stream if necessary. ``` $ cd flink-examples/build/install/pravega-flink-examples -$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream myscope/apacheaccess] +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream examples] [--stream apacheaccess] ``` ## Input Data ## diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf index 9da70a63..86119beb 100644 --- a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf +++ b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf @@ -12,7 +12,7 @@ output { pravega { pravega_endpoint => "tcp://127.0.0.1:9090" stream_name => "apacheaccess" - scope => "myscope" + scope => "examples" } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index a0acb073..09951d1d 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -16,9 +16,8 @@ */ public class Constants { protected static final String STREAM_PARAM = "stream"; - protected static final String DEFAULT_STREAM = "myscope/apacheaccess"; - protected static final String CONTROLLER_PARAM = "controller"; - protected static final String DEFAULT_CONTROLLER = "tcp://127.0.0.1:9090"; + protected static final String DEFAULT_SCOPE = "examples"; + protected static final String DEFAULT_STREAM = "apacheaccess"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; protected static final Integer ALERT_INTERVAL = 2; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index b03d7963..3a2b3730 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -10,9 +10,11 @@ */ package io.pravega.examples.flink.alert; +import io.pravega.client.stream.Stream; import io.pravega.connectors.flink.FlinkPravegaReader; -import io.pravega.connectors.flink.util.FlinkPravegaParams; -import io.pravega.connectors.flink.util.StreamId; +import io.pravega.connectors.flink.PravegaConfig; +import io.pravega.connectors.flink.serialization.PravegaSerialization; +import io.pravega.examples.flink.Utils; import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.functions.MapFunction; @@ -54,22 +56,24 @@ public static void main(String[] args) throws Exception { // initialize the parameter utility tool in order to retrieve input parameters ParameterTool params = ParameterTool.fromArgs(args); - // create Pravega helper utility for Flink using the input paramaters - FlinkPravegaParams helper = new FlinkPravegaParams(params); + PravegaConfig pravegaConfig = PravegaConfig + .fromParams(params) + .withDefaultScope(Constants.DEFAULT_SCOPE); - // get the Pravega stream from the input parameters - StreamId streamId = helper.getStreamFromParam(Constants.STREAM_PARAM, - Constants.DEFAULT_STREAM); + // create the Pravega input stream (if necessary) + Stream stream = Utils.createStream( + pravegaConfig, + params.get(Constants.STREAM_PARAM, Constants.DEFAULT_STREAM)); - // create the Pravega stream is not exists. - helper.createStream(streamId); - - // initialize Flink execution environment + // initialize the Flink execution environment final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - // create the Pravega stream reader - long startTime = 0; - FlinkPravegaReader reader = helper.newReader(streamId, startTime, String.class); + // create the Pravega source to read a stream of text + FlinkPravegaReader reader = FlinkPravegaReader.builder() + .withPravegaConfig(pravegaConfig) + .forStream(stream) + .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class)) + .build(); // add the Pravega reader as the data source DataStream inputStream = env.addSource(reader); From 16ffc4e336f154237fd36b34e50c1dda3689585d Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 01:11:47 -0400 Subject: [PATCH 41/48] update REAMDME Signed-off-by: Lida He --- flink-examples/doc/flink-high-error-count-alert/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index 5b3ad4c4..e020d2b7 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -82,7 +82,7 @@ More details can be found on github [pravega docker](https://github.com/hldnova/ Run the alerter. Adjust the controller and scope/stream if necessary. ``` $ cd flink-examples/build/install/pravega-flink-examples -$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--stream examples] [--stream apacheaccess] +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--scope examples] [--stream apacheaccess] ``` ## Input Data ## From c70ba9b851864bdea52c15de3643a21bde8a7fa6 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 18 May 2018 11:15:39 -0400 Subject: [PATCH 42/48] update tostring method Signed-off-by: Lida He --- .../java/io/pravega/examples/flink/alert/AccessLog.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index a57c6c3e..e2e9b706 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -15,6 +15,7 @@ import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.joda.time.DateTime; +import io.pravega.shaded.com.google.gson.Gson; /** * Object to process Apache access log @@ -102,11 +103,7 @@ public int hashCode() { @Override public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (JsonProcessingException e) { - return "AccessLog: Timestamp=" + getTimestampStr() +", ClientIP=" + getClientIP() + ", Verb=" + getVerb() + ", Status=" + getStatus(); - } + Gson gson = new Gson(); + return gson.toJson(this); } } From 54a42ca793270ef5938ddd374c29380324451435 Mon Sep 17 00:00:00 2001 From: Lida He Date: Tue, 22 May 2018 01:24:59 -0400 Subject: [PATCH 43/48] change class member to follow java bean naming convention Signed-off-by: Lida He --- .../examples/flink/alert/AccessLog.java | 87 +++++++++++-------- .../examples/flink/alert/Constants.java | 3 +- .../flink/alert/HighCountAlerter.java | 6 +- 3 files changed, 54 insertions(+), 42 deletions(-) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java index e2e9b706..926d8ca1 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java @@ -10,64 +10,76 @@ */ package io.pravega.examples.flink.alert; +import io.pravega.shaded.com.google.type.Date; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.joda.time.DateTime; -import io.pravega.shaded.com.google.gson.Gson; + +import java.io.IOException; /** * Object to process Apache access log */ @JsonIgnoreProperties(ignoreUnknown = true) public class AccessLog { - private String ClientIP; + private static final ObjectMapper mapper = new ObjectMapper(); + + @JsonProperty("clientip") + private String clientIp; - private String Status; + @JsonProperty("response") + private String status; - private String Verb; + @JsonProperty("verb") + private String verb; - private String TimestampStr; + @JsonProperty("@timestamp") + private String timestamp; - public AccessLog(){ - Status=Verb=ClientIP=""; + public static AccessLog toAccessLog(String value) throws IOException { + return mapper.readValue(value, AccessLog.class); } - @JsonProperty("clientip") - public String getClientIP() { - return ClientIP; + public String getClientIp() { + return clientIp; } - public void setClientIP(String clientIP) { - ClientIP = clientIP; + public void setClientIp(String clientIp) { + this.clientIp = clientIp; } - @JsonProperty("response") - public String getStatus() { - return Status; + public String getStatus() + { + return status; } - public void setStatus(String status) { - Status = status; + public void setStatus(String status) + { + this.status = status; } - @JsonProperty("@timestamp") - public String getTimestampStr() { return TimestampStr; } + public String getTimestamp() { + return timestamp; + } - public void setTimestampStr(String timestampStr) { TimestampStr = timestampStr; } + public void setTimestamp(String timestampStr) { + this.timestamp = timestampStr; + } - public long getTimestampMillis() { - return new DateTime(getTimestampStr()).getMillis(); + public long getTimestampMillis() + { + return new DateTime(getTimestamp()).getMillis(); } - @JsonProperty("verb") - public String getVerb() { - return Verb; + public String getVerb() + { + return verb; } - public void setVerb(String verb) { - Verb = verb; + public void setVerb(String verb) + { + this.verb = verb; } /** @@ -84,26 +96,25 @@ public boolean equals(Object obj) { return false; } AccessLog accessLog =(AccessLog)obj; - return accessLog.Verb.equals(Verb) && - accessLog.Status.equals(Status) && - accessLog.TimestampStr.equals(TimestampStr) && - accessLog.ClientIP.equals(ClientIP); + return accessLog.verb.equals(verb) && + accessLog.status.equals(status) && + accessLog.timestamp.equals(timestamp) && + accessLog.clientIp.equals(clientIp); } @Override public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((Status == null) ? 0 : Status.hashCode()); - result = prime * result + ((ClientIP == null) ? 0 : ClientIP.hashCode()); - result = prime * result + ((TimestampStr == null) ? 0 : TimestampStr.hashCode()); - result = prime * result + ((Verb == null) ? 0 : Verb.hashCode()); + result = prime * result + ((status == null) ? 0 : status.hashCode()); + result = prime * result + ((clientIp == null) ? 0 : clientIp.hashCode()); + result = prime * result + ((timestamp == null) ? 0 : timestamp.hashCode()); + result = prime * result + ((verb == null) ? 0 : verb.hashCode()); return result; } @Override public String toString() { - Gson gson = new Gson(); - return gson.toJson(this); + return "AccessLog [timestamp = "+timestamp+", verb = "+verb+", status = "+status+", clientIp = "+clientIp+"]"; } } diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index 09951d1d..c23c4a07 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -15,8 +15,9 @@ * */ public class Constants { - protected static final String STREAM_PARAM = "stream"; + protected static final String SCOPE_PARAM = "scope"; protected static final String DEFAULT_SCOPE = "examples"; + protected static final String STREAM_PARAM = "stream"; protected static final String DEFAULT_STREAM = "apacheaccess"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 3a2b3730..3ca7fb88 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -32,6 +32,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -58,7 +59,7 @@ public static void main(String[] args) throws Exception { PravegaConfig pravegaConfig = PravegaConfig .fromParams(params) - .withDefaultScope(Constants.DEFAULT_SCOPE); + .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); // create the Pravega input stream (if necessary) Stream stream = Utils.createStream( @@ -136,8 +137,7 @@ public Alert select(Map> pattern) throws Exception { //Parse the incoming streams & convert into Java PoJos private static class ParseLogData implements MapFunction{ public AccessLog map(String value) throws Exception { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(value, AccessLog.class); + return AccessLog.toAccessLog(value); } } From eb0f8e58ac45f69c5f7573c76f5143be215b33a6 Mon Sep 17 00:00:00 2001 From: Lida He Date: Wed, 23 May 2018 22:01:01 -0400 Subject: [PATCH 44/48] support secure connection to pravega Signed-off-by: Lida He --- .../main/java/io/pravega/examples/flink/alert/Constants.java | 2 ++ .../io/pravega/examples/flink/alert/HighCountAlerter.java | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java index c23c4a07..c3acb421 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java @@ -19,6 +19,8 @@ public class Constants { protected static final String DEFAULT_SCOPE = "examples"; protected static final String STREAM_PARAM = "stream"; protected static final String DEFAULT_STREAM = "apacheaccess"; + protected static final String USERNAME_PARAM = "username"; + protected static final String PASSWORD_PARAM = "password"; protected static final Integer ALERT_THRESHOLD = 6; protected static final Integer ALERT_WINDOW = 30; protected static final Integer ALERT_INTERVAL = 2; diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java index 3ca7fb88..58eb6a8b 100644 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java @@ -11,6 +11,7 @@ package io.pravega.examples.flink.alert; import io.pravega.client.stream.Stream; +import io.pravega.client.stream.impl.DefaultCredentials; import io.pravega.connectors.flink.FlinkPravegaReader; import io.pravega.connectors.flink.PravegaConfig; import io.pravega.connectors.flink.serialization.PravegaSerialization; @@ -57,8 +58,12 @@ public static void main(String[] args) throws Exception { // initialize the parameter utility tool in order to retrieve input parameters ParameterTool params = ParameterTool.fromArgs(args); + String username = params.get(Constants.USERNAME_PARAM, ""); + String password = params.get(Constants.PASSWORD_PARAM, ""); + PravegaConfig pravegaConfig = PravegaConfig .fromParams(params) + .withCredentials(new DefaultCredentials(password, username)) .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); // create the Pravega input stream (if necessary) From 7f037a67c4d34df7941dbf0f3278483b311234b4 Mon Sep 17 00:00:00 2001 From: Lida He Date: Mon, 4 Jun 2018 20:05:08 -0400 Subject: [PATCH 45/48] Add steps to check state of pravega and logstash Singed-off-by: Lida He Signed-off-by: Lida He --- .../flink-high-error-count-alert/README.md | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index e020d2b7..bcdc2bd8 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -15,11 +15,11 @@ Otherwise proceed to set up Logstash and Pravega ## Start Logstash with Pravega Output Plugin ## -On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.2.0.gem`. +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.3.0.gem`. Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` ``` -$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.2.0.gem +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.3.0.gem ``` Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. @@ -59,22 +59,40 @@ Run script below to start container from prebuilt image. Adjust parameters to yo #!/bin/sh set -u -PRAVEGA_SCOPE=myscope -PRAVEGA_STREAM=apacheaccess +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:apacheaccess} CONTAINER_NAME=pravega IMAGE_NAME=emccorp/pravega-demo docker run -d --name $CONTAINER_NAME \ -p 9090:9090 \ -p 9091:9091 \ + -p 9600:9600 \ -v /tmp/access.log:/opt/data/access.log \ - -v /tmp/logs/:/var/log/pravega/ \ - -e PRAVEGA_ENDPOINT=${PRAVEGA_ENDPOINT} \ + -v $PWD/logs/:/var/log/pravega/ \ -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ ${IMAGE_NAME} ``` +You can skip **PRAVEGA_SCOPE** and **PRAVEGA_STREAM** if you want to use the defaults. + +To check Pravega +``` +$ curl localhost:9091/v1/scopes +``` + +To check Logstash and output plugin for Pravega via Logstash monitoring API running on port 9600. Logstash may take one or two minutes to start as a delay is introduced to wait for Pravega to start. +``` +# The output should contain the name and the version (your version may differ) of the plugin +# "name" : "logstash-output-pravega", +# "version" : "0.3.0.pre.SNAPSHOT" + +$ curl localhost:9600/_node/plugins?pretty +``` + +The log files for Pravega and Logstash should be in the **logs** directory under current directory if you the script above to start the container. + More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) ## Run HighCountAlerter ## From 266298b44df6596ca35f4402a44ee6f8a93c1761 Mon Sep 17 00:00:00 2001 From: Lida He Date: Mon, 4 Jun 2018 22:24:12 -0400 Subject: [PATCH 46/48] fix bug in build script Signed-off-by: Lida He --- .../doc/flink-high-error-count-alert/README.md | 4 ++-- scenarios/turbine-heat-processor/build.gradle | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md index bcdc2bd8..070295fb 100644 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ b/flink-examples/doc/flink-high-error-count-alert/README.md @@ -59,8 +59,8 @@ Run script below to start container from prebuilt image. Adjust parameters to yo #!/bin/sh set -u -PRAVEGA_SCOPE=${PRAVEGA_SCOPE:examples} -PRAVEGA_STREAM=${PRAVEGA_STREAM:apacheaccess} +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} CONTAINER_NAME=pravega IMAGE_NAME=emccorp/pravega-demo diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index 4f7e3a1d..30babd0f 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -32,7 +32,6 @@ dependencies { compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" - compile "org.slf4j:slf4j-log4j12:1.7.14" compile "joda-time:joda-time:2.9.+" } @@ -57,6 +56,7 @@ task scriptWordCountReader(type: CreateStartScripts) { classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } + task scriptFlinkAlerter(type: CreateStartScripts) { outputDir = file('build/scripts') mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' @@ -64,6 +64,20 @@ task scriptFlinkAlerter(type: CreateStartScripts) { classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } +task scriptExactlyOnceWriter(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceWriter' + applicationName = 'exactlyOnceWriter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + +task scriptExactlyOnceChecker(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceChecker' + applicationName = 'exactlyOnceChecker' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + distributions { main { baseName = archivesBaseName @@ -78,6 +92,8 @@ distributions { from project.scriptWordCountWriter from project.scriptWordCountReader from project.scriptFlinkAlerter + from project.scriptExactlyOnceWriter + from project.scriptExactlyOnceChecker } } } From 5a320987538db6a2bdae649fb92c9424e90fc29c Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 22 Jun 2018 16:14:43 -0400 Subject: [PATCH 47/48] re-organize the sample to new org structure Signed-off-by: Lida He --- flink-connector-examples/README.md | 7 + flink-connector-examples/build.gradle | 9 + .../doc/high-count-alerter/README.md | 149 +++++++++++++ .../filters/01-file-input.conf | 17 ++ .../filters/10-apache-accesslog-filter.conf | 23 ++ .../filters/90-pravega-output.conf | 18 ++ .../filters/95-stdout-output.conf | 14 ++ .../flink/primer/datatype/AccessLog.java | 126 +++++++++++ .../flink/primer/datatype/Constants.java | 5 + .../primer/process/HighCountAlerter.java | 204 ++++++++++++++++++ 10 files changed, 572 insertions(+) create mode 100644 flink-connector-examples/doc/high-count-alerter/README.md create mode 100644 flink-connector-examples/doc/high-count-alerter/filters/01-file-input.conf create mode 100644 flink-connector-examples/doc/high-count-alerter/filters/10-apache-accesslog-filter.conf create mode 100644 flink-connector-examples/doc/high-count-alerter/filters/90-pravega-output.conf create mode 100644 flink-connector-examples/doc/high-count-alerter/filters/95-stdout-output.conf create mode 100644 flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/AccessLog.java create mode 100644 flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/HighCountAlerter.java diff --git a/flink-connector-examples/README.md b/flink-connector-examples/README.md index 760a52ea..527de8cf 100644 --- a/flink-connector-examples/README.md +++ b/flink-connector-examples/README.md @@ -40,3 +40,10 @@ See [wordcount](doc/flink-wordcount/README.md) for more information and executio This sample demonstrates Pravega EXACTLY_ONCE feature in conjuction with Flink checkpointing and exactly-once mode. See [Exactly Once Sample](doc/exactly-once/README.md) for instructions. + +## High Error Count Alert + +This example demonstrates how to use the Pravega Flink connectors to read and +parse Apache access logs from logstash via the [logstash pravega output plugin](https://github.com/pravega/logstash-output-pravega), +and how to generate alert when error count is high within a time frame. +See [High Count Alerter](doc/high-count-alerter/README.md) for instructions. diff --git a/flink-connector-examples/build.gradle b/flink-connector-examples/build.gradle index 04164246..9c1fc601 100644 --- a/flink-connector-examples/build.gradle +++ b/flink-connector-examples/build.gradle @@ -31,6 +31,7 @@ dependencies { compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" compile "org.slf4j:slf4j-log4j12:1.7.25" + compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" } shadowJar { @@ -68,6 +69,13 @@ task scriptExactlyOnceChecker(type: CreateStartScripts) { classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } +task scriptHighCountAlerter(type: CreateStartScripts) { + outputDir = file('build/scripts') + mainClassName = 'io.pravega.example.flink.primer.process.HighCountAlerter' + applicationName = 'highCountAlerter' + classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath +} + distributions { main { baseName = archivesBaseName @@ -83,6 +91,7 @@ distributions { from project.scriptWordCountReader from project.scriptExactlyOnceWriter from project.scriptExactlyOnceChecker + from project.scriptHighCountAlerter } } } diff --git a/flink-connector-examples/doc/high-count-alerter/README.md b/flink-connector-examples/doc/high-count-alerter/README.md new file mode 100644 index 00000000..c97a3445 --- /dev/null +++ b/flink-connector-examples/doc/high-count-alerter/README.md @@ -0,0 +1,149 @@ +# High Count Alert # + +The application reads apache access logs from a Pravega stream and once every 2 seconds +counts the number of 500 responses in the last 30 seconds, and generates +alert when the counts of 500 responses exceed 6. + +## Prerequistes ## + +A Docker image containing Pravega and Logstash had been prepared to simplify the demo. Skip ahead to the **Run in Docker Container** section in this document if you have docker environment handy. + +Otherwise proceed to set up Logstash and Pravega + +1. Logstash installed, see [Install logstash](https://www.elastic.co/guide/en/logstash/5.6/installing-logstash.html). +2. Pravega running, see [here](http://pravega.io/docs/latest/getting-started/) for instructions. + +## Start Logstash with Pravega Output Plugin ## + +On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.3.0.gem`. + +Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` +``` +$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.3.0.gem +``` + +Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. +update **pravega_endpoint** in ~/pravega/90-pravega-output.conf + +``` +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller + stream_name => "apacheaccess" + scope => "examples" + } +} +``` + +Start logstash, assuming it is installed at /usr/share/logstash/bin. +Note that sometimes it may take a minute or two for logstash to start. For troubleshooting, the logstash log files are +normally at /var/log/logstash. To restart, type Ctrl-C, then re-run the command. + +``` +$ sudo /usr/share/logstash/bin -f ~/pravega +Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties +``` + +Normally Logstash is configured to receive data from remote log shippers, such as filebeat. For simplicity in this demo +Logstash is configured read data from /tmp/access.log. + +## Run in Docker Container ## + +Create a file at /tmp/access.log +``` +$ touch /tmp/access.log +``` + +Run script below to start container from prebuilt image. Adjust parameters to your need. +``` +#!/bin/sh +set -u + +PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-examples} +PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} +CONTAINER_NAME=pravega +IMAGE_NAME=emccorp/pravega-demo + +docker run -d --name $CONTAINER_NAME \ + -p 9090:9090 \ + -p 9091:9091 \ + -p 9600:9600 \ + -v /tmp/access.log:/opt/data/access.log \ + -v $PWD/logs/:/var/log/pravega/ \ + -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ + -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ + ${IMAGE_NAME} +``` + +To check Pravega +``` +$ curl localhost:9091/v1/scopes +``` + +To check Logstash and output plugin for Pravega via Logstash monitoring API running on port 9600. Logstash may take one or two minutes to start as a delay is introduced to wait for Pravega to start. +``` +# The output should contain the name and the version (your version may differ) of the plugin +# "name" : "logstash-output-pravega", +# "version" : "0.3.0" + +$ curl localhost:9600/_node/plugins?pretty +``` + +The log files for Pravega and Logstash should be in the **logs** directory under current directory if you the script above to start the container. + +More details can be found on github [pravega docker](https://github.com/pravega/logstash-output-pravega) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) + +## Run HighCountAlerter ## + +Run the alerter. Adjust the controller and scope/stream if necessary. +``` +$ cd flink-connector-examples/build/install/pravega-flink-examples +$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--scope examples] [--stream apacheaccess] +``` + +## Input Data ## + +Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. +``` +echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log +``` + +Logstash will push the data to Pravega in json string, e.g., +``` +{ + "request" => "/mapping/", + "agent" => "\"python-client\"", + "auth" => "peter", + "ident" => "-", + "verb" => "PUT", + "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", + "referrer" => "\"http://example.com/myapp\"", + "@timestamp" => 2018-03-19T06:24:01.000Z, + "response" => "500", + "bytes" => "182", + "clientip" => "10.1.1.11", + "@version" => "1", + "host" => "lglca061.lss.emc.com", + "httpversion" => "1.1" +} +``` + +## View Alert ## +In the HighCountAlerter window, you should see output like the following. Once the 500 response counts reach 6 or above, it +should print **High 500 responses** alerts. +``` +3> Response count: 500 : 1 +3> Response count: 500 : 2 +3> Response count: 500 : 4 +3> Response count: 500 : 6 +2> High 500 responses: 500 : 6 +3> Response count: 500 : 8 +3> High 500 responses: 500 : 8 +3> Response count: 500 : 8 +2> High 500 responses: 500 : 8 +3> Response count: 500 : 7 +3> High 500 responses: 500 : 7 +3> Response count: 500 : 5 +3> Response count: 500 : 3 +3> Response count: 500 : 1 +``` diff --git a/flink-connector-examples/doc/high-count-alerter/filters/01-file-input.conf b/flink-connector-examples/doc/high-count-alerter/filters/01-file-input.conf new file mode 100644 index 00000000..22ae47fe --- /dev/null +++ b/flink-connector-examples/doc/high-count-alerter/filters/01-file-input.conf @@ -0,0 +1,17 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +input { + file { + path => "/tmp/access.log" + start_position => beginning + } +} + diff --git a/flink-connector-examples/doc/high-count-alerter/filters/10-apache-accesslog-filter.conf b/flink-connector-examples/doc/high-count-alerter/filters/10-apache-accesslog-filter.conf new file mode 100644 index 00000000..d33b4b95 --- /dev/null +++ b/flink-connector-examples/doc/high-count-alerter/filters/10-apache-accesslog-filter.conf @@ -0,0 +1,23 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +filter { + grok { + match => { "message" => "%{COMBINEDAPACHELOG}" } + } + date { + match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] + } + mutate { + remove_field => [ "timestamp" ] + } + +} + diff --git a/flink-connector-examples/doc/high-count-alerter/filters/90-pravega-output.conf b/flink-connector-examples/doc/high-count-alerter/filters/90-pravega-output.conf new file mode 100644 index 00000000..86119beb --- /dev/null +++ b/flink-connector-examples/doc/high-count-alerter/filters/90-pravega-output.conf @@ -0,0 +1,18 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + pravega { + pravega_endpoint => "tcp://127.0.0.1:9090" + stream_name => "apacheaccess" + scope => "examples" + } +} + diff --git a/flink-connector-examples/doc/high-count-alerter/filters/95-stdout-output.conf b/flink-connector-examples/doc/high-count-alerter/filters/95-stdout-output.conf new file mode 100644 index 00000000..04986e41 --- /dev/null +++ b/flink-connector-examples/doc/high-count-alerter/filters/95-stdout-output.conf @@ -0,0 +1,14 @@ +# +# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +output { + stdout { codec => rubydebug } +} + diff --git a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/AccessLog.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/AccessLog.java new file mode 100644 index 00000000..b8d760eb --- /dev/null +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/AccessLog.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.flink.primer.datatype; + +import io.pravega.shaded.com.google.type.Date; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.time.Instant; + +/** + * Object to process Apache access log + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class AccessLog { + private static final ObjectMapper mapper = new ObjectMapper(); + + @JsonProperty("clientip") + private String clientIp; + + @JsonProperty("response") + private String status; + + @JsonProperty("verb") + private String verb; + + @JsonProperty("@timestamp") + private String timestamp; + + public AccessLog() { + clientIp=status=verb=timestamp=""; + } + + public static AccessLog toAccessLog(String value) throws IOException { + return mapper.readValue(value, AccessLog.class); + } + + public String getClientIp() { + return clientIp; + } + + public void setClientIp(String clientIp) { + this.clientIp = clientIp; + } + + public String getStatus() + { + return status; + } + + public void setStatus(String status) + { + this.status = status; + } + + public String getTimestamp() { + return timestamp; + } + + public void setTimestamp(String timestampStr) { + this.timestamp = timestampStr; + } + + public long getTimestampMillis() + { + Instant parsedTimestamp = Instant.parse(getTimestamp()); + java.util.Date date = java.util.Date.from( parsedTimestamp ); + return date.getTime(); + } + + public String getVerb() + { + return verb; + } + + public void setVerb(String verb) + { + this.verb = verb; + } + + /** + * The events in the DataStream to which you want to apply pattern matching must + * implement proper equals() and hashCode() methods because these are used for + * comparing and matching events. + */ + @Override + public boolean equals(Object obj) { + if(this==obj){ + return true; + } + if(!(obj instanceof AccessLog)){ + return false; + } + AccessLog accessLog =(AccessLog)obj; + return accessLog.verb.equals(verb) && + accessLog.status.equals(status) && + accessLog.timestamp.equals(timestamp) && + accessLog.clientIp.equals(clientIp); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((status == null) ? 0 : status.hashCode()); + result = prime * result + ((clientIp == null) ? 0 : clientIp.hashCode()); + result = prime * result + ((timestamp == null) ? 0 : timestamp.hashCode()); + result = prime * result + ((verb == null) ? 0 : verb.hashCode()); + return result; + } + + @Override + public String toString() { + return "AccessLog [timestamp = "+timestamp+", timestampMillis = "+getTimestampMillis()+", verb = "+verb+", status = "+status+", clientIp = "+clientIp+"]"; + } +} diff --git a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java index cf6e5730..054ee5d4 100644 --- a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/datatype/Constants.java @@ -21,4 +21,9 @@ public class Constants { public static final String DEFAULT_STREAM = "mystream"; public static final String Default_URI_PARAM = "controller"; public static final String Default_URI = "tcp://localhost:9090"; + public static final String USERNAME_PARAM = "username"; + public static final String PASSWORD_PARAM = "password"; + public static final Integer ALERT_THRESHOLD = 6; + public static final Integer ALERT_WINDOW = 30; + public static final Integer ALERT_INTERVAL = 2; } diff --git a/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/HighCountAlerter.java b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/HighCountAlerter.java new file mode 100644 index 00000000..c79dd024 --- /dev/null +++ b/flink-connector-examples/src/main/java/io/pravega/example/flink/primer/process/HighCountAlerter.java @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + */ +package io.pravega.example.flink.primer.process; + +import io.pravega.client.stream.Stream; +import io.pravega.client.stream.impl.DefaultCredentials; +import io.pravega.connectors.flink.FlinkPravegaReader; +import io.pravega.connectors.flink.PravegaConfig; +import io.pravega.connectors.flink.serialization.PravegaSerialization; +import io.pravega.example.flink.Utils; +import io.pravega.example.flink.primer.datatype.AccessLog; +import io.pravega.example.flink.primer.datatype.Constants; +import org.apache.flink.api.common.functions.FilterFunction; +import org.apache.flink.api.common.functions.FlatMapFunction; +import org.apache.flink.api.common.functions.FoldFunction; +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.api.java.utils.ParameterTool; +import org.apache.flink.cep.CEP; +import org.apache.flink.cep.PatternSelectFunction; +import org.apache.flink.cep.PatternStream; +import org.apache.flink.cep.pattern.Pattern; +import org.apache.flink.cep.pattern.conditions.SimpleCondition; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; +import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows; +import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; +import org.apache.flink.streaming.api.windowing.time.Time; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import scala.tools.nsc.backend.icode.analysis.CopyPropagation; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/* + * This application has the following input parameters + * stream - Pravega stream name to write to + * controller - the Pravega controller URI, e.g., tcp://localhost:9090 + * Note that this parameter is processed in pravega flink connector + */ +public class HighCountAlerter { + + // Logger initialization + private static final Logger LOG = LoggerFactory.getLogger(HighCountAlerter.class); + + // The application reads data from specified Pravega stream and once every ALERT_INTERVAL (2 seconds) + // counts the number of 500 responses in the last ALERT_WINDOW (30 seconds), and generates + // alert when the counts exceed ALERT_THRESHOLD (6). + + public static void main(String[] args) throws Exception { + LOG.info("Starting HighErrorAlerter..."); + + // initialize the parameter utility tool in order to retrieve input parameters + ParameterTool params = ParameterTool.fromArgs(args); + + String username = params.get(Constants.USERNAME_PARAM, ""); + String password = params.get(Constants.PASSWORD_PARAM, ""); + + PravegaConfig pravegaConfig = PravegaConfig + .fromParams(params) + .withCredentials(new DefaultCredentials(password, username)) + .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); + + // create the Pravega input stream (if necessary) + Stream stream = Utils.createStream( + pravegaConfig, + params.get(Constants.STREAM_PARAM, Constants.DEFAULT_STREAM)); + + // initialize the Flink execution environment + final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + // create the Pravega source to read a stream of text + FlinkPravegaReader reader = FlinkPravegaReader.builder() + .withPravegaConfig(pravegaConfig) + .forStream(stream) + .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class)) + .build(); + + // add the Pravega reader as the data source + DataStream inputStream = env.addSource(reader); + + // create an output sink to stdout for verification + //inputStream.print(); + + // transform logs + DataStream dataStream = inputStream.map(new ParseLogData()) + .filter(a -> a.getStatus().equals("500")).name("filter"); + + // create an output sink to stdout for verification + //taStream.print().name("print-parsed-data"); + + // get responses and their counts + DataStream countStream = + dataStream.flatMap(new FlatMapFunction() { + @Override + public void flatMap(AccessLog value, Collector out) throws Exception { + out.collect(new ResponseCount(value.getStatus(), 1)); + } + }).keyBy("response") + .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) + .sum("count"); + + // create an output sink to stdout for verification + countStream.print().name("print-count"); + + // create alert pattern + Pattern pattern500 = Pattern.begin("500pattern") + .where(new SimpleCondition() { + @Override + public boolean filter(ResponseCount value) throws Exception { + return value.count >= Constants.ALERT_THRESHOLD; + } + }); + + PatternStream patternStream = CEP.pattern(countStream, pattern500); + + DataStream alertStream = patternStream.select( + new PatternSelectFunction() { + @Override + public Alert select(Map> pattern) throws Exception { + ResponseCount count = pattern.get("500pattern").get(0); + return new Alert(count.response, count.count, "High 500 responses"); + } + }).name("alert"); + + // create an output sink to stdout for verification + alertStream.print().name("print-alert"); + + + // execute within the Flink environment + env.execute("HighCountAlerter"); + + LOG.info("Ending HighCountAlerter..."); + } + + //Parse the incoming streams & convert into Java PoJos + private static class ParseLogData implements MapFunction{ + public AccessLog map(String value) throws Exception { + return AccessLog.toAccessLog(value); + } + } + + // Data type access status count + public static class ResponseCount { + + public String response; + public long count; + + public ResponseCount() {} + + public ResponseCount(String status, long count) { + this.response = status; + this.count = count; + } + + @Override + public String toString() { + return "Response count: " + response + " : " + count; + } + } + + // Data type access status count + public static class Alert { + + private String response; + private long count; + private String description; + + public Alert() {} + + public Alert(String response, long count, String description) { + this.response = response; + this.count = count; + this.description = description; + } + + @Override + public String toString() { + return description + ": " + response + " : " + count; + } + } + + public static class EventTimeExtractor extends BoundedOutOfOrdernessTimestampExtractor { + + public EventTimeExtractor(Time time) { super(time); } + + @Override + public long extractTimestamp(AccessLog accessLog) { + return accessLog.getTimestampMillis(); + } + } +} From 2b70db89c5fc7191cf628be5398f5ef2fd80d213 Mon Sep 17 00:00:00 2001 From: Lida He Date: Fri, 22 Jun 2018 16:17:20 -0400 Subject: [PATCH 48/48] re-organize the sample to new org structure Signed-off-by: Lida He --- flink-examples/README.md | 39 ---- .../flink-high-error-count-alert/README.md | 151 -------------- .../filters/01-file-input.conf | 17 -- .../filters/10-apache-accesslog-filter.conf | 23 --- .../filters/90-pravega-output.conf | 18 -- .../filters/95-stdout-output.conf | 14 -- .../src/main/dist/bin/create-stream.sh | 22 -- .../examples/flink/alert/AccessLog.java | 120 ----------- .../examples/flink/alert/Constants.java | 27 --- .../flink/alert/HighCountAlerter.java | 189 ------------------ scenarios/turbine-heat-processor/build.gradle | 53 +---- 11 files changed, 9 insertions(+), 664 deletions(-) delete mode 100644 flink-examples/README.md delete mode 100644 flink-examples/doc/flink-high-error-count-alert/README.md delete mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf delete mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf delete mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf delete mode 100644 flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf delete mode 100755 flink-examples/src/main/dist/bin/create-stream.sh delete mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java delete mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java delete mode 100644 flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java diff --git a/flink-examples/README.md b/flink-examples/README.md deleted file mode 100644 index fd847a18..00000000 --- a/flink-examples/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Pravega Flink Connector Samples -Steps to set up and run Pravega Flink connector samples. - -## Pre requisites -1. Java 8 -2. Pravega running (see [here](http://pravega.io/docs/latest/getting-started/) for instructions) - -## Build Pravega Flink Connectors - -Follow the below steps to build and publish artifacts from source to local Maven repository: - -``` -$ git clone https://github.com/pravega/flink-connectors.git -$ cd flink-connectors -$ ./gradlew clean install -``` - -## Build the Sample Code - -Follow the below steps to build the sample code: - -``` -$ git clone https://github.com/pravega/pravega-samples.git -$ cd pravega-samples -$ ./gradlew clean installDist -``` - -## Word Count Sample - -This example demonstrates how to use the Pravega Flink Connectors to write data collected -from an external network stream into a Pravega stream and read the data from the Pravega stream. -See [Flink Word Count Sample](doc/flink-wordcount/README.md) for instructions. - -## High Error Count Alert - -This example demonstrates how to use the Pravega Flink connectors to read and -parse Apache access logs from logstash via the [logstash pravega output plugin](https://github.com/pravega/logstash-output-pravega), -and how to generate alert when error count is high within a time frame. -See [High Error Count Alert](doc/flink-high-error-count-alert/README.md) for instructions. diff --git a/flink-examples/doc/flink-high-error-count-alert/README.md b/flink-examples/doc/flink-high-error-count-alert/README.md deleted file mode 100644 index 070295fb..00000000 --- a/flink-examples/doc/flink-high-error-count-alert/README.md +++ /dev/null @@ -1,151 +0,0 @@ -# High Count Alert # - -The application reads apache access logs from a Pravega stream and once every 2 seconds -counts the number of 500 responses in the last 30 seconds, and generates -alert when the counts of 500 responses exceed 6. - -## Prerequistes ## - -A Docker image containing Pravega and Logstash had been prepared to simplify the demo. Skip ahead to the **Run in Docker Container** section in this document if you have docker environment handy. - -Otherwise proceed to set up Logstash and Pravega - -1. Logstash installed, see [Install logstash](https://www.elastic.co/guide/en/logstash/5.6/installing-logstash.html). -2. Pravega running, see [here](http://pravega.io/docs/latest/getting-started/) for instructions. - -## Start Logstash with Pravega Output Plugin ## - -On the Logstash host, download the plugin gem file from [Logstash Pravega output plugin](https://github.com/pravega/logstash-output-pravega/releases), for example, `logstash-output-pravega-0.3.0.gem`. - -Install the plugin, assuming Logstash is installed at `/usr/share/logstash/` -``` -$ /usr/share/logstash/bin/logstash-plugin install logstash-output-pravega-0.3.0.gem -``` - -Copy the contents under flink-examples/doc/flink-high-error-count-alert/filters/ to the Logstash host, e.g., in directory ~/pravega. -update **pravega_endpoint** in ~/pravega/90-pravega-output.conf - -``` -output { - pravega { - pravega_endpoint => "tcp://127.0.0.1:9090" <- update to point to your Pravega controller - stream_name => "apacheaccess" - scope => "examples" - } -} -``` - -Start logstash, assuming it is installed at /usr/share/logstash/bin. -Note that sometimes it may take a minute or two for logstash to start. For troubleshooting, the logstash log files are -normally at /var/log/logstash. To restart, type Ctrl-C, then re-run the command. - -``` -$ sudo /usr/share/logstash/bin -f ~/pravega -Sending Logstash's logs to /var/log/logstash which is now configured via log4j2.properties -``` - -Normally Logstash is configured to receive data from remote log shippers, such as filebeat. For simplicity in this demo -Logstash is configured read data from /tmp/access.log. - -## Run in Docker Container ## - -Create a file at /tmp/access.log -``` -$ touch /tmp/access.log -``` - -Run script below to start container from prebuilt image. Adjust parameters to your need. -``` -#!/bin/sh -set -u - -PRAVEGA_SCOPE=${PRAVEGA_SCOPE:-examples} -PRAVEGA_STREAM=${PRAVEGA_STREAM:-apacheaccess} -CONTAINER_NAME=pravega -IMAGE_NAME=emccorp/pravega-demo - -docker run -d --name $CONTAINER_NAME \ - -p 9090:9090 \ - -p 9091:9091 \ - -p 9600:9600 \ - -v /tmp/access.log:/opt/data/access.log \ - -v $PWD/logs/:/var/log/pravega/ \ - -e PRAVEGA_SCOPE=${PRAVEGA_SCOPE} \ - -e PRAVEGA_STREAM=${PRAVEGA_STREAM} \ - ${IMAGE_NAME} -``` - -You can skip **PRAVEGA_SCOPE** and **PRAVEGA_STREAM** if you want to use the defaults. - -To check Pravega -``` -$ curl localhost:9091/v1/scopes -``` - -To check Logstash and output plugin for Pravega via Logstash monitoring API running on port 9600. Logstash may take one or two minutes to start as a delay is introduced to wait for Pravega to start. -``` -# The output should contain the name and the version (your version may differ) of the plugin -# "name" : "logstash-output-pravega", -# "version" : "0.3.0.pre.SNAPSHOT" - -$ curl localhost:9600/_node/plugins?pretty -``` - -The log files for Pravega and Logstash should be in the **logs** directory under current directory if you the script above to start the container. - -More details can be found on github [pravega docker](https://github.com/hldnova/pravega-docker) and on dockerhub [pravega docker image](https://hub.docker.com/r/emccorp/pravega-demo/) - -## Run HighCountAlerter ## - -Run the alerter. Adjust the controller and scope/stream if necessary. -``` -$ cd flink-examples/build/install/pravega-flink-examples -$ bin/highCountAlerter [--controller tcp://127.0.0.1:9090] [--scope examples] [--stream apacheaccess] -``` - -## Input Data ## - -Add access logs to /tmp/access.log, e.g., by running command below every one or two seconds. -``` -echo '10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] "PUT /mapping/ HTTP/1.1" 500 182 "http://example.com/myapp" "python-client"' >> /tmp/access.log -``` - -Logstash will push the data to Pravega in json string, e.g., -``` -{ - "request" => "/mapping/", - "agent" => "\"python-client\"", - "auth" => "peter", - "ident" => "-", - "verb" => "PUT", - "message" => "10.1.1.11 - peter [19/Mar/2018:02:24:01 -0400] \"PUT /mapping/ HTTP/1.1\" 500 182 \"http://example.com/myapp\" \"python-client\"", - "referrer" => "\"http://example.com/myapp\"", - "@timestamp" => 2018-03-19T06:24:01.000Z, - "response" => "500", - "bytes" => "182", - "clientip" => "10.1.1.11", - "@version" => "1", - "host" => "lglca061.lss.emc.com", - "httpversion" => "1.1" -} -``` - -## View Alert ## -In the HighCountAlerter window, you should see output like the following. Once the 500 response counts reach 6 or above, it -should print **High 500 responses** alerts. -``` -3> Response count: 500 : 1 -3> Response count: 500 : 2 -3> Response count: 500 : 4 -3> Response count: 500 : 6 -2> High 500 responses: 500 : 6 -3> Response count: 500 : 8 -3> High 500 responses: 500 : 8 -3> Response count: 500 : 8 -2> High 500 responses: 500 : 8 -3> Response count: 500 : 7 -3> High 500 responses: 500 : 7 -3> Response count: 500 : 5 -3> Response count: 500 : 3 -3> Response count: 500 : 1 -``` diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf b/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf deleted file mode 100644 index 22ae47fe..00000000 --- a/flink-examples/doc/flink-high-error-count-alert/filters/01-file-input.conf +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# - -input { - file { - path => "/tmp/access.log" - start_position => beginning - } -} - diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf b/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf deleted file mode 100644 index d33b4b95..00000000 --- a/flink-examples/doc/flink-high-error-count-alert/filters/10-apache-accesslog-filter.conf +++ /dev/null @@ -1,23 +0,0 @@ -# -# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# - -filter { - grok { - match => { "message" => "%{COMBINEDAPACHELOG}" } - } - date { - match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ] - } - mutate { - remove_field => [ "timestamp" ] - } - -} - diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf deleted file mode 100644 index 86119beb..00000000 --- a/flink-examples/doc/flink-high-error-count-alert/filters/90-pravega-output.conf +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# - -output { - pravega { - pravega_endpoint => "tcp://127.0.0.1:9090" - stream_name => "apacheaccess" - scope => "examples" - } -} - diff --git a/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf b/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf deleted file mode 100644 index 04986e41..00000000 --- a/flink-examples/doc/flink-high-error-count-alert/filters/95-stdout-output.conf +++ /dev/null @@ -1,14 +0,0 @@ -# -# Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# - -output { - stdout { codec => rubydebug } -} - diff --git a/flink-examples/src/main/dist/bin/create-stream.sh b/flink-examples/src/main/dist/bin/create-stream.sh deleted file mode 100755 index 9993ee91..00000000 --- a/flink-examples/src/main/dist/bin/create-stream.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# -# sample script to create scope and stream using Pravega REST API -# -host=localhost -port=9091 -scope=myscope -stream=apacheaccess -curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes --d '{ - "scopeName": "'${scope}'" -}' - -curl -v -H "Content-Type: application/json" $host:${port}/v1/scopes/${scope}/streams \ --d '{ - "streamName": "'${stream}'", - "scopeName": "'${scope}'", - "scalingPolicy":{ - "type": "FIXED_NUM_SEGMENTS", - "minSegments": 1 - } -}' diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java deleted file mode 100644 index 926d8ca1..00000000 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/AccessLog.java +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - */ -package io.pravega.examples.flink.alert; - -import io.pravega.shaded.com.google.type.Date; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.annotation.JsonProperty; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.joda.time.DateTime; - -import java.io.IOException; - -/** - * Object to process Apache access log - */ -@JsonIgnoreProperties(ignoreUnknown = true) -public class AccessLog { - private static final ObjectMapper mapper = new ObjectMapper(); - - @JsonProperty("clientip") - private String clientIp; - - @JsonProperty("response") - private String status; - - @JsonProperty("verb") - private String verb; - - @JsonProperty("@timestamp") - private String timestamp; - - public static AccessLog toAccessLog(String value) throws IOException { - return mapper.readValue(value, AccessLog.class); - } - - public String getClientIp() { - return clientIp; - } - - public void setClientIp(String clientIp) { - this.clientIp = clientIp; - } - - public String getStatus() - { - return status; - } - - public void setStatus(String status) - { - this.status = status; - } - - public String getTimestamp() { - return timestamp; - } - - public void setTimestamp(String timestampStr) { - this.timestamp = timestampStr; - } - - public long getTimestampMillis() - { - return new DateTime(getTimestamp()).getMillis(); - } - - public String getVerb() - { - return verb; - } - - public void setVerb(String verb) - { - this.verb = verb; - } - - /** - * The events in the DataStream to which you want to apply pattern matching must - * implement proper equals() and hashCode() methods because these are used for - * comparing and matching events. - */ - @Override - public boolean equals(Object obj) { - if(this==obj){ - return true; - } - if(!(obj instanceof AccessLog)){ - return false; - } - AccessLog accessLog =(AccessLog)obj; - return accessLog.verb.equals(verb) && - accessLog.status.equals(status) && - accessLog.timestamp.equals(timestamp) && - accessLog.clientIp.equals(clientIp); - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((status == null) ? 0 : status.hashCode()); - result = prime * result + ((clientIp == null) ? 0 : clientIp.hashCode()); - result = prime * result + ((timestamp == null) ? 0 : timestamp.hashCode()); - result = prime * result + ((verb == null) ? 0 : verb.hashCode()); - return result; - } - - @Override - public String toString() { - return "AccessLog [timestamp = "+timestamp+", verb = "+verb+", status = "+status+", clientIp = "+clientIp+"]"; - } -} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java deleted file mode 100644 index c3acb421..00000000 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/Constants.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - */ -package io.pravega.examples.flink.alert; - -/** - * Defines a handful of constants shared by classes in this package. - * - */ -public class Constants { - protected static final String SCOPE_PARAM = "scope"; - protected static final String DEFAULT_SCOPE = "examples"; - protected static final String STREAM_PARAM = "stream"; - protected static final String DEFAULT_STREAM = "apacheaccess"; - protected static final String USERNAME_PARAM = "username"; - protected static final String PASSWORD_PARAM = "password"; - protected static final Integer ALERT_THRESHOLD = 6; - protected static final Integer ALERT_WINDOW = 30; - protected static final Integer ALERT_INTERVAL = 2; -} diff --git a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java b/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java deleted file mode 100644 index 58eb6a8b..00000000 --- a/flink-examples/src/main/java/io/pravega/examples/flink/alert/HighCountAlerter.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - */ -package io.pravega.examples.flink.alert; - -import io.pravega.client.stream.Stream; -import io.pravega.client.stream.impl.DefaultCredentials; -import io.pravega.connectors.flink.FlinkPravegaReader; -import io.pravega.connectors.flink.PravegaConfig; -import io.pravega.connectors.flink.serialization.PravegaSerialization; -import io.pravega.examples.flink.Utils; -import org.apache.flink.api.common.functions.FilterFunction; -import org.apache.flink.api.common.functions.FlatMapFunction; -import org.apache.flink.api.common.functions.MapFunction; -import org.apache.flink.api.java.utils.ParameterTool; -import org.apache.flink.cep.CEP; -import org.apache.flink.cep.PatternSelectFunction; -import org.apache.flink.cep.PatternStream; -import org.apache.flink.cep.pattern.Pattern; -import org.apache.flink.cep.pattern.conditions.SimpleCondition; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.api.windowing.time.Time; -import org.apache.flink.util.Collector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/* - * This application has the following input parameters - * stream - Pravega stream name to write to - * controller - the Pravega controller URI, e.g., tcp://localhost:9090 - * Note that this parameter is processed in pravega flink connector - */ -public class HighCountAlerter { - - // Logger initialization - private static final Logger LOG = LoggerFactory.getLogger(HighCountAlerter.class); - - // The application reads data from specified Pravega stream and once every ALERT_INTERVAL (2 seconds) - // counts the number of 500 responses in the last ALERT_WINDOW (30 seconds), and generates - // alert when the counts exceed ALERT_THRESHOLD (6). - - public static void main(String[] args) throws Exception { - LOG.info("Starting HighErrorAlerter..."); - - // initialize the parameter utility tool in order to retrieve input parameters - ParameterTool params = ParameterTool.fromArgs(args); - - String username = params.get(Constants.USERNAME_PARAM, ""); - String password = params.get(Constants.PASSWORD_PARAM, ""); - - PravegaConfig pravegaConfig = PravegaConfig - .fromParams(params) - .withCredentials(new DefaultCredentials(password, username)) - .withDefaultScope(params.get(Constants.SCOPE_PARAM, Constants.DEFAULT_SCOPE)); - - // create the Pravega input stream (if necessary) - Stream stream = Utils.createStream( - pravegaConfig, - params.get(Constants.STREAM_PARAM, Constants.DEFAULT_STREAM)); - - // initialize the Flink execution environment - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - - // create the Pravega source to read a stream of text - FlinkPravegaReader reader = FlinkPravegaReader.builder() - .withPravegaConfig(pravegaConfig) - .forStream(stream) - .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class)) - .build(); - - // add the Pravega reader as the data source - DataStream inputStream = env.addSource(reader); - - // create an output sink to stdout for verification - //inputStream.print(); - - // transform logs - DataStream dataStream = inputStream.map(new ParseLogData()); - - // create an output sink to stdout for verification - //dataStream.print(); - - // get responses and their counts - DataStream countStream = - dataStream.flatMap(new FlatMapFunction() { - @Override - public void flatMap(AccessLog value, Collector out) throws Exception { - out.collect(new ResponseCount(value.getStatus(), 1)); - } - }).filter((FilterFunction) count -> { - return count.response.equals("500"); - }).keyBy("response") - .timeWindow(Time.seconds(Constants.ALERT_WINDOW), Time.seconds(Constants.ALERT_INTERVAL)) - .sum("count"); - - // create an output sink to stdout for verification - countStream.print(); - - // create alert pattern - Pattern pattern500 = Pattern.begin("500pattern") - .where(new SimpleCondition() { - @Override - public boolean filter(ResponseCount value) throws Exception { - return value.count >= Constants.ALERT_THRESHOLD; - } - }); - - PatternStream patternStream = CEP.pattern(countStream, pattern500); - - DataStream alertStream = patternStream.select( - new PatternSelectFunction() { - @Override - public Alert select(Map> pattern) throws Exception { - ResponseCount count = pattern.get("500pattern").get(0); - return new Alert(count.response, count.count, "High 500 responses"); - } - }); - - // create an output sink to stdout for verification - alertStream.print(); - - - // execute within the Flink environment - env.execute("HighCountAlerter"); - - LOG.info("Ending HighCountAlerter..."); - } - - //Parse the incoming streams & convert into Java PoJos - private static class ParseLogData implements MapFunction{ - public AccessLog map(String value) throws Exception { - return AccessLog.toAccessLog(value); - } - } - - // Data type access status count - public static class ResponseCount { - - public String response; - public long count; - - public ResponseCount() {} - - public ResponseCount(String status, long count) { - this.response = status; - this.count = count; - } - - @Override - public String toString() { - return "Response count: " + response + " : " + count; - } - } - - // Data type access status count - public static class Alert { - - private String response; - private long count; - private String description; - - public Alert() {} - - public Alert(String response, long count, String description) { - this.response = response; - this.count = count; - this.description = description; - } - - @Override - public String toString() { - return description + ": " + response + " : " + count; - } - } - -} diff --git a/scenarios/turbine-heat-processor/build.gradle b/scenarios/turbine-heat-processor/build.gradle index 30babd0f..b080e52d 100644 --- a/scenarios/turbine-heat-processor/build.gradle +++ b/scenarios/turbine-heat-processor/build.gradle @@ -9,7 +9,7 @@ * */ plugins { - id 'com.github.johnrengelman.shadow' version '2.0.3' + id 'com.github.johnrengelman.shadow' version '1.2.4' } apply plugin: "java" @@ -19,7 +19,7 @@ apply plugin: 'idea' apply plugin: 'eclipse' sourceCompatibility = "1.8" -archivesBaseName = 'pravega-flink-examples' +archivesBaseName = 'pravega-flink-scenario-turbineheatprocessor' ext { scalaJava8CompatVersion = '0.7.0' @@ -27,12 +27,10 @@ ext { dependencies { compile "org.scala-lang.modules:scala-java8-compat_2.11:${scalaJava8CompatVersion}" - compile "io.pravega:pravega-connectors-flink_2.11:${connectorVersion}" + compile "io.pravega:pravega-connectors-flink_2.11:${flinkConnectorVersion}" compile "org.apache.flink:flink-streaming-java_2.11:${flinkVersion}" compile "org.apache.flink:flink-streaming-scala_2.11:${flinkVersion}" - compile "org.slf4j:slf4j-log4j12:1.7.25" - compile "org.apache.flink:flink-cep_2.11:${flinkVersion}" - compile "joda-time:joda-time:2.9.+" + compile "org.slf4j:slf4j-log4j12:1.7.14" } shadowJar { @@ -42,39 +40,10 @@ shadowJar { } } -task scriptWordCountWriter(type: CreateStartScripts) { +task scriptTrubineHeatProcessor(type: CreateStartScripts) { outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.wordcount.WordCountWriter' - applicationName = 'wordCountWriter' - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath -} - -task scriptWordCountReader(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.wordcount.WordCountReader' - applicationName = 'wordCountReader' - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath -} - - -task scriptFlinkAlerter(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.alert.HighCountAlerter' - applicationName = 'highCountAlerter' - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath -} - -task scriptExactlyOnceWriter(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceWriter' - applicationName = 'exactlyOnceWriter' - classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath -} - -task scriptExactlyOnceChecker(type: CreateStartScripts) { - outputDir = file('build/scripts') - mainClassName = 'io.pravega.examples.flink.primer.process.ExactlyOnceChecker' - applicationName = 'exactlyOnceChecker' + mainClassName = 'io.pravega.turbineheatprocessor.TurbineHeatProcessor' + applicationName = 'turbineHeatProcessor' classpath = files(jar.archivePath) + sourceSets.main.runtimeClasspath } @@ -89,12 +58,8 @@ distributions { from(project.configurations.runtime) } into('bin') { - from project.scriptWordCountWriter - from project.scriptWordCountReader - from project.scriptFlinkAlerter - from project.scriptExactlyOnceWriter - from project.scriptExactlyOnceChecker + from project.scriptTrubineHeatProcessor } } } -} +} \ No newline at end of file