From 020bcc3dca9e79ad6509879796cede3ef6b6dc0b Mon Sep 17 00:00:00 2001 From: Gezim Sejdiu Date: Tue, 23 Nov 2021 22:25:24 +0100 Subject: [PATCH] Refactor java/scala templates to maven/sbt instead --- .github/workflows/build.yml | 2 +- README.md | 8 ++++---- build.sh | 4 ++-- template/{java => maven}/Dockerfile | 1 + template/{java => maven}/README.md | 18 +++++++++--------- template/{java => maven}/template.sh | 0 template/{scala => sbt}/Dockerfile | 0 template/{scala => sbt}/README.md | 14 +++++++------- template/{scala => sbt}/build.sbt | 0 template/{scala => sbt}/plugins.sbt | 0 template/{scala => sbt}/template.sh | 0 11 files changed, 24 insertions(+), 23 deletions(-) rename template/{java => maven}/Dockerfile (92%) rename template/{java => maven}/README.md (60%) rename template/{java => maven}/template.sh (100%) rename template/{scala => sbt}/Dockerfile (100%) rename template/{scala => sbt}/README.md (82%) rename template/{scala => sbt}/build.sbt (100%) rename template/{scala => sbt}/plugins.sbt (100%) rename template/{scala => sbt}/template.sh (100%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 13b51a0..256ad8a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -130,7 +130,7 @@ jobs: strategy: fail-fast: false matrix: - template: [java, scala, python] + template: [maven, sbt, python] needs: 'submit' steps: diff --git a/README.md b/README.md index 2079dd9..aefc3de 100644 --- a/README.md +++ b/README.md @@ -90,18 +90,18 @@ Make sure to fill in the `INIT_DAEMON_STEP` as configured in your pipeline. ### Spark Master To start a Spark master: - docker run --name spark-master -h spark-master -e ENABLE_INIT_DAEMON=false -d bde2020/spark-master:3.1.1-hadoop3.2 + docker run --name spark-master -h spark-master -d bde2020/spark-master:3.1.1-hadoop3.2 ### Spark Worker To start a Spark worker: - docker run --name spark-worker-1 --link spark-master:spark-master -e ENABLE_INIT_DAEMON=false -d bde2020/spark-worker:3.1.1-hadoop3.2 + docker run --name spark-worker-1 --link spark-master:spark-master -d bde2020/spark-worker:3.1.1-hadoop3.2 ## Launch a Spark application Building and running your Spark application on top of the Spark cluster is as simple as extending a template Docker image. Check the template's README for further documentation. -* [Java template](template/java) +* [Maven template](template/maven) * [Python template](template/python) -* [Scala template](template/scala) +* [Sbt template](template/sbt) ## Kubernetes deployment The BDE Spark images can also be used in a Kubernetes enviroment. diff --git a/build.sh b/build.sh index c9bd967..712a366 100755 --- a/build.sh +++ b/build.sh @@ -20,8 +20,8 @@ if [ $# -eq 0 ] build worker build history-server build submit - build java-template template/java - build scala-template template/scala + build maven-template template/maven + build sbt-template template/sbt build python-template template/python build python-example examples/python diff --git a/template/java/Dockerfile b/template/maven/Dockerfile similarity index 92% rename from template/java/Dockerfile rename to template/maven/Dockerfile index f073890..76e89e0 100644 --- a/template/java/Dockerfile +++ b/template/maven/Dockerfile @@ -3,6 +3,7 @@ FROM bde2020/spark-submit:3.1.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " ENV SPARK_APPLICATION_JAR_NAME application-1.0 +ENV SPARK_APPLICATION_JAR_LOCATION /app/application.jar COPY template.sh / diff --git a/template/java/README.md b/template/maven/README.md similarity index 60% rename from template/java/README.md rename to template/maven/README.md index 93bb4b0..8305799 100644 --- a/template/java/README.md +++ b/template/maven/README.md @@ -1,17 +1,17 @@ -# Spark Java template +# Spark Maven template -The Spark Java template image serves as a base image to build your own Java application to run on a Spark cluster. See [big-data-europe/docker-spark README](https://github.com/big-data-europe/docker-spark) for a description how to setup a Spark cluster. +The Spark Maven template image serves as a base image to build your own Maven application to run on a Spark cluster. See [big-data-europe/docker-spark README](https://github.com/big-data-europe/docker-spark) for a description how to setup a Spark cluster. ### Package your application using Maven -You can build and launch your Java application on a Spark cluster by extending this image with your sources. The template uses [Maven](https://maven.apache.org/) as build tool, so make sure you have a `pom.xml` file for your application specifying all the dependencies. +You can build and launch your Maven application on a Spark cluster by extending this image with your sources. The template uses [Maven](https://maven.apache.org/) as build tool, so make sure you have a `pom.xml` file for your application specifying all the dependencies. The Maven `package` command must create an assembly JAR (or 'uber' JAR) containing your code and its dependencies. Spark and Hadoop dependencies should be listes as `provided`. The [Maven shade plugin](http://maven.apache.org/plugins/maven-shade-plugin/) provides a plugin to build such assembly JARs. -### Extending the Spark Java template with your application +### Extending the Spark Maven template with your application -#### Steps to extend the Spark Java template +#### Steps to extend the Spark Maven template 1. Create a Dockerfile in the root folder of your project (which also contains a `pom.xml`) -2. Extend the Spark Java template Docker image +2. Extend the Spark Maven template Docker image 3. Configure the following environment variables (unless the default value satisfies): * `SPARK_MASTER_NAME` (default: spark-master) * `SPARK_MASTER_PORT` (default: 7077) @@ -21,10 +21,10 @@ The Maven `package` command must create an assembly JAR (or 'uber' JAR) containi 4. Build and run the image ``` docker build --rm=true -t bde/spark-app . -docker run --name my-spark-app -e ENABLE_INIT_DAEMON=false --link spark-master:spark-master -d bde/spark-app +docker run --name my-spark-app --link spark-master:spark-master -d bde/spark-app ``` -The sources in the project folder will be automatically added to `/usr/src/app` if you directly extend the Spark Java template image. Otherwise you will have to add and package the sources by yourself in your Dockerfile with the commands: +The sources in the project folder will be automatically added to `/usr/src/app` if you directly extend the Spark Maven template image. Otherwise you will have to add and package the sources by yourself in your Dockerfile with the commands: COPY . /usr/src/app RUN cd /usr/src/app \ @@ -34,7 +34,7 @@ If you overwrite the template's `CMD` in your Dockerfile, make sure to execute t #### Example Dockerfile ``` -FROM bde2020/spark-java-template:3.1.1-hadoop3.2 +FROM bde2020/spark-maven-template:3.1.1-hadoop3.2 MAINTAINER Erika Pauwels MAINTAINER Gezim Sejdiu diff --git a/template/java/template.sh b/template/maven/template.sh similarity index 100% rename from template/java/template.sh rename to template/maven/template.sh diff --git a/template/scala/Dockerfile b/template/sbt/Dockerfile similarity index 100% rename from template/scala/Dockerfile rename to template/sbt/Dockerfile diff --git a/template/scala/README.md b/template/sbt/README.md similarity index 82% rename from template/scala/README.md rename to template/sbt/README.md index d868faa..787001c 100644 --- a/template/scala/README.md +++ b/template/sbt/README.md @@ -1,6 +1,6 @@ -# Spark Scala template +# Spark SBT template -The Spark Scala template image serves as a base image to build your own Scala +The Spark SBT template image serves as a base image to build your own Scala application to run on a Spark cluster. See [big-data-europe/docker-spark README](https://github.com/big-data-europe/docker-spark) for a description how to setup a Spark cluster. @@ -11,7 +11,7 @@ for a description how to setup a Spark cluster. spark-shell: ``` -docker run -it --rm bde2020/spark-scala-template sbt console +docker run -it --rm bde2020/spark-sbt-template sbt console ``` You can also use directly your Docker image and test your own code that way. @@ -29,9 +29,9 @@ When the Docker image is built using this template, you should get a Docker image that includes a fat JAR containing your application and all its dependencies. -### Extending the Spark Scala template with your application +### Extending the Spark SBT template with your application -#### Steps to extend the Spark Scala template +#### Steps to extend the Spark SBT template 1. Create a Dockerfile in the root folder of your project (which also contains a `build.sbt`) @@ -45,7 +45,7 @@ dependencies. 4. Build and run the image: ``` docker build --rm=true -t bde/spark-app . -docker run --name my-spark-app -e ENABLE_INIT_DAEMON=false --link spark-master:spark-master -d bde/spark-app +docker run --name my-spark-app --link spark-master:spark-master -d bde/spark-app ``` The sources in the project folder will be automatically added to `/usr/src/app` @@ -62,7 +62,7 @@ the `/template.sh` script at the end. #### Example Dockerfile ``` -FROM bde2020/spark-scala-template:3.1.1-hadoop3.2 +FROM bde2020/spark-sbt-template:3.1.1-hadoop3.2 MAINTAINER Cecile Tonglet diff --git a/template/scala/build.sbt b/template/sbt/build.sbt similarity index 100% rename from template/scala/build.sbt rename to template/sbt/build.sbt diff --git a/template/scala/plugins.sbt b/template/sbt/plugins.sbt similarity index 100% rename from template/scala/plugins.sbt rename to template/sbt/plugins.sbt diff --git a/template/scala/template.sh b/template/sbt/template.sh similarity index 100% rename from template/scala/template.sh rename to template/sbt/template.sh