diff --git a/README.md b/README.md index 20e1c33..2b45f3a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Docker images to:
Currently supported versions: +* Spark 3.2.1 for Hadoop 3.2 with OpenJDK 8 and Scala 2.12 * Spark 3.2.0 for Hadoop 3.2 with OpenJDK 8 and Scala 2.12 * Spark 3.1.2 for Hadoop 3.2 with OpenJDK 8 and Scala 2.12 * Spark 3.1.1 for Hadoop 3.2 with OpenJDK 8 and Scala 2.12 @@ -51,7 +52,7 @@ Add the following services to your `docker-compose.yml` to integrate a Spark mas version: '3' services: spark-master: - image: bde2020/spark-master:3.2.0-hadoop3.2 + image: bde2020/spark-master:3.2.1-hadoop3.2 container_name: spark-master ports: - "8080:8080" @@ -59,7 +60,7 @@ services: environment: - INIT_DAEMON_STEP=setup_spark spark-worker-1: - image: bde2020/spark-worker:3.2.0-hadoop3.2 + image: bde2020/spark-worker:3.2.1-hadoop3.2 container_name: spark-worker-1 depends_on: - spark-master @@ -68,7 +69,7 @@ services: environment: - "SPARK_MASTER=spark://spark-master:7077" spark-worker-2: - image: bde2020/spark-worker:3.2.0-hadoop3.2 + image: bde2020/spark-worker:3.2.1-hadoop3.2 container_name: spark-worker-2 depends_on: - spark-master @@ -77,7 +78,7 @@ services: environment: - "SPARK_MASTER=spark://spark-master:7077" spark-history-server: - image: bde2020/spark-history-server:3.2.0-hadoop3.2 + image: bde2020/spark-history-server:3.2.1-hadoop3.2 container_name: spark-history-server depends_on: - spark-master @@ -92,12 +93,12 @@ Make sure to fill in the `INIT_DAEMON_STEP` as configured in your pipeline. ### Spark Master To start a Spark master: - docker run --name spark-master -h spark-master -d bde2020/spark-master:3.2.0-hadoop3.2 + docker run --name spark-master -h spark-master -d bde2020/spark-master:3.2.1-hadoop3.2 ### Spark Worker To start a Spark worker: - docker run --name spark-worker-1 --link spark-master:spark-master -d bde2020/spark-worker:3.2.0-hadoop3.2 + docker run --name spark-worker-1 --link spark-master:spark-master -d bde2020/spark-worker:3.2.1-hadoop3.2 ## Launch a Spark application Building and running your Spark application on top of the Spark cluster is as simple as extending a template Docker image. Check the template's README for further documentation. @@ -117,11 +118,11 @@ It will also setup a headless service so spark clients can be reachable from the Then to use `spark-shell` issue -`kubectl run spark-base --rm -it --labels="app=spark-client" --image bde2020/spark-base:3.2.0-hadoop3.2 -- bash ./spark/bin/spark-shell --master spark://spark-master:7077 --conf spark.driver.host=spark-client` +`kubectl run spark-base --rm -it --labels="app=spark-client" --image bde2020/spark-base:3.2.1-hadoop3.2 -- bash ./spark/bin/spark-shell --master spark://spark-master:7077 --conf spark.driver.host=spark-client` To use `spark-submit` issue for example -`kubectl run spark-base --rm -it --labels="app=spark-client" --image bde2020/spark-base:3.2.0-hadoop3.2 -- bash ./spark/bin/spark-submit --class CLASS_TO_RUN --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client URL_TO_YOUR_APP` +`kubectl run spark-base --rm -it --labels="app=spark-client" --image bde2020/spark-base:3.2.1-hadoop3.2 -- bash ./spark/bin/spark-submit --class CLASS_TO_RUN --master spark://spark-master:7077 --deploy-mode client --conf spark.driver.host=spark-client URL_TO_YOUR_APP` You can use your own image packed with Spark and your application but when deployed it must be reachable from the workers. One way to achieve this is by creating a headless service for your pod and then use `--conf spark.driver.host=YOUR_HEADLESS_SERVICE` whenever you submit your application. diff --git a/base/Dockerfile b/base/Dockerfile index 85d4c3f..9188b3e 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -7,7 +7,7 @@ ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon ENV INIT_DAEMON_STEP spark_master_init ENV BASE_URL=https://archive.apache.org/dist/spark/ -ENV SPARK_VERSION=3.2.0 +ENV SPARK_VERSION=3.2.1 ENV HADOOP_VERSION=3.2 COPY wait-for-step.sh / diff --git a/build.sh b/build.sh index 0526c9d..0a4f0f9 100755 --- a/build.sh +++ b/build.sh @@ -2,7 +2,7 @@ set -e -TAG=3.2.0-hadoop3.2 +TAG=3.2.1-hadoop3.2 build() { NAME=$1 diff --git a/docker-compose.yml b/docker-compose.yml index 02ce7e4..deee71e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '3' services: spark-master: - image: bde2020/spark-master:3.2.0-hadoop3.2 + image: bde2020/spark-master:3.2.1-hadoop3.2 container_name: spark-master ports: - "8080:8080" @@ -9,7 +9,7 @@ services: environment: - INIT_DAEMON_STEP=setup_spark spark-worker-1: - image: bde2020/spark-worker:3.2.0-hadoop3.2 + image: bde2020/spark-worker:3.2.1-hadoop3.2 container_name: spark-worker-1 depends_on: - spark-master diff --git a/examples/maven/Dockerfile b/examples/maven/Dockerfile index 8ef6fa7..a1f4b91 100644 --- a/examples/maven/Dockerfile +++ b/examples/maven/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-maven-template:3.2.0-hadoop3.2 +FROM bde2020/spark-maven-template:3.2.1-hadoop3.2 LABEL MAINTAINER="Gezim Sejdiu " diff --git a/examples/maven/README.md b/examples/maven/README.md index 862a3f6..9c898ba 100644 --- a/examples/maven/README.md +++ b/examples/maven/README.md @@ -17,5 +17,5 @@ To run the application, execute the following steps: ``` 3. Run the Docker container: ```bash - docker run --rm --network dockerspark_default --name spark-maven-example bde2020/spark-maven-example:3.2.0-hadoop3.2 + docker run --rm --network dockerspark_default --name spark-maven-example bde2020/spark-maven-example:3.2.1-hadoop3.2 ``` \ No newline at end of file diff --git a/examples/maven/pom.xml b/examples/maven/pom.xml index 881a97b..08fb089 100644 --- a/examples/maven/pom.xml +++ b/examples/maven/pom.xml @@ -14,7 +14,7 @@ UTF-8 2.12.13 2.12 - 3.1.1 + 3.2.1 diff --git a/examples/python/Dockerfile b/examples/python/Dockerfile index 732388f..9888d54 100644 --- a/examples/python/Dockerfile +++ b/examples/python/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-python-template:3.2.0-hadoop3.2 +FROM bde2020/spark-python-template:3.2.1-hadoop3.2 COPY wordcount.py /app/ diff --git a/examples/python/README.md b/examples/python/README.md index e0a17f7..d5a7b80 100644 --- a/examples/python/README.md +++ b/examples/python/README.md @@ -17,5 +17,5 @@ To run the application, execute the following steps: ``` 3. Run the Docker container: ```bash - docker run --rm --network dockerspark_default --name pyspark-example bde2020/spark-python-example:3.2.0-hadoop3.2 + docker run --rm --network dockerspark_default --name pyspark-example bde2020/spark-python-example:3.2.1-hadoop3.2 ``` \ No newline at end of file diff --git a/history-server/Dockerfile b/history-server/Dockerfile index cec6d5f..92fb065 100644 --- a/history-server/Dockerfile +++ b/history-server/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-base:3.2.0-hadoop3.2 +FROM bde2020/spark-base:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/k8s-spark-cluster.yaml b/k8s-spark-cluster.yaml index 978f4e5..878806d 100644 --- a/k8s-spark-cluster.yaml +++ b/k8s-spark-cluster.yaml @@ -46,7 +46,7 @@ spec: spec: containers: - name: spark-master - image: bde2020/spark-master:3.2.0-hadoop3.2 + image: bde2020/spark-master:3.2.1-hadoop3.2 imagePullPolicy: Always ports: - containerPort: 8080 @@ -70,7 +70,7 @@ spec: spec: containers: - name: spark-worker - image: bde2020/spark-worker:3.2.0-hadoop3.2 + image: bde2020/spark-worker:3.2.1-hadoop3.2 imagePullPolicy: Always ports: - containerPort: 8081 diff --git a/master/Dockerfile b/master/Dockerfile index b519818..f11bcf2 100644 --- a/master/Dockerfile +++ b/master/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-base:3.2.0-hadoop3.2 +FROM bde2020/spark-base:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/submit/Dockerfile b/submit/Dockerfile index eb57b40..66e11fd 100644 --- a/submit/Dockerfile +++ b/submit/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-base:3.2.0-hadoop3.2 +FROM bde2020/spark-base:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/template/maven/Dockerfile b/template/maven/Dockerfile index 7c2ec50..d8c00a0 100644 --- a/template/maven/Dockerfile +++ b/template/maven/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-submit:3.2.0-hadoop3.2 +FROM bde2020/spark-submit:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/template/maven/README.md b/template/maven/README.md index 84e0970..a33fe5b 100644 --- a/template/maven/README.md +++ b/template/maven/README.md @@ -34,7 +34,7 @@ If you overwrite the template's `CMD` in your Dockerfile, make sure to execute t #### Example Dockerfile ``` -FROM bde2020/spark-maven-template:3.2.0-hadoop3.2 +FROM bde2020/spark-maven-template:3.2.1-hadoop3.2 MAINTAINER Erika Pauwels MAINTAINER Gezim Sejdiu diff --git a/template/python/Dockerfile b/template/python/Dockerfile index fbf6ffc..fe784b2 100644 --- a/template/python/Dockerfile +++ b/template/python/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-submit:3.2.0-hadoop3.2 +FROM bde2020/spark-submit:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/template/python/README.md b/template/python/README.md index b520486..c131f27 100644 --- a/template/python/README.md +++ b/template/python/README.md @@ -30,7 +30,7 @@ If you overwrite the template's `CMD` in your Dockerfile, make sure to execute t #### Example Dockerfile ``` -FROM bde2020/spark-python-template:3.2.0-hadoop3.2 +FROM bde2020/spark-python-template:3.2.1-hadoop3.2 MAINTAINER You diff --git a/template/sbt/Dockerfile b/template/sbt/Dockerfile index 66ba3fc..2cb9f79 100644 --- a/template/sbt/Dockerfile +++ b/template/sbt/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-submit:3.2.0-hadoop3.2 +FROM bde2020/spark-submit:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis " diff --git a/template/sbt/README.md b/template/sbt/README.md index a97f670..2d05194 100644 --- a/template/sbt/README.md +++ b/template/sbt/README.md @@ -62,11 +62,7 @@ the `/template.sh` script at the end. #### Example Dockerfile ``` -<<<<<<< HEAD:template/sbt/README.md -FROM bde2020/spark-sbt-template:3.2.0-hadoop3.2 -======= -FROM bde2020/spark-scala-template:3.2.0-hadoop3.2 ->>>>>>> cd4cab298d8e63ecaf488ffaf80ed5f6df5d5384:template/scala/README.md +FROM bde2020/spark-sbt-template:3.2.1-hadoop3.2 MAINTAINER Cecile Tonglet diff --git a/template/sbt/build.sbt b/template/sbt/build.sbt index 5037c33..1f177b2 100644 --- a/template/sbt/build.sbt +++ b/template/sbt/build.sbt @@ -1,4 +1,4 @@ scalaVersion := "2.12.14" libraryDependencies ++= Seq( - "org.apache.spark" %% "spark-sql" % "3.2.0" % "provided" + "org.apache.spark" %% "spark-sql" % "3.2.1" % "provided" ) diff --git a/worker/Dockerfile b/worker/Dockerfile index 948ebf4..a7f84cc 100644 --- a/worker/Dockerfile +++ b/worker/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-base:3.2.0-hadoop3.2 +FROM bde2020/spark-base:3.2.1-hadoop3.2 LABEL maintainer="Gezim Sejdiu , Giannis Mouchakis "