From 22ce638fd724cad47a1f0c065d7a84e5d0069d09 Mon Sep 17 00:00:00 2001 From: nobyArdor Date: Sat, 8 Oct 2022 10:12:40 +0300 Subject: [PATCH 1/4] update to zeppelin 0.10.1 --- Makefile | 5 +- data/namenode/in_use.lock | 1 + docker-compose.yml | 41 +++++++++----- notebook/2CF34ERK6/note.json | 57 ++++++++----------- notebook/2HHJNC9Z8/note.json | 107 +++++++++++++++++++++++++++++++++++ zeppelin/Dockerfile | 12 ++-- 6 files changed, 166 insertions(+), 57 deletions(-) create mode 100644 data/namenode/in_use.lock create mode 100644 notebook/2HHJNC9Z8/note.json diff --git a/Makefile b/Makefile index 36f762c..4b7281f 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,9 @@ bash: run: docker build -t zeppelin ./zeppelin/. - docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.7.2-bin-all:/opt/zeppelin zeppelin /bin/bash + #docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook -v $(shell pwd)/zeppelin-0.10.1-bin-all:/opt/zeppelin zeppelin /bin/bash #docker run -it --rm --net spark-net -p 80:8080 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /opt/zeppelin/bin/zeppelin.sh + docker run -it --rm --net spark-net -p 8091:8080 -e ZEPPELIN_ADDR=0.0.0.0 -v $(shell pwd)/notebook:/opt/zeppelin/notebook zeppelin /bin/bash build: - docker build -t earthquakesan/zeppelin:0.7.2 ./zeppelin/. + docker build -t bde2020/zeppelin:0.10.1 ./zeppelin/. diff --git a/data/namenode/in_use.lock b/data/namenode/in_use.lock new file mode 100644 index 0000000..9fbfd21 --- /dev/null +++ b/data/namenode/in_use.lock @@ -0,0 +1 @@ +84@edd0807d5479e \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 447d604..9e94bc1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,26 +1,26 @@ -version: "2.1" +version: "3" services: namenode: - image: bde2020/hadoop-namenode:1.1.0-hadoop2.8-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.3.3-java8 container_name: namenode volumes: - ./data/namenode:/hadoop/dfs/name environment: - CLUSTER_NAME=test - - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_fs_defaultFS=hdfs://namenode:9000 healthcheck: interval: 5s retries: 100 networks: - spark-net datanode: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.8-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.3.3-java8 container_name: datanode volumes: - ./data/datanode:/hadoop/dfs/data environment: - - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_fs_defaultFS=hdfs://namenode:9000 depends_on: namenode: condition: service_healthy @@ -30,13 +30,15 @@ services: networks: - spark-net spark-master: - image: bde2020/spark-master:2.1.0-hadoop2.8-hive-java8 + image: bde2020/spark-master:3.3.0-hadoop3.3 container_name: spark-master ports: - "8080:8080" - "7077:7077" environment: - - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_fs_defaultFS=hdfs://namenode:9000 + environment: + SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode:9864" depends_on: namenode: condition: service_healthy @@ -48,14 +50,14 @@ services: networks: - spark-net spark-worker: - image: bde2020/spark-worker:2.1.0-hadoop2.8-hive-java8 + image: bde2020/spark-worker:3.3.0-hadoop3.3 environment: - "SPARK_MASTER=spark://spark-master:7077" environment: - - CORE_CONF_fs_defaultFS=hdfs://namenode:8020 + - CORE_CONF_fs_defaultFS=hdfs://namenode:9000 depends_on: - spark-master: - condition: service_healthy + - spark-master + # condition: service_healthy healthcheck: interval: 5s retries: 100 @@ -64,19 +66,28 @@ services: zeppelin: build: ./zeppelin ports: - - 80:8080 + - 8090:8080 volumes: - ./notebook:/opt/zeppelin/notebook environment: - CORE_CONF_fs_defaultFS: "hdfs://namenode:8020" + CORE_CONF_fs_defaultFS: "hdfs://namenode:9000" SPARK_MASTER: "spark://spark-master:7077" + SPARK_HOME: "/spark" MASTER: "spark://spark-master:7077" + ZEPPELIN_ADDR: "0.0.0.0" + #SPARK_SUBMIT_OPTIONS: "--jars /opt/sansa-examples/jars/sansa-examples-spark-2016-12.jar" depends_on: - spark-master: - condition: service_healthy + + # condition: service_healthy + + # condition: service_healthy namenode: condition: service_healthy + depends_on: + - spark-master + - spark-worker + networks: - spark-net diff --git a/notebook/2CF34ERK6/note.json b/notebook/2CF34ERK6/note.json index 942e31c..701d93b 100644 --- a/notebook/2CF34ERK6/note.json +++ b/notebook/2CF34ERK6/note.json @@ -1,47 +1,50 @@ { "paragraphs": [ { - "text": "import scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n val x \u003d random * 2 - 1\n val y \u003d random * 2 - 1\n if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))", + "text": "%pyspark\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nval slices \u003d 2\nval n \u003d math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow\nval count \u003d spark.sparkContext.parallelize(1 until n, slices).map { i \u003d\u003e\n val x \u003d random * 2 - 1\n val y \u003d random * 2 - 1\n if (x*x + y*y \u003c\u003d 1) 1 else 0\n}.reduce(_ + _)\nprintln(\"Pi is roughly \" + 4.0 * count / (n - 1))", "user": "anonymous", - "dateUpdated": "May 11, 2017 8:47:28 AM", + "dateUpdated": "2022-10-08 03:16:05.764", "config": { "colWidth": 12.0, "enabled": true, "results": {}, "editorSetting": { - "language": "text", - "editOnDblClick": false + "language": "python", + "editOnDblClick": false, + "completionKey": "TAB", + "completionSupport": true }, - "editorMode": "ace/mode/text", + "editorMode": "ace/mode/python", "editorHide": false, - "tableHide": true + "tableHide": true, + "fontSize": 9.0 }, "settings": { "params": {}, "forms": {} }, "results": { - "code": "SUCCESS", + "code": "ERROR", "msg": [ { "type": "TEXT", - "data": "\nimport scala.math.random\n\nimport org.apache.spark.sql.SparkSession\n\nslices: Int \u003d 2\n\nn: Int \u003d 200000\n\ncount: Int \u003d 156835\nPi is roughly 3.136715683578418\n" + "data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.getSparkInterpreter(PySparkInterpreter.java:664)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.createGatewayServerAndStartScript(PySparkInterpreter.java:260)\n\tat org.apache.zeppelin.spark.PySparkInterpreter.open(PySparkInterpreter.java:194)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n" } ] }, "apps": [], "jobName": "paragraph_1494446151737_1359394047", "id": "20170510-195551_400594820", - "dateCreated": "May 10, 2017 7:55:51 PM", - "dateStarted": "May 10, 2017 8:40:36 PM", - "dateFinished": "May 10, 2017 8:41:04 PM", - "status": "FINISHED", + "dateCreated": "2017-05-10 19:55:51.000", + "dateStarted": "2022-10-08 03:16:05.784", + "dateFinished": "2022-10-08 03:16:06.245", + "status": "ERROR", "progressUpdateIntervalMs": 500 }, { "text": "", "user": "anonymous", - "dateUpdated": "May 11, 2017 8:49:13 AM", + "dateUpdated": "2017-05-11 08:49:13.000", "config": { "colWidth": 12.0, "enabled": true, @@ -60,34 +63,20 @@ "apps": [], "jobName": "paragraph_1494446793336_527866307", "id": "20170510-200633_2002902352", - "dateCreated": "May 10, 2017 8:06:33 PM", + "dateCreated": "2017-05-10 20:06:33.000", "status": "FINISHED", "progressUpdateIntervalMs": 500 } ], "name": "SparkPi", "id": "2CF34ERK6", + "noteParams": {}, + "noteForms": {}, "angularObjects": { - "2CHD267MK:shared_process": [], - "2CFCYW8ZZ:shared_process": [], - "2CGGU1AUC:shared_process": [], - "2CHDUK5RT:shared_process": [], - "2CFPWN8ZX:shared_process": [], - "2CJT3A9WM:shared_process": [], - "2CFTT4BX1:shared_process": [], - "2CHPV1WNR:shared_process": [], - "2CH8NUNKD:shared_process": [], - "2CG24PUFX:shared_process": [], - "2CJHA5F79:shared_process": [], - "2CFD8HYGS:shared_process": [], - "2CH5TSP4J:shared_process": [], - "2CF3WY7WY:shared_process": [], - "2CFYWKGJK:shared_process": [], - "2CJFHW9TZ:shared_process": [], - "2CGEFHREK:shared_process": [], - "2CG4M1FG9:shared_process": [], - "2CH3SASQ1:shared_process": [] + "spark:shared_process": [] + }, + "config": { + "isZeppelinNotebookCronEnable": false }, - "config": {}, "info": {} } \ No newline at end of file diff --git a/notebook/2HHJNC9Z8/note.json b/notebook/2HHJNC9Z8/note.json new file mode 100644 index 0000000..21c8b02 --- /dev/null +++ b/notebook/2HHJNC9Z8/note.json @@ -0,0 +1,107 @@ +{ + "paragraphs": [ + { + "text": "val x \u003d sc.textFile(\"/lenta_articles/*.txt\", 4)", + "user": "anonymous", + "dateUpdated": "2022-10-08 03:24:24.338", + "config": { + "colWidth": 12.0, + "fontSize": 9.0, + "enabled": true, + "results": {}, + "editorSetting": { + "language": "scala", + "editOnDblClick": false, + "completionKey": "TAB", + "completionSupport": true + }, + "editorMode": "ace/mode/scala" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "ERROR", + "msg": [ + { + "type": "TEXT", + "data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1665196689001_1295208312", + "id": "20221008-023809_2067929533", + "dateCreated": "2022-10-08 02:38:09.001", + "dateStarted": "2022-10-08 03:24:24.353", + "dateFinished": "2022-10-08 03:24:24.373", + "status": "ERROR", + "progressUpdateIntervalMs": 500 + }, + { + "text": "x.take(3).foreach(println)", + "user": "anonymous", + "dateUpdated": "2022-10-08 03:17:30.596", + "config": { + "colWidth": 12.0, + "fontSize": 9.0, + "enabled": true, + "results": {}, + "editorSetting": { + "language": "scala", + "editOnDblClick": false, + "completionKey": "TAB", + "completionSupport": true + }, + "editorMode": "ace/mode/scala" + }, + "settings": { + "params": {}, + "forms": {} + }, + "results": { + "code": "ERROR", + "msg": [ + { + "type": "TEXT", + "data": "java.lang.NoSuchMethodError: scala.tools.nsc.Settings.usejavacp()Lscala/tools/nsc/settings/AbsSettings$AbsSetting;\n\tat org.apache.zeppelin.spark.SparkScala211Interpreter.open(SparkScala211Interpreter.scala:70)\n\tat org.apache.zeppelin.spark.NewSparkInterpreter.open(NewSparkInterpreter.java:102)\n\tat org.apache.zeppelin.spark.SparkInterpreter.open(SparkInterpreter.java:62)\n\tat org.apache.zeppelin.interpreter.LazyOpenInterpreter.open(LazyOpenInterpreter.java:69)\n\tat org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:616)\n\tat org.apache.zeppelin.scheduler.Job.run(Job.java:188)\n\tat org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:140)\n\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)\n\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\n" + } + ] + }, + "apps": [], + "jobName": "paragraph_1665197304739_-948041871", + "id": "20221008-024824_721918440", + "dateCreated": "2022-10-08 02:48:24.739", + "dateStarted": "2022-10-08 03:17:30.614", + "dateFinished": "2022-10-08 03:17:30.633", + "status": "ERROR", + "progressUpdateIntervalMs": 500 + }, + { + "user": "anonymous", + "config": {}, + "settings": { + "params": {}, + "forms": {} + }, + "apps": [], + "jobName": "paragraph_1665197328481_-57736739", + "id": "20221008-024848_945498816", + "dateCreated": "2022-10-08 02:48:48.481", + "status": "READY", + "progressUpdateIntervalMs": 500 + } + ], + "name": "Untitled Note 1", + "id": "2HHJNC9Z8", + "noteParams": {}, + "noteForms": {}, + "angularObjects": { + "spark:shared_process": [] + }, + "config": { + "isZeppelinNotebookCronEnable": false + }, + "info": {} +} \ No newline at end of file diff --git a/zeppelin/Dockerfile b/zeppelin/Dockerfile index 8d61b58..c932f8a 100644 --- a/zeppelin/Dockerfile +++ b/zeppelin/Dockerfile @@ -1,13 +1,13 @@ -FROM bde2020/spark-base:2.1.0-hadoop2.8-hive-java8 +FROM bde2020/spark-base:3.3.0-hadoop3.3 MAINTAINER Ivan Ermilov -ENV APACHE_SPARK_VERSION 2.1.0 -ENV APACHE_HADOOP_VERSION 2.8.0 -ENV ZEPPELIN_VERSION 0.7.2 +ENV APACHE_SPARK_VERSION 3.3.0 +ENV APACHE_HADOOP_VERSION 3.3.3 +ENV ZEPPELIN_VERSION 0.10.1 -RUN apt-get update && apt-get install wget +#RUN apt-get update && apt-get install wget RUN set -x \ - && curl -fSL "http://www-eu.apache.org/dist/zeppelin/zeppelin-0.7.2/zeppelin-0.7.2-bin-all.tgz" -o /tmp/zeppelin.tgz \ + && curl -fSL "https://dlcdn.apache.org/zeppelin/zeppelin-0.10.1/zeppelin-0.10.1-bin-all.tgz" -o /tmp/zeppelin.tgz \ && tar -xzvf /tmp/zeppelin.tgz -C /opt/ \ && mv /opt/zeppelin-* /opt/zeppelin \ && rm /tmp/zeppelin.tgz From 066c2dd154b4c1ab0709f523dcbcc9461d1c2cd4 Mon Sep 17 00:00:00 2001 From: nobyArdor Date: Sun, 30 Oct 2022 17:48:49 +0300 Subject: [PATCH 2/4] move to 3.2.2 spark to fix python3 depencies context --- Makefile | 2 +- zeppelin/Dockerfile | 41 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 4b7281f..f768439 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ down: docker network rm spark-net bash: - docker exec -it dockerzeppelin_zeppelin_1 bash + docker run --rm -it bde2020/zeppelin:0.10.1 bash run: docker build -t zeppelin ./zeppelin/. diff --git a/zeppelin/Dockerfile b/zeppelin/Dockerfile index c932f8a..c6b9d0a 100644 --- a/zeppelin/Dockerfile +++ b/zeppelin/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/spark-base:3.3.0-hadoop3.3 +FROM bde2020/spark-base:3.2.2-hadoop3.2 MAINTAINER Ivan Ermilov ENV APACHE_SPARK_VERSION 3.3.0 @@ -12,7 +12,44 @@ RUN set -x \ && mv /opt/zeppelin-* /opt/zeppelin \ && rm /tmp/zeppelin.tgz -ENV SPARK_SUBMIT_OPTIONS "--jars /opt/zeppelin/sansa-examples-spark-2016-12.jar" + +#RUN apk add --update --no-cache \ +# wget \ +# gcc \ +# make \ +# cmake \ +# gcc \ +# g++ \ +# gfortran \ +# zlib-dev \ +# libffi-dev \ +# openssl-dev \ +# musl-dev \ +# linux-headers \ +# g++ \ +# python3 \ +# python3-dev \ +# py3-pip \ +# build-base +RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +#RUN apk add --no-cache --update freetype freetype-dev +#ENV SPARK_SUBMIT_OPTIONS "--jars /opt/zeppelin/sansa-examples-spark-2016-12.jar" +#ENV PYTHONHASHSEED 1 +#ENV PYTHONUNBUFFERED=1 +#RUN apk add --update --no-cache python3 && ln -sf python3 /usr/bin/python +#RUN python -m ensurepip +#RUN pip3 install --no-cache --upgrade pip setuptools +#RUN pip3 install --no-cache --upgrade wheel pyproject.toml +#RUN pip3 install --no-cache --upgrade cython +#RUN pip3 install --no-cache --upgrade numpy +#RUN apk add --update --no-cache py3-packaging py3-wheel +#RUN apt update && apt install libfreetype6-dev + +#RUN pip3 install --no-cache --upgrade matplotlib +#RUN pip3 install --no-cache --upgrade jupyter +#RUN pip3 install --no-cache --upgrade grpcio +#RUN pip3 install --no-cache --upgrade protobuf +#RUN pip3 install --no-cache --upgrade ipykernel WORKDIR /opt/zeppelin From 389d1c1017e17215c7910c8468987047159e34a8 Mon Sep 17 00:00:00 2001 From: gorbunov Date: Tue, 22 Nov 2022 14:19:52 +0300 Subject: [PATCH 3/4] lf as default ending --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..07764a7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text eol=lf \ No newline at end of file From cf5d1cefe8eba5c841589d62072debccc45603b6 Mon Sep 17 00:00:00 2001 From: gorbunov Date: Tue, 22 Nov 2022 14:45:50 +0300 Subject: [PATCH 4/4] Update .gitattributes --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 07764a7..94f480d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -* text eol=lf \ No newline at end of file +* text=auto eol=lf \ No newline at end of file