-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
42 lines (37 loc) · 1.06 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
## Based on dylanmei/zeppelin and gettyimages/spark
# FROM debian:stretch
#FROM debian:jessie
FROM apache/zeppelin:0.11.0
MAINTAINER Anderson Santos [email protected]
# Usefull Python libs and deps
# RUN apt-get update && \
# apt-get install -y python3-pip && \
# pip3 install --upgrade pip
RUN pip3 install \
ijson \
matplotlib \
datetime \
folium \
tweepy \
pandas \
scikit-learn \
kpod \
networkx \
igraph \
seaborn \
nltk \
gensim \
wordcloud \
numpy \
pandasql \
scipy \
palettable
# Download corpus and models dependencies
COPY python-deps.py /tmp/
RUN python3 /tmp/python-deps.py
# Set python3 for spark workers
ENV PYSPARK_PYTHON=/usr/bin/python3
# Change the zeppelin interpreter for python3 - bellow are the changes
# RUN sed -i "s/^\([ \t]*\"zeppelin\.python\":\).*/\1 \"python3\"\,/" /zeppelin/conf/interpreter.json
# RUN sed -i "s/^\([ \t]*\"zeppelin\.pyspark\.python\":\).*/\1 \"python3\"\,/" /zeppelin/conf/interpreter.json
COPY interpreter.json /zeppelin/conf/interpreter.json