Skip to content

Commit

Permalink
Changes in Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
shortthirdman committed Apr 21, 2024
1 parent 84fadab commit c72820e
Showing 1 changed file with 30 additions and 43 deletions.
73 changes: 30 additions & 43 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,58 +1,45 @@
# syntax=docker/dockerfile:1
# Use the official Python 3.12 image as base
FROM python:3.12

# Use the specified base image
FROM quay.io/jupyter/scipy-notebook:x86_64-latest
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1

# Switch user to adminitrator privileges
USER root
# Set the working directory in the container
WORKDIR /app

# Install PyTorch and TensorFlow dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-pip \
apt-transport-https \
ca-certificates \
curl \
gnupg \
lsb-release \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

# Switch back to default user
USER jovyan
# Copy the requirements file into the container at /app
COPY requirements.txt /app/

# Upgrade pip
RUN pip install --upgrade pip

# Install PyTorch with CUDA support (assuming CUDA 11.4 is available on the host)
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# Install TensorFlow
RUN pip install --no-cache-dir tensorflow[and-cuda]

# Verify TensorFlow GPU Setup
RUN echo "[tensorflow] Verifying GPU setup" && python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))"
# Install dependencies from requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

# Verify TensorFlow CPU Setup
RUN echo "[tensorflow] Verifying CPU setup" && python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
# Install Java (required for Apache Spark)
RUN apt-get update && \
apt-get install -y openjdk-11-jdk-headless && \
rm -rf /var/lib/apt/lists/*

# Install PySpark
RUN pip install --no-cache-dir pyspark
# Install Apache Spark
ENV SPARK_VERSION 3.2.0
ENV HADOOP_VERSION 3.3
RUN wget -q https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
tar -xzf spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
rm spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz && \
mv spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION /spark

# Install Spark
RUN curl https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3-scala2.13.tgz \
-o spark-3.3.1-bin-hadoop3-scala2.13.tgz && \
tar -xzf spark-3.3.1-bin-hadoop3-scala2.13.tgz && \
mkdir -p /opt/spark && \
mv spark-3.3.1-bin-hadoop3-scala2.13 /opt/spark
# Set environment variables for Spark
ENV SPARK_HOME /spark
ENV PATH $SPARK_HOME/bin:$PATH

# Set environment variables for PySpark
ENV SPARK_HOME=/opt/spark
ENV PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin
ENV DOCKER_STACKS_JUPYTER_CMD=notebook
# Copy the rest of the application code into the container
COPY . /app/

# Define the working directory
WORKDIR /home/jovyan/work
# Set the default command to run when the container starts
# CMD ["python", "app.py"]

# Expose the default Jupyter Notebook port
EXPOSE 8888

# Start the Jupyter Notebook server
Expand Down

0 comments on commit c72820e

Please sign in to comment.