Skip to content

Commit

Permalink
Adding docker build for DeepSpeed
Browse files Browse the repository at this point in the history
  • Loading branch information
Paladinium committed Dec 19, 2024
1 parent 9a99126 commit 7522fa8
Show file tree
Hide file tree
Showing 5 changed files with 249 additions and 17 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -170,4 +170,7 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.

.idea/*
.idea/*

# Deepspeed build
deepspeed/build
61 changes: 45 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@ FROM continuumio/miniconda3:24.7.1-0
ARG TTS_MODEL="xtts"
ENV TTS_MODEL=$TTS_MODEL

ARG CUDA_VERSION="12.1.1"
ENV CUDA_VERSION=$CUDA_VERSION

ARG PYTHON_VERSION=3.11.9
ENV PYTHON_VERSION=$PYTHON_VERSION

ARG PYTORCH_VERSION=2.2.1
ENV PYTORCH_VERSION=$PYTORCH_VERSION

SHELL ["/bin/bash", "-l", "-c"]
ENV SHELL=/bin/bash
ENV HOST=0.0.0.0
ENV DEBIAN_FRONTEND=noninteractive
ENV CUDA_DOCKER_ARCH=all
ENV GRADIO_SERVER_NAME="0.0.0.0"
ENV NVIDIA_VISIBLE_DEVICES=all

RUN <<EOR
apt-get update
Expand All @@ -30,23 +40,30 @@ WORKDIR /alltalk
ARG INSTALL_ENV_DIR=/alltalk/alltalk_environment/env
ENV CONDA_AUTO_UPDATE_CONDA="false"
RUN <<EOR
conda create -y -n "alltalk" -c conda-forge python=3.11.9
CUDA_SHORT_VERSION=${CUDA_VERSION%.*}

conda create -y -n "alltalk" -c conda-forge python=${PYTHON_VERSION}
conda activate alltalk
conda install -y \
RESULT=$( { conda install -y \
gcc_linux-64 \
gxx_linux-64 \
pytorch=2.2.1 \
pytorch=${PYTORCH_VERSION} \
pytorch-cuda=${CUDA_SHORT_VERSION} \
torchvision \
torchaudio \
pytorch-cuda=12.1 \
libaio \
nvidia/label/cuda-12.1.0::cuda-toolkit=12.1 \
nvidia/label/cuda-${CUDA_SHORT_VERSION}.0::cuda-toolkit \
faiss-gpu=1.9.0 \
conda-forge::ffmpeg=7.1.0 \
conda-forge::portaudio=19.7.0 \
-c pytorch \
-c anaconda \
-c nvidia | grep -zq PackagesNotFoundError && exit 1
-c nvidia ; } 2>&1 )

if echo $RESULT | grep -izq error ; then
echo "Failed to install conda dependencies 2: $RESULT"
exit 1
fi
conda clean -a && pip cache purge
EOR

Expand All @@ -61,22 +78,34 @@ RUN <<EOR
mkdir /alltalk/pip_cache
pip install --no-cache-dir --cache-dir=/alltalk/pip_cache -r system/requirements/requirements_standalone.txt
pip install --no-cache-dir --cache-dir=/alltalk/pip_cache --upgrade gradio==4.32.2
# Parler:
pip install --no-cache-dir --cache-dir=/alltalk/pip_cache -r system/requirements/requirements_parler.txt

# Deepspeed:
curl -LO https://github.com/erew123/alltalk_tts/releases/download/DeepSpeed-14.0/deepspeed-0.14.2+cu121torch2.2-cp311-cp311-manylinux_2_24_x86_64.whl
CFLAGS="-I$CONDA_PREFIX/include/" LDFLAGS="-L$CONDA_PREFIX/lib/" \
pip install --no-cache-dir --cache-dir=/alltalk/pip_cache deepspeed-0.14.2+cu121torch2.2-cp311-cp311-manylinux_2_24_x86_64.whl
rm -f deepspeed-0.14.2+cu121torch2.2-cp311-cp311-manylinux_2_24_x86_64.whl
conda clean --all --force-pkgs-dirs -y && pip cache purge
EOR

# Parler:
pip install --no-cache-dir --no-deps --cache-dir=/alltalk/pip_cache -r system/requirements/requirements_parler.txt
# Deepspeed:
RUN mkdir -p /tmp/deepseped
COPY deepspeed/build/*.whl /tmp/deepspeed/
RUN <<EOR
DEEPSPEED_WHEEL=$(realpath /tmp/deepspeed/*.whl)
conda activate alltalk

RESULT=$( { CFLAGS="-I$CONDA_PREFIX/include/" LDFLAGS="-L$CONDA_PREFIX/lib/" \
pip install --no-cache-dir ${DEEPSPEED_WHEEL} ; } 2>&1 )

if echo $RESULT | grep -izq error ; then
echo "Failed to install pip dependencies: $RESULT"
exit 1
fi

rm ${DEEPSPEED_WHEEL}
conda clean --all --force-pkgs-dirs -y && pip cache purge
EOR

# Deepspeed requires cutlass:
RUN git clone --depth 1 --branch "v3.5.1" https://github.com/NVIDIA/cutlass /alltalk/cutlass
ENV CUTLASS_PATH=/alltalk/cutlass
### Deepspeed requires cutlass:
###RUN git clone --depth 1 --branch "v3.5.1" https://github.com/NVIDIA/cutlass /alltalk/cutlass
###ENV CUTLASS_PATH=/alltalk/cutlass

# Writing scripts to start alltalk:
RUN <<EOR
Expand Down
103 changes: 103 additions & 0 deletions deepspeed/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
ARG CUDA_VERSION=12.1.1
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04

ARG PYTHON_VERSION=3.11
ENV PYTHON_VERSION=$PYTHON_VERSION

ARG PYTORCH_VERSION=2.2.1
ENV PYTORCH_VERSION=$PYTORCH_VERSION

ARG DEEPSPEED_VERSION=0.16.1
ENV DEEPSPEED_VERSION=$DEEPSPEED_VERSION

ENV DEBIAN_FRONTEND=noninteractive

##############################################################################
# Directories:
##############################################################################
ENV STAGE_DIR=/tmp
RUN mkdir -p ${STAGE_DIR}

##############################################################################
# Installation/Basic Utilities
##############################################################################
SHELL ["/bin/bash", "-l", "-c"]
ENV SHELL=/bin/bash

RUN <<EOR
apt-get update
apt-get install --no-install-recommends -y \
software-properties-common \
ca-certificates

add-apt-repository ppa:git-core/ppa -y # for latest git
add-apt-repository ppa:deadsnakes/ppa -y # for python

apt-get update
apt-get upgrade -y
apt-get install --no-install-recommends -y \
build-essential \
autotools-dev \
pdsh \
cmake \
g++ \
gcc \
curl \
wget \
vim \
unzip \
llvm-dev \
git \
python${PYTHON_VERSION}-dev \
libcupti-dev \
libaio-dev
EOR

##############################################################################
# Python & pip
##############################################################################
RUN <<EOR
# Correct symlinks to use the proper python version:
PYTHON_MAJOR_VERSION=${PYTHON_VERSION%%.*}
rm -f /usr/bin/python${PYTHON_MAJOR_VERSION}
ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python${PYTHON_MAJOR_VERSION}
ln -s /usr/bin/python3 /usr/bin/python

curl -O https://bootstrap.pypa.io/pip/3.7/get-pip.py
python get-pip.py
rm get-pip.py
pip install --upgrade pip
EOR

# Minimal dependencies needed to build deepspeed:
RUN pip install \
deepspeed-kernels \
scikit-learn \
torch==${PYTORCH_VERSION}

##############################################################################
# DeepSpeed
##############################################################################
RUN <<EOR
git clone https://github.com/microsoft/DeepSpeed.git ${STAGE_DIR}/DeepSpeed
cd ${STAGE_DIR}/DeepSpeed
git checkout .
git checkout "tags/v${DEEPSPEED_VERSION}" -b "v${DEEPSPEED_VERSION}"
EOR

##############################################################################
# DeepSpeed build file
##############################################################################
RUN <<EOR
cat << EOF > build_deepspeed.sh
#!/usr/bin/env bash
mkdir -p /deepspeed
cd ${STAGE_DIR}/DeepSpeed
DS_BUILD_OPS=1 python setup.py build_ext -j8 bdist_wheel
mv ${STAGE_DIR}/DeepSpeed/dist/*.whl /deepspeed/
EOF
EOR

RUN chmod +x /build_deepspeed.sh

ENTRYPOINT ["/build_deepspeed.sh"]
65 changes: 65 additions & 0 deletions deepspeed/build-deepspeed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash

CUDA_VERSION=12.1.1
PYTHON_VERSION=3.11
PYTORCH_VERSION=2.2.1
DEEPSPEED_VERSION=0.16.1

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR

# Parse arguments
while [ "$#" -gt 0 ]; do
case "$1" in
--cuda-version)
CUDA_VERSION="$2"
shift
;;
--python-version)
PYTHON_VERSION="$2"
shift
;;
--pytorch-version)
PYTORCH_VERSION="$2"
shift
;;
--deepspeed-version)
DEEPSPEED_VERSION="$2"
shift
;;
*)
# Allow to pass arbitrary arguments to docker as well to be flexible:
echo "Unknown argument '$1'"
exit 1
;;
esac
shift
done

PYTHON_VERSION_NO_DOT=${PYTHON_VERSION//./}
if [[ -n $(find build -name "deepspeed-${DEEPSPEED_VERSION}*-cp${PYTHON_VERSION_NO_DOT}-cp${PYTHON_VERSION_NO_DOT}-*.whl") ]]
then
echo "DeepSpeed was already built - skipping..."
exit 0
fi

echo "Building DeepSpeed $DEEPSPEED_VERSION for CUDA $CUDA_VERSION using python ${PYTHON_VERSION} with PyTorch ${PYTORCH_VERSION}"

rm -rf build # make sure to properly clean up - we only want 1 wheel at the time
mkdir -p build
docker buildx \
build \
--build-arg CUDA_VERSION=$CUDA_VERSION \
--build-arg PYTHON_VERSION=$PYTHON_VERSION \
--build-arg PYTORCH_VERSION=$PYTORCH_VERSION \
--build-arg DEEPSPEED_VERSION=$DEEPSPEED_VERSION \
-t deepspeed:cu-$CUDA_VERSION-ds-$DEEPSPEED_VERSION \
.

docker run \
--rm \
-it \
--gpus=all \
--name deepspeed \
-v $SCRIPT_DIR/build:/deepspeed \
deepspeed:cu-$CUDA_VERSION-ds-$DEEPSPEED_VERSION
32 changes: 32 additions & 0 deletions docker-build.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
#!/usr/bin/env bash

TTS_MODEL=xtts
CUDA_VERSION=12.1.1
PYTHON_VERSION=3.11.9
PYTORCH_VERSION=2.2.1
DOCKER_TAG=latest

# Parse arguments
while [ "$#" -gt 0 ]; do
case "$1" in
--cuda-version)
CUDA_VERSION="$2"
shift
;;
--python-version)
PYTHON_VERSION="$2"
shift
;;
--pytorch-version)
PYTORCH_VERSION="$2"
shift
;;
--tts_model)
TTS_MODEL="$2"
shift
Expand All @@ -22,11 +37,28 @@ while [ "$#" -gt 0 ]; do
shift
done

echo "$PYTHON_VERSION -> ${PYTHON_VERSION%.*}"

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
cd $SCRIPT_DIR

PYTHON_MAJOR_MINOR=${PYTHON_VERSION%.*}
$SCRIPT_DIR/deepspeed/build-deepspeed.sh \
--cuda-version ${CUDA_VERSION} \
--python-version ${PYTHON_MAJOR_MINOR} \
--pytorch-version ${PYTORCH_VERSION}

echo "Starting docker build process using TTS model '${TTS_MODEL}' and docker tag '${DOCKER_TAG}'"
echo "Building for CUDA $CUDA_VERSION using python ${PYTHON_VERSION} with PyTorch ${PYTORCH_VERSION}"


docker buildx \
build \
--progress=plain \
--build-arg TTS_MODEL=$TTS_MODEL \
--build-arg CUDA_VERSION=$CUDA_VERSION \
--build-arg PYTHON_VERSION=$PYTHON_VERSION \
--build-arg PYTORCH_VERSION=$PYTORCH_VERSION \
-t alltalk_beta:${DOCKER_TAG} \
.

Expand Down

0 comments on commit 7522fa8

Please sign in to comment.