From 230ae61667555c9d635eb0179b3914e23cf3f0d7 Mon Sep 17 00:00:00 2001 From: Setepenre Date: Mon, 5 Aug 2024 19:59:02 +0000 Subject: [PATCH] Add XPU & HPU dockerfiles --- .github/workflows/docker.yml | 19 +++++---- docker/Dockerfile-cuda | 14 ++++--- docker/Dockerfile-hpu | 41 ++++++++++++------- docker/Dockerfile-rocm | 7 ++-- docker/Dockerfile-xpu | 78 ++++++++++++++++++++++++++++++++++++ milabench/_version.py | 6 +-- 6 files changed, 128 insertions(+), 37 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index eeced2845..2aa0e3441 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -3,7 +3,7 @@ name: Publish Docker image on: # Allow manual runs workflow_dispatch: - + # Only run for push on the main branch or for tagged version push: branches: @@ -15,13 +15,10 @@ env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} - permissions: packages: write - # define build arguments - jobs: build-image: strategy: @@ -30,8 +27,10 @@ jobs: include: - arch: cuda - arch: rocm + - arch: xpu + - arch: hpu - runs-on: ubuntu-latest + runs-on: ubuntu-latest permissions: contents: read @@ -45,7 +44,7 @@ jobs: remove-haskell: 'true' remove-android: 'true' build-mount-path: /home/runner/work/milabench/ - + - name: Show all images run: | docker image ls @@ -56,7 +55,7 @@ jobs: # The images are still on github registry docker image prune -f -a --filter "until=336h" docker system prune -f - sudo apt install jq -y + sudo apt install jq -y jq '. + { "data-root": "/home/runner/work/milabench/docker" }' /etc/docker/daemon.json > newconfig.json sudo mv -f newconfig.json /etc/docker/daemon.json cat /etc/docker/daemon.json @@ -68,7 +67,7 @@ jobs: - name: Check out the repo uses: actions/checkout@v3 - + - name: Get Image Tag Name env: GITHUB_REF_NAME_ENV: ${{ github.ref_name }} @@ -79,14 +78,14 @@ jobs: IMAGE_TAG="${GITHUB_REF_NAME##*/}" fi echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV - + - name: Log in to the registry uses: docker/login-action@v2 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Extract metadata (tags, labels) for the image id: meta uses: docker/metadata-action@v4 diff --git a/docker/Dockerfile-cuda b/docker/Dockerfile-cuda index 2c9cde10e..da9c2c96b 100644 --- a/docker/Dockerfile-cuda +++ b/docker/Dockerfile-cuda @@ -15,6 +15,10 @@ ENV MILABENCH_GPU_ARCH=$ARCH ENV MILABENCH_CONFIG_NAME=$CONFIG ENV MILABENCH_DOCKER=1 +ENV CUDA_VER=12.1 +ENV MELLANOX_KEY="https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox" +ENV MELLANOX_LIST="https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list" + # Paths # ----- @@ -42,17 +46,17 @@ COPY . /milabench/milabench/ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y &&\ apt-get install -y --no-install-recommends git build-essential curl python3.10 python-is-python3 python3-pip &&\ - curl -o /etc/apt/trusted.gpg.d/mellanox.asc https://content.mellanox.com/ofed/RPM-GPG-KEY-Mellanox &&\ - curl -o /etc/apt/sources.list.d/mellanox.list https://linux.mellanox.com/public/repo/mlnx_ofed/${MOFED_VERSION}/ubuntu22.04/mellanox_mlnx_ofed.list &&\ + curl -o /etc/apt/trusted.gpg.d/mellanox.asc $MELLANOX_KEY &&\ + curl -o /etc/apt/sources.list.d/mellanox.list $MELLANOX_LIST &&\ apt-get update -y &&\ apt-get install -y --no-install-recommends libibverbs1 &&\ apt-get clean &&\ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* # Install Rust RUN curl https://sh.rustup.rs -sSf | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" -ENV CUDA_HOME=/usr/local/cuda-12.1 +ENV CUDA_HOME="/usr/local/cuda-${CUDA_VER}" # Install Milabench # ----------------- @@ -71,6 +75,6 @@ RUN python -m pip install -U pip &&\ ENV PIP_DEFAULT_TIMEOUT=800 RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ - python -m pip cache purge + python -m pip cache purge CMD milabench run diff --git a/docker/Dockerfile-hpu b/docker/Dockerfile-hpu index c1a3f7ba1..90fac3849 100644 --- a/docker/Dockerfile-hpu +++ b/docker/Dockerfile-hpu @@ -4,24 +4,25 @@ FROM ubuntu:22.04 # Arguments # --------- -ARG ARCH=rocm +ARG ARCH=hpu ENV MILABENCH_GPU_ARCH=$ARCH ARG CONFIG=standard.yaml ENV MILABENCH_CONFIG_NAME=$CONFIG ENV MILABENCH_DOCKER=1 -ARG PYTHON=3.10 +ARG PYTHON="3.10" +ENV HABANA_INSTALLER=https://vault.habana.ai/artifactory/gaudi-installer/1.16.1/habanalabs-installer.sh # Paths # ----- ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME ENV MILABENCH_BASE=/milabench/envs -ENV MILABENCH_OUTPUT=/milabench/results/ ENV MILABENCH_ARGS="" -ENV CONDA_PATH=/opt/anaconda +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" # Copy milabench @@ -40,9 +41,11 @@ COPY . /milabench/milabench/ # build-essential: for rust RUN apt-get update &&\ - apt-get install -y git build-essential curl python3.10 &&\ + apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\ apt-get clean &&\ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* &&\ + curl -L -o habana_installer.sh -s ${HABANA_INSTALLER} &&\ + chmod +x habana_installer.sh RUN curl https://sh.rustup.rs -sSf | sh -s -- -y ENV PATH="/root/.cargo/bin:${PATH}" @@ -50,20 +53,28 @@ ENV PATH="/root/.cargo/bin:${PATH}" # Install Milabench # ----------------- -RUN python3 -m pip install -U pip &&\ - python3 -m pip install -U setuptools &&\ - python3 -m pip install -U poetry &&\ - python3 -m pip install -e /milabench/milabench/ &&\ - python3 -m pip cache purge - +# Have to install habana in the system env too... +# so we can monitor the HPU.. +RUN python -m pip install -U pip &&\ + python -m pip install -U setuptools &&\ + python -m pip install -U poetry &&\ + python -m pip install -e /milabench/milabench/ &&\ + ./habana_installer.sh install -t dependencies &&\ + ./habana_installer.sh install -t pytorch &&\ + python -m pip cache purge # Prepare bench # ------------- # pip times out often when downloading pytorch ENV PIP_DEFAULT_TIMEOUT=800 - -RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ - python3 -m pip cache purge +ENV HABANALABS_VIRTUAL_DIR=$BENCHMARK_VENV/torch + +# Install habana in the benchmark environment +RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ + ./habana_installer.sh install -t dependencies --venv -y &&\ + ./habana_installer.sh install -t pytorch --venv -y &&\ + python -m pip cache purge &&\ + rm -rf habana_installer.sh CMD ["milabench", "run"] diff --git a/docker/Dockerfile-rocm b/docker/Dockerfile-rocm index 357af0208..2290c40ef 100644 --- a/docker/Dockerfile-rocm +++ b/docker/Dockerfile-rocm @@ -11,7 +11,7 @@ ARG CONFIG=standard.yaml ENV MILABENCH_CONFIG_NAME=$CONFIG ENV MILABENCH_DOCKER=1 -ARG PYTHON=3.10 +ARG PYTHON="3.10" # Paths @@ -19,10 +19,9 @@ ARG PYTHON=3.10 ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME ENV MILABENCH_BASE=/milabench/envs -ENV MILABENCH_OUTPUT=/milabench/results/ ENV MILABENCH_ARGS="" -ENV CONDA_PATH=/opt/anaconda - +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" # Copy milabench # -------------- diff --git a/docker/Dockerfile-xpu b/docker/Dockerfile-xpu index 8b1378917..d8e524925 100644 --- a/docker/Dockerfile-xpu +++ b/docker/Dockerfile-xpu @@ -1 +1,79 @@ + +FROM ubuntu:22.04 + +# Arguments +# --------- + +ARG ARCH=xpu +ENV MILABENCH_GPU_ARCH=$ARCH + +ARG CONFIG=standard.yaml +ENV MILABENCH_CONFIG_NAME=$CONFIG +ENV MILABENCH_DOCKER=1 + +ARG PYTHON="3.10" + +ENV XPU_MANAGER="V1.2.36/xpumanager_1.2.36_20240428.081009.377f9162.u22.04_amd64.deb" + +# Paths +# ----- + +ENV MILABENCH_CONFIG=/milabench/milabench/config/$MILABENCH_CONFIG_NAME +ENV MILABENCH_BASE=/milabench/base +ENV MILABENCH_ARGS="" + +ENV MILABENCH_OUTPUT="$MILABENCH_BASE/runs" +ENV BENCHMARK_VENV="$MILABENCH_BASE/venv" + +# Copy milabench +# -------------- + +WORKDIR /milabench +COPY . /milabench/milabench/ + + +# Install Dependencies +# -------------------- + +# curl: used to download anaconda +# git: used by milabench +# rustc: used by BERT models inside https://pypi.org/project/tokenizers/ +# build-essential: for rust + +RUN apt-get update &&\ + apt-get install -y git build-essential curl python3.10 python-is-python3 python3-pip &&\ + apt-get clean &&\ + rm -rf /var/lib/apt/lists/* &&\ + curl -L -o xpu_manager.deb -s https://github.com/intel/xpumanager/releases/download/${XPU_MANAGER} &&\ + dpkg -i xpu_manager.deb &&\ + rm -rf xpu_manager.deb + + +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +# Install Milabench +# ----------------- + +RUN python -m pip install -U pip &&\ + python -m pip install -U setuptools &&\ + python -m pip install -U poetry &&\ + python -m pip install -e /milabench/milabench/ &&\ + python -m pip cache purge + + +# Prepare bench +# ------------- + +# pip times out often when downloading pytorch +ENV PIP_DEFAULT_TIMEOUT=800 + +# Uninstall default pytorch +# reinstall pytorch with the right extensions... +RUN milabench install --config $MILABENCH_CONFIG --base $MILABENCH_BASE $MILABENCH_ARGS &&\ + /bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip uninstall torch torchvision torchaudio" &&\ + /bin/bash -c "source $BENCHMARK_VENV/torch/bin/activate && pip install torch torchvision torchaudio intel-extension-for-pytorch oneccl_bind_pt intel-extension-for-pytorch-deepspeed --index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" &&\ + python -m pip cache purge + +CMD ["milabench", "run"] diff --git a/milabench/_version.py b/milabench/_version.py index 23cf810bc..dcf8c9247 100644 --- a/milabench/_version.py +++ b/milabench/_version.py @@ -1,5 +1,5 @@ """This file is generated, do not modify""" -__tag__ = "v0.1.0-38-gfb01d691" -__commit__ = "fb01d691aa0d88717dcb3fea8852f61e111cc75f" -__date__ = "2024-08-01 18:59:13 -0400" +__tag__ = "508240b" +__commit__ = "508240b96a3dc0f10aa2c1a06b34e0b5373b9a67" +__date__ = "2024-08-05 15:16:58 -0400"