diff --git a/.buildkite/run-gh200-test.sh b/.buildkite/run-gh200-test.sh index d06604f96f2b8..6cf071b02b23a 100644 --- a/.buildkite/run-gh200-test.sh +++ b/.buildkite/run-gh200-test.sh @@ -4,6 +4,19 @@ # It serves a sanity check for compilation and basic model usage. set -ex +# Download the python +PYTHON_VERSION=3.12 +apt-get update -y \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \ + && python3 --version && python3 -m pip --version + +python3 use_existing_torch.py + # Try building the docker image DOCKER_BUILDKIT=1 docker build . \ --target vllm-openai \ diff --git a/Dockerfile b/Dockerfile index 123703848749c..2a8f714f41515 100644 --- a/Dockerfile +++ b/Dockerfile @@ -47,13 +47,13 @@ WORKDIR /workspace # install build and runtime dependencies COPY requirements-common.txt requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt -COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-cuda.txt RUN --mount=type=cache,target=/root/.cache/pip \ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - python3 -m pip install -r requirements-cuda-arm64.txt; \ + python3 -m pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torch==2.6.0.dev20241210+cu124" \ + && python3 -m pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torchvision==0.22.0.dev20241215"; \ fi # cuda arch list used by torch @@ -77,11 +77,6 @@ COPY requirements-build.txt requirements-build.txt RUN --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install -r requirements-build.txt -RUN --mount=type=cache,target=/root/.cache/pip \ - if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - python3 -m pip install -r requirements-cuda-arm64.txt; \ - fi - COPY . . ARG GIT_REPO_CHECK=0 RUN --mount=type=bind,source=.git,target=.git \ @@ -188,12 +183,6 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist --mount=type=cache,target=/root/.cache/pip \ python3 -m pip install dist/*.whl --verbose -RUN --mount=type=cache,target=/root/.cache/pip \ - if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ - pip uninstall -y torch && \ - python3 -m pip install -r requirements-cuda-arm64.txt; \ - fi - RUN --mount=type=cache,target=/root/.cache/pip \ . /etc/environment && \ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \ diff --git a/docs/source/serving/deploying_with_docker.rst b/docs/source/serving/deploying_with_docker.rst index 11a9f12fd17cd..ccc3b9c92ca78 100644 --- a/docs/source/serving/deploying_with_docker.rst +++ b/docs/source/serving/deploying_with_docker.rst @@ -52,6 +52,10 @@ of PyTorch Nightly and should be considered **experimental**. Using the flag `-- .. code-block:: console # Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB) + # Note: You should download the torch and torchvision wheels from the PyTorch nightly site, and run the use_existing_torch.py script to skip the download of other torch wheels during the build. + $ python3 -m pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torch==2.6.0.dev20241210+cu124" + $ python3 -m pip install --index-url https://download.pytorch.org/whl/nightly/cu124 "torchvision==0.22.0.dev20241215" + $ python3 use_existing_torch.py $ DOCKER_BUILDKIT=1 sudo docker build . \ --target vllm-openai \ -platform "linux/arm64" \ diff --git a/requirements-build.txt b/requirements-build.txt index 388b193403e88..fec01caaf25ef 100644 --- a/requirements-build.txt +++ b/requirements-build.txt @@ -4,6 +4,6 @@ ninja packaging setuptools>=61 setuptools-scm>=8 -torch==2.5.1; platform_machine != 'aarch64' +torch==2.5.1 wheel jinja2 diff --git a/requirements-cuda-arm64.txt b/requirements-cuda-arm64.txt deleted file mode 100644 index bbcb5cb7012ce..0000000000000 --- a/requirements-cuda-arm64.txt +++ /dev/null @@ -1,3 +0,0 @@ ---index-url https://download.pytorch.org/whl/nightly/cu124 -torchvision==0.22.0.dev20241215; platform_machine == 'aarch64' -torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64' diff --git a/requirements-cuda.txt b/requirements-cuda.txt index 5d4dee8c7129a..058ab7c1ee9df 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -4,7 +4,7 @@ # Dependencies for NVIDIA GPUs ray >= 2.9 nvidia-ml-py >= 12.560.30 # for pynvml package -torch == 2.5.1; platform_machine != 'aarch64' +torch == 2.5.1 # These must be updated alongside torch -torchvision == 0.20.1; platform_machine != 'aarch64' # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1