diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst index 61efad2013b2a..71d7527a3e706 100644 --- a/docs/source/getting_started/amd-installation.rst +++ b/docs/source/getting_started/amd-installation.rst @@ -107,6 +107,35 @@ Alternatively, wheels intended for vLLM use can be accessed under the releases. $ python setup.py develop # This may take 5-10 minutes. Currently, `pip install .`` does not work for ROCm installation +.. tip:: + + For example, vLLM v0.5.3 on ROCM 6.1 can be built with the following steps: + + .. code-block:: console + + $ pip install --upgrade pip + + $ # Install PyTorch + $ pip uninstall torch -y + $ pip install --no-cache-dir --pre torch==2.5.0.dev20240710 --index-url https://download.pytorch.org/whl/nightly/rocm6.1 + + $ # Build & install AMD SMI + $ pip install /opt/rocm/share/amd_smi + + $ # Install dependencies + $ pip install --upgrade numba scipy huggingface-hub[cli] + $ pip install "numpy<2" + $ pip install -r requirements-rocm.txt + + $ # Apply the patch to ROCM 6.1 (requires root permission) + $ wget -N https://github.com/ROCm/vllm/raw/fa78403/rocm_patch/libamdhip64.so.6 -P /opt/rocm/lib + $ rm -f "$(python3 -c 'import torch; print(torch.__path__[0])')"/lib/libamdhip64.so* + + $ # Build vLLM for MI210/MI250/MI300. + $ export PYTORCH_ROCM_ARCH="gfx90a;gfx942" + $ python3 setup.py develop + + .. tip:: - Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.