From 628ec6c17b8121517e8f303b64567573036cdb38 Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Sun, 29 Dec 2024 21:46:14 -0800 Subject: [PATCH] [Docker] bump up neuron sdk v2.21 (#11593) Signed-off-by: Liangfu Chen --- Dockerfile.neuron | 6 +++--- requirements-neuron.txt | 4 ++-- vllm/_custom_ops.py | 3 +-- vllm/triton_utils/importing.py | 1 - 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Dockerfile.neuron b/Dockerfile.neuron index 77162bc82de62..269139fe90f0b 100644 --- a/Dockerfile.neuron +++ b/Dockerfile.neuron @@ -1,6 +1,6 @@ # default base image # https://gallery.ecr.aws/neuron/pytorch-inference-neuronx -ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.2-ubuntu20.04" +ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.5.1-neuronx-py310-sdk2.21.0-ubuntu22.04" FROM $BASE_IMAGE @@ -22,9 +22,9 @@ WORKDIR ${APP_MOUNT}/vllm RUN python3 -m pip install --upgrade pip RUN python3 -m pip install --no-cache-dir fastapi ninja tokenizers pandas -RUN python3 -m pip install sentencepiece transformers==4.36.2 -U +RUN python3 -m pip install sentencepiece transformers==4.45.2 -U RUN python3 -m pip install transformers-neuronx --extra-index-url=https://pip.repos.neuron.amazonaws.com -U -RUN python3 -m pip install --pre neuronx-cc==2.15.* --extra-index-url=https://pip.repos.neuron.amazonaws.com -U +RUN python3 -m pip install neuronx-cc==2.16.345.0 --extra-index-url=https://pip.repos.neuron.amazonaws.com -U COPY . . ARG GIT_REPO_CHECK=0 diff --git a/requirements-neuron.txt b/requirements-neuron.txt index 148fdbe0d6310..5e08d101fcd61 100644 --- a/requirements-neuron.txt +++ b/requirements-neuron.txt @@ -2,6 +2,6 @@ -r requirements-common.txt # Dependencies for Neuron devices -transformers-neuronx >= 0.12.0 -torch-neuronx >= 2.1.2 +transformers-neuronx >= 0.13.0 +torch-neuronx >= 2.5.0 neuronx-cc diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index aeacf5dda5761..eb2f69df42624 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -23,8 +23,7 @@ import vllm._moe_C # noqa: F401 supports_moe_ops = True -# neuron has torch version that doesn't even have impl_abstract -if TYPE_CHECKING or current_platform.is_neuron(): +if TYPE_CHECKING: def register_fake(fn): return lambda name: fn diff --git a/vllm/triton_utils/importing.py b/vllm/triton_utils/importing.py index 36315abcdfcda..0c96e0632f646 100644 --- a/vllm/triton_utils/importing.py +++ b/vllm/triton_utils/importing.py @@ -8,7 +8,6 @@ HAS_TRITON = ( find_spec("triton") is not None and not current_platform.is_xpu() # Not compatible - and not current_platform.is_neuron() # neuron has too old torch ) if not HAS_TRITON: