Skip to content

Commit

Permalink
[Docker] bump up neuron sdk v2.21 (vllm-project#11593)
Browse files Browse the repository at this point in the history
Signed-off-by: Liangfu Chen <[email protected]>
  • Loading branch information
liangfu authored Dec 30, 2024
1 parent 3682e33 commit 628ec6c
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 8 deletions.
6 changes: 3 additions & 3 deletions Dockerfile.neuron
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# default base image
# https://gallery.ecr.aws/neuron/pytorch-inference-neuronx
ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.1.2-neuronx-py310-sdk2.20.2-ubuntu20.04"
ARG BASE_IMAGE="public.ecr.aws/neuron/pytorch-inference-neuronx:2.5.1-neuronx-py310-sdk2.21.0-ubuntu22.04"

FROM $BASE_IMAGE

Expand All @@ -22,9 +22,9 @@ WORKDIR ${APP_MOUNT}/vllm

RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install --no-cache-dir fastapi ninja tokenizers pandas
RUN python3 -m pip install sentencepiece transformers==4.36.2 -U
RUN python3 -m pip install sentencepiece transformers==4.45.2 -U
RUN python3 -m pip install transformers-neuronx --extra-index-url=https://pip.repos.neuron.amazonaws.com -U
RUN python3 -m pip install --pre neuronx-cc==2.15.* --extra-index-url=https://pip.repos.neuron.amazonaws.com -U
RUN python3 -m pip install neuronx-cc==2.16.345.0 --extra-index-url=https://pip.repos.neuron.amazonaws.com -U

COPY . .
ARG GIT_REPO_CHECK=0
Expand Down
4 changes: 2 additions & 2 deletions requirements-neuron.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
-r requirements-common.txt

# Dependencies for Neuron devices
transformers-neuronx >= 0.12.0
torch-neuronx >= 2.1.2
transformers-neuronx >= 0.13.0
torch-neuronx >= 2.5.0
neuronx-cc
3 changes: 1 addition & 2 deletions vllm/_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
import vllm._moe_C # noqa: F401
supports_moe_ops = True

# neuron has torch version that doesn't even have impl_abstract
if TYPE_CHECKING or current_platform.is_neuron():
if TYPE_CHECKING:

def register_fake(fn):
return lambda name: fn
Expand Down
1 change: 0 additions & 1 deletion vllm/triton_utils/importing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
HAS_TRITON = (
find_spec("triton") is not None
and not current_platform.is_xpu() # Not compatible
and not current_platform.is_neuron() # neuron has too old torch
)

if not HAS_TRITON:
Expand Down

0 comments on commit 628ec6c

Please sign in to comment.