From 00595e5ff856500d776f4a36f5a4ec87bdbfd821 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 21 Nov 2024 14:09:15 -0800 Subject: [PATCH] finish Signed-off-by: youkaichao --- vllm/executor/gpu_executor.py | 2 +- vllm/executor/ray_hpu_executor.py | 3 +-- vllm/executor/xpu_executor.py | 2 +- vllm/platforms/cuda.py | 4 ++-- vllm/platforms/hpu.py | 4 +++- vllm/platforms/neuron.py | 7 +++++-- vllm/platforms/openvino.py | 6 ++++-- vllm/platforms/rocm.py | 3 ++- vllm/platforms/tpu.py | 6 ++++-- 9 files changed, 23 insertions(+), 14 deletions(-) diff --git a/vllm/executor/gpu_executor.py b/vllm/executor/gpu_executor.py index 0949b0cba35a7..7fa34456028dd 100644 --- a/vllm/executor/gpu_executor.py +++ b/vllm/executor/gpu_executor.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase from vllm.logger import init_logger diff --git a/vllm/executor/ray_hpu_executor.py b/vllm/executor/ray_hpu_executor.py index 0c34f403f51fb..3db1b5a13c44b 100644 --- a/vllm/executor/ray_hpu_executor.py +++ b/vllm/executor/ray_hpu_executor.py @@ -2,8 +2,7 @@ import os from collections import defaultdict from itertools import islice, repeat -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, - Type) +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple import msgspec diff --git a/vllm/executor/xpu_executor.py b/vllm/executor/xpu_executor.py index 03f327dc1f364..722b86a95ff8a 100644 --- a/vllm/executor/xpu_executor.py +++ b/vllm/executor/xpu_executor.py @@ -1,4 +1,4 @@ -from typing import Callable, List, Optional, Tuple, Type, Union +from typing import List, Optional, Union from vllm.executor.executor_base import ExecutorAsyncBase from vllm.executor.gpu_executor import GPUExecutor diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index d72b809be74a4..cf0d41081a5aa 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -4,7 +4,7 @@ import os from functools import lru_cache, wraps -from typing import Callable, List, Tuple, TypeVar +from typing import TYPE_CHECKING, Callable, List, Tuple, TypeVar import pynvml import torch @@ -15,7 +15,7 @@ from vllm.logger import init_logger from .interface import DeviceCapability, Platform, PlatformEnum -from typing import TYPE_CHECKING + if TYPE_CHECKING: from vllm.config import VllmConfig else: diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 210bd4653ea27..a8f568d31d5a7 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -1,7 +1,9 @@ +from typing import TYPE_CHECKING + import torch from .interface import Platform, PlatformEnum, _Backend -from typing import TYPE_CHECKING + if TYPE_CHECKING: from vllm.config import VllmConfig else: diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py index d9087e0161d7b..4c4d778ed3dd4 100644 --- a/vllm/platforms/neuron.py +++ b/vllm/platforms/neuron.py @@ -1,5 +1,7 @@ -from .interface import Platform, PlatformEnum from typing import TYPE_CHECKING + +from .interface import Platform, PlatformEnum + if TYPE_CHECKING: from vllm.config import VllmConfig else: @@ -18,4 +20,5 @@ def get_device_name(cls, device_id: int = 0) -> str: def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config = vllm_config.parallel_config if parallel_config.worker_cls == "auto": - parallel_config.worker_cls = "vllm.worker.neuron_worker.NeuronWorker" + parallel_config.worker_cls = \ + "vllm.worker.neuron_worker.NeuronWorker" diff --git a/vllm/platforms/openvino.py b/vllm/platforms/openvino.py index a21020f9ffa0b..33a41933e9fff 100644 --- a/vllm/platforms/openvino.py +++ b/vllm/platforms/openvino.py @@ -1,3 +1,5 @@ +from typing import TYPE_CHECKING + import torch import vllm.envs as envs @@ -5,7 +7,6 @@ from .interface import Platform, PlatformEnum, _Backend -from typing import TYPE_CHECKING if TYPE_CHECKING: from vllm.config import VllmConfig else: @@ -53,4 +54,5 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: ), "OpenVINOExecutor only supports single CPU socket currently." if parallel_config.worker_cls == "auto": - parallel_config.worker_cls = "vllm.worker.openvino_worker.OpenVINOWorker" + parallel_config.worker_cls = \ + "vllm.worker.openvino_worker.OpenVINOWorker" diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 933125d6c09f5..3fe8c01c15787 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -1,12 +1,13 @@ import os from functools import lru_cache +from typing import TYPE_CHECKING import torch from vllm.logger import init_logger from .interface import DeviceCapability, Platform, PlatformEnum, _Backend -from typing import TYPE_CHECKING + if TYPE_CHECKING: from vllm.config import VllmConfig else: diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 48a87c88747c9..513cfa54687dc 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -49,12 +49,14 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: if compilation_config.backend == "": compilation_config.backend = "openxla" - assert vllm_config.speculative_config is None, "TPU does not support speculative decoding" + assert vllm_config.speculative_config is None, \ + "TPU does not support speculative decoding" parallel_config = vllm_config.parallel_config scheduler_config = vllm_config.scheduler_config if parallel_config.worker_cls == "auto": if scheduler_config.is_multi_step: - parallel_config.worker_cls = "vllm.worker.multi_step_tpu_worker.MultiStepTPUWorker" + parallel_config.worker_cls = \ + "vllm.worker.multi_step_tpu_worker.MultiStepTPUWorker" else: parallel_config.worker_cls = "vllm.worker.tpu_worker.TPUWorker"