diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index f50e20cf70323..66a5089074ff5 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1256,3 +1256,10 @@ async def stop_profile(self) -> None: self.engine.model_executor.stop_profile() else: self.engine.model_executor._run_workers("stop_profile") + + +# TODO(v1): Remove this class proxy when V1 goes default. +if envs.VLLM_USE_V1: + from vllm.v1.engine.async_llm import AsyncLLM + + AsyncLLMEngine = AsyncLLM # type: ignore diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 00e2d1a56f160..2e5b769a825ce 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -27,6 +27,7 @@ import vllm.envs as envs from vllm.config import ModelConfig from vllm.engine.arg_utils import AsyncEngineArgs +from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore from vllm.engine.multiprocessing.client import MQLLMEngineClient from vllm.engine.multiprocessing.engine import run_mp_engine from vllm.engine.protocol import EngineClient @@ -66,11 +67,6 @@ is_valid_ipv6_address) from vllm.version import __version__ as VLLM_VERSION -if envs.VLLM_USE_V1: - from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore -else: - from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore - TIMEOUT_KEEP_ALIVE = 5 # seconds prometheus_multiproc_dir: tempfile.TemporaryDirectory diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 41fb4b25d45bb..cfdbea8004c35 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -98,7 +98,7 @@ def from_engine_args( start_engine_loop: bool = True, usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - ) -> "AsyncLLMEngine": + ) -> "AsyncLLM": """Create an AsyncLLM from the EngineArgs.""" # Create the engine configs. @@ -386,7 +386,3 @@ def errored(self) -> bool: @property def dead_error(self) -> BaseException: return Exception() # TODO: implement - - -# Retain V0 name for backwards compatibility. -AsyncLLMEngine = AsyncLLM