Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[V1] Make AsyncLLMEngine v1-v0 opaque #11383

Merged
merged 1 commit into from
Dec 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,3 +1256,10 @@ async def stop_profile(self) -> None:
self.engine.model_executor.stop_profile()
else:
self.engine.model_executor._run_workers("stop_profile")


# TODO(v1): Remove this class proxy when V1 goes default.
if envs.VLLM_USE_V1:
from vllm.v1.engine.async_llm import AsyncLLM

AsyncLLMEngine = AsyncLLM # type: ignore
6 changes: 1 addition & 5 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import vllm.envs as envs
from vllm.config import ModelConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
from vllm.engine.multiprocessing.client import MQLLMEngineClient
from vllm.engine.multiprocessing.engine import run_mp_engine
from vllm.engine.protocol import EngineClient
Expand Down Expand Up @@ -66,11 +67,6 @@
is_valid_ipv6_address)
from vllm.version import __version__ as VLLM_VERSION

if envs.VLLM_USE_V1:
from vllm.v1.engine.async_llm import AsyncLLMEngine # type: ignore
else:
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore

TIMEOUT_KEEP_ALIVE = 5 # seconds

prometheus_multiproc_dir: tempfile.TemporaryDirectory
Expand Down
6 changes: 1 addition & 5 deletions vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def from_engine_args(
start_engine_loop: bool = True,
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
stat_loggers: Optional[Dict[str, StatLoggerBase]] = None,
) -> "AsyncLLMEngine":
) -> "AsyncLLM":
"""Create an AsyncLLM from the EngineArgs."""

# Create the engine configs.
Expand Down Expand Up @@ -386,7 +386,3 @@ def errored(self) -> bool:
@property
def dead_error(self) -> BaseException:
return Exception() # TODO: implement


# Retain V0 name for backwards compatibility.
AsyncLLMEngine = AsyncLLM
Loading