Skip to content

Commit

Permalink
[Frontend] Expose revision arg in OpenAI server (vllm-project#8501)
Browse files Browse the repository at this point in the history
  • Loading branch information
lewtun authored Sep 16, 2024
1 parent a091e2d commit 837c196
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@


def model_is_embedding(model_name: str, trust_remote_code: bool,
quantization: Optional[str]) -> bool:
quantization: Optional[str],
revision: Optional[str]) -> bool:
return ModelConfig(model=model_name,
revision=revision,
tokenizer=model_name,
tokenizer_mode="auto",
trust_remote_code=trust_remote_code,
Expand Down Expand Up @@ -130,7 +132,7 @@ async def build_async_engine_client_from_engine_args(
# If manually triggered or embedding model, use AsyncLLMEngine in process.
# TODO: support embedding model via RPC.
if (model_is_embedding(engine_args.model, engine_args.trust_remote_code,
engine_args.quantization)
engine_args.quantization, engine_args.revision)
or disable_frontend_multiprocessing):
engine_client = AsyncLLMEngine.from_engine_args(
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
Expand Down

0 comments on commit 837c196

Please sign in to comment.