diff --git a/examples/openai_pooling_client.py b/examples/openai_pooling_client.py index 61765890dd778..37ec8f2fb6be3 100644 --- a/examples/openai_pooling_client.py +++ b/examples/openai_pooling_client.py @@ -20,7 +20,8 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="localhost") parser.add_argument("--port", type=int, default=8000) - parser.add_argument("--model", type=str, + parser.add_argument("--model", + type=str, default="jason9693/Qwen2.5-1.5B-apeach") args = parser.parse_args() @@ -35,13 +36,15 @@ def post_http_request(prompt: dict, api_url: str) -> requests.Response: # Input like Chat API prompt = { - "model": model_name, - "messages": [ - { - "role": "user", - "content": [{"type": "text", "text": "vLLM is great!"}], - } - ] + "model": + model_name, + "messages": [{ + "role": "user", + "content": [{ + "type": "text", + "text": "vLLM is great!" + }], + }] } pooling_response = post_http_request(prompt=prompt, api_url=api_url) print("Pooling Response:") diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 0ef2ccd1871bb..a1814290a1fe5 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -11,11 +11,10 @@ from vllm.engine.protocol import EngineClient from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption from vllm.entrypoints.logger import RequestLogger -from vllm.entrypoints.openai.protocol import (PoolingChatRequest, - PoolingRequest, - PoolingResponse, - PoolingResponseData, - ErrorResponse, UsageInfo) +from vllm.entrypoints.openai.protocol import (ErrorResponse, + PoolingChatRequest, + PoolingRequest, PoolingResponse, + PoolingResponseData, UsageInfo) from vllm.entrypoints.openai.serving_engine import BaseModelPath, OpenAIServing from vllm.logger import init_logger from vllm.outputs import PoolingOutput, PoolingRequestOutput