diff --git a/vllm/entrypoints/sync_openai/api_server.py b/vllm/entrypoints/sync_openai/api_server.py index 4e7d2b6382d3a..413ba964464c6 100644 --- a/vllm/entrypoints/sync_openai/api_server.py +++ b/vllm/entrypoints/sync_openai/api_server.py @@ -388,6 +388,7 @@ async def chat_completions(request: ChatCompletionRequest, while True: _, token, stats = await result_queue.get() + assert res.choices[0].message.content is not None res.choices[0].message.content += str(token) if stats is not None: res.usage.completion_tokens += stats["tokens"] # type: ignore