Skip to content

Commit

Permalink
Fix OpenAI server completion_tokens referenced before assignment (vll…
Browse files Browse the repository at this point in the history
  • Loading branch information
js8544 authored Dec 10, 2023
1 parent fe470ae commit 1aa1361
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,7 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
# Send token-by-token response for each request.n
delta_text = output.text[len(previous_texts[i]):]
previous_texts[i] = output.text
completion_tokens = len(output.token_ids)
previous_num_tokens[i] = completion_tokens
previous_num_tokens[i] = len(output.token_ids)
choice_data = ChatCompletionResponseStreamChoice(
index=i,
delta=DeltaMessage(content=delta_text),
Expand All @@ -351,8 +350,8 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
prompt_tokens = len(res.prompt_token_ids)
final_usage = UsageInfo(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
completion_tokens=previous_num_tokens[i],
total_tokens=prompt_tokens + previous_num_tokens[i],
)
choice_data = ChatCompletionResponseStreamChoice(
index=i, delta=[], finish_reason=output.finish_reason)
Expand Down

0 comments on commit 1aa1361

Please sign in to comment.