From 478e62b0ba98405a7d361d9e137ce6580b0c5baa Mon Sep 17 00:00:00 2001 From: sino Date: Fri, 13 Sep 2024 14:19:24 +0800 Subject: [PATCH] fix: ark token usage is none (#8351) --- .../model_providers/volcengine_maas/client.py | 6 ++---- .../model_providers/volcengine_maas/llm/llm.py | 12 +++++------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/api/core/model_runtime/model_providers/volcengine_maas/client.py b/api/core/model_runtime/model_providers/volcengine_maas/client.py index d6f1356651e7b3..cfe21e4b9f4617 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/client.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py @@ -208,11 +208,9 @@ def stream_chat( presence_penalty=presence_penalty, top_p=top_p, temperature=temperature, + stream_options={"include_usage": True}, ) - for chunk in chunks: - if not chunk.choices: - continue - yield chunk + yield from chunks def embeddings(self, texts: list[str]) -> CreateEmbeddingResponse: return self.ark.embeddings.create(model=self.endpoint_id, input=texts) diff --git a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py index f8bf8fb821978c..dec6c9d789c135 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/llm/llm.py @@ -239,16 +239,14 @@ def _generate_v3( def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Generator: for chunk in chunks: - if not chunk.choices: - continue - choice = chunk.choices[0] - yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( - index=choice.index, - message=AssistantPromptMessage(content=choice.delta.content, tool_calls=[]), + index=0, + message=AssistantPromptMessage( + content=chunk.choices[0].delta.content if chunk.choices else "", tool_calls=[] + ), usage=self._calc_response_usage( model=model, credentials=credentials, @@ -257,7 +255,7 @@ def _handle_stream_chat_response(chunks: Generator[ChatCompletionChunk]) -> Gene ) if chunk.usage else None, - finish_reason=choice.finish_reason, + finish_reason=chunk.choices[0].finish_reason if chunk.choices else None, ), )