feat:use xinference tts stream mode (langgenius#8616)

AceDataCloud · Nov 9, 2024 · fd75683 · fd75683
1 parent 2d95cb2
commit fd75683
Show file tree

Hide file tree

Showing 5 changed files with 13 additions and 17 deletions.
diff --git a/api/core/model_runtime/model_providers/xinference/llm/llm.py b/api/core/model_runtime/model_providers/xinference/llm/llm.py
@@ -19,7 +19,6 @@
 from openai.types.completion import Completion
 from xinference_client.client.restful.restful_client import (
     Client,
-    RESTfulChatglmCppChatModelHandle,
     RESTfulChatModelHandle,
     RESTfulGenerateModelHandle,
 )
@@ -491,7 +490,7 @@ def _generate(
         if tools and len(tools) > 0:
             generate_config["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools]
         vision = credentials.get("support_vision", False)
-        if isinstance(xinference_model, RESTfulChatModelHandle | RESTfulChatglmCppChatModelHandle):
+        if isinstance(xinference_model, RESTfulChatModelHandle):
             resp = client.chat.completions.create(
                 model=credentials["model_uid"],
                 messages=[self._convert_prompt_message_to_dict(message) for message in prompt_messages],

diff --git a/api/core/model_runtime/model_providers/xinference/tts/tts.py b/api/core/model_runtime/model_providers/xinference/tts/tts.py
@@ -208,21 +208,21 @@ def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str
                 executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
                 futures = [
                     executor.submit(
-                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=False
+                        handle.speech, input=sentences[i], voice=voice, response_format="mp3", speed=1.0, stream=True
                     )
                     for i in range(len(sentences))
                 ]
 
                 for future in futures:
                     response = future.result()
-                    for i in range(0, len(response), 1024):
-                        yield response[i : i + 1024]
+                    for chunk in response:
+                        yield chunk
             else:
                 response = handle.speech(
-                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=False
+                    input=content_text.strip(), voice=voice, response_format="mp3", speed=1.0, stream=True
                 )
 
-                for i in range(0, len(response), 1024):
-                    yield response[i : i + 1024]
+                for chunk in response:
+                    yield chunk
         except Exception as ex:
             raise InvokeBadRequestError(str(ex))
diff --git a/api/poetry.lock b/api/poetry.lock
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -203,7 +203,7 @@ transformers = "~4.35.0"
 unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
 websocket-client = "~1.7.0"
 werkzeug = "~3.0.1"
-xinference-client = "0.13.3"
+xinference-client = "0.15.2"
 yarl = "~1.9.4"
 zhipuai = "1.0.7"
 # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group.

diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py
@@ -9,7 +9,6 @@
 from requests.sessions import Session
 from xinference_client.client.restful.restful_client import (
     Client,
-    RESTfulChatglmCppChatModelHandle,
     RESTfulChatModelHandle,
     RESTfulEmbeddingModelHandle,
     RESTfulGenerateModelHandle,
@@ -19,9 +18,7 @@
 
 
 class MockXinferenceClass:
-    def get_chat_model(
-        self: Client, model_uid: str
-    ) -> Union[RESTfulChatglmCppChatModelHandle, RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
+    def get_chat_model(self: Client, model_uid: str) -> Union[RESTfulGenerateModelHandle, RESTfulChatModelHandle]:
         if not re.match(r"https?:\/\/[^\s\/$.?#].[^\s]*$", self.base_url):
             raise RuntimeError("404 Not Found")