diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml b/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml index 69bed9603902a6..88c76fe16ef733 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml +++ b/api/core/model_runtime/model_providers/openai_api_compatible/openai_api_compatible.yaml @@ -7,6 +7,7 @@ description: supported_model_types: - llm - text-embedding + - speech2text configurate_methods: - customizable-model model_credential_schema: @@ -61,6 +62,22 @@ model_credential_schema: zh_Hans: 模型上下文长度 en_US: Model context size required: true + show_on: + - variable: __model_type + value: llm + type: text-input + default: '4096' + placeholder: + zh_Hans: 在此输入您的模型上下文长度 + en_US: Enter your Model context size + - variable: context_size + label: + zh_Hans: 模型上下文长度 + en_US: Model context size + required: true + show_on: + - variable: __model_type + value: text-embedding type: text-input default: '4096' placeholder: diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/speech2text/__init__.py b/api/core/model_runtime/model_providers/openai_api_compatible/speech2text/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/speech2text/speech2text.py b/api/core/model_runtime/model_providers/openai_api_compatible/speech2text/speech2text.py new file mode 100644 index 00000000000000..00702ba9367cf4 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai_api_compatible/speech2text/speech2text.py @@ -0,0 +1,63 @@ +from typing import IO, Optional +from urllib.parse import urljoin + +import requests + +from core.model_runtime.errors.invoke import InvokeBadRequestError +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel +from core.model_runtime.model_providers.openai_api_compatible._common import _CommonOAI_API_Compat + + +class OAICompatSpeech2TextModel(_CommonOAI_API_Compat, Speech2TextModel): + """ + Model class for OpenAI Compatible Speech to text model. + """ + + def _invoke( + self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None + ) -> str: + """ + Invoke speech2text model + + :param model: model name + :param credentials: model credentials + :param file: audio file + :param user: unique user id + :return: text for given audio file + """ + headers = {} + + api_key = credentials.get("api_key") + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + + endpoint_url = credentials.get("endpoint_url") + if not endpoint_url.endswith("/"): + endpoint_url += "/" + endpoint_url = urljoin(endpoint_url, "audio/transcriptions") + + payload = {"model": model} + files = [("file", file)] + response = requests.post(endpoint_url, headers=headers, data=payload, files=files) + + if response.status_code != 200: + raise InvokeBadRequestError(response.text) + response_data = response.json() + return response_data["text"] + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + audio_file_path = self._get_demo_file_path() + + with open(audio_file_path, "rb") as audio_file: + self._invoke(model, credentials, audio_file) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.py b/api/core/model_runtime/model_providers/siliconflow/siliconflow.py index a53f16c929728e..dd0eea362a5f83 100644 --- a/api/core/model_runtime/model_providers/siliconflow/siliconflow.py +++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.py @@ -6,6 +6,7 @@ logger = logging.getLogger(__name__) + class SiliconflowProvider(ModelProvider): def validate_provider_credentials(self, credentials: dict) -> None: diff --git a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml index 3084d3edcd644f..1ebb1e6d8b149c 100644 --- a/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml +++ b/api/core/model_runtime/model_providers/siliconflow/siliconflow.yaml @@ -16,6 +16,7 @@ help: supported_model_types: - llm - text-embedding + - speech2text configurate_methods: - predefined-model provider_credential_schema: diff --git a/api/core/model_runtime/model_providers/siliconflow/speech2text/__init__.py b/api/core/model_runtime/model_providers/siliconflow/speech2text/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml b/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml new file mode 100644 index 00000000000000..deceaf60f4f017 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/sense-voice-small.yaml @@ -0,0 +1,5 @@ +model: iic/SenseVoiceSmall +model_type: speech2text +model_properties: + file_upload_limit: 1 + supported_file_extensions: mp3,wav diff --git a/api/core/model_runtime/model_providers/siliconflow/speech2text/speech2text.py b/api/core/model_runtime/model_providers/siliconflow/speech2text/speech2text.py new file mode 100644 index 00000000000000..6ad3cab5873c69 --- /dev/null +++ b/api/core/model_runtime/model_providers/siliconflow/speech2text/speech2text.py @@ -0,0 +1,32 @@ +from typing import IO, Optional + +from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel + + +class SiliconflowSpeech2TextModel(OAICompatSpeech2TextModel): + """ + Model class for Siliconflow Speech to text model. + """ + + def _invoke( + self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None + ) -> str: + """ + Invoke speech2text model + + :param model: model name + :param credentials: model credentials + :param file: audio file + :param user: unique user id + :return: text for given audio file + """ + self._add_custom_parameters(credentials) + return super()._invoke(model, credentials, file) + + def validate_credentials(self, model: str, credentials: dict) -> None: + self._add_custom_parameters(credentials) + return super().validate_credentials(model, credentials) + + @classmethod + def _add_custom_parameters(cls, credentials: dict) -> None: + credentials["endpoint_url"] = "https://api.siliconflow.cn/v1" diff --git a/api/tests/integration_tests/model_runtime/openai_api_compatible/test_speech2text.py b/api/tests/integration_tests/model_runtime/openai_api_compatible/test_speech2text.py new file mode 100644 index 00000000000000..61079104dcad73 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/openai_api_compatible/test_speech2text.py @@ -0,0 +1,59 @@ +import os + +import pytest + +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import ( + OAICompatSpeech2TextModel, +) + + +def test_validate_credentials(): + model = OAICompatSpeech2TextModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model="whisper-1", + credentials={ + "api_key": "invalid_key", + "endpoint_url": "https://api.openai.com/v1/" + }, + ) + + model.validate_credentials( + model="whisper-1", + credentials={ + "api_key": os.environ.get("OPENAI_API_KEY"), + "endpoint_url": "https://api.openai.com/v1/" + }, + ) + + +def test_invoke_model(): + model = OAICompatSpeech2TextModel() + + # Get the directory of the current file + current_dir = os.path.dirname(os.path.abspath(__file__)) + + # Get assets directory + assets_dir = os.path.join(os.path.dirname(current_dir), "assets") + + # Construct the path to the audio file + audio_file_path = os.path.join(assets_dir, "audio.mp3") + + # Open the file and get the file object + with open(audio_file_path, "rb") as audio_file: + file = audio_file + + result = model.invoke( + model="whisper-1", + credentials={ + "api_key": os.environ.get("OPENAI_API_KEY"), + "endpoint_url": "https://api.openai.com/v1/" + }, + file=file, + user="abc-123", + ) + + assert isinstance(result, str) + assert result == '1, 2, 3, 4, 5, 6, 7, 8, 9, 10' diff --git a/api/tests/integration_tests/model_runtime/siliconflow/test_speech2text.py b/api/tests/integration_tests/model_runtime/siliconflow/test_speech2text.py new file mode 100644 index 00000000000000..82b7921c8506f0 --- /dev/null +++ b/api/tests/integration_tests/model_runtime/siliconflow/test_speech2text.py @@ -0,0 +1,53 @@ +import os + +import pytest + +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.siliconflow.speech2text.speech2text import SiliconflowSpeech2TextModel + + +def test_validate_credentials(): + model = SiliconflowSpeech2TextModel() + + with pytest.raises(CredentialsValidateFailedError): + model.validate_credentials( + model="iic/SenseVoiceSmall", + credentials={ + "api_key": "invalid_key" + }, + ) + + model.validate_credentials( + model="iic/SenseVoiceSmall", + credentials={ + "api_key": os.environ.get("API_KEY") + }, + ) + + +def test_invoke_model(): + model = SiliconflowSpeech2TextModel() + + # Get the directory of the current file + current_dir = os.path.dirname(os.path.abspath(__file__)) + + # Get assets directory + assets_dir = os.path.join(os.path.dirname(current_dir), "assets") + + # Construct the path to the audio file + audio_file_path = os.path.join(assets_dir, "audio.mp3") + + # Open the file and get the file object + with open(audio_file_path, "rb") as audio_file: + file = audio_file + + result = model.invoke( + model="iic/SenseVoiceSmall", + credentials={ + "api_key": os.environ.get("API_KEY") + }, + file=file + ) + + assert isinstance(result, str) + assert result == '1,2,3,4,5,6,7,8,9,10.'