From 12c0d64cfd759c41c005722fb18927e3cf5613ef Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 10:03:51 +0800
Subject: [PATCH 1/8] refactor baichuan model

---
 .../model_providers/baichuan/baichuan.yaml    |   8 -
 .../baichuan/llm/baichuan2-53b.yaml           |  45 ---
 .../baichuan/llm/baichuan2-turbo-192k.yaml    |  45 ---
 .../baichuan/llm/baichuan4.yaml               |  30 +-
 .../baichuan/llm/baichuan_turbo.py            | 245 ++++++++--------
 .../model_providers/baichuan/llm/llm.py       | 264 +++++++++++-------
 6 files changed, 307 insertions(+), 330 deletions(-)
 delete mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
 delete mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml

diff --git a/api/core/model_runtime/model_providers/baichuan/baichuan.yaml b/api/core/model_runtime/model_providers/baichuan/baichuan.yaml
index 792126af7fd58f..81e6e36215aa84 100644
--- a/api/core/model_runtime/model_providers/baichuan/baichuan.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/baichuan.yaml
@@ -27,11 +27,3 @@ provider_credential_schema:
       placeholder:
         zh_Hans: 在此输入您的 API Key
         en_US: Enter your API Key
-    - variable: secret_key
-      label:
-        en_US: Secret Key
-      type: secret-input
-      required: false
-      placeholder:
-        zh_Hans: 在此输入您的 Secret Key
-        en_US: Enter your Secret Key
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
deleted file mode 100644
index 04849500dcb7f1..00000000000000
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-model: baichuan2-53b
-label:
-  en_US: Baichuan2-53B
-model_type: llm
-features:
-  - agent-thought
-model_properties:
-  mode: chat
-  context_size: 32000
-parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-  - name: max_tokens
-    use_template: max_tokens
-    required: true
-    default: 1000
-    min: 1
-    max: 4000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
-  - name: with_search_enhance
-    label:
-      zh_Hans: 搜索增强
-      en_US: Search Enhance
-    type: boolean
-    help:
-      zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
-      en_US: Allow the model to perform external search to enhance the generation results.
-    required: false
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
deleted file mode 100644
index c8156c152b15bd..00000000000000
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-model: baichuan2-turbo-192k
-label:
-  en_US: Baichuan2-Turbo-192K
-model_type: llm
-features:
-  - agent-thought
-model_properties:
-  mode: chat
-  context_size: 192000
-parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top k
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-  - name: max_tokens
-    use_template: max_tokens
-    required: true
-    default: 8000
-    min: 1
-    max: 192000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
-  - name: with_search_enhance
-    label:
-      zh_Hans: 搜索增强
-      en_US: Search Enhance
-    type: boolean
-    help:
-      zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
-      en_US: Allow the model to perform external search to enhance the generation results.
-    required: false
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml
index f8c65660818818..e5e6aeb49158e8 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml
@@ -4,36 +4,44 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
+    default: 0.3
   - name: top_p
     use_template: top_p
+    default: 0.85
   - name: top_k
     label:
       zh_Hans: 取样数量
       en_US: Top k
     type: int
+    min: 0
+    max: 20
+    default: 5
     help:
       zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
       en_US: Only sample from the top K options for each subsequent token.
     required: false
   - name: max_tokens
     use_template: max_tokens
-    required: true
-    default: 8000
-    min: 1
-    max: 32000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
+    default: 2048
+  - name: res_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
   - name: with_search_enhance
     label:
       zh_Hans: 搜索增强
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
index d7d8b7c91b6e2d..eb2d8de152f142 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
@@ -1,11 +1,14 @@
 from collections.abc import Generator
 from enum import Enum
-from hashlib import md5
 from json import dumps, loads
-from typing import Any, Union
+from typing import Any, Optional, Union
 
 from requests import post
 
+from core.model_runtime.entities.message_entities import (
+    AssistantPromptMessage,
+    PromptMessageTool,
+)
 from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import (
     BadRequestError,
     InsufficientAccountBalance,
@@ -18,201 +21,191 @@
 
 class BaichuanMessage:
     class Role(Enum):
-        USER = 'user'
-        ASSISTANT = 'assistant'
-        # Baichuan does not have system message
-        _SYSTEM = 'system'
+        USER = "user"
+        ASSISTANT = "assistant"
+        SYSTEM = "system"
+        TOOL = "tool"
 
     role: str = Role.USER.value
     content: str
     usage: dict[str, int] = None
-    stop_reason: str = ''
+    tool_calls : list[AssistantPromptMessage.ToolCall] = []
+    tool_call_id: str = ""
+    stop_reason: str = ""
 
     def to_dict(self) -> dict[str, Any]:
         return {
-            'role': self.role,
-            'content': self.content,
+            "role": self.role,
+            "content": self.content,
         }
-    
-    def __init__(self, content: str, role: str = 'user') -> None:
+
+    def __init__(self, content: str, role: str = "user", tool_call_id: str = "") -> None:
         self.content = content
         self.role = role
+        if tool_call_id:
+            self.tool_call_id = tool_call_id
+
 
 class BaichuanModel:
     api_key: str
-    secret_key: str
 
-    def __init__(self, api_key: str, secret_key: str = '') -> None:
+    def __init__(self, api_key: str) -> None:
         self.api_key = api_key
-        self.secret_key = secret_key
 
-    def _model_mapping(self, model: str) -> str:
+    @property
+    def _model_mapping(self) -> dict:
         return {
-            'baichuan2-turbo': 'Baichuan2-Turbo',
-            'baichuan2-turbo-192k': 'Baichuan2-Turbo-192k',
-            'baichuan2-53b': 'Baichuan2-53B',
-            'baichuan3-turbo': 'Baichuan3-Turbo',
-            'baichuan3-turbo-128k': 'Baichuan3-Turbo-128k',
-            'baichuan4': 'Baichuan4',
-        }[model]
-
-    def _handle_chat_generate_response(self, response) -> BaichuanMessage:
-        resp = response.json()
-        choices = resp.get('choices', [])
-        message = BaichuanMessage(content='', role='assistant')
-        for choice in choices:
-            message.content += choice['message']['content']
-            message.role = choice['message']['role']
-            if choice['finish_reason']:
-                message.stop_reason = choice['finish_reason']
-
-        if 'usage' in resp:
-            message.usage = {
-                'prompt_tokens': resp['usage']['prompt_tokens'],
-                'completion_tokens': resp['usage']['completion_tokens'],
-                'total_tokens': resp['usage']['total_tokens'],
-            }
+            "baichuan2-turbo": "Baichuan2-Turbo",
+            "baichuan3-turbo": "Baichuan3-Turbo",
+            "baichuan3-turbo-128k": "Baichuan3-Turbo-128k",
+            "baichuan4": "Baichuan4",
+        }
+
+    @property
+    def request_headers(self) -> dict[str, Any]:
+        return {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer " + self.api_key,
+        }
 
-        return message
-    
     def _handle_chat_stream_generate_response(self, response) -> Generator:
         for line in response.iter_lines():
             if not line:
                 continue
-            line = line.decode('utf-8')
+            line = line.decode("utf-8")
             # remove the first `data: ` prefix
-            if line.startswith('data:'):
+            if line.startswith("data:"):
                 line = line[5:].strip()
             try:
                 data = loads(line)
             except Exception as e:
-                if line.strip() == '[DONE]':
+                if line.strip() == "[DONE]":
                     return
-            choices = data.get('choices', [])
+            choices = data.get("choices", [])
             # save stop reason temporarily
-            stop_reason = ''
+            stop_reason = ""
             for choice in choices:
-                if choice.get('finish_reason'):
-                    stop_reason = choice['finish_reason']
+                if choice.get("finish_reason"):
+                    stop_reason = choice["finish_reason"]
 
-                if len(choice['delta']['content']) == 0:
+                if len(choice["delta"]["content"]) == 0:
                     continue
-                yield BaichuanMessage(**choice['delta'])
+                yield BaichuanMessage(**choice["delta"])
 
             # if there is usage, the response is the last one, yield it and return
-            if 'usage' in data:
-                message = BaichuanMessage(content='', role='assistant')
+            if "usage" in data:
+                message = BaichuanMessage(content="", role="assistant")
                 message.usage = {
-                    'prompt_tokens': data['usage']['prompt_tokens'],
-                    'completion_tokens': data['usage']['completion_tokens'],
-                    'total_tokens': data['usage']['total_tokens'],
+                    "prompt_tokens": data["usage"]["prompt_tokens"],
+                    "completion_tokens": data["usage"]["completion_tokens"],
+                    "total_tokens": data["usage"]["total_tokens"],
                 }
                 message.stop_reason = stop_reason
                 yield message
 
-    def _build_parameters(self, model: str, stream: bool, messages: list[BaichuanMessage],
-                               parameters: dict[str, Any]) \
-        -> dict[str, Any]:
-        if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
-                or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k'  or model == 'baichuan4'):
-            prompt_messages = []
-            for message in messages:
-                if message.role == BaichuanMessage.Role.USER.value or message.role == BaichuanMessage.Role._SYSTEM.value:
-                    # check if the latest message is a user message
-                    if len(prompt_messages) > 0 and prompt_messages[-1]['role'] == BaichuanMessage.Role.USER.value:
-                        prompt_messages[-1]['content'] += message.content
-                    else:
-                        prompt_messages.append({
-                            'content': message.content,
-                            'role': BaichuanMessage.Role.USER.value,
-                        })
-                elif message.role == BaichuanMessage.Role.ASSISTANT.value:
-                    prompt_messages.append({
-                        'content': message.content,
-                        'role': message.role,
-                    })
-            # [baichuan] frequency_penalty must be between 1 and 2
-            if 'frequency_penalty' in parameters:
-                if parameters['frequency_penalty'] < 1 or parameters['frequency_penalty'] > 2:
-                    parameters['frequency_penalty'] = 1
+    def _build_parameters(
+        self,
+        model: str,
+        stream: bool,
+        messages: list[dict],
+        parameters: dict[str, Any],
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> dict[str, Any]:
+        if model in self._model_mapping.keys():
+            # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters. we need to rename it to res_format to get its value
+            if parameters.get("res_format") == "json_object":
+                parameters["response_format"] = {"type": "json_object"}
+
+            if tools or parameters.get("with_search_enhance") is True:
+                parameters["tools"] = []
+            # with_search_enhance is deprecated, use web_search instead
+            if parameters.get("with_search_enhance") is True:
+                parameters["tools"].append(
+                    {
+                        "type": "web_search",
+                        "web_search": {"enable": True},
+                    }
+                )
+            if tools:
+                for tool in tools:
+                    parameters["tools"].append(
+                        {
+                            "type": "function",
+                            "function": {
+                                "name": tool.name,
+                                "description": tool.description,
+                                "parameters": tool.parameters,
+                            },
+                        }
+                    )
 
             # turbo api accepts flat parameters
             return {
-                'model': self._model_mapping(model),
-                'stream': stream,
-                'messages': prompt_messages,
+                "model": self._model_mapping.get(model),
+                "stream": stream,
+                "messages": messages,
                 **parameters,
             }
         else:
             raise BadRequestError(f"Unknown model: {model}")
-        
-    def _build_headers(self, model: str, data: dict[str, Any]) -> dict[str, Any]:
-        if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
-                or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k'  or model == 'baichuan4'):
-            # there is no secret key for turbo api
-            return {
-                'Content-Type': 'application/json',
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ',
-                'Authorization': 'Bearer ' + self.api_key,
-            }
-        else:
-            raise BadRequestError(f"Unknown model: {model}")
-        
-    def _calculate_md5(self, input_string):
-        return md5(input_string.encode('utf-8')).hexdigest()
-
-    def generate(self, model: str, stream: bool, messages: list[BaichuanMessage], 
-                 parameters: dict[str, Any], timeout: int) \
-        -> Union[Generator, BaichuanMessage]:
-        
-        if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b'
-                or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k'  or model == 'baichuan4'):
-            api_base = 'https://api.baichuan-ai.com/v1/chat/completions'
+
+    def generate(
+        self,
+        model: str,
+        stream: bool,
+        messages: list[dict],
+        parameters: dict[str, Any],
+        timeout: int,
+        tools: Optional[list[PromptMessageTool]] = None,
+    ) -> Union[Generator, dict]:
+
+        if model in self._model_mapping.keys():
+            api_base = "https://api.baichuan-ai.com/v1/chat/completions"
         else:
             raise BadRequestError(f"Unknown model: {model}")
-        
-        try:
-            data = self._build_parameters(model, stream, messages, parameters)
-            headers = self._build_headers(model, data)
-        except KeyError:
-            raise InternalServerError(f"Failed to build parameters for model: {model}")
+
+        data = self._build_parameters(model, stream, messages, parameters, tools)
 
         try:
             response = post(
                 url=api_base,
-                headers=headers,
+                headers=self.request_headers,
                 data=dumps(data),
                 timeout=timeout,
-                stream=stream
+                stream=stream,
             )
         except Exception as e:
             raise InternalServerError(f"Failed to invoke model: {e}")
-        
+
         if response.status_code != 200:
             try:
                 resp = response.json()
                 # try to parse error message
-                err = resp['error']['code']
-                msg = resp['error']['message']
+                err = resp["error"]["type"]
+                msg = resp["error"]["message"]
             except Exception as e:
-                raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}")
+                raise InternalServerError(
+                    f"Failed to convert response to json: {e} with text: {response.text}"
+                )
 
-            if err == 'invalid_api_key':
+            if err == "invalid_api_key":
                 raise InvalidAPIKeyError(msg)
-            elif err == 'insufficient_quota':
+            elif err == "insufficient_quota":
                 raise InsufficientAccountBalance(msg)
-            elif err == 'invalid_authentication':
+            elif err == "invalid_authentication":
                 raise InvalidAuthenticationError(msg)
-            elif 'rate' in err:
+            elif err == "invalid_request_error":
+                raise BadRequestError(msg)
+            elif "rate" in err:
                 raise RateLimitReachedError(msg)
-            elif 'internal' in err:
+            elif "internal" in err:
                 raise InternalServerError(msg)
-            elif err == 'api_key_empty':
+            elif err == "api_key_empty":
                 raise InvalidAPIKeyError(msg)
             else:
                 raise InternalServerError(f"Unknown error: {err} with message: {msg}")
-            
+
         if stream:
             return self._handle_chat_stream_generate_response(response)
         else:
-            return self._handle_chat_generate_response(response)
\ No newline at end of file
+            return response.json()
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
index edcd3af4203cfb..d05be120890f51 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@@ -1,7 +1,11 @@
 from collections.abc import Generator
 from typing import cast
 
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import (
+    LLMResult,
+    LLMResultChunk,
+    LLMResultChunkDelta,
+)
 from core.model_runtime.entities.message_entities import (
     AssistantPromptMessage,
     PromptMessage,
@@ -19,9 +23,16 @@
     InvokeServerUnavailableError,
 )
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
-from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import BaichuanTokenizer
-from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import BaichuanMessage, BaichuanModel
+from core.model_runtime.model_providers.__base.large_language_model import (
+    LargeLanguageModel,
+)
+from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import (
+    BaichuanTokenizer,
+)
+from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import (
+    BaichuanMessage,
+    BaichuanModel,
+)
 from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import (
     BadRequestError,
     InsufficientAccountBalance,
@@ -33,19 +44,42 @@
 
 
 class BaichuanLarguageModel(LargeLanguageModel):
-    def _invoke(self, model: str, credentials: dict,
-                prompt_messages: list[PromptMessage], model_parameters: dict,
-                tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
-                stream: bool = True, user: str | None = None) \
-            -> LLMResult | Generator:
-        return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages,
-                              model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user)
-
-    def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
-                       tools: list[PromptMessageTool] | None = None) -> int:
+
+    def _invoke(
+            self,
+            model: str,
+            credentials: dict,
+            prompt_messages: list[PromptMessage],
+            model_parameters: dict,
+            tools: list[PromptMessageTool] | None = None,
+            stop: list[str] | None = None,
+            stream: bool = True,
+            user: str | None = None,
+    ) -> LLMResult | Generator:
+        return self._generate(
+            model=model,
+            credentials=credentials,
+            prompt_messages=prompt_messages,
+            model_parameters=model_parameters,
+            tools=tools,
+            stop=stop,
+            stream=stream,
+            user=user,
+        )
+
+    def get_num_tokens(
+            self,
+            model: str,
+            credentials: dict,
+            prompt_messages: list[PromptMessage],
+            tools: list[PromptMessageTool] | None = None,
+    ) -> int:
         return self._num_tokens_from_messages(prompt_messages)
 
-    def _num_tokens_from_messages(self, messages: list[PromptMessage], ) -> int:
+    def _num_tokens_from_messages(
+            self,
+            messages: list[PromptMessage],
+    ) -> int:
         """Calculate num tokens for baichuan model"""
 
         def tokens(text: str):
@@ -59,10 +93,10 @@ def tokens(text: str):
             num_tokens += tokens_per_message
             for key, value in message.items():
                 if isinstance(value, list):
-                    text = ''
+                    text = ""
                     for item in value:
-                        if isinstance(item, dict) and item['type'] == 'text':
-                            text += item['text']
+                        if isinstance(item, dict) and item["type"] == "text":
+                            text += item["text"]
 
                     value = text
 
@@ -84,19 +118,18 @@ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
         elif isinstance(message, AssistantPromptMessage):
             message = cast(AssistantPromptMessage, message)
             message_dict = {"role": "assistant", "content": message.content}
+            if message.tool_calls:
+                message_dict["tool_calls"] = [tool_call.dict() for tool_call in
+                                              message.tool_calls]
         elif isinstance(message, SystemPromptMessage):
             message = cast(SystemPromptMessage, message)
-            message_dict = {"role": "user", "content": message.content}
+            message_dict = {"role": "system", "content": message.content}
         elif isinstance(message, ToolPromptMessage):
-            # copy from core/model_runtime/model_providers/anthropic/llm/llm.py
             message = cast(ToolPromptMessage, message)
             message_dict = {
-                "role": "user",
-                "content": [{
-                    "type": "tool_result",
-                    "tool_use_id": message.tool_call_id,
-                    "content": message.content
-                }]
+                "role": "tool",
+                "content": message.content,
+                "tool_call_id": message.tool_call_id
             }
         else:
             raise ValueError(f"Unknown message type {type(message)}")
@@ -105,89 +138,137 @@ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
 
     def validate_credentials(self, model: str, credentials: dict) -> None:
         # ping
-        instance = BaichuanModel(
-            api_key=credentials['api_key'],
-            secret_key=credentials.get('secret_key', '')
-        )
+        instance = BaichuanModel(api_key=credentials["api_key"])
 
         try:
-            instance.generate(model=model, stream=False, messages=[
-                BaichuanMessage(content='ping', role='user')
-            ], parameters={
-                'max_tokens': 1,
-            }, timeout=60)
+            instance.generate(
+                model=model,
+                stream=False,
+                messages=[{"content": "ping", "role": "user"}],
+                parameters={
+                    "max_tokens": 1,
+                },
+                timeout=60,
+            )
         except Exception as e:
             raise CredentialsValidateFailedError(f"Invalid API key: {e}")
 
-    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
-                  model_parameters: dict, tools: list[PromptMessageTool] | None = None,
-                  stop: list[str] | None = None, stream: bool = True, user: str | None = None) \
-            -> LLMResult | Generator:
-        if tools is not None and len(tools) > 0:
-            raise InvokeBadRequestError("Baichuan model doesn't support tools")
-
-        instance = BaichuanModel(
-            api_key=credentials['api_key'],
-            secret_key=credentials.get('secret_key', '')
-        )
+    def _generate(
+            self,
+            model: str,
+            credentials: dict,
+            prompt_messages: list[PromptMessage],
+            model_parameters: dict,
+            tools: list[PromptMessageTool] | None = None,
+            stop: list[str] | None = None,
+            stream: bool = True,
+            user: str | None = None,
+    ) -> LLMResult | Generator:
 
-        # convert prompt messages to baichuan messages
-        messages = [
-            BaichuanMessage(
-                content=message.content if isinstance(message.content, str) else ''.join([
-                    content.data for content in message.content
-                ]),
-                role=message.role.value
-            ) for message in prompt_messages
-        ]
+        instance = BaichuanModel(api_key=credentials["api_key"])
+        messages = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
 
         # invoke model
-        response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters,
-                                     timeout=60)
+        response = instance.generate(
+            model=model,
+            stream=stream,
+            messages=messages,
+            parameters=model_parameters,
+            timeout=60,
+            tools=tools,
+        )
 
         if stream:
-            return self._handle_chat_generate_stream_response(model, prompt_messages, credentials, response)
+            return self._handle_chat_generate_stream_response(
+                model, prompt_messages, credentials, response
+            )
+
+        return self._handle_chat_generate_response(
+            model, prompt_messages, credentials, response
+        )
+
+    def _handle_chat_generate_response(
+            self,
+            model: str,
+            prompt_messages: list[PromptMessage],
+            credentials: dict,
+            response: dict,
+    ) -> LLMResult:
+        choices = response.get("choices", [])
+        assistant_message = AssistantPromptMessage(content='', tool_calls=[])
+        stop_reason = None
+        if choices and choices[0]["finish_reason"] == "tool_calls":
+            stop_reason = "tool_calls"
+            for choice in choices:
+                for tool_call in choice["message"]["tool_calls"]:
+                    tool = AssistantPromptMessage.ToolCall(
+                        id=tool_call.get("id", ""),
+                        type=tool_call.get("type", ""),
+                        function=AssistantPromptMessage.ToolCall.ToolCallFunction(
+                            name=tool_call.get("function", {}).get("name", ""),
+                            arguments=tool_call.get("function", {}).get("arguments", "")
+                        ),
+                    )
+                    assistant_message.tool_calls.append(tool)
+        else:
+            for choice in choices:
+                assistant_message.content += choice["message"]["content"]
+                assistant_message.role = choice["message"]["role"]
+                if choice["finish_reason"]:
+                    stop_reason = choice["finish_reason"]
 
-        return self._handle_chat_generate_response(model, prompt_messages, credentials, response)
+        usage = response.get("usage")
+        if usage:
+            # transform usage
+            prompt_tokens = usage["prompt_tokens"]
+            completion_tokens = usage["completion_tokens"]
+        else:
+            # calculate num tokens
+            prompt_tokens = self._num_tokens_from_messages(prompt_messages)
+            completion_tokens = self._num_tokens_from_messages([assistant_message])
 
-    def _handle_chat_generate_response(self, model: str,
-                                       prompt_messages: list[PromptMessage],
-                                       credentials: dict,
-                                       response: BaichuanMessage) -> LLMResult:
         # convert baichuan message to llm result
-        usage = self._calc_response_usage(model=model, credentials=credentials,
-                                          prompt_tokens=response.usage['prompt_tokens'],
-                                          completion_tokens=response.usage['completion_tokens'])
+        usage = self._calc_response_usage(
+            model=model,
+            credentials=credentials,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
         return LLMResult(
             model=model,
             prompt_messages=prompt_messages,
-            message=AssistantPromptMessage(
-                content=response.content,
-                tool_calls=[]
-            ),
+            message=assistant_message,
             usage=usage,
         )
 
-    def _handle_chat_generate_stream_response(self, model: str,
-                                              prompt_messages: list[PromptMessage],
-                                              credentials: dict,
-                                              response: Generator[BaichuanMessage, None, None]) -> Generator:
+    def _handle_chat_generate_stream_response(
+            self,
+            model: str,
+            prompt_messages: list[PromptMessage],
+            credentials: dict,
+            response: Generator[BaichuanMessage, None, None],
+    ) -> Generator:
         for message in response:
             if message.usage:
-                usage = self._calc_response_usage(model=model, credentials=credentials,
-                                                  prompt_tokens=message.usage['prompt_tokens'],
-                                                  completion_tokens=message.usage['completion_tokens'])
+                usage = self._calc_response_usage(
+                    model=model,
+                    credentials=credentials,
+                    prompt_tokens=message.usage["prompt_tokens"],
+                    completion_tokens=message.usage["completion_tokens"],
+                )
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,
                     delta=LLMResultChunkDelta(
                         index=0,
                         message=AssistantPromptMessage(
-                            content=message.content,
-                            tool_calls=[]
+                            content=message.content, tool_calls=[]
                         ),
                         usage=usage,
-                        finish_reason=message.stop_reason if message.stop_reason else None,
+                        finish_reason=(
+                            message.stop_reason if message.stop_reason else None
+                        ),
                     ),
                 )
             else:
@@ -197,10 +278,11 @@ def _handle_chat_generate_stream_response(self, model: str,
                     delta=LLMResultChunkDelta(
                         index=0,
                         message=AssistantPromptMessage(
-                            content=message.content,
-                            tool_calls=[]
+                            content=message.content, tool_calls=[]
+                        ),
+                        finish_reason=(
+                            message.stop_reason if message.stop_reason else None
                         ),
-                        finish_reason=message.stop_reason if message.stop_reason else None,
                     ),
                 )
 
@@ -215,21 +297,13 @@ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]
         :return: Invoke error mapping
         """
         return {
-            InvokeConnectionError: [
-            ],
-            InvokeServerUnavailableError: [
-                InternalServerError
-            ],
-            InvokeRateLimitError: [
-                RateLimitReachedError
-            ],
+            InvokeConnectionError: [],
+            InvokeServerUnavailableError: [InternalServerError],
+            InvokeRateLimitError: [RateLimitReachedError],
             InvokeAuthorizationError: [
                 InvalidAuthenticationError,
                 InsufficientAccountBalance,
                 InvalidAPIKeyError,
             ],
-            InvokeBadRequestError: [
-                BadRequestError,
-                KeyError
-            ]
+            InvokeBadRequestError: [BadRequestError, KeyError],
         }

From ce70cd6d6affce1ce1ddd3d8bf33139e06e099b1 Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 11:35:27 +0800
Subject: [PATCH 2/8] refactor baichuan model

---
 .../baichuan/llm/baichuan2-turbo.yaml         | 18 ++--
 .../baichuan/llm/baichuan3-turbo-128k.yaml    | 31 ++++---
 .../baichuan/llm/baichuan3-turbo.yaml         | 30 ++++---
 .../baichuan/llm/baichuan_turbo.py            | 83 ++-----------------
 .../model_providers/baichuan/llm/llm.py       | 83 +++++++++----------
 5 files changed, 96 insertions(+), 149 deletions(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml
index f91329c77aa9ec..ccb4ee8b92bc16 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml
@@ -4,36 +4,32 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
+    default: 0.3
   - name: top_p
     use_template: top_p
+    default: 0.85
   - name: top_k
     label:
       zh_Hans: 取样数量
       en_US: Top k
     type: int
+    min: 0
+    max: 20
+    default: 5
     help:
       zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
       en_US: Only sample from the top K options for each subsequent token.
     required: false
   - name: max_tokens
     use_template: max_tokens
-    required: true
-    default: 8000
-    min: 1
-    max: 192000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
+    default: 2048
   - name: with_search_enhance
     label:
       zh_Hans: 搜索增强
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
index bf72e8229671f6..59c5f10a4a55cd 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
@@ -4,36 +4,44 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 128000
 parameter_rules:
   - name: temperature
     use_template: temperature
+    default: 0.3
   - name: top_p
     use_template: top_p
+    default: 0.85
   - name: top_k
     label:
       zh_Hans: 取样数量
       en_US: Top k
     type: int
+    min: 0
+    max: 20
+    default: 5
     help:
       zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
       en_US: Only sample from the top K options for each subsequent token.
     required: false
   - name: max_tokens
     use_template: max_tokens
-    required: true
-    default: 8000
-    min: 1
-    max: 128000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
+    default: 2048
+  - name: res_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
   - name: with_search_enhance
     label:
       zh_Hans: 搜索增强
@@ -43,3 +51,4 @@ parameter_rules:
       zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
       en_US: Allow the model to perform external search to enhance the generation results.
     required: false
+
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml
index 85882519b86741..ee8a9ff0d5408a 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml
@@ -4,36 +4,44 @@ label:
 model_type: llm
 features:
   - agent-thought
+  - multi-tool-call
 model_properties:
   mode: chat
   context_size: 32000
 parameter_rules:
   - name: temperature
     use_template: temperature
+    default: 0.3
   - name: top_p
     use_template: top_p
+    default: 0.85
   - name: top_k
     label:
       zh_Hans: 取样数量
       en_US: Top k
     type: int
+    min: 0
+    max: 20
+    default: 5
     help:
       zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
       en_US: Only sample from the top K options for each subsequent token.
     required: false
   - name: max_tokens
     use_template: max_tokens
-    required: true
-    default: 8000
-    min: 1
-    max: 32000
-  - name: presence_penalty
-    use_template: presence_penalty
-  - name: frequency_penalty
-    use_template: frequency_penalty
-    default: 1
-    min: 1
-    max: 2
+    default: 2048
+  - name: res_format
+    label:
+      zh_Hans: 回复格式
+      en_US: response format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
   - name: with_search_enhance
     label:
       zh_Hans: 搜索增强
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
index eb2d8de152f142..67a18d9a538b6a 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
@@ -1,14 +1,9 @@
-from collections.abc import Generator
-from enum import Enum
-from json import dumps, loads
-from typing import Any, Optional, Union
+import json
+from typing import Any, Optional, Union, Iterator
 
 from requests import post
 
-from core.model_runtime.entities.message_entities import (
-    AssistantPromptMessage,
-    PromptMessageTool,
-)
+from core.model_runtime.entities.message_entities import PromptMessageTool
 from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import (
     BadRequestError,
     InsufficientAccountBalance,
@@ -19,33 +14,6 @@
 )
 
 
-class BaichuanMessage:
-    class Role(Enum):
-        USER = "user"
-        ASSISTANT = "assistant"
-        SYSTEM = "system"
-        TOOL = "tool"
-
-    role: str = Role.USER.value
-    content: str
-    usage: dict[str, int] = None
-    tool_calls : list[AssistantPromptMessage.ToolCall] = []
-    tool_call_id: str = ""
-    stop_reason: str = ""
-
-    def to_dict(self) -> dict[str, Any]:
-        return {
-            "role": self.role,
-            "content": self.content,
-        }
-
-    def __init__(self, content: str, role: str = "user", tool_call_id: str = "") -> None:
-        self.content = content
-        self.role = role
-        if tool_call_id:
-            self.tool_call_id = tool_call_id
-
-
 class BaichuanModel:
     api_key: str
 
@@ -68,41 +36,6 @@ def request_headers(self) -> dict[str, Any]:
             "Authorization": "Bearer " + self.api_key,
         }
 
-    def _handle_chat_stream_generate_response(self, response) -> Generator:
-        for line in response.iter_lines():
-            if not line:
-                continue
-            line = line.decode("utf-8")
-            # remove the first `data: ` prefix
-            if line.startswith("data:"):
-                line = line[5:].strip()
-            try:
-                data = loads(line)
-            except Exception as e:
-                if line.strip() == "[DONE]":
-                    return
-            choices = data.get("choices", [])
-            # save stop reason temporarily
-            stop_reason = ""
-            for choice in choices:
-                if choice.get("finish_reason"):
-                    stop_reason = choice["finish_reason"]
-
-                if len(choice["delta"]["content"]) == 0:
-                    continue
-                yield BaichuanMessage(**choice["delta"])
-
-            # if there is usage, the response is the last one, yield it and return
-            if "usage" in data:
-                message = BaichuanMessage(content="", role="assistant")
-                message.usage = {
-                    "prompt_tokens": data["usage"]["prompt_tokens"],
-                    "completion_tokens": data["usage"]["completion_tokens"],
-                    "total_tokens": data["usage"]["total_tokens"],
-                }
-                message.stop_reason = stop_reason
-                yield message
-
     def _build_parameters(
         self,
         model: str,
@@ -112,12 +45,14 @@ def _build_parameters(
         tools: Optional[list[PromptMessageTool]] = None,
     ) -> dict[str, Any]:
         if model in self._model_mapping.keys():
-            # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters. we need to rename it to res_format to get its value
+            # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters.
+            # we need to rename it to res_format to get its value
             if parameters.get("res_format") == "json_object":
                 parameters["response_format"] = {"type": "json_object"}
 
             if tools or parameters.get("with_search_enhance") is True:
                 parameters["tools"] = []
+
             # with_search_enhance is deprecated, use web_search instead
             if parameters.get("with_search_enhance") is True:
                 parameters["tools"].append(
@@ -157,7 +92,7 @@ def generate(
         parameters: dict[str, Any],
         timeout: int,
         tools: Optional[list[PromptMessageTool]] = None,
-    ) -> Union[Generator, dict]:
+    ) -> Union[Iterator, dict]:
 
         if model in self._model_mapping.keys():
             api_base = "https://api.baichuan-ai.com/v1/chat/completions"
@@ -170,7 +105,7 @@ def generate(
             response = post(
                 url=api_base,
                 headers=self.request_headers,
-                data=dumps(data),
+                data=json.dumps(data),
                 timeout=timeout,
                 stream=stream,
             )
@@ -206,6 +141,6 @@ def generate(
                 raise InternalServerError(f"Unknown error: {err} with message: {msg}")
 
         if stream:
-            return self._handle_chat_stream_generate_response(response)
+            return response.iter_lines()
         else:
             return response.json()
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
index d05be120890f51..d75feb2f9bca51 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@@ -1,5 +1,5 @@
-from collections.abc import Generator
-from typing import cast
+import json
+from typing import Generator, Iterator, cast
 
 from core.model_runtime.entities.llm_entities import (
     LLMResult,
@@ -23,16 +23,9 @@
     InvokeServerUnavailableError,
 )
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from core.model_runtime.model_providers.__base.large_language_model import (
-    LargeLanguageModel,
-)
-from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import (
-    BaichuanTokenizer,
-)
-from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import (
-    BaichuanMessage,
-    BaichuanModel,
-)
+from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
+from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import BaichuanTokenizer
+from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import BaichuanModel
 from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import (
     BadRequestError,
     InsufficientAccountBalance,
@@ -62,9 +55,7 @@ def _invoke(
             prompt_messages=prompt_messages,
             model_parameters=model_parameters,
             tools=tools,
-            stop=stop,
             stream=stream,
-            user=user,
         )
 
     def get_num_tokens(
@@ -160,9 +151,7 @@ def _generate(
             prompt_messages: list[PromptMessage],
             model_parameters: dict,
             tools: list[PromptMessageTool] | None = None,
-            stop: list[str] | None = None,
             stream: bool = True,
-            user: str | None = None,
     ) -> LLMResult | Generator:
 
         instance = BaichuanModel(api_key=credentials["api_key"])
@@ -196,9 +185,7 @@ def _handle_chat_generate_response(
     ) -> LLMResult:
         choices = response.get("choices", [])
         assistant_message = AssistantPromptMessage(content='', tool_calls=[])
-        stop_reason = None
         if choices and choices[0]["finish_reason"] == "tool_calls":
-            stop_reason = "tool_calls"
             for choice in choices:
                 for tool_call in choice["message"]["tool_calls"]:
                     tool = AssistantPromptMessage.ToolCall(
@@ -214,8 +201,6 @@ def _handle_chat_generate_response(
             for choice in choices:
                 assistant_message.content += choice["message"]["content"]
                 assistant_message.role = choice["message"]["role"]
-                if choice["finish_reason"]:
-                    stop_reason = choice["finish_reason"]
 
         usage = response.get("usage")
         if usage:
@@ -227,7 +212,6 @@ def _handle_chat_generate_response(
             prompt_tokens = self._num_tokens_from_messages(prompt_messages)
             completion_tokens = self._num_tokens_from_messages([assistant_message])
 
-        # convert baichuan message to llm result
         usage = self._calc_response_usage(
             model=model,
             credentials=credentials,
@@ -247,42 +231,57 @@ def _handle_chat_generate_stream_response(
             model: str,
             prompt_messages: list[PromptMessage],
             credentials: dict,
-            response: Generator[BaichuanMessage, None, None],
+            response: Iterator,
     ) -> Generator:
-        for message in response:
-            if message.usage:
-                usage = self._calc_response_usage(
-                    model=model,
-                    credentials=credentials,
-                    prompt_tokens=message.usage["prompt_tokens"],
-                    completion_tokens=message.usage["completion_tokens"],
-                )
+        for line in response:
+            if not line:
+                continue
+            line = line.decode("utf-8")
+            # remove the first `data: ` prefix
+            if line.startswith("data:"):
+                line = line[5:].strip()
+            try:
+                data = json.loads(line)
+            except Exception as e:
+                if line.strip() == "[DONE]":
+                    return
+            choices = data.get("choices", [])
+
+            stop_reason = ""
+            for choice in choices:
+                if choice.get("finish_reason"):
+                    stop_reason = choice["finish_reason"]
+
+                if len(choice["delta"]["content"]) == 0:
+                    continue
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,
                     delta=LLMResultChunkDelta(
                         index=0,
                         message=AssistantPromptMessage(
-                            content=message.content, tool_calls=[]
-                        ),
-                        usage=usage,
-                        finish_reason=(
-                            message.stop_reason if message.stop_reason else None
+                            content=choice["delta"]["content"], tool_calls=[]
                         ),
+                        finish_reason=stop_reason,
                     ),
                 )
-            else:
+
+            # if there is usage, the response is the last one, yield it and return
+            if "usage" in data:
+                usage = self._calc_response_usage(
+                    model=model,
+                    credentials=credentials,
+                    prompt_tokens=data["usage"]["prompt_tokens"],
+                    completion_tokens=data["usage"]["completion_tokens"],
+                )
                 yield LLMResultChunk(
                     model=model,
                     prompt_messages=prompt_messages,
                     delta=LLMResultChunkDelta(
                         index=0,
-                        message=AssistantPromptMessage(
-                            content=message.content, tool_calls=[]
-                        ),
-                        finish_reason=(
-                            message.stop_reason if message.stop_reason else None
-                        ),
+                        message=AssistantPromptMessage(content="", tool_calls=[]),
+                        usage=usage,
+                        finish_reason=stop_reason,
                     ),
                 )
 

From 108e77a68382b190e8eb8c0d9f7cd3af34d43cb9 Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 11:35:40 +0800
Subject: [PATCH 3/8] refactor baichuan model

---
 .../model_providers/baichuan/llm/baichuan_turbo.py             | 3 ++-
 api/core/model_runtime/model_providers/baichuan/llm/llm.py     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
index 67a18d9a538b6a..a8fd9dce91abbf 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py
@@ -1,5 +1,6 @@
 import json
-from typing import Any, Optional, Union, Iterator
+from collections.abc import Iterator
+from typing import Any, Optional, Union
 
 from requests import post
 
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
index d75feb2f9bca51..4f44682e9f440e 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@@ -1,5 +1,6 @@
 import json
-from typing import Generator, Iterator, cast
+from collections.abc import Generator, Iterator
+from typing import cast
 
 from core.model_runtime.entities.llm_entities import (
     LLMResult,

From 221c03ea727ef2750375e5d3418f335637441e1d Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 11:57:46 +0800
Subject: [PATCH 4/8] fix CI

---
 .../model_providers/baichuan/llm/baichuan3-turbo-128k.yaml       | 1 -
 1 file changed, 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
index 59c5f10a4a55cd..c6c6c7e9e91947 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml
@@ -51,4 +51,3 @@ parameter_rules:
       zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
       en_US: Allow the model to perform external search to enhance the generation results.
     required: false
-

From bb0a350f720a92856f17c55aff835b6142be5a5d Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 13:18:52 +0800
Subject: [PATCH 5/8] nothing changed, just for re-run CI

---
 api/core/model_runtime/model_providers/baichuan/llm/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
index 4f44682e9f440e..049c078a21cd4e 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@@ -85,7 +85,7 @@ def tokens(text: str):
             num_tokens += tokens_per_message
             for key, value in message.items():
                 if isinstance(value, list):
-                    text = ""
+                    text = ''
                     for item in value:
                         if isinstance(item, dict) and item["type"] == "text":
                             text += item["text"]

From cef3856713bba189f4800b4b79e568f1dc211bad Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 13:19:04 +0800
Subject: [PATCH 6/8] nothing changed, just for re-run CI

---
 api/core/model_runtime/model_providers/baichuan/llm/llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
index 049c078a21cd4e..4f44682e9f440e 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py
+++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py
@@ -85,7 +85,7 @@ def tokens(text: str):
             num_tokens += tokens_per_message
             for key, value in message.items():
                 if isinstance(value, list):
-                    text = ''
+                    text = ""
                     for item in value:
                         if isinstance(item, dict) and item["type"] == "text":
                             text += item["text"]

From c54a7c51f20e72adbdb7fdcf5116a375f8c87f32 Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 13:30:42 +0800
Subject: [PATCH 7/8] not remove the deprecated model, just add deprecated=true

---
 .../baichuan/llm/baichuan2-53b.yaml           | 46 +++++++++++++++++++
 .../baichuan/llm/baichuan2-turbo-192k.yaml    | 46 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
 create mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
new file mode 100644
index 00000000000000..8360dd5faffb00
--- /dev/null
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml
@@ -0,0 +1,46 @@
+model: baichuan2-53b
+label:
+  en_US: Baichuan2-53B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 1000
+    min: 1
+    max: 4000
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    default: 1
+    min: 1
+    max: 2
+  - name: with_search_enhance
+    label:
+      zh_Hans: 搜索增强
+      en_US: Search Enhance
+    type: boolean
+    help:
+      zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
new file mode 100644
index 00000000000000..17873a476ea534
--- /dev/null
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
@@ -0,0 +1,46 @@
+model: baichuan2-turbo-192k
+label:
+  en_US: Baichuan2-Turbo-192K
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 192000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 8000
+    min: 1
+    max: 192000
+  - name: presence_penalty
+    use_template: presence_penalty
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    default: 1
+    min: 1
+    max: 2
+  - name: with_search_enhance
+    label:
+      zh_Hans: 搜索增强
+      en_US: Search Enhance
+    type: boolean
+    help:
+      zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
+      en_US: Allow the model to perform external search to enhance the generation results.
+    required: false
+deprecated: true
\ No newline at end of file

From f41efc554d7de9f27b0c89fdd5f243ed46813039 Mon Sep 17 00:00:00 2001
From: hejl <hjlarry@163.com>
Date: Wed, 4 Sep 2024 13:34:47 +0800
Subject: [PATCH 8/8] fix CI

---
 .../model_providers/baichuan/llm/baichuan2-turbo-192k.yaml      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
index 17873a476ea534..0ce0265cfe5c6c 100644
--- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
+++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml
@@ -43,4 +43,4 @@ parameter_rules:
       zh_Hans: 允许模型自行进行外部搜索，以增强生成结果。
       en_US: Allow the model to perform external search to enhance the generation results.
     required: false
-deprecated: true
\ No newline at end of file
+deprecated: true