From 12c0d64cfd759c41c005722fb18927e3cf5613ef Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 10:03:51 +0800 Subject: [PATCH 1/8] refactor baichuan model --- .../model_providers/baichuan/baichuan.yaml | 8 - .../baichuan/llm/baichuan2-53b.yaml | 45 --- .../baichuan/llm/baichuan2-turbo-192k.yaml | 45 --- .../baichuan/llm/baichuan4.yaml | 30 +- .../baichuan/llm/baichuan_turbo.py | 245 ++++++++-------- .../model_providers/baichuan/llm/llm.py | 264 +++++++++++------- 6 files changed, 307 insertions(+), 330 deletions(-) delete mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml delete mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml diff --git a/api/core/model_runtime/model_providers/baichuan/baichuan.yaml b/api/core/model_runtime/model_providers/baichuan/baichuan.yaml index 792126af7fd58f..81e6e36215aa84 100644 --- a/api/core/model_runtime/model_providers/baichuan/baichuan.yaml +++ b/api/core/model_runtime/model_providers/baichuan/baichuan.yaml @@ -27,11 +27,3 @@ provider_credential_schema: placeholder: zh_Hans: 在此输入您的 API Key en_US: Enter your API Key - - variable: secret_key - label: - en_US: Secret Key - type: secret-input - required: false - placeholder: - zh_Hans: 在此输入您的 Secret Key - en_US: Enter your Secret Key diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml deleted file mode 100644 index 04849500dcb7f1..00000000000000 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml +++ /dev/null @@ -1,45 +0,0 @@ -model: baichuan2-53b -label: - en_US: Baichuan2-53B -model_type: llm -features: - - agent-thought -model_properties: - mode: chat - context_size: 32000 -parameter_rules: - - name: temperature - use_template: temperature - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top k - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - - name: max_tokens - use_template: max_tokens - required: true - default: 1000 - min: 1 - max: 4000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 - - name: with_search_enhance - label: - zh_Hans: 搜索增强 - en_US: Search Enhance - type: boolean - help: - zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 - en_US: Allow the model to perform external search to enhance the generation results. - required: false diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml deleted file mode 100644 index c8156c152b15bd..00000000000000 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml +++ /dev/null @@ -1,45 +0,0 @@ -model: baichuan2-turbo-192k -label: - en_US: Baichuan2-Turbo-192K -model_type: llm -features: - - agent-thought -model_properties: - mode: chat - context_size: 192000 -parameter_rules: - - name: temperature - use_template: temperature - - name: top_p - use_template: top_p - - name: top_k - label: - zh_Hans: 取样数量 - en_US: Top k - type: int - help: - zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 - en_US: Only sample from the top K options for each subsequent token. - required: false - - name: max_tokens - use_template: max_tokens - required: true - default: 8000 - min: 1 - max: 192000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 - - name: with_search_enhance - label: - zh_Hans: 搜索增强 - en_US: Search Enhance - type: boolean - help: - zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 - en_US: Allow the model to perform external search to enhance the generation results. - required: false diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml index f8c65660818818..e5e6aeb49158e8 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan4.yaml @@ -4,36 +4,44 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 32000 parameter_rules: - name: temperature use_template: temperature + default: 0.3 - name: top_p use_template: top_p + default: 0.85 - name: top_k label: zh_Hans: 取样数量 en_US: Top k type: int + min: 0 + max: 20 + default: 5 help: zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 en_US: Only sample from the top K options for each subsequent token. required: false - name: max_tokens use_template: max_tokens - required: true - default: 8000 - min: 1 - max: 32000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 + default: 2048 + - name: res_format + label: + zh_Hans: 回复格式 + en_US: response format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object - name: with_search_enhance label: zh_Hans: 搜索增强 diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py index d7d8b7c91b6e2d..eb2d8de152f142 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py @@ -1,11 +1,14 @@ from collections.abc import Generator from enum import Enum -from hashlib import md5 from json import dumps, loads -from typing import Any, Union +from typing import Any, Optional, Union from requests import post +from core.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessageTool, +) from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import ( BadRequestError, InsufficientAccountBalance, @@ -18,201 +21,191 @@ class BaichuanMessage: class Role(Enum): - USER = 'user' - ASSISTANT = 'assistant' - # Baichuan does not have system message - _SYSTEM = 'system' + USER = "user" + ASSISTANT = "assistant" + SYSTEM = "system" + TOOL = "tool" role: str = Role.USER.value content: str usage: dict[str, int] = None - stop_reason: str = '' + tool_calls : list[AssistantPromptMessage.ToolCall] = [] + tool_call_id: str = "" + stop_reason: str = "" def to_dict(self) -> dict[str, Any]: return { - 'role': self.role, - 'content': self.content, + "role": self.role, + "content": self.content, } - - def __init__(self, content: str, role: str = 'user') -> None: + + def __init__(self, content: str, role: str = "user", tool_call_id: str = "") -> None: self.content = content self.role = role + if tool_call_id: + self.tool_call_id = tool_call_id + class BaichuanModel: api_key: str - secret_key: str - def __init__(self, api_key: str, secret_key: str = '') -> None: + def __init__(self, api_key: str) -> None: self.api_key = api_key - self.secret_key = secret_key - def _model_mapping(self, model: str) -> str: + @property + def _model_mapping(self) -> dict: return { - 'baichuan2-turbo': 'Baichuan2-Turbo', - 'baichuan2-turbo-192k': 'Baichuan2-Turbo-192k', - 'baichuan2-53b': 'Baichuan2-53B', - 'baichuan3-turbo': 'Baichuan3-Turbo', - 'baichuan3-turbo-128k': 'Baichuan3-Turbo-128k', - 'baichuan4': 'Baichuan4', - }[model] - - def _handle_chat_generate_response(self, response) -> BaichuanMessage: - resp = response.json() - choices = resp.get('choices', []) - message = BaichuanMessage(content='', role='assistant') - for choice in choices: - message.content += choice['message']['content'] - message.role = choice['message']['role'] - if choice['finish_reason']: - message.stop_reason = choice['finish_reason'] - - if 'usage' in resp: - message.usage = { - 'prompt_tokens': resp['usage']['prompt_tokens'], - 'completion_tokens': resp['usage']['completion_tokens'], - 'total_tokens': resp['usage']['total_tokens'], - } + "baichuan2-turbo": "Baichuan2-Turbo", + "baichuan3-turbo": "Baichuan3-Turbo", + "baichuan3-turbo-128k": "Baichuan3-Turbo-128k", + "baichuan4": "Baichuan4", + } + + @property + def request_headers(self) -> dict[str, Any]: + return { + "Content-Type": "application/json", + "Authorization": "Bearer " + self.api_key, + } - return message - def _handle_chat_stream_generate_response(self, response) -> Generator: for line in response.iter_lines(): if not line: continue - line = line.decode('utf-8') + line = line.decode("utf-8") # remove the first `data: ` prefix - if line.startswith('data:'): + if line.startswith("data:"): line = line[5:].strip() try: data = loads(line) except Exception as e: - if line.strip() == '[DONE]': + if line.strip() == "[DONE]": return - choices = data.get('choices', []) + choices = data.get("choices", []) # save stop reason temporarily - stop_reason = '' + stop_reason = "" for choice in choices: - if choice.get('finish_reason'): - stop_reason = choice['finish_reason'] + if choice.get("finish_reason"): + stop_reason = choice["finish_reason"] - if len(choice['delta']['content']) == 0: + if len(choice["delta"]["content"]) == 0: continue - yield BaichuanMessage(**choice['delta']) + yield BaichuanMessage(**choice["delta"]) # if there is usage, the response is the last one, yield it and return - if 'usage' in data: - message = BaichuanMessage(content='', role='assistant') + if "usage" in data: + message = BaichuanMessage(content="", role="assistant") message.usage = { - 'prompt_tokens': data['usage']['prompt_tokens'], - 'completion_tokens': data['usage']['completion_tokens'], - 'total_tokens': data['usage']['total_tokens'], + "prompt_tokens": data["usage"]["prompt_tokens"], + "completion_tokens": data["usage"]["completion_tokens"], + "total_tokens": data["usage"]["total_tokens"], } message.stop_reason = stop_reason yield message - def _build_parameters(self, model: str, stream: bool, messages: list[BaichuanMessage], - parameters: dict[str, Any]) \ - -> dict[str, Any]: - if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b' - or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'): - prompt_messages = [] - for message in messages: - if message.role == BaichuanMessage.Role.USER.value or message.role == BaichuanMessage.Role._SYSTEM.value: - # check if the latest message is a user message - if len(prompt_messages) > 0 and prompt_messages[-1]['role'] == BaichuanMessage.Role.USER.value: - prompt_messages[-1]['content'] += message.content - else: - prompt_messages.append({ - 'content': message.content, - 'role': BaichuanMessage.Role.USER.value, - }) - elif message.role == BaichuanMessage.Role.ASSISTANT.value: - prompt_messages.append({ - 'content': message.content, - 'role': message.role, - }) - # [baichuan] frequency_penalty must be between 1 and 2 - if 'frequency_penalty' in parameters: - if parameters['frequency_penalty'] < 1 or parameters['frequency_penalty'] > 2: - parameters['frequency_penalty'] = 1 + def _build_parameters( + self, + model: str, + stream: bool, + messages: list[dict], + parameters: dict[str, Any], + tools: Optional[list[PromptMessageTool]] = None, + ) -> dict[str, Any]: + if model in self._model_mapping.keys(): + # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters. we need to rename it to res_format to get its value + if parameters.get("res_format") == "json_object": + parameters["response_format"] = {"type": "json_object"} + + if tools or parameters.get("with_search_enhance") is True: + parameters["tools"] = [] + # with_search_enhance is deprecated, use web_search instead + if parameters.get("with_search_enhance") is True: + parameters["tools"].append( + { + "type": "web_search", + "web_search": {"enable": True}, + } + ) + if tools: + for tool in tools: + parameters["tools"].append( + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters, + }, + } + ) # turbo api accepts flat parameters return { - 'model': self._model_mapping(model), - 'stream': stream, - 'messages': prompt_messages, + "model": self._model_mapping.get(model), + "stream": stream, + "messages": messages, **parameters, } else: raise BadRequestError(f"Unknown model: {model}") - - def _build_headers(self, model: str, data: dict[str, Any]) -> dict[str, Any]: - if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b' - or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'): - # there is no secret key for turbo api - return { - 'Content-Type': 'application/json', - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ', - 'Authorization': 'Bearer ' + self.api_key, - } - else: - raise BadRequestError(f"Unknown model: {model}") - - def _calculate_md5(self, input_string): - return md5(input_string.encode('utf-8')).hexdigest() - - def generate(self, model: str, stream: bool, messages: list[BaichuanMessage], - parameters: dict[str, Any], timeout: int) \ - -> Union[Generator, BaichuanMessage]: - - if (model == 'baichuan2-turbo' or model == 'baichuan2-turbo-192k' or model == 'baichuan2-53b' - or model == 'baichuan3-turbo' or model == 'baichuan3-turbo-128k' or model == 'baichuan4'): - api_base = 'https://api.baichuan-ai.com/v1/chat/completions' + + def generate( + self, + model: str, + stream: bool, + messages: list[dict], + parameters: dict[str, Any], + timeout: int, + tools: Optional[list[PromptMessageTool]] = None, + ) -> Union[Generator, dict]: + + if model in self._model_mapping.keys(): + api_base = "https://api.baichuan-ai.com/v1/chat/completions" else: raise BadRequestError(f"Unknown model: {model}") - - try: - data = self._build_parameters(model, stream, messages, parameters) - headers = self._build_headers(model, data) - except KeyError: - raise InternalServerError(f"Failed to build parameters for model: {model}") + + data = self._build_parameters(model, stream, messages, parameters, tools) try: response = post( url=api_base, - headers=headers, + headers=self.request_headers, data=dumps(data), timeout=timeout, - stream=stream + stream=stream, ) except Exception as e: raise InternalServerError(f"Failed to invoke model: {e}") - + if response.status_code != 200: try: resp = response.json() # try to parse error message - err = resp['error']['code'] - msg = resp['error']['message'] + err = resp["error"]["type"] + msg = resp["error"]["message"] except Exception as e: - raise InternalServerError(f"Failed to convert response to json: {e} with text: {response.text}") + raise InternalServerError( + f"Failed to convert response to json: {e} with text: {response.text}" + ) - if err == 'invalid_api_key': + if err == "invalid_api_key": raise InvalidAPIKeyError(msg) - elif err == 'insufficient_quota': + elif err == "insufficient_quota": raise InsufficientAccountBalance(msg) - elif err == 'invalid_authentication': + elif err == "invalid_authentication": raise InvalidAuthenticationError(msg) - elif 'rate' in err: + elif err == "invalid_request_error": + raise BadRequestError(msg) + elif "rate" in err: raise RateLimitReachedError(msg) - elif 'internal' in err: + elif "internal" in err: raise InternalServerError(msg) - elif err == 'api_key_empty': + elif err == "api_key_empty": raise InvalidAPIKeyError(msg) else: raise InternalServerError(f"Unknown error: {err} with message: {msg}") - + if stream: return self._handle_chat_stream_generate_response(response) else: - return self._handle_chat_generate_response(response) \ No newline at end of file + return response.json() diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index edcd3af4203cfb..d05be120890f51 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -1,7 +1,11 @@ from collections.abc import Generator from typing import cast -from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta +from core.model_runtime.entities.llm_entities import ( + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, +) from core.model_runtime.entities.message_entities import ( AssistantPromptMessage, PromptMessage, @@ -19,9 +23,16 @@ InvokeServerUnavailableError, ) from core.model_runtime.errors.validate import CredentialsValidateFailedError -from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel -from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import BaichuanTokenizer -from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import BaichuanMessage, BaichuanModel +from core.model_runtime.model_providers.__base.large_language_model import ( + LargeLanguageModel, +) +from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import ( + BaichuanTokenizer, +) +from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import ( + BaichuanMessage, + BaichuanModel, +) from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import ( BadRequestError, InsufficientAccountBalance, @@ -33,19 +44,42 @@ class BaichuanLarguageModel(LargeLanguageModel): - def _invoke(self, model: str, credentials: dict, - prompt_messages: list[PromptMessage], model_parameters: dict, - tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, - stream: bool = True, user: str | None = None) \ - -> LLMResult | Generator: - return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages, - model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user) - - def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - tools: list[PromptMessageTool] | None = None) -> int: + + def _invoke( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, + stream: bool = True, + user: str | None = None, + ) -> LLMResult | Generator: + return self._generate( + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def get_num_tokens( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + tools: list[PromptMessageTool] | None = None, + ) -> int: return self._num_tokens_from_messages(prompt_messages) - def _num_tokens_from_messages(self, messages: list[PromptMessage], ) -> int: + def _num_tokens_from_messages( + self, + messages: list[PromptMessage], + ) -> int: """Calculate num tokens for baichuan model""" def tokens(text: str): @@ -59,10 +93,10 @@ def tokens(text: str): num_tokens += tokens_per_message for key, value in message.items(): if isinstance(value, list): - text = '' + text = "" for item in value: - if isinstance(item, dict) and item['type'] == 'text': - text += item['text'] + if isinstance(item, dict) and item["type"] == "text": + text += item["text"] value = text @@ -84,19 +118,18 @@ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: elif isinstance(message, AssistantPromptMessage): message = cast(AssistantPromptMessage, message) message_dict = {"role": "assistant", "content": message.content} + if message.tool_calls: + message_dict["tool_calls"] = [tool_call.dict() for tool_call in + message.tool_calls] elif isinstance(message, SystemPromptMessage): message = cast(SystemPromptMessage, message) - message_dict = {"role": "user", "content": message.content} + message_dict = {"role": "system", "content": message.content} elif isinstance(message, ToolPromptMessage): - # copy from core/model_runtime/model_providers/anthropic/llm/llm.py message = cast(ToolPromptMessage, message) message_dict = { - "role": "user", - "content": [{ - "type": "tool_result", - "tool_use_id": message.tool_call_id, - "content": message.content - }] + "role": "tool", + "content": message.content, + "tool_call_id": message.tool_call_id } else: raise ValueError(f"Unknown message type {type(message)}") @@ -105,89 +138,137 @@ def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict: def validate_credentials(self, model: str, credentials: dict) -> None: # ping - instance = BaichuanModel( - api_key=credentials['api_key'], - secret_key=credentials.get('secret_key', '') - ) + instance = BaichuanModel(api_key=credentials["api_key"]) try: - instance.generate(model=model, stream=False, messages=[ - BaichuanMessage(content='ping', role='user') - ], parameters={ - 'max_tokens': 1, - }, timeout=60) + instance.generate( + model=model, + stream=False, + messages=[{"content": "ping", "role": "user"}], + parameters={ + "max_tokens": 1, + }, + timeout=60, + ) except Exception as e: raise CredentialsValidateFailedError(f"Invalid API key: {e}") - def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - model_parameters: dict, tools: list[PromptMessageTool] | None = None, - stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ - -> LLMResult | Generator: - if tools is not None and len(tools) > 0: - raise InvokeBadRequestError("Baichuan model doesn't support tools") - - instance = BaichuanModel( - api_key=credentials['api_key'], - secret_key=credentials.get('secret_key', '') - ) + def _generate( + self, + model: str, + credentials: dict, + prompt_messages: list[PromptMessage], + model_parameters: dict, + tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, + stream: bool = True, + user: str | None = None, + ) -> LLMResult | Generator: - # convert prompt messages to baichuan messages - messages = [ - BaichuanMessage( - content=message.content if isinstance(message.content, str) else ''.join([ - content.data for content in message.content - ]), - role=message.role.value - ) for message in prompt_messages - ] + instance = BaichuanModel(api_key=credentials["api_key"]) + messages = [self._convert_prompt_message_to_dict(m) for m in prompt_messages] # invoke model - response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters, - timeout=60) + response = instance.generate( + model=model, + stream=stream, + messages=messages, + parameters=model_parameters, + timeout=60, + tools=tools, + ) if stream: - return self._handle_chat_generate_stream_response(model, prompt_messages, credentials, response) + return self._handle_chat_generate_stream_response( + model, prompt_messages, credentials, response + ) + + return self._handle_chat_generate_response( + model, prompt_messages, credentials, response + ) + + def _handle_chat_generate_response( + self, + model: str, + prompt_messages: list[PromptMessage], + credentials: dict, + response: dict, + ) -> LLMResult: + choices = response.get("choices", []) + assistant_message = AssistantPromptMessage(content='', tool_calls=[]) + stop_reason = None + if choices and choices[0]["finish_reason"] == "tool_calls": + stop_reason = "tool_calls" + for choice in choices: + for tool_call in choice["message"]["tool_calls"]: + tool = AssistantPromptMessage.ToolCall( + id=tool_call.get("id", ""), + type=tool_call.get("type", ""), + function=AssistantPromptMessage.ToolCall.ToolCallFunction( + name=tool_call.get("function", {}).get("name", ""), + arguments=tool_call.get("function", {}).get("arguments", "") + ), + ) + assistant_message.tool_calls.append(tool) + else: + for choice in choices: + assistant_message.content += choice["message"]["content"] + assistant_message.role = choice["message"]["role"] + if choice["finish_reason"]: + stop_reason = choice["finish_reason"] - return self._handle_chat_generate_response(model, prompt_messages, credentials, response) + usage = response.get("usage") + if usage: + # transform usage + prompt_tokens = usage["prompt_tokens"] + completion_tokens = usage["completion_tokens"] + else: + # calculate num tokens + prompt_tokens = self._num_tokens_from_messages(prompt_messages) + completion_tokens = self._num_tokens_from_messages([assistant_message]) - def _handle_chat_generate_response(self, model: str, - prompt_messages: list[PromptMessage], - credentials: dict, - response: BaichuanMessage) -> LLMResult: # convert baichuan message to llm result - usage = self._calc_response_usage(model=model, credentials=credentials, - prompt_tokens=response.usage['prompt_tokens'], - completion_tokens=response.usage['completion_tokens']) + usage = self._calc_response_usage( + model=model, + credentials=credentials, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) + return LLMResult( model=model, prompt_messages=prompt_messages, - message=AssistantPromptMessage( - content=response.content, - tool_calls=[] - ), + message=assistant_message, usage=usage, ) - def _handle_chat_generate_stream_response(self, model: str, - prompt_messages: list[PromptMessage], - credentials: dict, - response: Generator[BaichuanMessage, None, None]) -> Generator: + def _handle_chat_generate_stream_response( + self, + model: str, + prompt_messages: list[PromptMessage], + credentials: dict, + response: Generator[BaichuanMessage, None, None], + ) -> Generator: for message in response: if message.usage: - usage = self._calc_response_usage(model=model, credentials=credentials, - prompt_tokens=message.usage['prompt_tokens'], - completion_tokens=message.usage['completion_tokens']) + usage = self._calc_response_usage( + model=model, + credentials=credentials, + prompt_tokens=message.usage["prompt_tokens"], + completion_tokens=message.usage["completion_tokens"], + ) yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( index=0, message=AssistantPromptMessage( - content=message.content, - tool_calls=[] + content=message.content, tool_calls=[] ), usage=usage, - finish_reason=message.stop_reason if message.stop_reason else None, + finish_reason=( + message.stop_reason if message.stop_reason else None + ), ), ) else: @@ -197,10 +278,11 @@ def _handle_chat_generate_stream_response(self, model: str, delta=LLMResultChunkDelta( index=0, message=AssistantPromptMessage( - content=message.content, - tool_calls=[] + content=message.content, tool_calls=[] + ), + finish_reason=( + message.stop_reason if message.stop_reason else None ), - finish_reason=message.stop_reason if message.stop_reason else None, ), ) @@ -215,21 +297,13 @@ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]] :return: Invoke error mapping """ return { - InvokeConnectionError: [ - ], - InvokeServerUnavailableError: [ - InternalServerError - ], - InvokeRateLimitError: [ - RateLimitReachedError - ], + InvokeConnectionError: [], + InvokeServerUnavailableError: [InternalServerError], + InvokeRateLimitError: [RateLimitReachedError], InvokeAuthorizationError: [ InvalidAuthenticationError, InsufficientAccountBalance, InvalidAPIKeyError, ], - InvokeBadRequestError: [ - BadRequestError, - KeyError - ] + InvokeBadRequestError: [BadRequestError, KeyError], } From ce70cd6d6affce1ce1ddd3d8bf33139e06e099b1 Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 11:35:27 +0800 Subject: [PATCH 2/8] refactor baichuan model --- .../baichuan/llm/baichuan2-turbo.yaml | 18 ++-- .../baichuan/llm/baichuan3-turbo-128k.yaml | 31 ++++--- .../baichuan/llm/baichuan3-turbo.yaml | 30 ++++--- .../baichuan/llm/baichuan_turbo.py | 83 ++----------------- .../model_providers/baichuan/llm/llm.py | 83 +++++++++---------- 5 files changed, 96 insertions(+), 149 deletions(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml index f91329c77aa9ec..ccb4ee8b92bc16 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo.yaml @@ -4,36 +4,32 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 32000 parameter_rules: - name: temperature use_template: temperature + default: 0.3 - name: top_p use_template: top_p + default: 0.85 - name: top_k label: zh_Hans: 取样数量 en_US: Top k type: int + min: 0 + max: 20 + default: 5 help: zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 en_US: Only sample from the top K options for each subsequent token. required: false - name: max_tokens use_template: max_tokens - required: true - default: 8000 - min: 1 - max: 192000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 + default: 2048 - name: with_search_enhance label: zh_Hans: 搜索增强 diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml index bf72e8229671f6..59c5f10a4a55cd 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml @@ -4,36 +4,44 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 128000 parameter_rules: - name: temperature use_template: temperature + default: 0.3 - name: top_p use_template: top_p + default: 0.85 - name: top_k label: zh_Hans: 取样数量 en_US: Top k type: int + min: 0 + max: 20 + default: 5 help: zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 en_US: Only sample from the top K options for each subsequent token. required: false - name: max_tokens use_template: max_tokens - required: true - default: 8000 - min: 1 - max: 128000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 + default: 2048 + - name: res_format + label: + zh_Hans: 回复格式 + en_US: response format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object - name: with_search_enhance label: zh_Hans: 搜索增强 @@ -43,3 +51,4 @@ parameter_rules: zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 en_US: Allow the model to perform external search to enhance the generation results. required: false + diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml index 85882519b86741..ee8a9ff0d5408a 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo.yaml @@ -4,36 +4,44 @@ label: model_type: llm features: - agent-thought + - multi-tool-call model_properties: mode: chat context_size: 32000 parameter_rules: - name: temperature use_template: temperature + default: 0.3 - name: top_p use_template: top_p + default: 0.85 - name: top_k label: zh_Hans: 取样数量 en_US: Top k type: int + min: 0 + max: 20 + default: 5 help: zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 en_US: Only sample from the top K options for each subsequent token. required: false - name: max_tokens use_template: max_tokens - required: true - default: 8000 - min: 1 - max: 32000 - - name: presence_penalty - use_template: presence_penalty - - name: frequency_penalty - use_template: frequency_penalty - default: 1 - min: 1 - max: 2 + default: 2048 + - name: res_format + label: + zh_Hans: 回复格式 + en_US: response format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object - name: with_search_enhance label: zh_Hans: 搜索增强 diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py index eb2d8de152f142..67a18d9a538b6a 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py @@ -1,14 +1,9 @@ -from collections.abc import Generator -from enum import Enum -from json import dumps, loads -from typing import Any, Optional, Union +import json +from typing import Any, Optional, Union, Iterator from requests import post -from core.model_runtime.entities.message_entities import ( - AssistantPromptMessage, - PromptMessageTool, -) +from core.model_runtime.entities.message_entities import PromptMessageTool from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import ( BadRequestError, InsufficientAccountBalance, @@ -19,33 +14,6 @@ ) -class BaichuanMessage: - class Role(Enum): - USER = "user" - ASSISTANT = "assistant" - SYSTEM = "system" - TOOL = "tool" - - role: str = Role.USER.value - content: str - usage: dict[str, int] = None - tool_calls : list[AssistantPromptMessage.ToolCall] = [] - tool_call_id: str = "" - stop_reason: str = "" - - def to_dict(self) -> dict[str, Any]: - return { - "role": self.role, - "content": self.content, - } - - def __init__(self, content: str, role: str = "user", tool_call_id: str = "") -> None: - self.content = content - self.role = role - if tool_call_id: - self.tool_call_id = tool_call_id - - class BaichuanModel: api_key: str @@ -68,41 +36,6 @@ def request_headers(self) -> dict[str, Any]: "Authorization": "Bearer " + self.api_key, } - def _handle_chat_stream_generate_response(self, response) -> Generator: - for line in response.iter_lines(): - if not line: - continue - line = line.decode("utf-8") - # remove the first `data: ` prefix - if line.startswith("data:"): - line = line[5:].strip() - try: - data = loads(line) - except Exception as e: - if line.strip() == "[DONE]": - return - choices = data.get("choices", []) - # save stop reason temporarily - stop_reason = "" - for choice in choices: - if choice.get("finish_reason"): - stop_reason = choice["finish_reason"] - - if len(choice["delta"]["content"]) == 0: - continue - yield BaichuanMessage(**choice["delta"]) - - # if there is usage, the response is the last one, yield it and return - if "usage" in data: - message = BaichuanMessage(content="", role="assistant") - message.usage = { - "prompt_tokens": data["usage"]["prompt_tokens"], - "completion_tokens": data["usage"]["completion_tokens"], - "total_tokens": data["usage"]["total_tokens"], - } - message.stop_reason = stop_reason - yield message - def _build_parameters( self, model: str, @@ -112,12 +45,14 @@ def _build_parameters( tools: Optional[list[PromptMessageTool]] = None, ) -> dict[str, Any]: if model in self._model_mapping.keys(): - # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters. we need to rename it to res_format to get its value + # the LargeLanguageModel._code_block_mode_wrapper() method will remove the response_format of parameters. + # we need to rename it to res_format to get its value if parameters.get("res_format") == "json_object": parameters["response_format"] = {"type": "json_object"} if tools or parameters.get("with_search_enhance") is True: parameters["tools"] = [] + # with_search_enhance is deprecated, use web_search instead if parameters.get("with_search_enhance") is True: parameters["tools"].append( @@ -157,7 +92,7 @@ def generate( parameters: dict[str, Any], timeout: int, tools: Optional[list[PromptMessageTool]] = None, - ) -> Union[Generator, dict]: + ) -> Union[Iterator, dict]: if model in self._model_mapping.keys(): api_base = "https://api.baichuan-ai.com/v1/chat/completions" @@ -170,7 +105,7 @@ def generate( response = post( url=api_base, headers=self.request_headers, - data=dumps(data), + data=json.dumps(data), timeout=timeout, stream=stream, ) @@ -206,6 +141,6 @@ def generate( raise InternalServerError(f"Unknown error: {err} with message: {msg}") if stream: - return self._handle_chat_stream_generate_response(response) + return response.iter_lines() else: return response.json() diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index d05be120890f51..d75feb2f9bca51 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -1,5 +1,5 @@ -from collections.abc import Generator -from typing import cast +import json +from typing import Generator, Iterator, cast from core.model_runtime.entities.llm_entities import ( LLMResult, @@ -23,16 +23,9 @@ InvokeServerUnavailableError, ) from core.model_runtime.errors.validate import CredentialsValidateFailedError -from core.model_runtime.model_providers.__base.large_language_model import ( - LargeLanguageModel, -) -from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import ( - BaichuanTokenizer, -) -from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import ( - BaichuanMessage, - BaichuanModel, -) +from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel +from core.model_runtime.model_providers.baichuan.llm.baichuan_tokenizer import BaichuanTokenizer +from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo import BaichuanModel from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors import ( BadRequestError, InsufficientAccountBalance, @@ -62,9 +55,7 @@ def _invoke( prompt_messages=prompt_messages, model_parameters=model_parameters, tools=tools, - stop=stop, stream=stream, - user=user, ) def get_num_tokens( @@ -160,9 +151,7 @@ def _generate( prompt_messages: list[PromptMessage], model_parameters: dict, tools: list[PromptMessageTool] | None = None, - stop: list[str] | None = None, stream: bool = True, - user: str | None = None, ) -> LLMResult | Generator: instance = BaichuanModel(api_key=credentials["api_key"]) @@ -196,9 +185,7 @@ def _handle_chat_generate_response( ) -> LLMResult: choices = response.get("choices", []) assistant_message = AssistantPromptMessage(content='', tool_calls=[]) - stop_reason = None if choices and choices[0]["finish_reason"] == "tool_calls": - stop_reason = "tool_calls" for choice in choices: for tool_call in choice["message"]["tool_calls"]: tool = AssistantPromptMessage.ToolCall( @@ -214,8 +201,6 @@ def _handle_chat_generate_response( for choice in choices: assistant_message.content += choice["message"]["content"] assistant_message.role = choice["message"]["role"] - if choice["finish_reason"]: - stop_reason = choice["finish_reason"] usage = response.get("usage") if usage: @@ -227,7 +212,6 @@ def _handle_chat_generate_response( prompt_tokens = self._num_tokens_from_messages(prompt_messages) completion_tokens = self._num_tokens_from_messages([assistant_message]) - # convert baichuan message to llm result usage = self._calc_response_usage( model=model, credentials=credentials, @@ -247,42 +231,57 @@ def _handle_chat_generate_stream_response( model: str, prompt_messages: list[PromptMessage], credentials: dict, - response: Generator[BaichuanMessage, None, None], + response: Iterator, ) -> Generator: - for message in response: - if message.usage: - usage = self._calc_response_usage( - model=model, - credentials=credentials, - prompt_tokens=message.usage["prompt_tokens"], - completion_tokens=message.usage["completion_tokens"], - ) + for line in response: + if not line: + continue + line = line.decode("utf-8") + # remove the first `data: ` prefix + if line.startswith("data:"): + line = line[5:].strip() + try: + data = json.loads(line) + except Exception as e: + if line.strip() == "[DONE]": + return + choices = data.get("choices", []) + + stop_reason = "" + for choice in choices: + if choice.get("finish_reason"): + stop_reason = choice["finish_reason"] + + if len(choice["delta"]["content"]) == 0: + continue yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( index=0, message=AssistantPromptMessage( - content=message.content, tool_calls=[] - ), - usage=usage, - finish_reason=( - message.stop_reason if message.stop_reason else None + content=choice["delta"]["content"], tool_calls=[] ), + finish_reason=stop_reason, ), ) - else: + + # if there is usage, the response is the last one, yield it and return + if "usage" in data: + usage = self._calc_response_usage( + model=model, + credentials=credentials, + prompt_tokens=data["usage"]["prompt_tokens"], + completion_tokens=data["usage"]["completion_tokens"], + ) yield LLMResultChunk( model=model, prompt_messages=prompt_messages, delta=LLMResultChunkDelta( index=0, - message=AssistantPromptMessage( - content=message.content, tool_calls=[] - ), - finish_reason=( - message.stop_reason if message.stop_reason else None - ), + message=AssistantPromptMessage(content="", tool_calls=[]), + usage=usage, + finish_reason=stop_reason, ), ) From 108e77a68382b190e8eb8c0d9f7cd3af34d43cb9 Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 11:35:40 +0800 Subject: [PATCH 3/8] refactor baichuan model --- .../model_providers/baichuan/llm/baichuan_turbo.py | 3 ++- api/core/model_runtime/model_providers/baichuan/llm/llm.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py index 67a18d9a538b6a..a8fd9dce91abbf 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan_turbo.py @@ -1,5 +1,6 @@ import json -from typing import Any, Optional, Union, Iterator +from collections.abc import Iterator +from typing import Any, Optional, Union from requests import post diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index d75feb2f9bca51..4f44682e9f440e 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -1,5 +1,6 @@ import json -from typing import Generator, Iterator, cast +from collections.abc import Generator, Iterator +from typing import cast from core.model_runtime.entities.llm_entities import ( LLMResult, From 221c03ea727ef2750375e5d3418f335637441e1d Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 11:57:46 +0800 Subject: [PATCH 4/8] fix CI --- .../model_providers/baichuan/llm/baichuan3-turbo-128k.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml index 59c5f10a4a55cd..c6c6c7e9e91947 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan3-turbo-128k.yaml @@ -51,4 +51,3 @@ parameter_rules: zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 en_US: Allow the model to perform external search to enhance the generation results. required: false - From bb0a350f720a92856f17c55aff835b6142be5a5d Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 13:18:52 +0800 Subject: [PATCH 5/8] nothing changed, just for re-run CI --- api/core/model_runtime/model_providers/baichuan/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index 4f44682e9f440e..049c078a21cd4e 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -85,7 +85,7 @@ def tokens(text: str): num_tokens += tokens_per_message for key, value in message.items(): if isinstance(value, list): - text = "" + text = '' for item in value: if isinstance(item, dict) and item["type"] == "text": text += item["text"] From cef3856713bba189f4800b4b79e568f1dc211bad Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 13:19:04 +0800 Subject: [PATCH 6/8] nothing changed, just for re-run CI --- api/core/model_runtime/model_providers/baichuan/llm/llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index 049c078a21cd4e..4f44682e9f440e 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -85,7 +85,7 @@ def tokens(text: str): num_tokens += tokens_per_message for key, value in message.items(): if isinstance(value, list): - text = '' + text = "" for item in value: if isinstance(item, dict) and item["type"] == "text": text += item["text"] From c54a7c51f20e72adbdb7fdcf5116a375f8c87f32 Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 13:30:42 +0800 Subject: [PATCH 7/8] not remove the deprecated model, just add deprecated=true --- .../baichuan/llm/baichuan2-53b.yaml | 46 +++++++++++++++++++ .../baichuan/llm/baichuan2-turbo-192k.yaml | 46 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml create mode 100644 api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml new file mode 100644 index 00000000000000..8360dd5faffb00 --- /dev/null +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-53b.yaml @@ -0,0 +1,46 @@ +model: baichuan2-53b +label: + en_US: Baichuan2-53B +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 32000 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens + use_template: max_tokens + required: true + default: 1000 + min: 1 + max: 4000 + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + default: 1 + min: 1 + max: 2 + - name: with_search_enhance + label: + zh_Hans: 搜索增强 + en_US: Search Enhance + type: boolean + help: + zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false +deprecated: true diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml new file mode 100644 index 00000000000000..17873a476ea534 --- /dev/null +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml @@ -0,0 +1,46 @@ +model: baichuan2-turbo-192k +label: + en_US: Baichuan2-Turbo-192K +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 192000 +parameter_rules: + - name: temperature + use_template: temperature + - name: top_p + use_template: top_p + - name: top_k + label: + zh_Hans: 取样数量 + en_US: Top k + type: int + help: + zh_Hans: 仅从每个后续标记的前 K 个选项中采样。 + en_US: Only sample from the top K options for each subsequent token. + required: false + - name: max_tokens + use_template: max_tokens + required: true + default: 8000 + min: 1 + max: 192000 + - name: presence_penalty + use_template: presence_penalty + - name: frequency_penalty + use_template: frequency_penalty + default: 1 + min: 1 + max: 2 + - name: with_search_enhance + label: + zh_Hans: 搜索增强 + en_US: Search Enhance + type: boolean + help: + zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false +deprecated: true \ No newline at end of file From f41efc554d7de9f27b0c89fdd5f243ed46813039 Mon Sep 17 00:00:00 2001 From: hejl Date: Wed, 4 Sep 2024 13:34:47 +0800 Subject: [PATCH 8/8] fix CI --- .../model_providers/baichuan/llm/baichuan2-turbo-192k.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml index 17873a476ea534..0ce0265cfe5c6c 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml +++ b/api/core/model_runtime/model_providers/baichuan/llm/baichuan2-turbo-192k.yaml @@ -43,4 +43,4 @@ parameter_rules: zh_Hans: 允许模型自行进行外部搜索,以增强生成结果。 en_US: Allow the model to perform external search to enhance the generation results. required: false -deprecated: true \ No newline at end of file +deprecated: true