diff --git a/api/.env.example b/api/.env.example index d4f8d9905adeb4..a38e6baff994d0 100644 --- a/api/.env.example +++ b/api/.env.example @@ -60,7 +60,8 @@ ALIYUN_OSS_SECRET_KEY=your-secret-key ALIYUN_OSS_ENDPOINT=your-endpoint ALIYUN_OSS_AUTH_VERSION=v1 ALIYUN_OSS_REGION=your-region - +# Don't start with '/'. OSS doesn't support leading slash in object names. +ALIYUN_OSS_PATH=your-path # Google Storage configuration GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string diff --git a/api/commands.py b/api/commands.py index 41f1a6444c4581..3bf8bc0ecc45f2 100644 --- a/api/commands.py +++ b/api/commands.py @@ -559,8 +559,9 @@ def add_qdrant_doc_id_index(field: str): @click.command("create-tenant", help="Create account and tenant.") @click.option("--email", prompt=True, help="The email address of the tenant account.") +@click.option("--name", prompt=True, help="The workspace name of the tenant account.") @click.option("--language", prompt=True, help="Account language, default: en-US.") -def create_tenant(email: str, language: Optional[str] = None): +def create_tenant(email: str, language: Optional[str] = None, name: Optional[str] = None): """ Create tenant account """ @@ -580,13 +581,15 @@ def create_tenant(email: str, language: Optional[str] = None): if language not in languages: language = "en-US" + name = name.strip() + # generate random password new_password = secrets.token_urlsafe(16) # register account account = RegisterService.register(email=email, name=account_name, password=new_password, language=language) - TenantService.create_owner_tenant_if_not_exist(account) + TenantService.create_owner_tenant_if_not_exist(account, name) click.echo( click.style( diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 9e39477b895d2e..91fbee65ae2445 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Annotated, Optional from pydantic import ( AliasChoices, @@ -226,20 +226,17 @@ def CONSOLE_CORS_ALLOW_ORIGINS(self) -> list[str]: def WEB_API_CORS_ALLOW_ORIGINS(self) -> list[str]: return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",") - HTTP_REQUEST_MAX_CONNECT_TIMEOUT: NonNegativeInt = Field( - description="", - default=300, - ) + HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[ + PositiveInt, Field(ge=10, description="connect timeout in seconds for HTTP request") + ] = 10 - HTTP_REQUEST_MAX_READ_TIMEOUT: NonNegativeInt = Field( - description="", - default=600, - ) + HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[ + PositiveInt, Field(ge=60, description="read timeout in seconds for HTTP request") + ] = 60 - HTTP_REQUEST_MAX_WRITE_TIMEOUT: NonNegativeInt = Field( - description="", - default=600, - ) + HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[ + PositiveInt, Field(ge=10, description="read timeout in seconds for HTTP request") + ] = 20 HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field( description="", diff --git a/api/configs/middleware/storage/aliyun_oss_storage_config.py b/api/configs/middleware/storage/aliyun_oss_storage_config.py index 78f70b7ad39fd8..c1843dc26cee00 100644 --- a/api/configs/middleware/storage/aliyun_oss_storage_config.py +++ b/api/configs/middleware/storage/aliyun_oss_storage_config.py @@ -38,3 +38,8 @@ class AliyunOSSStorageConfig(BaseSettings): description="Aliyun OSS authentication version", default=None, ) + + ALIYUN_OSS_PATH: Optional[str] = Field( + description="Aliyun OSS path", + default=None, + ) diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index 753a6be20cd1a6..c3aac6690e4c3d 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -173,21 +173,18 @@ def get(self, app_model): if args["keyword"]: keyword_filter = "%{}%".format(args["keyword"]) - query = ( - query.join( - Message, - Message.conversation_id == Conversation.id, - ) - .join(subquery, subquery.c.conversation_id == Conversation.id) - .filter( - or_( - Message.query.ilike(keyword_filter), - Message.answer.ilike(keyword_filter), - Conversation.name.ilike(keyword_filter), - Conversation.introduction.ilike(keyword_filter), - subquery.c.from_end_user_session_id.ilike(keyword_filter), - ), - ) + message_subquery = ( + db.session.query(Message.conversation_id) + .filter(or_(Message.query.ilike(keyword_filter), Message.answer.ilike(keyword_filter))) + .subquery() + ) + query = query.join(subquery, subquery.c.conversation_id == Conversation.id).filter( + or_( + Conversation.id.in_(message_subquery), + Conversation.name.ilike(keyword_filter), + Conversation.introduction.ilike(keyword_filter), + subquery.c.from_end_user_session_id.ilike(keyword_filter), + ), ) account = current_user diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index b71078760c6255..3f1e64a2478b4b 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -35,6 +35,7 @@ def get(self): "uninstallable": current_tenant_id == installed_app.app_owner_tenant_id, } for installed_app in installed_apps + if installed_app.app is not None ] installed_apps.sort( key=lambda app: ( diff --git a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py index a8eb1f9f76c799..1a621d2090f759 100644 --- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py @@ -93,7 +93,7 @@ def convert(cls, config: dict) -> Optional[DatasetEntity]: reranking_model=dataset_configs.get('reranking_model'), weights=dataset_configs.get('weights'), reranking_enabled=dataset_configs.get('reranking_enabled', True), - rerank_mode=dataset_configs.get('rerank_mode', 'reranking_model'), + rerank_mode=dataset_configs.get('reranking_mode', 'reranking_model'), ) ) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index 6d0ceac253e975..638cc07461d714 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -4,7 +4,7 @@ import threading import uuid from collections.abc import Generator -from typing import Any, Optional, Union +from typing import Any, Literal, Optional, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -32,6 +32,26 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): + @overload + def generate( + self, app_model: App, + workflow: Workflow, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[True] = True, + ) -> Generator[str, None, None]: ... + + @overload + def generate( + self, app_model: App, + workflow: Workflow, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[False] = False, + ) -> dict: ... + def generate( self, app_model: App, diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py index 53780bdfb003b2..daf37f4a50dd31 100644 --- a/api/core/app/apps/agent_chat/app_generator.py +++ b/api/core/app/apps/agent_chat/app_generator.py @@ -3,7 +3,7 @@ import threading import uuid from collections.abc import Generator -from typing import Any, Union +from typing import Any, Literal, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -28,6 +28,24 @@ class AgentChatAppGenerator(MessageBasedAppGenerator): + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[True] = True, + ) -> Generator[dict, None, None]: ... + + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[False] = False, + ) -> dict: ... + def generate(self, app_model: App, user: Union[Account, EndUser], args: Any, diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py index 5b896e28455340..ab15928b744b61 100644 --- a/api/core/app/apps/chat/app_generator.py +++ b/api/core/app/apps/chat/app_generator.py @@ -3,7 +3,7 @@ import threading import uuid from collections.abc import Generator -from typing import Any, Union +from typing import Any, Literal, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -28,13 +28,31 @@ class ChatAppGenerator(MessageBasedAppGenerator): + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: Any, + invoke_from: InvokeFrom, + stream: Literal[True] = True, + ) -> Generator[str, None, None]: ... + + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: Any, + invoke_from: InvokeFrom, + stream: Literal[False] = False, + ) -> dict: ... + def generate( self, app_model: App, user: Union[Account, EndUser], args: Any, invoke_from: InvokeFrom, stream: bool = True, - ) -> Union[dict, Generator[dict, None, None]]: + ) -> Union[dict, Generator[str, None, None]]: """ Generate App response. diff --git a/api/core/app/apps/completion/app_generator.py b/api/core/app/apps/completion/app_generator.py index c4e1caf65a9679..c0b13b40fdb0d6 100644 --- a/api/core/app/apps/completion/app_generator.py +++ b/api/core/app/apps/completion/app_generator.py @@ -3,7 +3,7 @@ import threading import uuid from collections.abc import Generator -from typing import Any, Union +from typing import Any, Literal, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -30,12 +30,30 @@ class CompletionAppGenerator(MessageBasedAppGenerator): + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[True] = True, + ) -> Generator[str, None, None]: ... + + @overload + def generate( + self, app_model: App, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[False] = False, + ) -> dict: ... + def generate(self, app_model: App, user: Union[Account, EndUser], args: Any, invoke_from: InvokeFrom, stream: bool = True) \ - -> Union[dict, Generator[dict, None, None]]: + -> Union[dict, Generator[str, None, None]]: """ Generate App response. @@ -203,7 +221,7 @@ def generate_more_like_this(self, app_model: App, user: Union[Account, EndUser], invoke_from: InvokeFrom, stream: bool = True) \ - -> Union[dict, Generator[dict, None, None]]: + -> Union[dict, Generator[str, None, None]]: """ Generate App response. diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index e293fc7b6f95e9..5b7635e0e8d871 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -4,7 +4,7 @@ import threading import uuid from collections.abc import Generator -from typing import Any, Union +from typing import Any, Literal, Union, overload from flask import Flask, current_app from pydantic import ValidationError @@ -32,8 +32,28 @@ class WorkflowAppGenerator(BaseAppGenerator): + @overload def generate( - self, + self, app_model: App, + workflow: Workflow, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[True] = True, + ) -> Generator[str, None, None]: ... + + @overload + def generate( + self, app_model: App, + workflow: Workflow, + user: Union[Account, EndUser], + args: dict, + invoke_from: InvokeFrom, + stream: Literal[False] = False, + ) -> dict: ... + + def generate( + self, app_model: App, workflow: Workflow, user: Union[Account, EndUser], @@ -109,7 +129,7 @@ def _generate( application_generate_entity: WorkflowAppGenerateEntity, invoke_from: InvokeFrom, stream: bool = True, - ) -> dict[str, Any] | Generator[str, Any, None]: + ) -> dict[str, Any] | Generator[str, None, None]: """ Generate App response. @@ -228,7 +248,7 @@ def _generate_worker(self, flask_app: Flask, application_generate_entity=application_generate_entity, queue_manager=queue_manager ) - + runner.run() except GenerateTaskStoppedException: pass diff --git a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py index 2b729d42932bdb..6279125f46ffa0 100644 --- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py @@ -150,9 +150,9 @@ def validate_credentials(self, model: str, credentials: dict) -> None: except json.JSONDecodeError as e: raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error') - if (completion_type is LLMMode.CHAT and json_result['object'] == ''): + if (completion_type is LLMMode.CHAT and json_result.get('object','') == ''): json_result['object'] = 'chat.completion' - elif (completion_type is LLMMode.COMPLETION and json_result['object'] == ''): + elif (completion_type is LLMMode.COMPLETION and json_result.get('object','') == ''): json_result['object'] = 'text_completion' if (completion_type is LLMMode.CHAT diff --git a/api/core/model_runtime/model_providers/volcengine_maas/client.py b/api/core/model_runtime/model_providers/volcengine_maas/client.py index 61f3521a43634f..a4d89dabcbc076 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/client.py +++ b/api/core/model_runtime/model_providers/volcengine_maas/client.py @@ -71,11 +71,24 @@ def from_credentials(cls, credentials): args = { "base_url": credentials['api_endpoint_host'], "region": credentials['volc_region'], - "ak": credentials['volc_access_key_id'], - "sk": credentials['volc_secret_access_key'], } + if credentials.get("auth_method") == "api_key": + args = { + **args, + "api_key": credentials['volc_api_key'], + } + else: + args = { + **args, + "ak": credentials['volc_access_key_id'], + "sk": credentials['volc_secret_access_key'], + } + if cls.is_compatible_with_legacy(credentials): - args["base_url"] = DEFAULT_V3_ENDPOINT + args = { + **args, + "base_url": DEFAULT_V3_ENDPOINT + } client = ArkClientV3( **args diff --git a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml index 735ba2b31431fb..13e00da76fb149 100644 --- a/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml +++ b/api/core/model_runtime/model_providers/volcengine_maas/volcengine_maas.yaml @@ -30,8 +30,28 @@ model_credential_schema: en_US: Enter your Model Name zh_Hans: 输入模型名称 credential_form_schemas: + - variable: auth_method + required: true + label: + en_US: Authentication Method + zh_Hans: 鉴权方式 + type: select + default: aksk + options: + - label: + en_US: API Key + value: api_key + - label: + en_US: Access Key / Secret Access Key + value: aksk + placeholder: + en_US: Enter your Authentication Method + zh_Hans: 选择鉴权方式 - variable: volc_access_key_id required: true + show_on: + - variable: auth_method + value: aksk label: en_US: Access Key zh_Hans: Access Key @@ -41,6 +61,9 @@ model_credential_schema: zh_Hans: 输入您的 Access Key - variable: volc_secret_access_key required: true + show_on: + - variable: auth_method + value: aksk label: en_US: Secret Access Key zh_Hans: Secret Access Key @@ -48,6 +71,17 @@ model_credential_schema: placeholder: en_US: Enter your Secret Access Key zh_Hans: 输入您的 Secret Access Key + - variable: volc_api_key + required: true + show_on: + - variable: auth_method + value: api_key + label: + en_US: API Key + type: secret-input + placeholder: + en_US: Enter your API Key + zh_Hans: 输入您的 API Key - variable: volc_region required: true label: diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml index 1b1d499ba7383c..0e3c001f06b0f4 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml @@ -38,7 +38,7 @@ parameter_rules: min: 1 max: 8192 pricing: - input: '0.0001' - output: '0.0001' + input: '0' + output: '0' unit: '0.001' currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml index 5bdb4428403908..b0f95c0a68e555 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml @@ -37,3 +37,8 @@ parameter_rules: default: 1024 min: 1 max: 8192 +pricing: + input: '0.001' + output: '0.001' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml index 6b5bcc5bcf4468..271eecf199c476 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml @@ -37,3 +37,8 @@ parameter_rules: default: 1024 min: 1 max: 8192 +pricing: + input: '0.1' + output: '0.1' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml index 9d92e58f6cdff1..150e07b60af979 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml @@ -30,4 +30,9 @@ parameter_rules: use_template: max_tokens default: 1024 min: 1 - max: 4096 + max: 8192 +pricing: + input: '0.001' + output: '0.001' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml new file mode 100644 index 00000000000000..237a951cd5a14e --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml @@ -0,0 +1,44 @@ +model: glm-4-plus +label: + en_US: glm-4-plus +model_type: llm +features: + - multi-tool-call + - agent-thought + - stream-tool-call +model_properties: + mode: chat +parameter_rules: + - name: temperature + use_template: temperature + default: 0.95 + min: 0.0 + max: 1.0 + help: + zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。 + en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time. + - name: top_p + use_template: top_p + default: 0.7 + help: + zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。 + en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time. + - name: incremental + label: + zh_Hans: 增量返回 + en_US: Incremental + type: boolean + help: + zh_Hans: SSE接口调用时,用于控制每次返回内容方式是增量还是全量,不提供此参数时默认为增量返回,true 为增量返回,false 为全量返回。 + en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return. + required: false + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 8192 +pricing: + input: '0.05' + output: '0.05' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml index ddea331c8e46f3..c7a4093d7aa7a2 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml @@ -34,4 +34,9 @@ parameter_rules: use_template: max_tokens default: 1024 min: 1 - max: 8192 + max: 1024 +pricing: + input: '0.05' + output: '0.05' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml new file mode 100644 index 00000000000000..a7aee5b4ca2363 --- /dev/null +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml @@ -0,0 +1,42 @@ +model: glm-4v-plus +label: + en_US: glm-4v-plus +model_type: llm +model_properties: + mode: chat +features: + - vision +parameter_rules: + - name: temperature + use_template: temperature + default: 0.95 + min: 0.0 + max: 1.0 + help: + zh_Hans: 采样温度,控制输出的随机性,必须为正数取值范围是:(0.0,1.0],不能等于 0,默认值为 0.95 值越大,会使输出更随机,更具创造性;值越小,输出会更加稳定或确定建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。 + en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time. + - name: top_p + use_template: top_p + default: 0.7 + help: + zh_Hans: 用温度取样的另一种方法,称为核取样取值范围是:(0.0, 1.0) 开区间,不能等于 0 或 1,默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如:0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数,但不要同时调整两个参数。 + en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time. + - name: incremental + label: + zh_Hans: 增量返回 + en_US: Incremental + type: boolean + help: + zh_Hans: SSE接口调用时,用于控制每次返回内容方式是增量还是全量,不提供此参数时默认为增量返回,true 为增量返回,false 为全量返回。 + en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return. + required: false + - name: max_tokens + use_template: max_tokens + default: 1024 + min: 1 + max: 1024 +pricing: + input: '0.01' + output: '0.01' + unit: '0.001' + currency: RMB diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py index 13d8f5e5c32102..b2cdc7ad7a4644 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py +++ b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py @@ -153,7 +153,8 @@ def _generate(self, model: str, credentials_kwargs: dict, :return: full response or stream response chunk generator result """ extra_model_kwargs = {} - if stop: + # request to glm-4v-plus with stop words will always response "finish_reason":"network_error" + if stop and model!= 'glm-4v-plus': extra_model_kwargs['stop'] = stop client = ZhipuAI( @@ -174,7 +175,7 @@ def _generate(self, model: str, credentials_kwargs: dict, if copy_prompt_message.role in [PromptMessageRole.USER, PromptMessageRole.SYSTEM, PromptMessageRole.TOOL]: if isinstance(copy_prompt_message.content, list): # check if model is 'glm-4v' - if model != 'glm-4v': + if model not in ('glm-4v', 'glm-4v-plus'): # not support list message continue # get image and @@ -207,7 +208,7 @@ def _generate(self, model: str, credentials_kwargs: dict, else: new_prompt_messages.append(copy_prompt_message) - if model == 'glm-4v': + if model == 'glm-4v' or model == 'glm-4v-plus': params = self._construct_glm_4v_parameter(model, new_prompt_messages, model_parameters) else: params = { @@ -304,7 +305,7 @@ def _construct_glm_4v_parameter(self, model: str, prompt_messages: list[PromptMe return params - def _construct_glm_4v_messages(self, prompt_message: Union[str | list[PromptMessageContent]]) -> list[dict]: + def _construct_glm_4v_messages(self, prompt_message: Union[str, list[PromptMessageContent]]) -> list[dict]: if isinstance(prompt_message, str): return [{'type': 'text', 'text': prompt_message}] diff --git a/api/core/tools/provider/_position.yaml b/api/core/tools/provider/_position.yaml index 9b90dda3b2389e..40c3356116770b 100644 --- a/api/core/tools/provider/_position.yaml +++ b/api/core/tools/provider/_position.yaml @@ -1,5 +1,6 @@ - google - bing +- perplexity - duckduckgo - searchapi - serper diff --git a/api/core/tools/provider/builtin/perplexity/_assets/icon.svg b/api/core/tools/provider/builtin/perplexity/_assets/icon.svg new file mode 100644 index 00000000000000..c2974c142fc622 --- /dev/null +++ b/api/core/tools/provider/builtin/perplexity/_assets/icon.svg @@ -0,0 +1,3 @@ + diff --git a/api/core/tools/provider/builtin/perplexity/perplexity.py b/api/core/tools/provider/builtin/perplexity/perplexity.py new file mode 100644 index 00000000000000..ff91edf18df4c0 --- /dev/null +++ b/api/core/tools/provider/builtin/perplexity/perplexity.py @@ -0,0 +1,46 @@ +from typing import Any + +import requests + +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.provider.builtin.perplexity.tools.perplexity_search import PERPLEXITY_API_URL +from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController + + +class PerplexityProvider(BuiltinToolProviderController): + def _validate_credentials(self, credentials: dict[str, Any]) -> None: + headers = { + "Authorization": f"Bearer {credentials.get('perplexity_api_key')}", + "Content-Type": "application/json" + } + + payload = { + "model": "llama-3.1-sonar-small-128k-online", + "messages": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Hello" + } + ], + "max_tokens": 5, + "temperature": 0.1, + "top_p": 0.9, + "stream": False + } + + try: + response = requests.post(PERPLEXITY_API_URL, json=payload, headers=headers) + response.raise_for_status() + except requests.RequestException as e: + raise ToolProviderCredentialValidationError( + f"Failed to validate Perplexity API key: {str(e)}" + ) + + if response.status_code != 200: + raise ToolProviderCredentialValidationError( + f"Perplexity API key is invalid. Status code: {response.status_code}" + ) diff --git a/api/core/tools/provider/builtin/perplexity/perplexity.yaml b/api/core/tools/provider/builtin/perplexity/perplexity.yaml new file mode 100644 index 00000000000000..c0b504f300c45a --- /dev/null +++ b/api/core/tools/provider/builtin/perplexity/perplexity.yaml @@ -0,0 +1,26 @@ +identity: + author: Dify + name: perplexity + label: + en_US: Perplexity + zh_Hans: Perplexity + description: + en_US: Perplexity.AI + zh_Hans: Perplexity.AI + icon: icon.svg + tags: + - search +credentials_for_provider: + perplexity_api_key: + type: secret-input + required: true + label: + en_US: Perplexity API key + zh_Hans: Perplexity API key + placeholder: + en_US: Please input your Perplexity API key + zh_Hans: 请输入你的 Perplexity API key + help: + en_US: Get your Perplexity API key from Perplexity + zh_Hans: 从 Perplexity 获取您的 Perplexity API key + url: https://www.perplexity.ai/settings/api diff --git a/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.py b/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.py new file mode 100644 index 00000000000000..5b1a263f9b2218 --- /dev/null +++ b/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.py @@ -0,0 +1,72 @@ +import json +from typing import Any, Union + +import requests + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.tool.builtin_tool import BuiltinTool + +PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions" + +class PerplexityAITool(BuiltinTool): + def _parse_response(self, response: dict) -> dict: + """Parse the response from Perplexity AI API""" + if 'choices' in response and len(response['choices']) > 0: + message = response['choices'][0]['message'] + return { + 'content': message.get('content', ''), + 'role': message.get('role', ''), + 'citations': response.get('citations', []) + } + else: + return {'content': 'Unable to get a valid response', 'role': 'assistant', 'citations': []} + + def _invoke(self, + user_id: str, + tool_parameters: dict[str, Any], + ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]: + headers = { + "Authorization": f"Bearer {self.runtime.credentials['perplexity_api_key']}", + "Content-Type": "application/json" + } + + payload = { + "model": tool_parameters.get('model', 'llama-3.1-sonar-small-128k-online'), + "messages": [ + { + "role": "system", + "content": "Be precise and concise." + }, + { + "role": "user", + "content": tool_parameters['query'] + } + ], + "max_tokens": tool_parameters.get('max_tokens', 4096), + "temperature": tool_parameters.get('temperature', 0.7), + "top_p": tool_parameters.get('top_p', 1), + "top_k": tool_parameters.get('top_k', 5), + "presence_penalty": tool_parameters.get('presence_penalty', 0), + "frequency_penalty": tool_parameters.get('frequency_penalty', 1), + "stream": False + } + + if 'search_recency_filter' in tool_parameters: + payload['search_recency_filter'] = tool_parameters['search_recency_filter'] + if 'return_citations' in tool_parameters: + payload['return_citations'] = tool_parameters['return_citations'] + if 'search_domain_filter' in tool_parameters: + if isinstance(tool_parameters['search_domain_filter'], str): + payload['search_domain_filter'] = [tool_parameters['search_domain_filter']] + elif isinstance(tool_parameters['search_domain_filter'], list): + payload['search_domain_filter'] = tool_parameters['search_domain_filter'] + + + response = requests.post(url=PERPLEXITY_API_URL, json=payload, headers=headers) + response.raise_for_status() + valuable_res = self._parse_response(response.json()) + + return [ + self.create_json_message(valuable_res), + self.create_text_message(json.dumps(valuable_res, ensure_ascii=False, indent=2)) + ] diff --git a/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.yaml b/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.yaml new file mode 100644 index 00000000000000..02a645df335aaf --- /dev/null +++ b/api/core/tools/provider/builtin/perplexity/tools/perplexity_search.yaml @@ -0,0 +1,178 @@ +identity: + name: perplexity + author: Dify + label: + en_US: Perplexity Search +description: + human: + en_US: Search information using Perplexity AI's language models. + llm: This tool is used to search information using Perplexity AI's language models. +parameters: + - name: query + type: string + required: true + label: + en_US: Query + zh_Hans: 查询 + human_description: + en_US: The text query to be processed by the AI model. + zh_Hans: 要由 AI 模型处理的文本查询。 + form: llm + - name: model + type: select + required: false + label: + en_US: Model Name + zh_Hans: 模型名称 + human_description: + en_US: The Perplexity AI model to use for generating the response. + zh_Hans: 用于生成响应的 Perplexity AI 模型。 + form: form + default: "llama-3.1-sonar-small-128k-online" + options: + - value: llama-3.1-sonar-small-128k-online + label: + en_US: llama-3.1-sonar-small-128k-online + zh_Hans: llama-3.1-sonar-small-128k-online + - value: llama-3.1-sonar-large-128k-online + label: + en_US: llama-3.1-sonar-large-128k-online + zh_Hans: llama-3.1-sonar-large-128k-online + - value: llama-3.1-sonar-huge-128k-online + label: + en_US: llama-3.1-sonar-huge-128k-online + zh_Hans: llama-3.1-sonar-huge-128k-online + - name: max_tokens + type: number + required: false + label: + en_US: Max Tokens + zh_Hans: 最大令牌数 + pt_BR: Máximo de Tokens + human_description: + en_US: The maximum number of tokens to generate in the response. + zh_Hans: 在响应中生成的最大令牌数。 + pt_BR: O número máximo de tokens a serem gerados na resposta. + form: form + default: 4096 + min: 1 + max: 4096 + - name: temperature + type: number + required: false + label: + en_US: Temperature + zh_Hans: 温度 + pt_BR: Temperatura + human_description: + en_US: Controls randomness in the output. Lower values make the output more focused and deterministic. + zh_Hans: 控制输出的随机性。较低的值使输出更加集中和确定。 + form: form + default: 0.7 + min: 0 + max: 1 + - name: top_k + type: number + required: false + label: + en_US: Top K + zh_Hans: 取样数量 + human_description: + en_US: The number of top results to consider for response generation. + zh_Hans: 用于生成响应的顶部结果数量。 + form: form + default: 5 + min: 1 + max: 100 + - name: top_p + type: number + required: false + label: + en_US: Top P + zh_Hans: Top P + human_description: + en_US: Controls diversity via nucleus sampling. + zh_Hans: 通过核心采样控制多样性。 + form: form + default: 1 + min: 0.1 + max: 1 + step: 0.1 + - name: presence_penalty + type: number + required: false + label: + en_US: Presence Penalty + zh_Hans: 存在惩罚 + human_description: + en_US: Positive values penalize new tokens based on whether they appear in the text so far. + zh_Hans: 正值会根据新词元是否已经出现在文本中来对其进行惩罚。 + form: form + default: 0 + min: -1.0 + max: 1.0 + step: 0.1 + - name: frequency_penalty + type: number + required: false + label: + en_US: Frequency Penalty + zh_Hans: 频率惩罚 + human_description: + en_US: Positive values penalize new tokens based on their existing frequency in the text so far. + zh_Hans: 正值会根据新词元在文本中已经出现的频率来对其进行惩罚。 + form: form + default: 1 + min: 0.1 + max: 1.0 + step: 0.1 + - name: return_citations + type: boolean + required: false + label: + en_US: Return Citations + zh_Hans: 返回引用 + human_description: + en_US: Whether to return citations in the response. + zh_Hans: 是否在响应中返回引用。 + form: form + default: true + - name: search_domain_filter + type: string + required: false + label: + en_US: Search Domain Filter + zh_Hans: 搜索域过滤器 + human_description: + en_US: Domain to filter the search results. + zh_Hans: 用于过滤搜索结果的域名。 + form: form + default: "" + - name: search_recency_filter + type: select + required: false + label: + en_US: Search Recency Filter + zh_Hans: 搜索时间过滤器 + human_description: + en_US: Filter for search results based on recency. + zh_Hans: 基于时间筛选搜索结果。 + form: form + default: "month" + options: + - value: day + label: + en_US: Day + zh_Hans: 天 + - value: week + label: + en_US: Week + zh_Hans: 周 + - value: month + label: + en_US: Month + zh_Hans: 月 + - value: year + label: + en_US: Year + zh_Hans: 年 diff --git a/api/core/workflow/nodes/http_request/http_request_node.py b/api/core/workflow/nodes/http_request/http_request_node.py index 533763515e8828..3f68c8b1d03ce5 100644 --- a/api/core/workflow/nodes/http_request/http_request_node.py +++ b/api/core/workflow/nodes/http_request/http_request_node.py @@ -18,9 +18,9 @@ from models.workflow import WorkflowNodeExecutionStatus HTTP_REQUEST_DEFAULT_TIMEOUT = HttpRequestNodeTimeout( - connect=min(10, dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT), - read=min(60, dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT), - write=min(20, dify_config.HTTP_REQUEST_MAX_WRITE_TIMEOUT), + connect=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, + read=dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, + write=dify_config.HTTP_REQUEST_MAX_WRITE_TIMEOUT, ) @@ -100,12 +100,9 @@ def _get_request_timeout(node_data: HttpRequestNodeData) -> HttpRequestNodeTimeo if timeout is None: return HTTP_REQUEST_DEFAULT_TIMEOUT - timeout.connect = min(timeout.connect or HTTP_REQUEST_DEFAULT_TIMEOUT.connect, - dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT) - timeout.read = min(timeout.read or HTTP_REQUEST_DEFAULT_TIMEOUT.read, - dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT) - timeout.write = min(timeout.write or HTTP_REQUEST_DEFAULT_TIMEOUT.write, - dify_config.HTTP_REQUEST_MAX_WRITE_TIMEOUT) + timeout.connect = timeout.connect or HTTP_REQUEST_DEFAULT_TIMEOUT.connect + timeout.read = timeout.read or HTTP_REQUEST_DEFAULT_TIMEOUT.read + timeout.write = timeout.write or HTTP_REQUEST_DEFAULT_TIMEOUT.write return timeout @classmethod diff --git a/api/extensions/storage/aliyun_storage.py b/api/extensions/storage/aliyun_storage.py index b962cedc55178d..bee237fc17fe86 100644 --- a/api/extensions/storage/aliyun_storage.py +++ b/api/extensions/storage/aliyun_storage.py @@ -15,6 +15,7 @@ def __init__(self, app: Flask): app_config = self.app.config self.bucket_name = app_config.get("ALIYUN_OSS_BUCKET_NAME") + self.folder = app.config.get("ALIYUN_OSS_PATH") oss_auth_method = aliyun_s3.Auth region = None if app_config.get("ALIYUN_OSS_AUTH_VERSION") == "v4": @@ -30,15 +31,29 @@ def __init__(self, app: Flask): ) def save(self, filename, data): + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename self.client.put_object(filename, data) def load_once(self, filename: str) -> bytes: + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename + with closing(self.client.get_object(filename)) as obj: data = obj.read() return data def load_stream(self, filename: str) -> Generator: def generate(filename: str = filename) -> Generator: + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename + with closing(self.client.get_object(filename)) as obj: while chunk := obj.read(4096): yield chunk @@ -46,10 +61,24 @@ def generate(filename: str = filename) -> Generator: return generate() def download(self, filename, target_filepath): + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename + self.client.get_object_to_file(filename, target_filepath) def exists(self, filename): + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename + return self.client.object_exists(filename) def delete(self, filename): + if not self.folder or self.folder.endswith("/"): + filename = self.folder + filename + else: + filename = self.folder + "/" + filename self.client.delete_object(filename) diff --git a/api/services/account_service.py b/api/services/account_service.py index 2abc2d33a56dcb..b1adff4c988138 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -327,7 +327,7 @@ def create_tenant(name: str) -> Tenant: return tenant @staticmethod - def create_owner_tenant_if_not_exist(account: Account): + def create_owner_tenant_if_not_exist(account: Account, name: Optional[str] = None): """Create owner tenant if not exist""" available_ta = ( TenantAccountJoin.query.filter_by(account_id=account.id).order_by(TenantAccountJoin.id.asc()).first() @@ -336,7 +336,10 @@ def create_owner_tenant_if_not_exist(account: Account): if available_ta: return - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + if name: + tenant = TenantService.create_tenant(name) + else: + tenant = TenantService.create_tenant(f"{account.name}'s Workspace") TenantService.create_tenant_member(tenant, account, role="owner") account.current_tenant = tenant db.session.commit() diff --git a/api/tasks/mail_invite_member_task.py b/api/tasks/mail_invite_member_task.py index 4ef6e29994a3d0..c7dfb9bf6063ff 100644 --- a/api/tasks/mail_invite_member_task.py +++ b/api/tasks/mail_invite_member_task.py @@ -19,7 +19,7 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam :param inviter_name :param workspace_name - Usage: send_invite_member_mail_task.delay(langauge, to, token, inviter_name, workspace_name) + Usage: send_invite_member_mail_task.delay(language, to, token, inviter_name, workspace_name) """ if not mail.is_inited(): return diff --git a/api/tests/unit_tests/configs/test_dify_config.py b/api/tests/unit_tests/configs/test_dify_config.py index fb415483dd39ad..3f639ccacc48f5 100644 --- a/api/tests/unit_tests/configs/test_dify_config.py +++ b/api/tests/unit_tests/configs/test_dify_config.py @@ -19,6 +19,7 @@ def example_env_file(tmp_path, monkeypatch) -> str: """ CONSOLE_API_URL=https://example.com CONSOLE_WEB_URL=https://example.com + HTTP_REQUEST_MAX_WRITE_TIMEOUT=30 """ ) ) @@ -48,6 +49,12 @@ def test_dify_config(example_env_file): assert config.API_COMPRESSION_ENABLED is False assert config.SENTRY_TRACES_SAMPLE_RATE == 1.0 + # annotated field with default value + assert config.HTTP_REQUEST_MAX_READ_TIMEOUT == 60 + + # annotated field with configured value + assert config.HTTP_REQUEST_MAX_WRITE_TIMEOUT == 30 + # NOTE: If there is a `.env` file in your Workspace, this test might not succeed as expected. # This is due to `pymilvus` loading all the variables from the `.env` file into `os.environ`. diff --git a/docker/.env.example b/docker/.env.example index d090a2fe703aa3..617f02562d0b7e 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -285,6 +285,8 @@ ALIYUN_OSS_SECRET_KEY=your-secret-key ALIYUN_OSS_ENDPOINT=https://oss-ap-southeast-1-internal.aliyuncs.com ALIYUN_OSS_REGION=ap-southeast-1 ALIYUN_OSS_AUTH_VERSION=v4 +# Don't start with '/'. OSS doesn't support leading slash in object names. +ALIYUN_OSS_PATH=your-path # Tencent COS Configuration # The name of the Tencent COS bucket to use for storing files. diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index a319d6b45a101f..7a68bb38750067 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -66,6 +66,7 @@ x-shared-env: &shared-api-worker-env ALIYUN_OSS_ENDPOINT: ${ALIYUN_OSS_ENDPOINT:-} ALIYUN_OSS_REGION: ${ALIYUN_OSS_REGION:-} ALIYUN_OSS_AUTH_VERSION: ${ALIYUN_OSS_AUTH_VERSION:-v4} + ALIYUN_OSS_PATHS: ${ALIYUN_OSS_PATH:-} TENCENT_COS_BUCKET_NAME: ${TENCENT_COS_BUCKET_NAME:-} TENCENT_COS_SECRET_KEY: ${TENCENT_COS_SECRET_KEY:-} TENCENT_COS_SECRET_ID: ${TENCENT_COS_SECRET_ID:-} diff --git a/web/app/(commonLayout)/apps/AppCard.tsx b/web/app/(commonLayout)/apps/AppCard.tsx index bc7308a711a06b..6dcc39046c4003 100644 --- a/web/app/(commonLayout)/apps/AppCard.tsx +++ b/web/app/(commonLayout)/apps/AppCard.tsx @@ -255,7 +255,7 @@ const AppCard = ({ app, onRefresh }: AppCardProps) => { e.preventDefault() getRedirection(isCurrentWorkspaceEditor, app, push) }} - className='group flex col-span-1 bg-white border-2 border-solid border-transparent rounded-xl shadow-sm min-h-[160px] flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg' + className='relative group col-span-1 bg-white border-2 border-solid border-transparent rounded-xl shadow-sm flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg' >