From 7618190849b0831ab7d7b80271d5175c72b51446 Mon Sep 17 00:00:00 2001
From: "Charlie.Wei" <luowei@cvte.com>
Date: Fri, 6 Sep 2024 17:44:09 +0800
Subject: [PATCH 1/9] ifEsle node add regex match (#8007)

---
 .../llm/{spark-1.5.yaml => Spark Lite.yaml}   |  6 ++---
 .../llm/{spark-3.5.yaml => Spark Max.yaml}    |  0
 .../llm/{spark-2.yaml => Spark Pro-128K.yaml} |  7 +++---
 .../llm/{spark-3.yaml => Spark Pro.yaml}      |  6 ++---
 .../llm/{spark-4.yaml => Spark4.0 Ultra.yaml} |  6 ++---
 .../model_providers/spark/llm/_client.py      | 25 +++++++++++--------
 .../model_providers/spark/llm/_position.yaml  | 10 ++++----
 api/core/workflow/nodes/if_else/entities.py   |  2 +-
 .../workflow/nodes/if_else/if_else_node.py    | 18 +++++++++++++
 .../app/configuration/config-var/index.tsx    |  1 -
 .../workflow/nodes/if-else/types.ts           |  1 +
 .../workflow/nodes/if-else/utils.ts           |  1 +
 web/i18n/en-US/workflow.ts                    |  1 +
 web/i18n/zh-Hans/workflow.ts                  |  1 +
 14 files changed, 54 insertions(+), 31 deletions(-)
 rename api/core/model_runtime/model_providers/spark/llm/{spark-1.5.yaml => Spark Lite.yaml} (90%)
 rename api/core/model_runtime/model_providers/spark/llm/{spark-3.5.yaml => Spark Max.yaml} (100%)
 rename api/core/model_runtime/model_providers/spark/llm/{spark-2.yaml => Spark Pro-128K.yaml} (90%)
 rename api/core/model_runtime/model_providers/spark/llm/{spark-3.yaml => Spark Pro.yaml} (90%)
 rename api/core/model_runtime/model_providers/spark/llm/{spark-4.yaml => Spark4.0 Ultra.yaml} (90%)

diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-1.5.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark Lite.yaml
similarity index 90%
rename from api/core/model_runtime/model_providers/spark/llm/spark-1.5.yaml
rename to api/core/model_runtime/model_providers/spark/llm/Spark Lite.yaml
index 41b8765fe6c4f1..2afbb5c494f2c1 100644
--- a/api/core/model_runtime/model_providers/spark/llm/spark-1.5.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark Lite.yaml	
@@ -1,6 +1,6 @@
-model: spark-1.5
+model: Spark Lite
 label:
-  en_US: Spark V1.5
+  en_US: Spark Lite
 model_type: llm
 model_properties:
   mode: chat
@@ -18,7 +18,7 @@ parameter_rules:
     max: 4096
     help:
       zh_Hans: 模型回答的tokens的最大长度。
-      en_US: 模型回答的tokens的最大长度。
+      en_US: Maximum length of tokens for the model response.
   - name: top_k
     label:
       zh_Hans: 取样数量
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-3.5.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark Max.yaml
similarity index 100%
rename from api/core/model_runtime/model_providers/spark/llm/spark-3.5.yaml
rename to api/core/model_runtime/model_providers/spark/llm/Spark Max.yaml
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-2.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark Pro-128K.yaml
similarity index 90%
rename from api/core/model_runtime/model_providers/spark/llm/spark-2.yaml
rename to api/core/model_runtime/model_providers/spark/llm/Spark Pro-128K.yaml
index 2db6805a2e2af0..08ce6887cd712f 100644
--- a/api/core/model_runtime/model_providers/spark/llm/spark-2.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark Pro-128K.yaml	
@@ -1,7 +1,6 @@
-model: spark-2
-deprecated: true
+model: Spark Pro-128K
 label:
-  en_US: Spark V2.0
+  en_US: Spark Pro-128K
 model_type: llm
 model_properties:
   mode: chat
@@ -19,7 +18,7 @@ parameter_rules:
     max: 8192
     help:
       zh_Hans: 模型回答的tokens的最大长度。
-      en_US: 模型回答的tokens的最大长度。
+      en_US: Maximum length of tokens for the model response.
   - name: top_k
     label:
       zh_Hans: 取样数量
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-3.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark Pro.yaml
similarity index 90%
rename from api/core/model_runtime/model_providers/spark/llm/spark-3.yaml
rename to api/core/model_runtime/model_providers/spark/llm/Spark Pro.yaml
index 2ef9e10f453f6b..420e73ee043330 100644
--- a/api/core/model_runtime/model_providers/spark/llm/spark-3.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark Pro.yaml	
@@ -1,6 +1,6 @@
-model: spark-3
+model: Spark Pro
 label:
-  en_US: Spark V3.0
+  en_US: Spark Pro
 model_type: llm
 model_properties:
   mode: chat
@@ -18,7 +18,7 @@ parameter_rules:
     max: 8192
     help:
       zh_Hans: 模型回答的tokens的最大长度。
-      en_US: 模型回答的tokens的最大长度。
+      en_US: Maximum length of tokens for the model response.
   - name: top_k
     label:
       zh_Hans: 取样数量
diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-4.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark4.0 Ultra.yaml
similarity index 90%
rename from api/core/model_runtime/model_providers/spark/llm/spark-4.yaml
rename to api/core/model_runtime/model_providers/spark/llm/Spark4.0 Ultra.yaml
index 4b0bf27029ff76..ffb0669a76bab2 100644
--- a/api/core/model_runtime/model_providers/spark/llm/spark-4.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark4.0 Ultra.yaml	
@@ -1,6 +1,6 @@
-model: spark-4
+model: Spark4.0 Ultra
 label:
-  en_US: Spark V4.0
+  en_US: Spark4.0 Ultra
 model_type: llm
 model_properties:
   mode: chat
@@ -18,7 +18,7 @@ parameter_rules:
     max: 8192
     help:
       zh_Hans: 模型回答的tokens的最大长度。
-      en_US: 模型回答的tokens的最大长度。
+      en_US: Maximum length of tokens for the model response.
   - name: top_k
     label:
       zh_Hans: 取样数量
diff --git a/api/core/model_runtime/model_providers/spark/llm/_client.py b/api/core/model_runtime/model_providers/spark/llm/_client.py
index 10da265701a423..b9f7a2d32be268 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_client.py
+++ b/api/core/model_runtime/model_providers/spark/llm/_client.py
@@ -19,27 +19,25 @@ def __init__(self, model: str, app_id: str, api_key: str, api_secret: str, api_d
         endpoint = 'chat'
         if api_domain:
             domain = api_domain
-            if model == 'spark-v3':
-                endpoint = 'multimodal'
 
         model_api_configs = {
-            'spark-1.5': {
+            'Spark Lite': {
                 'version': 'v1.1',
                 'chat_domain': 'general'
             },
-            'spark-2': {
-                'version': 'v2.1',
-                'chat_domain': 'generalv2'
-            },
-            'spark-3': {
+            'Spark Pro': {
                 'version': 'v3.1',
                 'chat_domain': 'generalv3'
             },
-            'spark-3.5': {
+            'Spark Pro-128K': {
+                'version': 'pro-128k',
+                'chat_domain': 'pro-128k'
+            },
+            'Spark Max': {
                 'version': 'v3.5',
                 'chat_domain': 'generalv3.5'
             },
-            'spark-4': {
+            'Spark4.0 Ultra': {
                 'version': 'v4.0',
                 'chat_domain': '4.0Ultra'
             }
@@ -48,7 +46,12 @@ def __init__(self, model: str, app_id: str, api_key: str, api_secret: str, api_d
         api_version = model_api_configs[model]['version']
 
         self.chat_domain = model_api_configs[model]['chat_domain']
-        self.api_base = f"wss://{domain}/{api_version}/{endpoint}"
+
+        if model == 'Spark Pro-128K':
+            self.api_base = f"wss://{domain}/{endpoint}/{api_version}"
+        else:
+            self.api_base = f"wss://{domain}/{api_version}/{endpoint}"
+
         self.app_id = app_id
         self.ws_url = self.create_url(
             urlparse(self.api_base).netloc,
diff --git a/api/core/model_runtime/model_providers/spark/llm/_position.yaml b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
index e49ee97db7cf56..9ded28047d12a7 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
@@ -1,5 +1,5 @@
-- spark-4
-- spark-3.5
-- spark-3
-- spark-1.5
-- spark-2
+Spark4.0 Ultra
+Spark Max
+Spark Pro-128K
+Spark Pro
+Spark Lite
\ No newline at end of file
diff --git a/api/core/workflow/nodes/if_else/entities.py b/api/core/workflow/nodes/if_else/entities.py
index bc6dce0d3bd37a..7eb69b80dfe117 100644
--- a/api/core/workflow/nodes/if_else/entities.py
+++ b/api/core/workflow/nodes/if_else/entities.py
@@ -12,7 +12,7 @@ class Condition(BaseModel):
     variable_selector: list[str]
     comparison_operator: Literal[
         # for string or array
-        "contains", "not contains", "start with", "end with", "is", "is not", "empty", "not empty",
+        "contains", "not contains", "start with", "end with", "is", "is not", "empty", "not empty", "regex match",
             # for number
         "=", "≠", ">", "<", "≥", "≤", "null", "not null"
     ]
diff --git a/api/core/workflow/nodes/if_else/if_else_node.py b/api/core/workflow/nodes/if_else/if_else_node.py
index c6d235627f04b0..5f240df6b07e29 100644
--- a/api/core/workflow/nodes/if_else/if_else_node.py
+++ b/api/core/workflow/nodes/if_else/if_else_node.py
@@ -1,3 +1,4 @@
+import re
 from collections.abc import Sequence
 from typing import Optional, cast
 
@@ -136,6 +137,8 @@ def evaluate_condition(
             return self._assert_null(actual_value)
         elif comparison_operator == "not null":
             return self._assert_not_null(actual_value)
+        elif comparison_operator == "regex match":
+            return self._assert_regex_match(actual_value, expected_value)
         else:
             raise ValueError(f"Invalid comparison operator: {comparison_operator}")
 
@@ -285,6 +288,21 @@ def _assert_empty(self, actual_value: Optional[str]) -> bool:
             return True
         return False
 
+    def _assert_regex_match(self, actual_value: Optional[str], expected_value: str) -> bool:
+        """
+        Assert empty
+        :param actual_value: actual value
+        :return:
+        """
+        if actual_value is None:
+            return False
+
+        pattern = re.compile(expected_value)
+        regex_result = pattern.findall(actual_value)
+        if len(regex_result) > 0:
+            return True
+        return False
+
     def _assert_not_empty(self, actual_value: Optional[str]) -> bool:
         """
         Assert not empty
diff --git a/web/app/components/app/configuration/config-var/index.tsx b/web/app/components/app/configuration/config-var/index.tsx
index 802528e0af7a4b..fc165571c4a52c 100644
--- a/web/app/components/app/configuration/config-var/index.tsx
+++ b/web/app/components/app/configuration/config-var/index.tsx
@@ -88,7 +88,6 @@ const ConfigVar: FC<IConfigVarProps> = ({ promptVariables, readonly, onPromptVar
     } as InputVar
   })()
   const updatePromptVariableItem = (payload: InputVar) => {
-    console.log(payload)
     const newPromptVariables = produce(promptVariables, (draft) => {
       const { variable, label, type, ...rest } = payload
       draft[currIndex] = {
diff --git a/web/app/components/workflow/nodes/if-else/types.ts b/web/app/components/workflow/nodes/if-else/types.ts
index 693dce1784e87c..e67bee2fdccc09 100644
--- a/web/app/components/workflow/nodes/if-else/types.ts
+++ b/web/app/components/workflow/nodes/if-else/types.ts
@@ -28,6 +28,7 @@ export enum ComparisonOperator {
   lessThanOrEqual = '≤',
   isNull = 'is null',
   isNotNull = 'is not null',
+  regexMatch = 'regex match',
 }
 
 export type Condition = {
diff --git a/web/app/components/workflow/nodes/if-else/utils.ts b/web/app/components/workflow/nodes/if-else/utils.ts
index ffb6758bba5391..b71a3a57ca7f0f 100644
--- a/web/app/components/workflow/nodes/if-else/utils.ts
+++ b/web/app/components/workflow/nodes/if-else/utils.ts
@@ -30,6 +30,7 @@ export const getOperators = (type?: VarType) => {
         ComparisonOperator.isNot,
         ComparisonOperator.empty,
         ComparisonOperator.notEmpty,
+        ComparisonOperator.regexMatch,
       ]
     case VarType.number:
       return [
diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts
index e0613a110fafeb..033049f0f45b6d 100644
--- a/web/i18n/en-US/workflow.ts
+++ b/web/i18n/en-US/workflow.ts
@@ -412,6 +412,7 @@ const translation = {
         'not empty': 'is not empty',
         'null': 'is null',
         'not null': 'is not null',
+        'regex match': 'regex match',
       },
       enterValue: 'Enter value',
       addCondition: 'Add Condition',
diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts
index 56d1de6ceb664f..34cfb6380ec4a4 100644
--- a/web/i18n/zh-Hans/workflow.ts
+++ b/web/i18n/zh-Hans/workflow.ts
@@ -412,6 +412,7 @@ const translation = {
         'not empty': '不为空',
         'null': '空',
         'not null': '不为空',
+        'regex match': '正则匹配',
       },
       enterValue: '输入值',
       addCondition: '添加条件',

From ab84afc3cdb7c5b1b4c76ce964d9ffa2b3a98e87 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Sat, 7 Sep 2024 13:53:10 +0800
Subject: [PATCH 2/9] Update Spark-4.0Ultra.yaml

---
 .../model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
index 8f6fbd9a5227aa..c544cf4fdbdc6a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
@@ -39,4 +39,4 @@ parameter_rules:
     default: false
     help:
       zh_Hans: 该参数仅4.0 Ultra版本支持，当设置为true时，如果输入内容触发联网检索插件，会先返回检索信源列表，然后再返回星火回复结果，否则仅返回星火回复结果
-      en_US: The parameter is only supported in the 4.0 Ultra version. When set to true, if the input triggers the online search plugin, it will first return a list of search sources and then return the Spark response. Otherwise, it will only return the Spark response.
\ No newline at end of file
+      en_US: The parameter is only supported in the 4.0 Ultra version. When set to true, if the input triggers the online search plugin, it will first return a list of search sources and then return the Spark response. Otherwise, it will only return the Spark response.

From 764c4174a48b6e92ce5da9dfa1d907fb88ce1913 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Sat, 7 Sep 2024 13:53:10 +0800
Subject: [PATCH 3/9] Update Spark-4.0Ultra.yaml

---
 .../spark/llm/Spark-4.0Ultra.yaml             |  2 +-
 .../zhipuai/llm/chatglm_turbo.yaml            | 21 ++++++++++++-----
 .../zhipuai/llm/glm-4-0520.yaml               | 23 +++++++++++++------
 .../zhipuai/llm/glm-4-air.yaml                | 23 +++++++++++++------
 .../zhipuai/llm/glm-4-airx.yaml               | 23 +++++++++++++------
 .../zhipuai/llm/glm-4-flash.yaml              | 23 +++++++++++++------
 .../zhipuai/llm/glm_3_turbo.yaml              | 21 ++++++++++++-----
 .../model_providers/zhipuai/llm/glm_4.yaml    | 23 +++++++++++++------
 .../zhipuai/llm/glm_4_long.yaml               | 20 +++++++++++++++-
 .../zhipuai/llm/glm_4_plus.yaml               | 23 +++++++++++++------
 .../model_providers/zhipuai/llm/glm_4v.yaml   | 23 +++++++++++++------
 .../zhipuai/llm/glm_4v_plus.yaml              | 23 +++++++++++++------
 12 files changed, 178 insertions(+), 70 deletions(-)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml b/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
index 8f6fbd9a5227aa..c544cf4fdbdc6a 100644
--- a/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/Spark-4.0Ultra.yaml
@@ -39,4 +39,4 @@ parameter_rules:
     default: false
     help:
       zh_Hans: 该参数仅4.0 Ultra版本支持，当设置为true时，如果输入内容触发联网检索插件，会先返回检索信源列表，然后再返回星火回复结果，否则仅返回星火回复结果
-      en_US: The parameter is only supported in the 4.0 Ultra version. When set to true, if the input triggers the online search plugin, it will first return a list of search sources and then return the Spark response. Otherwise, it will only return the Spark response.
\ No newline at end of file
+      en_US: The parameter is only supported in the 4.0 Ultra version. When set to true, if the input triggers the online search plugin, it will first return a list of search sources and then return the Spark response. Otherwise, it will only return the Spark response.
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
index 8f51f80967748f..1abefa27e913de 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
@@ -19,15 +19,24 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: return_type
     label:
       zh_Hans: 回复类型
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index 8391278e4f1ea3..88110736aad7a7 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.1'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index 7caebd3e4b6aa8..6b057475d08b51 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.001'
   output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index dc123913deb8b5..4ec17c5fc977d8 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.01'
   output: '0.01'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index 0e3c001f06b0f4..952088d49d6399 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0'
   output: '0'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index b0f95c0a68e555..cc9d8e43641278 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -23,15 +23,24 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index 271eecf199c476..a672ad1d14bab2 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.1'
   output: '0.1'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index 150e07b60af979..ca9e7a0b0624e9 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -26,11 +26,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
+  - name: do_sample
+    label:
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
+    type: boolean
+    help:
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.001'
   output: '0.001'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index 237a951cd5a14e..0f41be9dc6eff8 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -23,20 +23,29 @@ parameter_rules:
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
     min: 1
-    max: 8192
+    max: 4095
 pricing:
   input: '0.05'
   output: '0.05'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index c7a4093d7aa7a2..d32c7e6cac3a7e 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -17,19 +17,28 @@ parameter_rules:
       en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
   - name: top_p
     use_template: top_p
-    default: 0.7
+    default: 0.6
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index a7aee5b4ca2363..d734a17296c628 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -17,19 +17,28 @@ parameter_rules:
       en_US: Sampling temperature, controls the randomness of the output, must be a positive number. The value range is (0.0,1.0], which cannot be equal to 0. The default value is 0.95. The larger the value, the more random and creative the output will be; the smaller the value, The output will be more stable or certain. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
   - name: top_p
     use_template: top_p
-    default: 0.7
+    default: 0.6
     help:
       zh_Hans: 用温度取样的另一种方法，称为核取样取值范围是：(0.0, 1.0) 开区间，不能等于 0 或 1，默认值为 0.7 模型考虑具有 top_p 概率质量tokens的结果例如：0.1 意味着模型解码器只考虑从前 10% 的概率的候选集中取 tokens 建议您根据应用场景调整 top_p 或 temperature 参数，但不要同时调整两个参数。
       en_US: Another method of temperature sampling is called kernel sampling. The value range is (0.0, 1.0) open interval, which cannot be equal to 0 or 1. The default value is 0.7. The model considers the results with top_p probability mass tokens. For example 0.1 means The model decoder only considers tokens from the candidate set with the top 10% probability. It is recommended that you adjust the top_p or temperature parameters according to the application scenario, but do not adjust both parameters at the same time.
-  - name: incremental
+  - name: do_sample
     label:
-      zh_Hans: 增量返回
-      en_US: Incremental
+      zh_Hans: 采样策略
+      en_US: Sampling strategy
     type: boolean
     help:
-      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
-      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
-    required: false
+      zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
+      en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
+    default: true
+  - name: stream
+    label:
+      zh_Hans: 流处理
+      en_US: Event Stream
+    type: boolean
+    help:
+      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data: [DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
+      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data: [DONE] message will be sent at the end of the Event Stream.Note: During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
+    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024

From c63ce9aae4087e97db989a3ba2961fab2c84485f Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:32:37 +0800
Subject: [PATCH 4/9] Rename Spark-Lite.yaml to spark-lite.yaml

---
 .../spark/llm/{Spark-Lite.yaml => spark-lite.yaml}                | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename api/core/model_runtime/model_providers/spark/llm/{Spark-Lite.yaml => spark-lite.yaml} (100%)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-Lite.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-lite.yaml
similarity index 100%
rename from api/core/model_runtime/model_providers/spark/llm/Spark-Lite.yaml
rename to api/core/model_runtime/model_providers/spark/llm/spark-lite.yaml

From 531e6bdbdd4fcf7e25ace8dbfb12f83701a3942d Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:33:03 +0800
Subject: [PATCH 5/9] Rename Spark-Max.yaml to spark-max.yaml

---
 .../model_providers/spark/llm/{Spark-Max.yaml => spark-max.yaml}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename api/core/model_runtime/model_providers/spark/llm/{Spark-Max.yaml => spark-max.yaml} (100%)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-Max.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-max.yaml
similarity index 100%
rename from api/core/model_runtime/model_providers/spark/llm/Spark-Max.yaml
rename to api/core/model_runtime/model_providers/spark/llm/spark-max.yaml

From d5e41c3bd181ea86a7e6ac2897c9ef512d4d9a50 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:33:20 +0800
Subject: [PATCH 6/9] Rename Spark-Pro.yaml to spark-pro.yaml

---
 .../model_providers/spark/llm/{Spark-Pro.yaml => spark-pro.yaml}  | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename api/core/model_runtime/model_providers/spark/llm/{Spark-Pro.yaml => spark-pro.yaml} (100%)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-Pro.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-pro.yaml
similarity index 100%
rename from api/core/model_runtime/model_providers/spark/llm/Spark-Pro.yaml
rename to api/core/model_runtime/model_providers/spark/llm/spark-pro.yaml

From 4e2762270faed19366f9a5fbf667a91593646233 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:33:42 +0800
Subject: [PATCH 7/9] Rename Spark-Pro-128K.yaml to spark-pro-128k.yaml

---
 .../spark/llm/{Spark-Pro-128K.yaml => spark-pro-128k.yaml}        | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename api/core/model_runtime/model_providers/spark/llm/{Spark-Pro-128K.yaml => spark-pro-128k.yaml} (100%)

diff --git a/api/core/model_runtime/model_providers/spark/llm/Spark-Pro-128K.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-pro-128k.yaml
similarity index 100%
rename from api/core/model_runtime/model_providers/spark/llm/Spark-Pro-128K.yaml
rename to api/core/model_runtime/model_providers/spark/llm/spark-pro-128k.yaml

From dfc5156e4bc893e0d649b58b3dcd59c7037515d7 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:36:58 +0800
Subject: [PATCH 8/9] Update spark-4.0-ultra.yaml

---
 .../model_providers/spark/llm/spark-4.0-ultra.yaml              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/spark/llm/spark-4.0-ultra.yaml b/api/core/model_runtime/model_providers/spark/llm/spark-4.0-ultra.yaml
index 3204c39184d65a..bbf85764f1c8c1 100644
--- a/api/core/model_runtime/model_providers/spark/llm/spark-4.0-ultra.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/spark-4.0-ultra.yaml
@@ -1,6 +1,6 @@
 model: spark-4.0-ultra
 label:
-  en_US: Spark 4.0Ultra
+  en_US: Spark 4.0 Ultra
 model_type: llm
 model_properties:
   mode: chat

From 125a782e4476816de8ee7dab65460deeed145490 Mon Sep 17 00:00:00 2001
From: AAEE86 <33052466+AAEE86@users.noreply.github.com>
Date: Mon, 9 Sep 2024 10:37:49 +0800
Subject: [PATCH 9/9] Update _position.yaml

---
 .../model_providers/spark/llm/_position.yaml           | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/api/core/model_runtime/model_providers/spark/llm/_position.yaml b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
index 33082c4557c751..458397f2aaf1c6 100644
--- a/api/core/model_runtime/model_providers/spark/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/spark/llm/_position.yaml
@@ -1,8 +1,8 @@
-- Spark-4.0Ultra
-- Spark-Max
-- Spark-Pro-128K
-- Spark-Pro
-- Spark-Lite
+- spark-4.0-ultra
+- spark-max
+- spark-pro-128k
+- spark-pro
+- spark-lite
 - spark-4
 - spark-3.5
 - spark-3