add-some-new-models-hosted-on-nvidia (#4303)

langgenius · May 11, 2024 · a80fe20 · a80fe20
1 parent f798680
commit a80fe20
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 1 deletion.
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -1,7 +1,11 @@
 - google/gemma-7b
 - google/codegemma-7b
+- google/recurrentgemma-2b
 - meta/llama2-70b
 - meta/llama3-8b-instruct
 - meta/llama3-70b-instruct
+- mistralai/mistral-large
 - mistralai/mixtral-8x7b-instruct-v0.1
+- mistralai/mixtral-8x22b-instruct-v0.1
 - fuyu-8b
+- snowflake/arctic
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml b/api/core/model_runtime/model_providers/nvidia/llm/arctic.yaml
@@ -0,0 +1,36 @@
+model: snowflake/arctic
+label:
+  zh_Hans: snowflake/arctic
+  en_US: snowflake/arctic
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -22,12 +22,16 @@
 class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
     MODEL_SUFFIX_MAP = {
         'fuyu-8b': 'vlm/adept/fuyu-8b',
+        'mistralai/mistral-large': '',
         'mistralai/mixtral-8x7b-instruct-v0.1': '',
+        'mistralai/mixtral-8x22b-instruct-v0.1': '',
         'google/gemma-7b': '',
         'google/codegemma-7b': '',
+        'snowflake/arctic':'',
         'meta/llama2-70b': '',
         'meta/llama3-8b-instruct': '',
-        'meta/llama3-70b-instruct': ''
+        'meta/llama3-70b-instruct': '',
+        'google/recurrentgemma-2b': ''
 
     }
 

diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistral-large.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mistral-large
+label:
+  zh_Hans: mistralai/mistral-large
+  en_US: mistralai/mistral-large
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mixtral-8x22b-instruct-v0.1.yaml
@@ -0,0 +1,36 @@
+model: mistralai/mixtral-8x22b-instruct-v0.1
+label:
+  zh_Hans: mistralai/mixtral-8x22b-instruct-v0.1
+  en_US: mistralai/mixtral-8x22b-instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/recurrentgemma-2b.yaml
@@ -0,0 +1,37 @@
+model: google/recurrentgemma-2b
+label:
+  zh_Hans: google/recurrentgemma-2b
+  en_US: google/recurrentgemma-2b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 2048
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.2
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 0.7
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 1024
+    default: 1024
+  - name: random_seed
+    type: int
+    help:
+      en_US: The seed to use for random sampling. If set, different calls will generate deterministic results.
+      zh_Hans: 当开启随机数种子以后，你可以通过指定一个固定的种子来使得回答结果更加稳定
+    label:
+      en_US: Seed
+      zh_Hans: 种子
+    default: 0
+    min: 0
+    max: 2147483647