From 031cf6e1ff086e9f04b037723700260954317493 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Wed, 4 Dec 2024 20:10:03 +0530 Subject: [PATCH] ChatQnA: Update kubernetes xeon chatqna remote inference and svelte UI (#1215) Signed-off-by: sgurunat Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../manifest/chatqna-remote-inference.yaml | 22 ++++++++++++------- .../ui/svelte/src/lib/network/chat/Network.ts | 8 ++++++- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml index 5778132686..453ff404af 100644 --- a/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml +++ b/ChatQnA/kubernetes/intel/cpu/xeon/manifest/chatqna-remote-inference.yaml @@ -70,9 +70,8 @@ data: no_proxy: "" LOGFLAG: "" vLLM_ENDPOINT: "insert-your-remote-inference-endpoint" - LLM_MODEL: "meta-llama/Meta-Llama-3.1-8B-Instruct" - LLM_MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" - MODEL_ID: "meta-llama/Meta-Llama-3.1-8B-Instruct" + LLM_MODEL: "meta-llama/Meta-Llama-3.1-70B-Instruct" + MODEL_ID: "meta-llama/Meta-Llama-3.1-70B-Instruct" CLIENTID: "" CLIENT_SECRET: "" TOKEN_URL: "" @@ -216,6 +215,10 @@ data: proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_buffering off; + proxy_cache off; + proxy_request_buffering off; + gzip off; } location /v1/dataprep { @@ -552,6 +555,9 @@ spec: {} containers: - name: chatqna-ui + env: + - name: MODEL_ID + value: "meta-llama/Meta-Llama-3.1-70B-Instruct" securityContext: {} image: "opea/chatqna-ui:latest" @@ -691,7 +697,7 @@ spec: seccompProfile: type: RuntimeDefault image: "opea/embedding-tei:latest" - imagePullPolicy: IfNotPresent + imagePullPolicy: Always ports: - name: embedding-usvc containerPort: 6000 @@ -769,7 +775,7 @@ spec: seccompProfile: type: RuntimeDefault image: "opea/llm-vllm:latest" - imagePullPolicy: IfNotPresent + imagePullPolicy: Always ports: - name: llm-uservice containerPort: 9000 @@ -919,7 +925,7 @@ spec: seccompProfile: type: RuntimeDefault image: "opea/reranking-tei:latest" - imagePullPolicy: IfNotPresent + imagePullPolicy: Always ports: - name: reranking-usvc containerPort: 8000 @@ -1257,7 +1263,7 @@ spec: - name: EMBEDDING_SERVICE_HOST_IP value: chatqna-embedding-usvc - name: MODEL_ID - value: "meta-llama/Meta-Llama-3.1-8B-Instruct" + value: "meta-llama/Meta-Llama-3.1-70B-Instruct" securityContext: allowPrivilegeEscalation: false capabilities: @@ -1269,7 +1275,7 @@ spec: seccompProfile: type: RuntimeDefault image: "opea/chatqna-wrapper:latest" - imagePullPolicy: IfNotPresent + imagePullPolicy: Always volumeMounts: - mountPath: /tmp name: tmp diff --git a/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts b/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts index 64ce8e8f79..cafc346756 100644 --- a/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts +++ b/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts @@ -16,13 +16,19 @@ import { env } from "$env/dynamic/public"; import { SSE } from "sse.js"; const CHAT_BASE_URL = env.CHAT_BASE_URL; +const MODEL_ID = env.MODEL_ID; export async function fetchTextStream(query: string) { let payload = {}; let url = ""; + let modelId = "Intel/neural-chat-7b-v3-3"; + + if (MODEL_ID) { + modelId = MODEL_ID; + } payload = { - model: "Intel/neural-chat-7b-v3-3", + model: `${modelId}`, messages: query, }; url = `${CHAT_BASE_URL}`;