From 715d953512713f2ad02a135dd072ab0d77e55637 Mon Sep 17 00:00:00 2001 From: Scott Davidson Date: Thu, 26 Oct 2023 11:46:01 +0100 Subject: [PATCH] Update deployment spec to use vllm --- templates/api/deployment.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/templates/api/deployment.yml b/templates/api/deployment.yml index 69784a1..4d3ed98 100644 --- a/templates/api/deployment.yml +++ b/templates/api/deployment.yml @@ -24,9 +24,9 @@ spec: containerPort: 80 volumeMounts: - name: data - mountPath: /data + mountPath: /root/.cache/huggingface args: - - --model-id + - --model - {{ .Values.huggingface.model }} {{- if .Values.huggingface.secretName }} envFrom: @@ -44,7 +44,6 @@ spec: port: 80 initialDelaySeconds: 15 periodSeconds: 10 - # TODO: Make this configurable resources: limits: nvidia.com/gpu: {{ .Values.api.gpus | int }} @@ -53,7 +52,7 @@ spec: - name: data # emptyDir: hostPath: - path: /tmp/tgi/data + path: /tmp/llm/data # Suggested in text-generation-inference docs - name: shm emptyDir: