From 715d953512713f2ad02a135dd072ab0d77e55637 Mon Sep 17 00:00:00 2001
From: Scott Davidson <scott@stackhpc.com>
Date: Thu, 26 Oct 2023 11:46:01 +0100
Subject: [PATCH] Update deployment spec to use vllm

---
 templates/api/deployment.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/templates/api/deployment.yml b/templates/api/deployment.yml
index 69784a1..4d3ed98 100644
--- a/templates/api/deployment.yml
+++ b/templates/api/deployment.yml
@@ -24,9 +24,9 @@ spec:
           containerPort: 80
         volumeMounts:
         - name: data
-          mountPath: /data
+          mountPath: /root/.cache/huggingface
         args:
-          - --model-id
+          - --model
           - {{ .Values.huggingface.model }}
         {{- if .Values.huggingface.secretName }}
         envFrom:
@@ -44,7 +44,6 @@ spec:
             port: 80
           initialDelaySeconds: 15
           periodSeconds: 10
-        # TODO: Make this configurable
         resources:
           limits:
             nvidia.com/gpu: {{ .Values.api.gpus | int }}
@@ -53,7 +52,7 @@ spec:
         - name: data
           # emptyDir:
           hostPath:
-            path: /tmp/tgi/data
+            path: /tmp/llm/data
         # Suggested in text-generation-inference docs
         - name: shm
           emptyDir: