chore: add PVC to whisper package to store model weights

defenseunicorns · Jul 26, 2024 · 321996d · 321996d
1 parent d7150f3
commit 321996d
Show file tree

Hide file tree

Showing 5 changed files with 66 additions and 8 deletions.
diff --git a/packages/whisper/Dockerfile b/packages/whisper/Dockerfile
@@ -17,12 +17,6 @@ COPY packages/whisper packages/whisper
 RUN rm -f packages/whisper/build/*.whl
 RUN python -m pip wheel "packages/whisper[dev]" -w packages/whisper/build --find-links=${SDK_DEST}
 
-# download and covnert OpenAI's whisper base
-ARG MODEL_NAME=openai/whisper-base
-RUN pip install ctranslate2 transformers[torch] --no-index --find-links=packages/whisper/build/
-RUN ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32
-RUN pip uninstall -y ctranslate2 transformers[torch]
-
 RUN pip install packages/whisper/build/lfai_whisper*.whl --no-index --find-links=packages/whisper/build/
 
 # Use hardened ffmpeg image to get compiled binaries
@@ -40,7 +34,6 @@ COPY --from=ffmpeg /usr/bin/ffprobe /usr/bin
 COPY --from=ffmpeg /usr/lib/lib* /usr/lib
 
 COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
-COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/
 
 # set the path to the cuda 11.8 dependencies
 ENV LD_LIBRARY_PATH \

diff --git a/packages/whisper/chart/templates/deployment.yaml b/packages/whisper/chart/templates/deployment.yaml
@@ -23,8 +23,38 @@ spec:
         {{- toYaml . | nindent 8 }}
       {{- end }}
       labels:
+        app: lfai-whisper
         {{- include "chart.selectorLabels" . | nindent 8 }}
     spec:
+      # It's necessary to include the ###ZARF_DATA_INJECTION_MARKER### somewhere in the podspec, otherwise data injections will not occur.
+      initContainers:
+        - name: data-loader
+          image: cgr.dev/chainguard/bash:latest
+          securityContext:
+            runAsUser: 65532
+            runAsGroup: 65532
+            fsGroup: 65532
+          # This command looks for the Zarf "data injection marker" which is a timestamped file that is injected after everything else and marks the injection as complete.
+          command:
+            [
+              "sh",
+              "-c",
+              'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
+            ]
+          resources:
+            requests:
+              memory: "64Mi"
+              cpu: "200m"
+            limits:
+              memory: "128Mi"
+              cpu: "500m"
+          volumeMounts:
+            - name: leapfrogai-pv-storage
+              mountPath: /data
+      volumes:
+        - name: leapfrogai-pv-storage
+          persistentVolumeClaim:
+            claimName: lfai-whisper-pv-claim
       securityContext:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
       containers:
@@ -38,10 +68,15 @@ spec:
               containerPort: {{ .Values.service.port }}
               protocol: TCP
           env:
+            - name: LFAI_MODEL_PATH
+              value: "/data/.model"
             - name: GPU_REQUEST
               value: "{{ (index .Values.resources.requests "nvidia.com/gpu") | default "0" }}"
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
+          volumeMounts:
+            - name: leapfrogai-pv-storage
+              mountPath: "/data"
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}

diff --git a/packages/whisper/chart/templates/pvc.yaml b/packages/whisper/chart/templates/pvc.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+    name: lfai-whisper-pv-claim
+    namespace: leapfrogai
+spec:
+  storageClassName: "local-path"
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 15Gi
diff --git a/packages/whisper/main.py b/packages/whisper/main.py
@@ -9,12 +9,13 @@
 
 logger = logging.getLogger(__name__)
 
-model_path = ".model"
+model_path = os.environ.get("LFAI_MODEL_PATH", ".model")
 
 GPU_ENABLED = True if int(os.environ.get("GPU_REQUEST", 0)) > 0 else False
 
 
 def make_transcribe_request(filename, task, language, temperature, prompt):
+    print(f"model_path: {model_path}")
     device = "cuda" if GPU_ENABLED else "cpu"
     model = WhisperModel(model_path, device=device, compute_type="float32")
 

diff --git a/packages/whisper/zarf.yaml b/packages/whisper/zarf.yaml
@@ -32,3 +32,20 @@ components:
           - "whisper-values.yaml"
     images:
       - ghcr.io/defenseunicorns/leapfrogai/whisper:###ZARF_PKG_TMPL_IMAGE_VERSION###
+      - cgr.dev/chainguard/bash:latest
+    dataInjections:
+      - source: .model/
+        target:
+          namespace: leapfrogai
+          selector: app=lfai-whisper
+          container: data-loader
+          path: /data/.model
+        compress: true
+    actions:
+      onCreate:
+        before:
+          # NOTE: This assumes python is installed and in $PATH
+          # TODO: add ctrnaslate2 and transformers[torch] to the pyproject dev deps
+          - cmd: ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32 --force
+            env:
+              - MODEL_NAME=openai/whisper-base