Skip to content

Commit

Permalink
chore: add PVC to whisper package to store model weights
Browse files Browse the repository at this point in the history
  • Loading branch information
YrrepNoj committed Jul 26, 2024
1 parent d7150f3 commit 321996d
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 8 deletions.
7 changes: 0 additions & 7 deletions packages/whisper/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,6 @@ COPY packages/whisper packages/whisper
RUN rm -f packages/whisper/build/*.whl
RUN python -m pip wheel "packages/whisper[dev]" -w packages/whisper/build --find-links=${SDK_DEST}

# download and covnert OpenAI's whisper base
ARG MODEL_NAME=openai/whisper-base
RUN pip install ctranslate2 transformers[torch] --no-index --find-links=packages/whisper/build/
RUN ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32
RUN pip uninstall -y ctranslate2 transformers[torch]

RUN pip install packages/whisper/build/lfai_whisper*.whl --no-index --find-links=packages/whisper/build/

# Use hardened ffmpeg image to get compiled binaries
Expand All @@ -40,7 +34,6 @@ COPY --from=ffmpeg /usr/bin/ffprobe /usr/bin
COPY --from=ffmpeg /usr/lib/lib* /usr/lib

COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/

# set the path to the cuda 11.8 dependencies
ENV LD_LIBRARY_PATH \
Expand Down
35 changes: 35 additions & 0 deletions packages/whisper/chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,38 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
app: lfai-whisper
{{- include "chart.selectorLabels" . | nindent 8 }}
spec:
# It's necessary to include the ###ZARF_DATA_INJECTION_MARKER### somewhere in the podspec, otherwise data injections will not occur.
initContainers:
- name: data-loader
image: cgr.dev/chainguard/bash:latest
securityContext:
runAsUser: 65532
runAsGroup: 65532
fsGroup: 65532
# This command looks for the Zarf "data injection marker" which is a timestamped file that is injected after everything else and marks the injection as complete.
command:
[
"sh",
"-c",
'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
]
resources:
requests:
memory: "64Mi"
cpu: "200m"
limits:
memory: "128Mi"
cpu: "500m"
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: /data
volumes:
- name: leapfrogai-pv-storage
persistentVolumeClaim:
claimName: lfai-whisper-pv-claim
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
Expand All @@ -38,10 +68,15 @@ spec:
containerPort: {{ .Values.service.port }}
protocol: TCP
env:
- name: LFAI_MODEL_PATH
value: "/data/.model"
- name: GPU_REQUEST
value: "{{ (index .Values.resources.requests "nvidia.com/gpu") | default "0" }}"
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: "/data"
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
12 changes: 12 additions & 0 deletions packages/whisper/chart/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: lfai-whisper-pv-claim
namespace: leapfrogai
spec:
storageClassName: "local-path"
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 15Gi
3 changes: 2 additions & 1 deletion packages/whisper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@

logger = logging.getLogger(__name__)

model_path = ".model"
model_path = os.environ.get("LFAI_MODEL_PATH", ".model")

GPU_ENABLED = True if int(os.environ.get("GPU_REQUEST", 0)) > 0 else False


def make_transcribe_request(filename, task, language, temperature, prompt):
print(f"model_path: {model_path}")
device = "cuda" if GPU_ENABLED else "cpu"
model = WhisperModel(model_path, device=device, compute_type="float32")

Expand Down
17 changes: 17 additions & 0 deletions packages/whisper/zarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,20 @@ components:
- "whisper-values.yaml"
images:
- ghcr.io/defenseunicorns/leapfrogai/whisper:###ZARF_PKG_TMPL_IMAGE_VERSION###
- cgr.dev/chainguard/bash:latest
dataInjections:
- source: .model/
target:
namespace: leapfrogai
selector: app=lfai-whisper
container: data-loader
path: /data/.model
compress: true
actions:
onCreate:
before:
# NOTE: This assumes python is installed and in $PATH
# TODO: add ctrnaslate2 and transformers[torch] to the pyproject dev deps
- cmd: ct2-transformers-converter --model ${MODEL_NAME} --output_dir .model --copy_files tokenizer.json --quantization float32 --force
env:
- MODEL_NAME=openai/whisper-base

0 comments on commit 321996d

Please sign in to comment.