diff --git a/packages/text-embeddings/Dockerfile b/packages/text-embeddings/Dockerfile index 9c1e76f5b0..9af7c55376 100644 --- a/packages/text-embeddings/Dockerfile +++ b/packages/text-embeddings/Dockerfile @@ -14,7 +14,6 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH" # copy and install all python dependencies # NOTE: We are copying the leapfrog whl to this filename because installing 'optional extras' from # a wheel requires the absolute path to the wheel file (instead of a wildcard whl) - COPY --from=sdk /leapfrogai/${SDK_DEST} ${SDK_DEST} COPY packages/text-embeddings packages/text-embeddings @@ -22,14 +21,6 @@ RUN rm -f packages/text-embeddings/build/*.whl RUN python -m pip wheel packages/text-embeddings -w packages/text-embeddings/build --find-links=${SDK_DEST} RUN pip install packages/text-embeddings/build/lfai_text_embeddings*.whl --no-index --find-links=packages/text-embeddings/build/ - -# download model -RUN python -m pip install -U huggingface_hub[cli,hf_transfer] -ARG REPO_ID="hkunlp/instructor-xl" -ARG REVISION="ce48b213095e647a6c3536364b9fa00daf57f436" -COPY packages/text-embeddings/scripts/model_download.py scripts/model_download.py -RUN REPO_ID=${REPO_ID} REVISION=${REVISION} python scripts/model_download.py - # hardened and slim python image FROM ghcr.io/defenseunicorns/leapfrogai/python:3.11 @@ -38,7 +29,6 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH" WORKDIR /leapfrogai COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/ -COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/ COPY packages/text-embeddings/main.py . diff --git a/packages/text-embeddings/chart/templates/deployment.yaml b/packages/text-embeddings/chart/templates/deployment.yaml index 8d014f1c72..13869b0a5f 100644 --- a/packages/text-embeddings/chart/templates/deployment.yaml +++ b/packages/text-embeddings/chart/templates/deployment.yaml @@ -23,8 +23,38 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} labels: + app: lfai-text-embeddings {{- include "chart.selectorLabels" . | nindent 8 }} spec: + # It's necessary to include the ###ZARF_DATA_INJECTION_MARKER### somewhere in the podspec, otherwise data injections will not occur. + initContainers: + - name: data-loader + image: cgr.dev/chainguard/bash:latest + securityContext: + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + # This command looks for the Zarf "data injection marker" which is a timestamped file that is injected after everything else and marks the injection as complete. + command: + [ + "sh", + "-c", + 'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"', + ] + resources: + requests: + memory: "64Mi" + cpu: "200m" + limits: + memory: "128Mi" + cpu: "500m" + volumeMounts: + - name: leapfrogai-pv-storage + mountPath: /data + volumes: + - name: leapfrogai-pv-storage + persistentVolumeClaim: + claimName: lfai-text-embeddings-pv-claim securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} containers: @@ -39,6 +69,12 @@ spec: protocol: TCP resources: {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: leapfrogai-pv-storage + mountPath: "/data" + env: + - name: LFAI_MODEL_DIR + value: '/data/.model' {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/packages/text-embeddings/chart/templates/pvc.yaml b/packages/text-embeddings/chart/templates/pvc.yaml new file mode 100644 index 0000000000..fa7e02b675 --- /dev/null +++ b/packages/text-embeddings/chart/templates/pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: lfai-text-embeddings-pv-claim + namespace: leapfrogai +spec: + storageClassName: "local-path" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 15Gi diff --git a/packages/text-embeddings/main.py b/packages/text-embeddings/main.py index 6af2d6a720..0ad8ce8242 100644 --- a/packages/text-embeddings/main.py +++ b/packages/text-embeddings/main.py @@ -1,5 +1,6 @@ import asyncio import logging +import os from InstructorEmbedding import INSTRUCTOR from leapfrogai_sdk import ( @@ -10,7 +11,8 @@ serve, ) -model = INSTRUCTOR("./.model") +model_dir = os.environ.get("LFAI_MODEL_PATH", ".model") +model = INSTRUCTOR(model_dir) class InstructorEmbedding: diff --git a/packages/text-embeddings/zarf.yaml b/packages/text-embeddings/zarf.yaml index d3bec755cf..b47672c31e 100644 --- a/packages/text-embeddings/zarf.yaml +++ b/packages/text-embeddings/zarf.yaml @@ -32,3 +32,20 @@ components: - "embedding-values.yaml" images: - ghcr.io/defenseunicorns/leapfrogai/text-embeddings:###ZARF_PKG_TMPL_IMAGE_VERSION### + - cgr.dev/chainguard/bash:latest + dataInjections: + - source: .model/ + target: + namespace: leapfrogai + selector: app=lfai-text-embeddings + container: data-loader + path: /data/.model + compress: true + actions: + onCreate: + before: + # NOTE: This assumes python is installed and in $PATH + - cmd: python scripts/model_download.py + env: + - REPO_ID=hkunlp/instructor-xl + - REVISION=ce48b213095e647a6c3536364b9fa00daf57f436