openshift · openshift-merge-bot · Nov 26, 2024 · Oct 7, 2024 · Nov 20, 2024 · asamal4
diff --git a/.tekton/lightspeed-rag-content-pull-request.yaml b/.tekton/lightspeed-rag-content-pull-request.yaml
@@ -29,11 +29,15 @@ spec:
     value: Containerfile
   - name: path-context
     value: .
+  - name: prefetch-input
+    value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true"}]'
+  - name: hermetic
+    value: "true"
   - name: build-args
-    value: [FLAVOR=gpu]
+    value: [FLAVOR=gpu, HERMETIC=true]
   timeouts:
-    pipeline: "1h0m0s"
-    tasks: "1h0m0s"
+    pipeline: "4h0m0s"
+    tasks: "4h0m0s"
   pipelineSpec:
     description: |
       This pipeline is ideal for building container images from a Containerfile while reducing network traffic.
@@ -192,14 +196,16 @@ spec:
       params:
       - name: input
         value: $(params.prefetch-input)
+      - name: dev-package-managers
+        value: "true"
       runAfter:
       - clone-repository
       taskRef:
         params:
         - name: name
           value: prefetch-dependencies
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:fe7234e3824d1e65d6a7aac352e7a6bbce623d90d8d7da9aceeee108ad2c61be
+          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:47d8d3320b4e29360108f18235598dd247bc316a4792063d970bffb00e61b71a
         - name: kind
           value: task
         resolver: bundles
@@ -216,6 +222,9 @@ spec:
       - name: netrc
         workspace: netrc
     - name: build-container
+      env:
+      - name: TMPDIR
+        value: /workspace/buildah-tmp
       params:
       - name: PLATFORM
         value: linux-g6xlarge/amd64
@@ -302,7 +311,7 @@ spec:
         - name: name
           value: source-build
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:21cb5ebaff7a9216903cf78933dc4ec4dd6283a52636b16590a5f52ceb278269
+          value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:ddfa1fb418c1d9d55d7d70d39fe8f35ce05e96073bcd057bb6aaacd1f839cc51
         - name: kind
           value: task
         resolver: bundles
@@ -507,7 +516,7 @@ spec:
         - ReadWriteOnce
         resources:
           requests:
-            storage: 8Gi
+            storage: 24Gi
       status: {}
   - name: git-auth
     secret:

diff --git a/.tekton/lightspeed-rag-content-push.yaml b/.tekton/lightspeed-rag-content-push.yaml
@@ -27,11 +27,15 @@ spec:
     value: Containerfile
   - name: path-context
     value: .
+  - name: prefetch-input
+    value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true"}]'
+  - name: hermetic
+    value: "true"
   - name: build-args
-    value: [FLAVOR=gpu]
+    value: [FLAVOR=gpu, HERMETIC=true]
   timeouts:
-    pipeline: "1h0m0s"
-    tasks: "1h0m0s"
+    pipeline: "4h0m0s"
+    tasks: "4h0m0s"
   pipelineSpec:
     description: |
       This pipeline is ideal for building container images from a Containerfile while reducing network traffic.
@@ -190,14 +194,16 @@ spec:
       params:
       - name: input
         value: $(params.prefetch-input)
+      - name: dev-package-managers
+        value: "true"
       runAfter:
       - clone-repository
       taskRef:
         params:
         - name: name
           value: prefetch-dependencies
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:fe7234e3824d1e65d6a7aac352e7a6bbce623d90d8d7da9aceeee108ad2c61be
+          value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:47d8d3320b4e29360108f18235598dd247bc316a4792063d970bffb00e61b71a
         - name: kind
           value: task
         resolver: bundles
@@ -214,6 +220,9 @@ spec:
       - name: netrc
         workspace: netrc
     - name: build-container
+      env:
+      - name: TMPDIR
+        value: /workspace/buildah-tmp
       params:
       - name: PLATFORM
         value: linux-g6xlarge/amd64
@@ -300,7 +309,7 @@ spec:
         - name: name
           value: source-build
         - name: bundle
-          value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:21cb5ebaff7a9216903cf78933dc4ec4dd6283a52636b16590a5f52ceb278269
+          value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:ddfa1fb418c1d9d55d7d70d39fe8f35ce05e96073bcd057bb6aaacd1f839cc51
         - name: kind
           value: task
         resolver: bundles
@@ -505,7 +514,7 @@ spec:
         - ReadWriteOnce
         resources:
           requests:
-            storage: 8Gi
+            storage: 24Gi
       status: {}
   - name: git-auth
     secret:

diff --git a/Containerfile b/Containerfile
@@ -1,5 +1,6 @@
 ARG EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
 ARG FLAVOR=cpu
+ARG HERMETIC=false
 
 FROM registry.access.redhat.com/ubi9/python-311 as cpu-base
 ARG EMBEDDING_MODEL
@@ -8,33 +9,41 @@ ARG FLAVOR
 FROM nvcr.io/nvidia/cuda:12.6.2-devel-ubi9 as gpu-base
 ARG EMBEDDING_MODEL
 ARG FLAVOR
-RUN dnf install -y python3.11 python3.11-pip libcudnn8 libnccl
+RUN dnf install -y python3.11 python3.11-pip libcudnn9 libnccl
 
 FROM ${FLAVOR}-base as lightspeed-rag-builder
 ARG EMBEDDING_MODEL
 ARG FLAVOR
+ARG HERMETIC
 
 USER 0
 WORKDIR /workdir
 
-COPY pyproject.toml pdm.lock* Makefile .
-RUN make install-tools && pdm config python.use_venv false && make pdm-lock-check install-deps
+COPY requirements.txt .
+RUN pip3.11 install --no-cache-dir -r requirements.txt
 
 COPY ocp-product-docs-plaintext ./ocp-product-docs-plaintext
 COPY runbooks ./runbooks
 
-COPY scripts/download_embeddings_model.py .
-RUN pdm run python download_embeddings_model.py -l ./embeddings_model -r ${EMBEDDING_MODEL}
+COPY embeddings_model ./embeddings_model
+#RUN cat embeddings_model/model.safetensors.part* > embeddings_model/model.safetensors && rm embeddings_model/model.safetensors.part*
+RUN cd embeddings_model; if [ "$HERMETIC" == "true" ]; then \
+        ln -s /cachi2/output/deps/generic/model.safetensors model.safetensors; \
+    else \
+        wget -q https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors; \
+    fi
 
-RUN export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
-    pdm run python -c "import torch; print(torch.version.cuda); print(torch.cuda.is_available());"
+RUN if [ "$FLAVOR" == "gpu" ]; then \
+        export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
+        python3.11 -c "import torch; print(torch.version.cuda); print(torch.cuda.is_available());"; \
+    fi
 
 COPY scripts/generate_embeddings.py .
 RUN export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
     set -e && for OCP_VERSION in $(ls -1 ocp-product-docs-plaintext); do \
-        pdm run python generate_embeddings.py -f ocp-product-docs-plaintext/${OCP_VERSION} -r runbooks/alerts -md embeddings_model \
+        python3.11 generate_embeddings.py -f ocp-product-docs-plaintext/${OCP_VERSION} -r runbooks/alerts -md embeddings_model \
             -mn ${EMBEDDING_MODEL} -o vector_db/ocp_product_docs/${OCP_VERSION} \
-            -i ocp-product-docs-$(echo $OCP_VERSION | sed 's/\./_/g') -v ${OCP_VERSION}; \
+            -i ocp-product-docs-$(echo $OCP_VERSION | sed 's/\./_/g') -v ${OCP_VERSION} -hb $HERMETIC; \
     done
 
 FROM registry.access.redhat.com/ubi9/ubi-minimal@sha256:d85040b6e3ed3628a89683f51a38c709185efc3fb552db2ad1b9180f2a6c38be

diff --git a/Makefile b/Makefile
@@ -11,7 +11,7 @@ $(error Unsupported FLAVOR $(FLAVOR), must be 'cpu' or 'gpu')
 endif
 
 install-tools: ## Install required utilities/tools
-	@command -v pdm > /dev/null || { echo >&2 "pdm is not installed. Installing..."; pip3.11 install --upgrade pip pdm; }
+	@command -v pdm > /dev/null || { echo >&2 "pdm is not installed. Installing..."; pip3.11 install --no-cache-dir --upgrade pip pdm; }
 
 pdm-lock-check: ## Check that the pdm.lock file is in a good shape
 	pdm lock --check --group $(TORCH_GROUP) --lockfile pdm.lock.$(TORCH_GROUP)
@@ -43,6 +43,10 @@ update-docs: ## Update the plaintext OCP docs in ocp-product-docs-plaintext/
 	done
 	scripts/get_runbooks.sh
 
+update-model: ## Update the local copy of the embedding model
+	@rm -rf ./embeddings_model
+	@python scripts/download_embeddings_model.py -l ./embeddings_model -r sentence-transformers/all-mpnet-base-v2
+
 build-image: ## Build a rag-content container image.
 	podman build -t rag-content .
 

diff --git a/artifacts.lock.yaml b/artifacts.lock.yaml
@@ -0,0 +1,7 @@
+---
+metadata:
+  version: "1.0"
+artifacts:
+  - download_url: "https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors"
+    checksum: "sha256:78c0197b6159d92658e319bc1d72e4c73a9a03dd03815e70e555c5ef05615658"
+    filename: "model.safetensors"
diff --git a/cuda.repo b/cuda.repo
@@ -0,0 +1,6 @@
+[cuda]
+name=cuda
+baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64
+enabled=1
+gpgcheck=1
+gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
diff --git a/embeddings_model/.gitattributes b/embeddings_model/.gitattributes
@@ -0,0 +1,28 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text 
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+model.safetensors filter=lfs diff=lfs merge=lfs -text
diff --git a/embeddings_model/1_Pooling/config.json b/embeddings_model/1_Pooling/config.json
@@ -0,0 +1,7 @@
+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}