Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OLS-1093: Enable hermetic build for the rag-content component #124

Merged
merged 2 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions .tekton/lightspeed-rag-content-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ spec:
value: Containerfile
- name: path-context
value: .
- name: prefetch-input
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true"}]'
- name: hermetic
value: "true"
- name: build-args
value: [FLAVOR=gpu]
value: [FLAVOR=gpu, HERMETIC=true]
timeouts:
pipeline: "1h0m0s"
tasks: "1h0m0s"
pipeline: "4h0m0s"
tasks: "4h0m0s"
pipelineSpec:
description: |
This pipeline is ideal for building container images from a Containerfile while reducing network traffic.
Expand Down Expand Up @@ -192,14 +196,16 @@ spec:
params:
- name: input
value: $(params.prefetch-input)
- name: dev-package-managers
value: "true"
runAfter:
- clone-repository
taskRef:
params:
- name: name
value: prefetch-dependencies
- name: bundle
value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:fe7234e3824d1e65d6a7aac352e7a6bbce623d90d8d7da9aceeee108ad2c61be
value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:47d8d3320b4e29360108f18235598dd247bc316a4792063d970bffb00e61b71a
- name: kind
value: task
resolver: bundles
Expand All @@ -216,6 +222,9 @@ spec:
- name: netrc
workspace: netrc
- name: build-container
env:
- name: TMPDIR
value: /workspace/buildah-tmp
params:
- name: PLATFORM
value: linux-g6xlarge/amd64
Expand Down Expand Up @@ -302,7 +311,7 @@ spec:
- name: name
value: source-build
- name: bundle
value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:21cb5ebaff7a9216903cf78933dc4ec4dd6283a52636b16590a5f52ceb278269
value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:ddfa1fb418c1d9d55d7d70d39fe8f35ce05e96073bcd057bb6aaacd1f839cc51
- name: kind
value: task
resolver: bundles
Expand Down Expand Up @@ -507,7 +516,7 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 8Gi
storage: 24Gi
status: {}
- name: git-auth
secret:
Expand Down
21 changes: 15 additions & 6 deletions .tekton/lightspeed-rag-content-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ spec:
value: Containerfile
- name: path-context
value: .
- name: prefetch-input
value: '[{"type": "generic", "path": "."}, {"type": "rpm", "path": "."}, {"type": "pip", "path": ".", "allow_binary": "true"}]'
- name: hermetic
value: "true"
- name: build-args
value: [FLAVOR=gpu]
value: [FLAVOR=gpu, HERMETIC=true]
timeouts:
pipeline: "1h0m0s"
tasks: "1h0m0s"
pipeline: "4h0m0s"
tasks: "4h0m0s"
pipelineSpec:
description: |
This pipeline is ideal for building container images from a Containerfile while reducing network traffic.
Expand Down Expand Up @@ -190,14 +194,16 @@ spec:
params:
- name: input
value: $(params.prefetch-input)
- name: dev-package-managers
value: "true"
runAfter:
- clone-repository
taskRef:
params:
- name: name
value: prefetch-dependencies
- name: bundle
value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:fe7234e3824d1e65d6a7aac352e7a6bbce623d90d8d7da9aceeee108ad2c61be
value: quay.io/konflux-ci/tekton-catalog/task-prefetch-dependencies:0.1@sha256:47d8d3320b4e29360108f18235598dd247bc316a4792063d970bffb00e61b71a
- name: kind
value: task
resolver: bundles
Expand All @@ -214,6 +220,9 @@ spec:
- name: netrc
workspace: netrc
- name: build-container
env:
- name: TMPDIR
value: /workspace/buildah-tmp
params:
- name: PLATFORM
value: linux-g6xlarge/amd64
Expand Down Expand Up @@ -300,7 +309,7 @@ spec:
- name: name
value: source-build
- name: bundle
value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:21cb5ebaff7a9216903cf78933dc4ec4dd6283a52636b16590a5f52ceb278269
value: quay.io/konflux-ci/tekton-catalog/task-source-build:0.1@sha256:ddfa1fb418c1d9d55d7d70d39fe8f35ce05e96073bcd057bb6aaacd1f839cc51
- name: kind
value: task
resolver: bundles
Expand Down Expand Up @@ -505,7 +514,7 @@ spec:
- ReadWriteOnce
resources:
requests:
storage: 8Gi
storage: 24Gi
status: {}
- name: git-auth
secret:
Expand Down
27 changes: 18 additions & 9 deletions Containerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ARG EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
ARG FLAVOR=cpu
ARG HERMETIC=false

FROM registry.access.redhat.com/ubi9/python-311 as cpu-base
ARG EMBEDDING_MODEL
Expand All @@ -8,33 +9,41 @@ ARG FLAVOR
FROM nvcr.io/nvidia/cuda:12.6.2-devel-ubi9 as gpu-base
ARG EMBEDDING_MODEL
ARG FLAVOR
RUN dnf install -y python3.11 python3.11-pip libcudnn8 libnccl
RUN dnf install -y python3.11 python3.11-pip libcudnn9 libnccl

FROM ${FLAVOR}-base as lightspeed-rag-builder
ARG EMBEDDING_MODEL
ARG FLAVOR
ARG HERMETIC

USER 0
WORKDIR /workdir

COPY pyproject.toml pdm.lock* Makefile .
RUN make install-tools && pdm config python.use_venv false && make pdm-lock-check install-deps
COPY requirements.txt .
RUN pip3.11 install --no-cache-dir -r requirements.txt

COPY ocp-product-docs-plaintext ./ocp-product-docs-plaintext
COPY runbooks ./runbooks

COPY scripts/download_embeddings_model.py .
RUN pdm run python download_embeddings_model.py -l ./embeddings_model -r ${EMBEDDING_MODEL}
COPY embeddings_model ./embeddings_model
#RUN cat embeddings_model/model.safetensors.part* > embeddings_model/model.safetensors && rm embeddings_model/model.safetensors.part*
RUN cd embeddings_model; if [ "$HERMETIC" == "true" ]; then \
ln -s /cachi2/output/deps/generic/model.safetensors model.safetensors; \
else \
wget -q https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors; \
fi

RUN export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
pdm run python -c "import torch; print(torch.version.cuda); print(torch.cuda.is_available());"
RUN if [ "$FLAVOR" == "gpu" ]; then \
export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
python3.11 -c "import torch; print(torch.version.cuda); print(torch.cuda.is_available());"; \
fi

COPY scripts/generate_embeddings.py .
RUN export LD_LIBRARY_PATH=/usr/local/cuda-12.6/compat:$LD_LIBRARY_PATH; \
set -e && for OCP_VERSION in $(ls -1 ocp-product-docs-plaintext); do \
pdm run python generate_embeddings.py -f ocp-product-docs-plaintext/${OCP_VERSION} -r runbooks/alerts -md embeddings_model \
python3.11 generate_embeddings.py -f ocp-product-docs-plaintext/${OCP_VERSION} -r runbooks/alerts -md embeddings_model \
-mn ${EMBEDDING_MODEL} -o vector_db/ocp_product_docs/${OCP_VERSION} \
-i ocp-product-docs-$(echo $OCP_VERSION | sed 's/\./_/g') -v ${OCP_VERSION}; \
-i ocp-product-docs-$(echo $OCP_VERSION | sed 's/\./_/g') -v ${OCP_VERSION} -hb $HERMETIC; \
done

FROM registry.access.redhat.com/ubi9/ubi-minimal@sha256:d85040b6e3ed3628a89683f51a38c709185efc3fb552db2ad1b9180f2a6c38be
Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ $(error Unsupported FLAVOR $(FLAVOR), must be 'cpu' or 'gpu')
endif

install-tools: ## Install required utilities/tools
@command -v pdm > /dev/null || { echo >&2 "pdm is not installed. Installing..."; pip3.11 install --upgrade pip pdm; }
@command -v pdm > /dev/null || { echo >&2 "pdm is not installed. Installing..."; pip3.11 install --no-cache-dir --upgrade pip pdm; }

pdm-lock-check: ## Check that the pdm.lock file is in a good shape
pdm lock --check --group $(TORCH_GROUP) --lockfile pdm.lock.$(TORCH_GROUP)
Expand Down Expand Up @@ -43,6 +43,10 @@ update-docs: ## Update the plaintext OCP docs in ocp-product-docs-plaintext/
done
scripts/get_runbooks.sh

update-model: ## Update the local copy of the embedding model
@rm -rf ./embeddings_model
@python scripts/download_embeddings_model.py -l ./embeddings_model -r sentence-transformers/all-mpnet-base-v2

build-image: ## Build a rag-content container image.
podman build -t rag-content .

Expand Down
7 changes: 7 additions & 0 deletions artifacts.lock.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
metadata:
version: "1.0"
artifacts:
- download_url: "https://huggingface.co/sentence-transformers/all-mpnet-base-v2/resolve/9a3225965996d404b775526de6dbfe85d3368642/model.safetensors"
checksum: "sha256:78c0197b6159d92658e319bc1d72e4c73a9a03dd03815e70e555c5ef05615658"
filename: "model.safetensors"
6 changes: 6 additions & 0 deletions cuda.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[cuda]
name=cuda
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
28 changes: 28 additions & 0 deletions embeddings_model/.gitattributes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question/Clarification:
I understand that we won't be able to download the embedding model during build..
I am not aware about all possible options for hermetic build. But keeping all files related to embedding model in our repo doesn't seem right to me (considering we are using this as it is, without any re-training).

Is this a temporary solution ?
Can we create some kind of base image with embedding model outside normal build procedure ?
cc: @xrajesh @tisnik

Copy link
Contributor Author

@syedriko syedriko Nov 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I see your concern. The point of the hermetic build is to build from known input artifacts. Copying into our GH repo, with its access control, achieves that end.
Wrt to the embedding model, we can

  • copy the whole thing into our GH repo, which I tried. This is not a good idea because GH doesn't handle large binary blobs well.
  • the present situation where the small text files of the embedding model are copied to our GH repo and the model blob gets downloaded before the build with SHA verification
  • we can treat every file we need from the model like we treat the model's blob, download them with the generic fetcher and check their SHAs.
    We can place the embedding model into its base image, but I'm not sure what that would change. I don't think we need/want to circumvent hermetic build.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Primarily my concern was duplicating files (embedding model) in our repo and that to partially..

  • safetensor file is anyway missing due to large size..
  • what if (may not happen ever) we keep an option of selecting one model from multiple embedding models. In that case lot of unrelated files in our repo.

So below can avoid keeping files in our repo. But seems like too much effort.

we can treat every file we need from the model like we treat the model's blob and download it with the generic fetcher and check their SHAs.
We can place the embedding model into its base image

Based on you comment, okay with current approach..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/lgtm

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bin.* filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zstandard filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
model.safetensors filter=lfs diff=lfs merge=lfs -text
7 changes: 7 additions & 0 deletions embeddings_model/1_Pooling/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"word_embedding_dimension": 768,
"pooling_mode_cls_token": false,
"pooling_mode_mean_tokens": true,
"pooling_mode_max_tokens": false,
"pooling_mode_mean_sqrt_len_tokens": false
}
Loading