add sparse llama 8b

redhat-ai-services · Dec 17, 2024 · 06e45e9 · 06e45e9
1 parent 5979ff0
commit 06e45e9
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 0 deletions.
diff --git a/modelcar-images/sparse-llama-3.1-8b-2of4/Containerfile b/modelcar-images/sparse-llama-3.1-8b-2of4/Containerfile
@@ -0,0 +1,17 @@
+# Base image for the modelcar Granite image
+FROM quay.io/redhat-ai-services/huggingface-modelcar-builder:latest as base
+
+# The model repo to download
+ENV MODEL_REPO="neuralmagic/Sparse-Llama-3.1-8B-2of4"
+
+# Download the necessary model files
+RUN python3 download_model.py --model-repo ${MODEL_REPO}
+
+# Final image containing only the essential model files
+FROM registry.access.redhat.com/ubi9/ubi-micro:9.4
+
+# Copy only the necessary model files from the base image
+COPY --from=base /models /models
+
+# Set the user to 1001
+USER 1001
diff --git a/modelcar-images/sparse-llama-3.1-8b-2of4/README.md b/modelcar-images/sparse-llama-3.1-8b-2of4/README.md
@@ -0,0 +1,15 @@
+# Sparse-Llama-3.1-8B-2of4
+
+https://huggingface.co/neuralmagic/Sparse-Llama-3.1-8B-2of4/tree/main
+
+quay.io/redhat-ai-services/modelcar-catalog:sparse-llama-3.1-8b-2of4
+
+https://neuralmagic.com/blog/24-sparse-llama-smaller-models-for-efficient-gpu-inference/
+
+## Building Image
+
+```
+podman build modelcar-images/sparse-llama-3.1-8b-2of4 \
+    -t quay.io/redhat-ai-services/modelcar-catalog:sparse-llama-3.1-8b-2of4  \
+    --platform linux/amd64
+```