cuda : fix defrag with quantized KV (#9319) #22
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow uses actions that are not certified by GitHub. | |
# They are provided by a third-party and are governed by | |
# separate terms of service, privacy policy, and support | |
# documentation. | |
# GitHub recommends pinning actions to a commit SHA. | |
# To get a newer version, you will need to update the SHA. | |
# You can also reference a tag or branch, but the action may change without warning. | |
name: Publish Docker image | |
on: | |
#pull_request: | |
push: | |
branches: | |
- master | |
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal'] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
push_to_registry: | |
name: Push Docker image to Docker Hub | |
#if: github.event.pull_request.draft == false | |
runs-on: ubuntu-latest | |
env: | |
COMMIT_SHA: ${{ github.sha }} | |
strategy: | |
matrix: | |
config: | |
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" } | |
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" } | |
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" } | |
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
# Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete. | |
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" } | |
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" } | |
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" } | |
steps: | |
- name: Check out the repo | |
uses: actions/checkout@v4 | |
- name: Set up QEMU | |
uses: docker/setup-qemu-action@v2 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v2 | |
- name: Log in to Docker Hub | |
uses: docker/login-action@v2 | |
with: | |
registry: ghcr.io | |
username: ${{ github.repository_owner }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example | |
- name: Free Disk Space (Ubuntu) | |
uses: jlumbroso/free-disk-space@main | |
with: | |
# this might remove tools that are actually needed, | |
# if set to "true" but frees about 6 GB | |
tool-cache: false | |
# all of these default to true, but feel free to set to | |
# "false" if necessary for your workflow | |
android: true | |
dotnet: true | |
haskell: true | |
large-packages: true | |
docker-images: true | |
swap-storage: true | |
- name: Determine tag name | |
id: tag | |
shell: bash | |
run: | | |
BUILD_NUMBER="$(git rev-list --count HEAD)" | |
SHORT_HASH="$(git rev-parse --short=7 HEAD)" | |
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then | |
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT | |
else | |
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') | |
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT | |
fi | |
- name: Downcase github.repository_owner | |
run: | | |
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV | |
env: | |
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' | |
- name: Build and push Docker image (tagged + versioned) | |
if: github.event_name == 'push' | |
uses: docker/build-push-action@v6 | |
with: | |
context: . | |
push: true | |
platforms: ${{ matrix.config.platforms }} | |
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}" | |
file: ${{ matrix.config.dockerfile }} |