Skip to content

Commit

Permalink
Merge pull request #6
Browse files Browse the repository at this point in the history
Update llama.cpp to latest
  • Loading branch information
wsxiaoys authored Mar 1, 2024
2 parents 4af2548 + c504a54 commit 807ee66
Show file tree
Hide file tree
Showing 220 changed files with 105,247 additions and 8,433 deletions.
16 changes: 9 additions & 7 deletions .devops/main-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
ARG UBUNTU_VERSION=22.04

FROM intel/hpckit:$ONEAPI_VERSION as build
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build

ARG LLAMA_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git

WORKDIR /app

COPY . .

# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
RUN mkdir build && \
cd build && \
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
cmake --build . --config Release --target main server
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build . --config Release --target main

FROM ubuntu:$UBUNTU_VERSION as runtime
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

COPY --from=build /app/build/bin/main /main
COPY --from=build /app/build/bin/server /server

ENV LC_ALL=C.utf8

Expand Down
29 changes: 29 additions & 0 deletions .devops/main-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget

# Install Vulkan SDK
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt update -y && \
apt-get install -y vulkan-sdk

# Build it
WORKDIR /app
COPY . .
RUN mkdir build && \
cd build && \
cmake .. -DLLAMA_VULKAN=1 && \
cmake --build . --config Release --target main

# Clean up
WORKDIR /
RUN cp /app/build/bin/main /main && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/main" ]
37 changes: 37 additions & 0 deletions .devops/nix/docker.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
lib,
dockerTools,
buildEnv,
llama-cpp,
interactive ? true,
coreutils,
}:

# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result

# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/

# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.

dockerTools.buildLayeredImage {
name = llama-cpp.pname;
tag = "latest";

contents =
[ llama-cpp ]
++ lib.optionals interactive [
coreutils
dockerTools.binSh
dockerTools.caCertificates
];
}
17 changes: 15 additions & 2 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,22 @@
cudaPackages,
darwin,
rocmPackages,
vulkan-headers,
vulkan-loader,
clblast,
useBlas ? builtins.all (x: !x) [
useCuda
useMetalKit
useOpenCL
useRocm
useVulkan
],
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
useMpi ? false, # Increases the runtime closure size by ~700M
useOpenCL ? false,
useRocm ? config.rocmSupport,
useVulkan ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
}@inputs:

Expand All @@ -48,7 +52,8 @@ let
++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useMpi [ "MPI" ]
++ lib.optionals useOpenCL [ "OpenCL" ]
++ lib.optionals useRocm [ "ROCm" ];
++ lib.optionals useRocm [ "ROCm" ]
++ lib.optionals useVulkan [ "Vulkan" ];

pnameSuffix =
strings.optionalString (suffices != [ ])
Expand Down Expand Up @@ -108,6 +113,11 @@ let
hipblas
rocblas
];

vulkanBuildInputs = [
vulkan-headers
vulkan-loader
];
in

effectiveStdenv.mkDerivation (
Expand Down Expand Up @@ -164,7 +174,8 @@ effectiveStdenv.mkDerivation (
++ optionals useCuda cudaBuildInputs
++ optionals useMpi [ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useRocm rocmBuildInputs;
++ optionals useRocm rocmBuildInputs
++ optionals useVulkan vulkanBuildInputs;

cmakeFlags =
[
Expand All @@ -178,6 +189,7 @@ effectiveStdenv.mkDerivation (
(cmakeBool "LLAMA_HIPBLAS" useRocm)
(cmakeBool "LLAMA_METAL" useMetalKit)
(cmakeBool "LLAMA_MPI" useMpi)
(cmakeBool "LLAMA_VULKAN" useVulkan)
]
++ optionals useCuda [
(
Expand Down Expand Up @@ -218,6 +230,7 @@ effectiveStdenv.mkDerivation (
useMpi
useOpenCL
useRocm
useVulkan
;

shell = mkShell {
Expand Down
3 changes: 3 additions & 0 deletions .devops/nix/scope.nix
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,8 @@ lib.makeScope newScope (
self: {
inherit llamaVersion;
llama-cpp = self.callPackage ./package.nix { };
docker = self.callPackage ./docker.nix { };
docker-min = self.callPackage ./docker.nix { interactive = false; };
sif = self.callPackage ./sif.nix { };
}
)
27 changes: 27 additions & 0 deletions .devops/nix/sif.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
lib,
singularity-tools,
llama-cpp,
bashInteractive,
interactive ? false,
}:

let
optionalInt = cond: x: if cond then x else 0;
in
singularity-tools.buildImage rec {
inherit (llama-cpp) name;
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];

# These are excessive (but safe) for most variants. Building singularity
# images requires superuser privileges, so we build them inside a VM in a
# writable image of pre-determined size.
#
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
#
# Expected image sizes:
# - cpu/blas: 150M,
# - cuda, all gencodes: 560M,
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
memSize = diskSize;
}
32 changes: 32 additions & 0 deletions .devops/server-cuda.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=11.7.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
# Target the CUDA runtime image
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} as build

# Unless otherwise specified, we make a fat build.
ARG CUDA_DOCKER_ARCH=all

RUN apt-get update && \
apt-get install -y build-essential git

WORKDIR /app

COPY . .

# Set nvcc architecture
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
# Enable cuBLAS
ENV LLAMA_CUBLAS=1

RUN make

FROM ${BASE_CUDA_RUN_CONTAINER} as runtime

COPY --from=build /app/server /server

ENTRYPOINT [ "/server" ]
28 changes: 28 additions & 0 deletions .devops/server-intel.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04

FROM intel/oneapi-basekit:$ONEAPI_VERSION as build

ARG LLAMA_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git

WORKDIR /app

COPY . .

RUN mkdir build && \
cd build && \
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
echo "LLAMA_SYCL_F16 is set" && \
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
fi && \
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
cmake --build . --config Release --target server

FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime

COPY --from=build /app/build/bin/server /server

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
45 changes: 45 additions & 0 deletions .devops/server-rocm.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
ARG UBUNTU_VERSION=22.04

# This needs to generally match the container host's environment.
ARG ROCM_VERSION=5.6

# Target the CUDA build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

FROM ${BASE_ROCM_DEV_CONTAINER} as build

# Unless otherwise specified, we make a fat build.
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
# This is mostly tied to rocBLAS supported archs.
ARG ROCM_DOCKER_ARCH=\
gfx803 \
gfx900 \
gfx906 \
gfx908 \
gfx90a \
gfx1010 \
gfx1030 \
gfx1100 \
gfx1101 \
gfx1102

COPY requirements.txt requirements.txt
COPY requirements requirements

RUN pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt

WORKDIR /app

COPY . .

# Set nvcc architecture
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
# Enable ROCm
ENV LLAMA_HIPBLAS=1
ENV CC=/opt/rocm/llvm/bin/clang
ENV CXX=/opt/rocm/llvm/bin/clang++

RUN make

ENTRYPOINT [ "/app/server" ]
29 changes: 29 additions & 0 deletions .devops/server-vulkan.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ARG UBUNTU_VERSION=jammy

FROM ubuntu:$UBUNTU_VERSION as build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget

# Install Vulkan SDK
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
apt update -y && \
apt-get install -y vulkan-sdk

# Build it
WORKDIR /app
COPY . .
RUN mkdir build && \
cd build && \
cmake .. -DLLAMA_VULKAN=1 && \
cmake --build . --config Release --target server

# Clean up
WORKDIR /
RUN cp /app/build/bin/server /server && \
rm -rf /app

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
20 changes: 20 additions & 0 deletions .devops/server.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
ARG UBUNTU_VERSION=22.04

FROM ubuntu:$UBUNTU_VERSION as build

RUN apt-get update && \
apt-get install -y build-essential git

WORKDIR /app

COPY . .

RUN make

FROM ubuntu:$UBUNTU_VERSION as runtime

COPY --from=build /app/server /server

ENV LC_ALL=C.utf8

ENTRYPOINT [ "/server" ]
1 change: 1 addition & 0 deletions .ecrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"Exclude": ["^\\.gitmodules$"],
"Disable": {
"IndentSize": true
}
Expand Down
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
[flake8]
max-line-length = 125
ignore = W503
2 changes: 2 additions & 0 deletions .github/ISSUE_TEMPLATE/bug.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ assignees: ''
---

Please include information about your system, the steps to reproduce the bug, and the version of llama.cpp that you are using. If possible, please provide a minimal code example that reproduces the bug.

If the bug concerns the server, please try to reproduce it first using the [server test scenario framework](https://github.com/ggerganov/llama.cpp/tree/master/examples/server/tests).
Loading

0 comments on commit 807ee66

Please sign in to comment.