-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 98fce50
Showing
500 changed files
with
275,960 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
--- | ||
Checks: > | ||
bugprone-*, | ||
-bugprone-easily-swappable-parameters, | ||
-bugprone-implicit-widening-of-multiplication-result, | ||
-bugprone-misplaced-widening-cast, | ||
-bugprone-narrowing-conversions, | ||
readability-*, | ||
-readability-avoid-unconditional-preprocessor-if, | ||
-readability-function-cognitive-complexity, | ||
-readability-identifier-length, | ||
-readability-implicit-bool-conversion, | ||
-readability-magic-numbers, | ||
-readability-uppercase-literal-suffix, | ||
clang-analyzer-*, | ||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, | ||
performance-*, | ||
portability-*, | ||
misc-*, | ||
-misc-const-correctness, | ||
-misc-non-private-member-variables-in-classes, | ||
-misc-no-recursion, | ||
FormatStyle: none |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries | ||
stage('Cleanup'){ | ||
cleanWs() // Cleaning previous CI build in workspace | ||
} | ||
stage('checkout repo'){ | ||
retry(5){ // Retry if the cloning fails due to some reason | ||
checkout scm // Clone the repo on Runner | ||
} | ||
} | ||
stage('Compiling llama.cpp'){ | ||
sh'''#!/bin/bash | ||
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V | ||
''' | ||
} | ||
stage('Running llama.cpp'){ | ||
sh'''#!/bin/bash | ||
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc | ||
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./main -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64 | ||
cat llama_log.txt # Printing results | ||
''' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
# This needs to generally match the container host's environment. | ||
ARG CUDA_VERSION=11.7.1 | ||
|
||
# Target the CUDA build image | ||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} | ||
|
||
FROM ${BASE_CUDA_DEV_CONTAINER} as build | ||
|
||
# Unless otherwise specified, we make a fat build. | ||
ARG CUDA_DOCKER_ARCH=all | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential python3 python3-pip git | ||
|
||
COPY requirements.txt requirements.txt | ||
COPY requirements requirements | ||
|
||
RUN pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Set nvcc architecture | ||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} | ||
# Enable cuBLAS | ||
ENV LLAMA_CUBLAS=1 | ||
|
||
RUN make | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
# This needs to generally match the container host's environment. | ||
ARG ROCM_VERSION=5.6 | ||
|
||
# Target the CUDA build image | ||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete | ||
|
||
FROM ${BASE_ROCM_DEV_CONTAINER} as build | ||
|
||
# Unless otherwise specified, we make a fat build. | ||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 | ||
# This is mostly tied to rocBLAS supported archs. | ||
ARG ROCM_DOCKER_ARCH=\ | ||
gfx803 \ | ||
gfx900 \ | ||
gfx906 \ | ||
gfx908 \ | ||
gfx90a \ | ||
gfx1010 \ | ||
gfx1030 \ | ||
gfx1100 \ | ||
gfx1101 \ | ||
gfx1102 | ||
|
||
COPY requirements.txt requirements.txt | ||
COPY requirements requirements | ||
|
||
RUN pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Set nvcc architecture | ||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} | ||
# Enable ROCm | ||
ENV LLAMA_HIPBLAS=1 | ||
ENV CC=/opt/rocm/llvm/bin/clang | ||
ENV CXX=/opt/rocm/llvm/bin/clang++ | ||
|
||
RUN make | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
FROM ubuntu:$UBUNTU_VERSION as build | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y build-essential python3 python3-pip git | ||
|
||
COPY requirements.txt requirements.txt | ||
COPY requirements requirements | ||
|
||
RUN pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
RUN make | ||
|
||
ENV LC_ALL=C.utf8 | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# SRPM for building from source and packaging an RPM for RPM-based distros. | ||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package | ||
# Built and maintained by John Boero - [email protected] | ||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal | ||
|
||
# Notes for llama.cpp: | ||
# 1. Tags are currently based on hash - which will not sort asciibetically. | ||
# We need to declare standard versioning if people want to sort latest releases. | ||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. | ||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. | ||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo | ||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. | ||
# It is up to the user to install the correct vendor-specific support. | ||
|
||
Name: llama.cpp-clblast | ||
Version: %( date "+%%Y%%m%%d" ) | ||
Release: 1%{?dist} | ||
Summary: OpenCL Inference of LLaMA model in C/C++ | ||
License: MIT | ||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz | ||
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel | ||
Requires: clblast | ||
URL: https://github.com/ggerganov/llama.cpp | ||
|
||
%define debug_package %{nil} | ||
%define source_date_epoch_from_changelog 0 | ||
|
||
%description | ||
CPU inference for Meta's Lllama2 models using default options. | ||
|
||
%prep | ||
%setup -n llama.cpp-master | ||
|
||
%build | ||
make -j LLAMA_CLBLAST=1 | ||
|
||
%install | ||
mkdir -p %{buildroot}%{_bindir}/ | ||
cp -p main %{buildroot}%{_bindir}/llamaclblast | ||
cp -p server %{buildroot}%{_bindir}/llamaclblastserver | ||
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple | ||
|
||
mkdir -p %{buildroot}/usr/lib/systemd/system | ||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service | ||
[Unit] | ||
Description=Llama.cpp server, CPU only (no GPU support in this build). | ||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target | ||
|
||
[Service] | ||
Type=simple | ||
EnvironmentFile=/etc/sysconfig/llama | ||
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS | ||
ExecReload=/bin/kill -s HUP $MAINPID | ||
Restart=never | ||
|
||
[Install] | ||
WantedBy=default.target | ||
EOF | ||
|
||
mkdir -p %{buildroot}/etc/sysconfig | ||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama | ||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" | ||
EOF | ||
|
||
%clean | ||
rm -rf %{buildroot} | ||
rm -rf %{_builddir}/* | ||
|
||
%files | ||
%{_bindir}/llamaclblast | ||
%{_bindir}/llamaclblastserver | ||
%{_bindir}/llamaclblastsimple | ||
/usr/lib/systemd/system/llamaclblast.service | ||
%config /etc/sysconfig/llama | ||
|
||
|
||
%pre | ||
|
||
%post | ||
|
||
%preun | ||
%postun | ||
|
||
%changelog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# SRPM for building from source and packaging an RPM for RPM-based distros. | ||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package | ||
# Built and maintained by John Boero - [email protected] | ||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal | ||
|
||
# Notes for llama.cpp: | ||
# 1. Tags are currently based on hash - which will not sort asciibetically. | ||
# We need to declare standard versioning if people want to sort latest releases. | ||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. | ||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. | ||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo | ||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. | ||
# It is up to the user to install the correct vendor-specific support. | ||
|
||
Name: llama.cpp-cublas | ||
Version: %( date "+%%Y%%m%%d" ) | ||
Release: 1%{?dist} | ||
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) | ||
License: MIT | ||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz | ||
BuildRequires: coreutils make gcc-c++ git cuda-toolkit | ||
Requires: cuda-toolkit | ||
URL: https://github.com/ggerganov/llama.cpp | ||
|
||
%define debug_package %{nil} | ||
%define source_date_epoch_from_changelog 0 | ||
|
||
%description | ||
CPU inference for Meta's Lllama2 models using default options. | ||
|
||
%prep | ||
%setup -n llama.cpp-master | ||
|
||
%build | ||
make -j LLAMA_CUBLAS=1 | ||
|
||
%install | ||
mkdir -p %{buildroot}%{_bindir}/ | ||
cp -p main %{buildroot}%{_bindir}/llamacppcublas | ||
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver | ||
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple | ||
|
||
mkdir -p %{buildroot}/usr/lib/systemd/system | ||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service | ||
[Unit] | ||
Description=Llama.cpp server, CPU only (no GPU support in this build). | ||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target | ||
|
||
[Service] | ||
Type=simple | ||
EnvironmentFile=/etc/sysconfig/llama | ||
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS | ||
ExecReload=/bin/kill -s HUP $MAINPID | ||
Restart=never | ||
|
||
[Install] | ||
WantedBy=default.target | ||
EOF | ||
|
||
mkdir -p %{buildroot}/etc/sysconfig | ||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama | ||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" | ||
EOF | ||
|
||
%clean | ||
rm -rf %{buildroot} | ||
rm -rf %{_builddir}/* | ||
|
||
%files | ||
%{_bindir}/llamacppcublas | ||
%{_bindir}/llamacppcublasserver | ||
%{_bindir}/llamacppcublassimple | ||
/usr/lib/systemd/system/llamacublas.service | ||
%config /etc/sysconfig/llama | ||
|
||
%pre | ||
|
||
%post | ||
|
||
%preun | ||
%postun | ||
|
||
%changelog |
Oops, something went wrong.