Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiyuan8 committed Aug 22, 2024
2 parents 4575115 + a09893f commit 6955699
Show file tree
Hide file tree
Showing 30 changed files with 992 additions and 504 deletions.
21 changes: 12 additions & 9 deletions .github/workflows/build-wheels-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,16 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
# python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_SKIP: "*musllinux*"
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
package-dir: .
Expand All @@ -51,8 +52,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# here 'Windows' is a large runner
os: [Windows]
# here 'nexa-sdk-windows-8-core' is a large runner
os: [nexa-sdk-windows-8-core]

steps:
- uses: actions/checkout@v4
Expand All @@ -68,14 +69,15 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
# python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CMAKE_BUILD_PARALLEL_LEVEL: 16
with:
package-dir: .
Expand Down Expand Up @@ -107,15 +109,16 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
# python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
Expand Down Expand Up @@ -146,7 +149,7 @@ jobs:
CIBW_SKIP: "*musllinux* pp*"
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ARCHS: "aarch64"
CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CMAKE_BUILD_PARALLEL_LEVEL: $(nproc)
with:
output-dir: wheelhouse
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/build-wheels-cuda-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ jobs:
run: |
$matrix = @{
'os' = @('ubuntu-20.04')
'pyver' = @("3.9", "3.10", "3.11", "3.12")
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1")
'pyver' = @("3.10", "3.11", "3.12")
'cuda' = @("12.4.1")
'releasetag' = @("basic")
}
Expand Down Expand Up @@ -113,12 +113,9 @@ jobs:
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all -DSD_CUBLAS=ON'
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
$env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
cp tomls/pyproject_cuda.toml pyproject.toml
python -m build --wheel
# write the build tag to the output
Expand Down
13 changes: 5 additions & 8 deletions .github/workflows/build-wheels-cuda-win.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ jobs:
steps:
- name: Define Job Output
id: set-matrix
# here 'Windows' is a large runner
# here 'nexa-sdk-windows-8-core' is a large runner
run: |
$matrix = @{
'os' = @('Windows')
'pyver' = @("3.9", "3.10", "3.11", "3.12")
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1")
'os' = @('nexa-sdk-windows-8-core')
'pyver' = @("3.10", "3.11", "3.12")
'cuda' = @("12.4.1")
'releasetag' = @("basic")
}
Expand Down Expand Up @@ -114,11 +114,8 @@ jobs:
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
}
$env:VERBOSE = '1'
$env:CMAKE_ARGS = '-DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all -DSD_CUBLAS=ON'
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
cp tomls/pyproject_cuda.toml pyproject.toml
$env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
python -m build --wheel
Expand Down
10 changes: 7 additions & 3 deletions .github/workflows/build-wheels-metal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,20 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .
# python -m pip install -e .
python -m pip install build wheel
- name: Copy pyproject.toml
run: |
cp tomls/pyproject_metal.toml pyproject.toml
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ARCHS: "arm64"
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Python CI

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3
with:
submodules: recursive # This will clone the repository with all its submodules
fetch-depth: 0 # This fetches all history so you can access any version of the submodules


- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10' # Specify the Python version you want

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build pytest
- name: Build DLL
run: |
python -m pip install -e .
- name: Run tests
run: |
python -m pytest tests
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
include requirements.txt
include requirements-onnx.txt
63 changes: 54 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,60 @@ Detailed API documentation is available [here](docs/index.html).

## Installation

For CPU version
```
pip install nexaai --no-cache-dir
```

For GPU version
```
pip install nexaai-gpu --no-cache-dir
```
**GPU version(optional)**

check if you have GPU acceleration (torch required)
<details>
<summary>CUDA:</summary>

```
import torch
torch.cuda.is_available()
```

if True

```
CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai-gpu
```
</details>
<details>
<summary>Apple M Chip:</summary>
Apple icon -> about this mac -> Graphics

if True:

```
CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai-gpu
```
</details>

<details>
<summary>AMD graphics card:</summary>


```
CMAKE_ARGS="-DGGML_HIPBLAS=on" pip install nexaai-gpu
```
</details>

**CPU version**

<details>
<summary>Mac with Intel chips</summary>

```
CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
```
</details>

<details>
<summary>Mac with M chips or other Operating systems:</summary>

```
pip install nexaai
```
<details>

## Nexa CLI commands

Expand Down
2 changes: 1 addition & 1 deletion nexa/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.0.dev"
__version__ = "0.0.1"
6 changes: 3 additions & 3 deletions nexa/cli/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def run_ggml_inference(args):
if hasattr(args, 'streamlit') and args.streamlit:
inference.run_streamlit(model_path)
elif args.img2img:
inference.run_img2img()
inference.loop_img2img()
else:
inference.run_txt2img()
inference.loop_txt2img()
return
elif args.command == "vlm":
from nexa.gguf.nexa_inference_vlm import NexaVLMInference
Expand Down Expand Up @@ -218,4 +218,4 @@ def main():


if __name__ == "__main__":
main()
main()
37 changes: 32 additions & 5 deletions nexa/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@
"codeqwen": "CodeQwen1.5-7B-Instruct:q4_0",
"deepseek-coder": "deepseek-coder-1.3b-instruct:q4_0",
"dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
"nomic-embed-text": "nomic-embed-text-v1.5:fp16",
"phi2": "Phi-2:q4_0",
"phi3": "Phi-3-mini-128k-instruct:q4_0",
"llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
"llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
}

NEXA_RUN_MODEL_MAP_ONNX = {
Expand All @@ -60,37 +61,58 @@
"phi3v": "Phi-3-vision-128k-instruct:onnx-cpu-int4",
"sd1-5": "stable-diffusion-v1-5:onnx-cpu-fp32",
"lcm-dreamshaper": "lcm-dreamshaper-v7:onnx-cpu-fp32",
"whisper": "whisper-tiny.en:onnx-cpu-fp32",
"whisper": "whisper-tiny:onnx-cpu-fp32",
"ljspeech": "ljspeech-jets:onnx-cpu-fp32",
}

NEXA_RUN_MODEL_MAP_VLM = {
"nanollava": "nanoLLaVA:model-fp16",
"nanoLLaVA:fp16": "nanoLLaVA:model-fp16",
"llava-phi3": "llava-phi-3-mini:model-q4_0",
# "llava1.5": "/usr/share/ollama/.ollama/models/blobs/sha256-170370233dd5c5415250a2ecd5c71586352850729062ccef1496385647293868",
"llava-phi-3-mini:q4_0": "llava-phi-3-mini:model-q4_0",
"llava-phi-3-mini:fp16": "llava-phi-3-mini:model-fp16",
"llava-llama3": "llava-llama-3-8b-v1.1:model-q4_0",
"llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:model-q4_0",
"llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:model-fp16",
"llava1.6-mistral": "llava-v1.6-mistral-7b:model-q4_0",
"llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:model-q4_0",
"llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:model-fp16",
"llava1.6-vicuna": "llava-v1.6-vicuna-7b:model-q4_0",
"llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:model-q4_0",
"llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:model-fp16",
}

NEXA_RUN_MODEL_MAP_VOICE = {
"whisper-large": "whisper-large:bin-large-v3",
"whisper-tiny": "whisper-tiny:bin-tiny",
"faster-whisper-tiny": "faster-whisper-tiny:bin-cpu-fp16",
"faster-whisper-small": "faster-whisper-small:bin-cpu-fp16",
"faster-whisper-medium": "faster-whisper-medium:bin-cpu-fp16",
"faster-whisper-base": "faster-whisper-base:bin-cpu-fp16",
"faster-whisper-large": "faster-whisper-large:bin-cpu-fp16",
}

NEXA_RUN_MODEL_MAP_FUNCTION_CALLING = {
"llama2-function-calling": "Llama2-7b-function-calling:q3_K_M",
"llama2-function-calling": "Llama2-7b-function-calling:q4_K_M",
}



NEXA_RUN_PROJECTOR_MAP = {
"nanollava": "nanoLLaVA:projector-fp16",
"nanoLLaVA:fp16": "nanoLLaVA:project-fp16",
"llava-phi3": "llava-phi-3-mini:projector-q4_0",
# "llava1.5": "/usr/share/ollama/.ollama/models/blobs/sha256-72d6f08a42f656d36b356dbe0920675899a99ce21192fd66266fb7d82ed07539",
"llava-phi-3-mini:q4_0": "llava-phi-3-mini:projector-q4_0",
"llava-phi-3-mini:fp16": "llava-phi-3-mini:projector-fp16",
"llava-llama3": "llava-llama-3-8b-v1.1:projector-q4_0",
"llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:projector-q4_0",
"llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:projector-fp16",
"llava1.6-mistral": "llava-v1.6-mistral-7b:projector-q4_0",
"llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:projector-q4_0",
"llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:projector-fp16",
"llava1.6-vicuna": "llava-v1.6-vicuna-7b:projector-q4_0",
"llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:projector-q4_0",
"llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
}

NEXA_RUN_MODEL_MAP_IMAGE = {
Expand All @@ -99,6 +121,8 @@
"sd2-1": "stable-diffusion-v2-1:fp16",
"sdxl-turbo": "sdxl-turbo:q8_0",
"lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
"anything-lcm": "anything-v30-LCM:fp16",
"hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
}

NEXA_RUN_MODEL_MAP = {
Expand Down Expand Up @@ -127,6 +151,9 @@
"sd1-5": "q4_0",
"sd2-1": "q4_0",
"lcm-dreamshaper": "f16",
"sdxl-turbo": "q8_0",
"anything-lcm": "f16",
"hassaku-lcm": "f16",
}

EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
Expand Down
Loading

0 comments on commit 6955699

Please sign in to comment.