wip

NexaAI · Aug 22, 2024 · 6955699 · 6955699
2 parents 4575115 + a09893f
commit 6955699
Show file tree

Hide file tree

Showing 30 changed files with 992 additions and 504 deletions.
diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml
@@ -27,15 +27,16 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .
+          # python -m pip install -e .
+          python -m pip install build wheel
 
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_SKIP: "*musllinux*"
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
           CMAKE_BUILD_PARALLEL_LEVEL: 4
         with:
           package-dir: .
@@ -51,8 +52,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        # here 'Windows' is a large runner
-        os: [Windows]
+        # here 'nexa-sdk-windows-8-core' is a large runner
+        os: [nexa-sdk-windows-8-core]
 
     steps:
       - uses: actions/checkout@v4
@@ -68,14 +69,15 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .
+          # python -m pip install -e .
+          python -m pip install build wheel
 
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
           CMAKE_BUILD_PARALLEL_LEVEL: 16
         with:
           package-dir: .
@@ -107,15 +109,16 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .
+          # python -m pip install -e .
+          python -m pip install build wheel
 
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
           CMAKE_BUILD_PARALLEL_LEVEL: 4
         with:
@@ -146,7 +149,7 @@ jobs:
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "aarch64"
-          CIBW_BUILD: "cp38-* cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
           CMAKE_BUILD_PARALLEL_LEVEL: $(nproc)
         with:
           output-dir: wheelhouse

diff --git a/.github/workflows/build-wheels-cuda-linux.yaml b/.github/workflows/build-wheels-cuda-linux.yaml
@@ -21,8 +21,8 @@ jobs:
         run: |
           $matrix = @{
               'os' = @('ubuntu-20.04')
-              'pyver' = @("3.9", "3.10", "3.11", "3.12")
-              'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1")
+              'pyver' = @("3.10", "3.11", "3.12")
+              'cuda' = @("12.4.1")
               'releasetag' = @("basic")
           }
 
@@ -113,12 +113,9 @@ jobs:
             $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
           }
           $env:VERBOSE = '1'
-          $env:CMAKE_ARGS = '-DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all -DSD_CUBLAS=ON'
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
-
-          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
 
           $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
+          cp tomls/pyproject_cuda.toml pyproject.toml
 
           python -m build --wheel
           # write the build tag to the output

diff --git a/.github/workflows/build-wheels-cuda-win.yaml b/.github/workflows/build-wheels-cuda-win.yaml
@@ -18,12 +18,12 @@ jobs:
     steps:
       - name: Define Job Output
         id: set-matrix
-        # here 'Windows' is a large runner
+        # here 'nexa-sdk-windows-8-core' is a large runner
         run: |
           $matrix = @{
-              'os' = @('Windows')
-              'pyver' = @("3.9", "3.10", "3.11", "3.12")
-              'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1")
+              'os' = @('nexa-sdk-windows-8-core')
+              'pyver' = @("3.10", "3.11", "3.12")
+              'cuda' = @("12.4.1")
               'releasetag' = @("basic")
           }
 
@@ -114,11 +114,8 @@ jobs:
             $env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
           }
           $env:VERBOSE = '1'
-          $env:CMAKE_ARGS = '-DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all -DSD_CUBLAS=ON'
-          $env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
-
-          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
 
+          cp tomls/pyproject_cuda.toml pyproject.toml
           $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
 
           python -m build --wheel

diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
@@ -27,16 +27,20 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install -e .
+          # python -m pip install -e .
+          python -m pip install build wheel
+
+      - name: Copy pyproject.toml
+        run: |
+          cp tomls/pyproject_metal.toml pyproject.toml
 
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
           CMAKE_BUILD_PARALLEL_LEVEL: 4
         with:

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,37 @@
+name: Python CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+      with:
+        submodules: recursive  # This will clone the repository with all its submodules
+        fetch-depth: 0    # This fetches all history so you can access any version of the submodules
+
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.10'  # Specify the Python version you want
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install build pytest
+    - name: Build DLL
+      run: |
+        python -m pip install -e .
+    - name: Run tests
+      run: |
+        python -m pytest tests
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1 @@
 include requirements.txt
-include requirements-onnx.txt
diff --git a/README.md b/README.md
@@ -24,15 +24,60 @@ Detailed API documentation is available [here](docs/index.html).
 
 ## Installation
 
-For CPU version
-```
-pip install nexaai --no-cache-dir
-```
-
-For GPU version
-```
-pip install nexaai-gpu --no-cache-dir
-```
+**GPU version(optional)** 
+
+check if you have GPU acceleration (torch required)
+<details>
+  <summary>CUDA:</summary>
+
+  ```
+  import torch
+  torch.cuda.is_available()
+  ```
+
+  if True
+
+  ```
+  CMAKE_ARGS="-DGGML_CUDA=on -DSD_CUBLAS=ON" pip install nexaai-gpu
+  ```
+</details>
+<details>
+  <summary>Apple M Chip:</summary>
+  Apple icon -> about this mac -> Graphics
+
+  if True:
+
+  ```
+  CMAKE_ARGS="-DGGML_METAL=on -DSD_METAL=ON" pip install nexaai-gpu
+  ```
+</details>
+
+<details>
+  <summary>AMD graphics card:</summary>
+
+
+  ```
+  CMAKE_ARGS="-DGGML_HIPBLAS=on" pip install nexaai-gpu
+  ```
+</details>
+
+**CPU version**
+
+<details>
+  <summary>Mac with Intel chips</summary>
+
+  ```
+  CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" pip install nexaai
+  ```
+</details>
+
+<details>
+  <summary>Mac with M chips or other Operating systems:</summary>
+
+  ```
+  pip install nexaai
+  ```
+<details>
 
 ## Nexa CLI commands
 

diff --git a/nexa/__init__.py b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.0.dev"
+__version__ = "0.0.1"
diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py
@@ -51,9 +51,9 @@ def run_ggml_inference(args):
         if hasattr(args, 'streamlit') and args.streamlit:
             inference.run_streamlit(model_path)
         elif args.img2img:
-            inference.run_img2img()
+            inference.loop_img2img()
         else:
-            inference.run_txt2img()
+            inference.loop_txt2img()
         return
     elif args.command == "vlm":
         from nexa.gguf.nexa_inference_vlm import NexaVLMInference
@@ -218,4 +218,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/nexa/constants.py b/nexa/constants.py
@@ -42,9 +42,10 @@
     "codeqwen": "CodeQwen1.5-7B-Instruct:q4_0",
     "deepseek-coder": "deepseek-coder-1.3b-instruct:q4_0",
     "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
-    "nomic-embed-text": "nomic-embed-text-v1.5:fp16",
     "phi2": "Phi-2:q4_0",
     "phi3": "Phi-3-mini-128k-instruct:q4_0",
+    "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
+    "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
 }
 
 NEXA_RUN_MODEL_MAP_ONNX = {
@@ -60,37 +61,58 @@
     "phi3v": "Phi-3-vision-128k-instruct:onnx-cpu-int4",
     "sd1-5": "stable-diffusion-v1-5:onnx-cpu-fp32",
     "lcm-dreamshaper": "lcm-dreamshaper-v7:onnx-cpu-fp32",
-    "whisper": "whisper-tiny.en:onnx-cpu-fp32",
+    "whisper": "whisper-tiny:onnx-cpu-fp32",
     "ljspeech": "ljspeech-jets:onnx-cpu-fp32",
 }
 
 NEXA_RUN_MODEL_MAP_VLM = {
     "nanollava": "nanoLLaVA:model-fp16",
+    "nanoLLaVA:fp16": "nanoLLaVA:model-fp16",
     "llava-phi3": "llava-phi-3-mini:model-q4_0",
-    # "llava1.5": "/usr/share/ollama/.ollama/models/blobs/sha256-170370233dd5c5415250a2ecd5c71586352850729062ccef1496385647293868",
+    "llava-phi-3-mini:q4_0": "llava-phi-3-mini:model-q4_0",
+    "llava-phi-3-mini:fp16": "llava-phi-3-mini:model-fp16",
     "llava-llama3": "llava-llama-3-8b-v1.1:model-q4_0",
+    "llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:model-q4_0",
+    "llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:model-fp16",
     "llava1.6-mistral": "llava-v1.6-mistral-7b:model-q4_0",
+    "llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:model-q4_0",
+    "llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:model-fp16",
     "llava1.6-vicuna": "llava-v1.6-vicuna-7b:model-q4_0",
+    "llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:model-q4_0",
+    "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:model-fp16",
 }
 
 NEXA_RUN_MODEL_MAP_VOICE = {
     "whisper-large": "whisper-large:bin-large-v3",
     "whisper-tiny": "whisper-tiny:bin-tiny",
+    "faster-whisper-tiny": "faster-whisper-tiny:bin-cpu-fp16",
+    "faster-whisper-small": "faster-whisper-small:bin-cpu-fp16",
+    "faster-whisper-medium": "faster-whisper-medium:bin-cpu-fp16",
+    "faster-whisper-base": "faster-whisper-base:bin-cpu-fp16",
+    "faster-whisper-large": "faster-whisper-large:bin-cpu-fp16",
 }
 
 NEXA_RUN_MODEL_MAP_FUNCTION_CALLING = {
-  "llama2-function-calling": "Llama2-7b-function-calling:q3_K_M",
+  "llama2-function-calling": "Llama2-7b-function-calling:q4_K_M",
 }
 
 
 
 NEXA_RUN_PROJECTOR_MAP = {
     "nanollava": "nanoLLaVA:projector-fp16",
+    "nanoLLaVA:fp16": "nanoLLaVA:project-fp16",
     "llava-phi3": "llava-phi-3-mini:projector-q4_0",
-    # "llava1.5": "/usr/share/ollama/.ollama/models/blobs/sha256-72d6f08a42f656d36b356dbe0920675899a99ce21192fd66266fb7d82ed07539",
+    "llava-phi-3-mini:q4_0": "llava-phi-3-mini:projector-q4_0",
+    "llava-phi-3-mini:fp16": "llava-phi-3-mini:projector-fp16",
     "llava-llama3": "llava-llama-3-8b-v1.1:projector-q4_0",
+    "llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:projector-q4_0",
+    "llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:projector-fp16",
     "llava1.6-mistral": "llava-v1.6-mistral-7b:projector-q4_0",
+    "llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:projector-q4_0",
+    "llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:projector-fp16",
     "llava1.6-vicuna": "llava-v1.6-vicuna-7b:projector-q4_0",
+    "llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:projector-q4_0",
+    "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
 }
 
 NEXA_RUN_MODEL_MAP_IMAGE = {
@@ -99,6 +121,8 @@
     "sd2-1": "stable-diffusion-v2-1:fp16",
     "sdxl-turbo": "sdxl-turbo:q8_0",
     "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
+    "anything-lcm": "anything-v30-LCM:fp16",
+    "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
 }
 
 NEXA_RUN_MODEL_MAP = {
@@ -127,6 +151,9 @@
     "sd1-5": "q4_0",
     "sd2-1": "q4_0",
     "lcm-dreamshaper": "f16",
+    "sdxl-turbo": "q8_0",
+    "anything-lcm": "f16",
+    "hassaku-lcm": "f16",
 }
 
 EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		include requirements.txt
		include requirements-onnx.txt