Merge branch 'main' of https://github.com/NexaAI/nexaai-sdk-cpp into …

…david/bugfix
NexaAI · Aug 22, 2024 · 78f55b4 · 78f55b4
2 parents ac59357 + 40a911f
commit 78f55b4
Show file tree

Hide file tree

Showing 19 changed files with 131 additions and 79 deletions.
diff --git a/.github/workflows/build-wheels-cpu-macos.yaml b/.github/workflows/build-wheels-cpu-macos.yaml
@@ -0,0 +1,68 @@
+name: Build Wheels (CPU) (MacOS)
+
+on: workflow_dispatch
+
+permissions:
+  contents: write
+
+jobs:
+
+  build_wheels_macos:
+    name: Build wheels on  macos-${{ matrix.os }}
+    runs-on: macos-${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [12, 13, 14]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: "recursive"
+
+      # Used to host cibuildwheel
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+          cache: "pip"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # python -m pip install -e .
+          python -m pip install build wheel
+
+      - name: Build wheels
+        uses: pypa/[email protected]
+        env:
+          # disable repair
+          CIBW_REPAIR_WHEEL_COMMAND: ""
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
+          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
+          CMAKE_BUILD_PARALLEL_LEVEL: 4
+        with:
+          package-dir: .
+          output-dir: wheelhouse
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: wheels-macos-${{ matrix.os }}
+          path: ./wheelhouse/*.whl
+
+  release:
+    name: Release
+    needs: [build_wheels_macos]
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          merge-multiple: true
+          path: dist
+
+      - uses: softprops/action-gh-release@v2
+        with:
+          files: dist/*
+          tag_name: ${{ github.ref_name }}
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml
@@ -88,48 +88,6 @@ jobs:
           name: wheels-${{ matrix.os }}
           path: ./wheelhouse/*.whl
 
-  build_wheels_macos:
-    name: Build wheels on  macos-${{ matrix.os }}
-    runs-on: macos-${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [12, 13, 14]
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-
-      # Used to host cibuildwheel
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.8"
-          cache: "pip"
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          # python -m pip install -e .
-          python -m pip install build wheel
-
-      - name: Build wheels
-        uses: pypa/[email protected]
-        env:
-          # disable repair
-          CIBW_REPAIR_WHEEL_COMMAND: ""
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
-          MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
-          CMAKE_BUILD_PARALLEL_LEVEL: 4
-        with:
-          package-dir: .
-          output-dir: wheelhouse
-
-      - uses: actions/upload-artifact@v4
-        with:
-          name: wheels-macos-${{ matrix.os }}
-          path: ./wheelhouse/*.whl
-
   build_wheels_arm64:
     name: Build arm64 wheels
     runs-on: ubuntu-20.04
@@ -185,7 +143,7 @@ jobs:
 
   release:
     name: Release
-    needs: [build_wheels, build_wheels_macos, build_wheels_arm64, build_sdist]
+    needs: [build_wheels_linux, build_wheels_win, build_wheels_arm64, build_sdist]
     runs-on: ubuntu-latest
 
     steps:

diff --git a/.github/workflows/build-wheels-cuda-linux.yaml b/.github/workflows/build-wheels-cuda-linux.yaml
@@ -115,7 +115,8 @@ jobs:
           $env:VERBOSE = '1'
 
           $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
-          cp tomls/pyproject_cuda.toml pyproject.toml
+          $env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all'
+          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
 
           python -m build --wheel
           # write the build tag to the output

diff --git a/.github/workflows/build-wheels-cuda-win.yaml b/.github/workflows/build-wheels-cuda-win.yaml
@@ -115,7 +115,8 @@ jobs:
           }
           $env:VERBOSE = '1'
 
-          cp tomls/pyproject_cuda.toml pyproject.toml
+          $env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all'
+          $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
           $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
 
           python -m build --wheel

diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
@@ -30,17 +30,14 @@ jobs:
           # python -m pip install -e .
           python -m pip install build wheel
 
-      - name: Copy pyproject.toml
-        run: |
-          cp tomls/pyproject_metal.toml pyproject.toml
-
       - name: Build wheels
         uses: pypa/[email protected]
         env:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_ARCHS: "arm64"
           CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
           CMAKE_BUILD_PARALLEL_LEVEL: 4
         with:

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -10,28 +10,37 @@ on:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      matrix:
+        os: [macos-latest, windows-latest]
+        python-version: [3.10]
 
     steps:
     - name: Checkout code
       uses: actions/checkout@v3
       with:
         submodules: recursive  # This will clone the repository with all its submodules
-        fetch-depth: 0    # This fetches all history so you can access any version of the submodules
-
+        fetch-depth: 0  # This fetches all history so you can access any version of the submodules
 
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'  # Specify the Python version you want
+        python-version: ${{ matrix.python-version }}
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
         python -m pip install build pytest
+      shell: bash
+
     - name: Build DLL
       run: |
         python -m pip install -e .
+      shell: bash
+
     - name: Run tests
       run: |
-        python -m pytest tests
+        python -m pytest tests
+      shell: bash
diff --git a/.github/workflows/generate-index-from-release.yaml b/.github/workflows/generate-index-from-release.yaml
@@ -32,14 +32,19 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v4
+
       - name: Setup Pages
         uses: actions/configure-pages@v5
+
+      - name: Set execute permissions for script
+        run: chmod +x ./scripts/releases-to-pep-503.sh
+
       - name: Build
         run: |
           ./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
-          ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
-          ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
-          ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
+          # ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
+          # ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
+          # ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
           ./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
           ./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
       - name: Upload artifact

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,13 +1,19 @@
 cmake_minimum_required(VERSION 3.16)
 
 if (GGML_CUDA OR GGML_METAL)
-    set(EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt")
+    set(SOURCE_EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt")
     add_custom_command(
-        OUTPUT ${EMPTY_FILE_PATH}
-        COMMAND ${CMAKE_COMMAND} -E touch ${EMPTY_FILE_PATH}
-        COMMENT "Creating an empty file because MY_FEATURE is ON"
+        OUTPUT ${SOURCE_EMPTY_FILE_PATH}
+        COMMAND ${CMAKE_COMMAND} -E touch ${SOURCE_EMPTY_FILE_PATH}
+        COMMENT "Creating an empty file to source folder because gpu option is ON"
+    )
+    set(WHEEL_EMPTY_FILE_PATH "${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/empty_file.txt")
+    add_custom_command(
+        OUTPUT ${WHEEL_EMPTY_FILE_PATH}
+        COMMAND ${CMAKE_COMMAND} -E touch ${WHEEL_EMPTY_FILE_PATH}
+        COMMENT "Creating an empty file to lib folder because gpu option is ON"
     )    
-    add_custom_target(create_empty_file ALL DEPENDS ${EMPTY_FILE_PATH})
+    add_custom_target(create_empty_file ALL DEPENDS ${SOURCE_EMPTY_FILE_PATH} ${WHEEL_EMPTY_FILE_PATH})
 endif()
 
 # Project: stable_diffusion_cpp

diff --git a/README.md b/README.md
@@ -97,7 +97,7 @@ Example:
 `docker run -v /home/ubuntu/.cache/nexa/hub/official:/model -it nexa4ai/sdk:latest nexa gen-text /model/Phi-3-mini-128k-instruct/q4_0.gguf`
 
 will create an interactive session with text generation
-```
+
 
 ## Nexa CLI commands
 

diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py
@@ -13,7 +13,9 @@
     DEFAULT_IMG_GEN_PARAMS_LCM,
     DEFAULT_IMG_GEN_PARAMS_TURBO,
 )
-from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
+from nexa.utils import SpinningCursorAnimation, nexa_prompt
+from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
+
 from streamlit.web import cli as stcli
 from nexa.general import pull_model
 

diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py
@@ -13,8 +13,10 @@
     NEXA_STOP_WORDS_MAP,
 )
 from nexa.gguf.lib_utils import is_gpu_available
-from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
 from nexa.general import pull_model
+from nexa.utils import SpinningCursorAnimation, nexa_prompt
+from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
+
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"

diff --git a/nexa/gguf/nexa_inference_vlm.py b/nexa/gguf/nexa_inference_vlm.py
@@ -24,7 +24,8 @@
     Llava16ChatHandler,
     NanoLlavaChatHandler,
 )
-from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
+from nexa.utils import SpinningCursorAnimation, nexa_prompt
+from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"

diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py
@@ -10,9 +10,11 @@
     EXIT_REMINDER,
     NEXA_RUN_MODEL_MAP_VOICE,
 )
-from nexa.utils import nexa_prompt
-from nexa.utils import nexa_prompt, SpinningCursorAnimation, suppress_stdout_stderr
 from nexa.general import pull_model
+from nexa.utils import nexa_prompt, SpinningCursorAnimation
+from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
+
+
 logging.basicConfig(level=logging.INFO)
 
 
@@ -69,6 +71,7 @@ def _load_model(self):
 
         logging.debug(f"Loading model from: {self.downloaded_path}")
         with suppress_stdout_stderr():
+            os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
             self.model = WhisperModel(
                 self.downloaded_path,
                 device="cpu",

diff --git a/nexa/onnx/nexa_inference_image.py b/nexa/onnx/nexa_inference_image.py
@@ -14,7 +14,7 @@
 )
 from nexa.general import pull_model
 from nexa.constants import EXIT_REMINDER, NEXA_RUN_MODEL_MAP_ONNX
-from nexa.utils import nexa_prompt
+from nexa.utils import nexa_prompt, SpinningCursorAnimation
 
 logging.basicConfig(level=logging.INFO)
 
@@ -75,6 +75,7 @@ def run(self):
         self._load_model(self.download_onnx_folder)
         self._dialogue_mode()
 
+    @SpinningCursorAnimation()
     def _load_model(self, model_path):
         """
         Load the model from the given model path using the appropriate pipeline.
@@ -149,7 +150,7 @@ def generate_images(self, prompt, negative_prompt):
         images = self.pipeline(**pipeline_kwargs).images
         return images
 
-            
+
 
     def _save_images(self, images):
         """

diff --git a/nexa/onnx/nexa_inference_text.py b/nexa/onnx/nexa_inference_text.py
@@ -9,7 +9,7 @@
 from transformers import AutoTokenizer, TextStreamer
 from nexa.general import pull_model
 from nexa.constants import NEXA_RUN_MODEL_MAP_ONNX
-from nexa.utils import nexa_prompt
+from nexa.utils import nexa_prompt, SpinningCursorAnimation
 
 logging.basicConfig(level=logging.INFO)
 
@@ -51,6 +51,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
         self.timings = kwargs.get("timings", False)
         self.device = "cpu"
 
+    @SpinningCursorAnimation()
     def _load_model_and_tokenizer(self) -> Tuple[Any, Any, Any, bool]:
         logging.debug(f"Loading model from {self.downloaded_onnx_folder}")
         start_time = time.time()

diff --git a/nexa/utils.py b/nexa/utils.py
@@ -10,9 +10,6 @@
 from prompt_toolkit.styles import Style
 
 from nexa.constants import EXIT_COMMANDS, EXIT_REMINDER
-from nexa.gguf.llama._utils_transformers import (
-    suppress_stdout_stderr,
-)  # re-import, don't comment out
 
 
 def is_package_installed(package_name: str) -> bool:
-Original file line number
+Diff line change
@@ Expand Up / @@ -97,7 +97,7 @@ Example: @@
     `docker run -v /home/ubuntu/.cache/nexa/hub/official:/model -it nexa4ai/sdk:latest nexa gen-text /model/Phi-3-mini-128k-instruct/q4_0.gguf`
     will create an interactive session with text generation
-    ```
     ## Nexa CLI commands
@@ Expand Down @@