NexaAI · zhiyuan8 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/.github/workflows/build-wheels-cpu-macos.yaml b/.github/workflows/build-wheels-cpu-macos.yaml
@@ -8,7 +8,7 @@ permissions:
 jobs:
 
   build_wheels_macos:
-    name: Build wheels on  macos-${{ matrix.os }}
+    name: Build wheels on macos-${{ matrix.os }}
     runs-on: macos-${{ matrix.os }}
     strategy:
       matrix:
@@ -28,7 +28,6 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          # python -m pip install -e .
           python -m pip install build wheel
 
       - name: Build wheels
@@ -37,8 +36,8 @@ jobs:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_BUILD_FRONTEND: "build"
-          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64"
+          CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
           CMAKE_BUILD_PARALLEL_LEVEL: 4
         with:

diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml
@@ -37,7 +37,7 @@ jobs:
   #         CIBW_REPAIR_WHEEL_COMMAND: ""
   #         CIBW_BUILD_FRONTEND: "build"
   #         CIBW_SKIP: "*musllinux*"
-  #         CIBW_BUILD: "cp310-* cp311-* cp312-*"
+  #         CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
   #         CMAKE_BUILD_PARALLEL_LEVEL: 4
   #       with:
   #         package-dir: .
@@ -79,7 +79,7 @@ jobs:
           # disable repair
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_BUILD_FRONTEND: "build"
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
           CMAKE_BUILD_PARALLEL_LEVEL: 16
         with:
           package-dir: .
@@ -110,7 +110,7 @@ jobs:
           CIBW_BUILD_FRONTEND: "build"
           CIBW_SKIP: "*musllinux* pp*"
           CIBW_ARCHS: "aarch64"
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
           CMAKE_BUILD_PARALLEL_LEVEL: $(nproc)
         with:
           output-dir: wheelhouse

diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
@@ -37,7 +37,7 @@ jobs:
           CIBW_REPAIR_WHEEL_COMMAND: ""
           CIBW_BUILD_FRONTEND: "build"
           CIBW_ARCHS: "arm64"
-          CIBW_BUILD: "cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
           CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
           MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
           CMAKE_BUILD_PARALLEL_LEVEL: 4

diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,7 @@ build*.sh
 *.dll
 *.dylib
 *.a
+generated_images/
 
 # Python
 __pycache__/

diff --git a/README.md b/README.md
@@ -65,60 +65,39 @@ We have released pre-built wheels for various Python versions, platforms, and ba
 #### CPU
 
 ```bash
-pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
+pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
 
 #### GPU (Metal)
 
 For the GPU version supporting **Metal (macOS)**:
 
 ```bash
-CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
+CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
 
-#### GPU (CUDA)
-
-For the GPU version supporting **CUDA (Linux/Windows)**:
-
-```bash
-CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
+<details>
+<summary><strong>FAQ: cannot using Metal/GPU on m1</strong></summary>
+try the following command:
 ```
-
-> [!NOTE]
-> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
-
-
-#### GPU (Metal)
-
-For the GPU version supporting Metal (macOS):
-
-```bash
-CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai
+wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
+bash Miniforge3-MacOSX-arm64.sh
+conda create -n llama python=3.10
+conda activate llama
+CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
+</details>
 
 #### GPU (CUDA)
 
-For the GPU version supporting CUDA (Linux/Windows), run the following command:
+For the GPU version supporting **CUDA (Linux/Windows)**:
 
 ```bash
-CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai
+CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
 
-> [!TIP]
-> You can accelerate the building process via parallel cmake by appending the following to the commands above:
->
-> ```bash
-> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
-> ```
->
-> For example:
->
-> ```bash
-> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL
-> ```
-
-> [!TIP]
-> For Windows users, we recommend running the installation command in Git Bash to avoid unexpected behavior.
+> [!NOTE]
+> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
 
 
 <details>

diff --git a/docs/.media/error.jpeg b/docs/.media/error.jpeg
diff --git a/docs/cmd_macos.sh b/docs/cmd_macos.sh
diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py
@@ -96,7 +96,7 @@ def create_embedding(
 
     @SpinningCursorAnimation()
     def _load_model(self):
-        logging.debug(f"Loading model from {self.downloaded_path}")
+        logging.debug(f"Loading model from {self.downloaded_path}, use_cuda_or_metal : {is_gpu_available()}")
         start_time = time.time()
         with suppress_stdout_stderr():
             try:

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,6 +32,8 @@ dependencies = [
 ]
 classifiers = [
     "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ build*.sh @@
     *.dll
     *.dylib
     *.a
+    generated_images/
     # Python
     __pycache__/
@@ Expand Down @@