diff --git a/.github/workflows/build-wheels-cpu-macos.yaml b/.github/workflows/build-wheels-cpu-macos.yaml
index f1e1fc18..30e508e5 100644
--- a/.github/workflows/build-wheels-cpu-macos.yaml
+++ b/.github/workflows/build-wheels-cpu-macos.yaml
@@ -8,7 +8,7 @@ permissions:
jobs:
build_wheels_macos:
- name: Build wheels on macos-${{ matrix.os }}
+ name: Build wheels on macos-${{ matrix.os }}
runs-on: macos-${{ matrix.os }}
strategy:
matrix:
@@ -28,7 +28,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- # python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
@@ -37,8 +36,8 @@ jobs:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_BUILD_FRONTEND: "build"
- CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
- CIBW_BUILD: "cp310-* cp311-* cp312-*"
+ CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64"
+ CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml
index f86ec878..4dc36265 100644
--- a/.github/workflows/build-wheels-cpu.yaml
+++ b/.github/workflows/build-wheels-cpu.yaml
@@ -37,7 +37,7 @@ jobs:
# CIBW_REPAIR_WHEEL_COMMAND: ""
# CIBW_BUILD_FRONTEND: "build"
# CIBW_SKIP: "*musllinux*"
- # CIBW_BUILD: "cp310-* cp311-* cp312-*"
+ # CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
# CMAKE_BUILD_PARALLEL_LEVEL: 4
# with:
# package-dir: .
@@ -79,7 +79,7 @@ jobs:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_BUILD_FRONTEND: "build"
- CIBW_BUILD: "cp310-* cp311-* cp312-*"
+ CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
CMAKE_BUILD_PARALLEL_LEVEL: 16
with:
package-dir: .
@@ -110,7 +110,7 @@ jobs:
CIBW_BUILD_FRONTEND: "build"
CIBW_SKIP: "*musllinux* pp*"
CIBW_ARCHS: "aarch64"
- CIBW_BUILD: "cp310-* cp311-* cp312-*"
+ CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
CMAKE_BUILD_PARALLEL_LEVEL: $(nproc)
with:
output-dir: wheelhouse
diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
index 930aad72..bea2fd49 100644
--- a/.github/workflows/build-wheels-metal.yaml
+++ b/.github/workflows/build-wheels-metal.yaml
@@ -37,7 +37,7 @@ jobs:
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_BUILD_FRONTEND: "build"
CIBW_ARCHS: "arm64"
- CIBW_BUILD: "cp310-* cp311-* cp312-*"
+ CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*"
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
diff --git a/.gitignore b/.gitignore
index 047b7b5e..00d554d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ build*.sh
*.dll
*.dylib
*.a
+generated_images/
# Python
__pycache__/
diff --git a/README.md b/README.md
index 6491a99b..48b4a3b1 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ We have released pre-built wheels for various Python versions, platforms, and ba
#### CPU
```bash
-pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple
+pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple --no-cache-dir
```
#### GPU (Metal)
@@ -73,52 +73,31 @@ pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra
For the GPU version supporting **Metal (macOS)**:
```bash
-CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple
+CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir
```
-#### GPU (CUDA)
-
-For the GPU version supporting **CUDA (Linux/Windows)**:
-
-```bash
-CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple
+
+FAQ: cannot using Metal/GPU on m1
+try the following command:
```
-
-> [!NOTE]
-> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
-
-
-#### GPU (Metal)
-
-For the GPU version supporting Metal (macOS):
-
-```bash
-CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai
+wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
+bash Miniforge3-MacOSX-arm64.sh
+conda create -n llama python=3.10
+conda activate llama
+CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir
```
+
#### GPU (CUDA)
-For the GPU version supporting CUDA (Linux/Windows), run the following command:
+For the GPU version supporting **CUDA (Linux/Windows)**:
```bash
-CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai
+CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
```
-> [!TIP]
-> You can accelerate the building process via parallel cmake by appending the following to the commands above:
->
-> ```bash
-> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc)
-> ```
->
-> For example:
->
-> ```bash
-> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL
-> ```
-
-> [!TIP]
-> For Windows users, we recommend running the installation command in Git Bash to avoid unexpected behavior.
+> [!NOTE]
+> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
diff --git a/docs/.media/error.jpeg b/docs/.media/error.jpeg
new file mode 100644
index 00000000..1e374e19
Binary files /dev/null and b/docs/.media/error.jpeg differ
diff --git a/docs/cmd_macos.sh b/docs/cmd_macos.sh
deleted file mode 100644
index 0fbdae18..00000000
--- a/docs/cmd_macos.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-cd "$(dirname "${BASH_SOURCE[0]}")"
-
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
-
-# deactivate existing conda envs as needed to avoid conflicts
-{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
-
-# config
-CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
-INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-
-# environment isolation
-export PYTHONNOUSERSITE=1
-unset PYTHONPATH
-unset PYTHONHOME
-export CUDA_PATH="$INSTALL_ENV_DIR"
-export CUDA_HOME="$CUDA_PATH"
-
-# activate env
-source $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh
-conda activate $INSTALL_ENV_DIR
-exec bash --norc
\ No newline at end of file
diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py
index f12054f1..a2352607 100644
--- a/nexa/gguf/nexa_inference_text.py
+++ b/nexa/gguf/nexa_inference_text.py
@@ -96,7 +96,7 @@ def create_embedding(
@SpinningCursorAnimation()
def _load_model(self):
- logging.debug(f"Loading model from {self.downloaded_path}")
+ logging.debug(f"Loading model from {self.downloaded_path}, use_cuda_or_metal : {is_gpu_available()}")
start_time = time.time()
with suppress_stdout_stderr():
try:
diff --git a/pyproject.toml b/pyproject.toml
index c4f8f9df..f54a8e27 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,8 @@ dependencies = [
]
classifiers = [
"Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",