diff --git a/.github/workflows/build-wheels-cpu-macos.yaml b/.github/workflows/build-wheels-cpu-macos.yaml index f1e1fc18..30e508e5 100644 --- a/.github/workflows/build-wheels-cpu-macos.yaml +++ b/.github/workflows/build-wheels-cpu-macos.yaml @@ -8,7 +8,7 @@ permissions: jobs: build_wheels_macos: - name: Build wheels on macos-${{ matrix.os }} + name: Build wheels on macos-${{ matrix.os }} runs-on: macos-${{ matrix.os }} strategy: matrix: @@ -28,7 +28,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - # python -m pip install -e . python -m pip install build wheel - name: Build wheels @@ -37,8 +36,8 @@ jobs: # disable repair CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_BUILD_FRONTEND: "build" - CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 " - CIBW_BUILD: "cp310-* cp311-* cp312-*" + CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64" + CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*" MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }} CMAKE_BUILD_PARALLEL_LEVEL: 4 with: diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml index f86ec878..4dc36265 100644 --- a/.github/workflows/build-wheels-cpu.yaml +++ b/.github/workflows/build-wheels-cpu.yaml @@ -37,7 +37,7 @@ jobs: # CIBW_REPAIR_WHEEL_COMMAND: "" # CIBW_BUILD_FRONTEND: "build" # CIBW_SKIP: "*musllinux*" - # CIBW_BUILD: "cp310-* cp311-* cp312-*" + # CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*" # CMAKE_BUILD_PARALLEL_LEVEL: 4 # with: # package-dir: . @@ -79,7 +79,7 @@ jobs: # disable repair CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_BUILD_FRONTEND: "build" - CIBW_BUILD: "cp310-* cp311-* cp312-*" + CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*" CMAKE_BUILD_PARALLEL_LEVEL: 16 with: package-dir: . @@ -110,7 +110,7 @@ jobs: CIBW_BUILD_FRONTEND: "build" CIBW_SKIP: "*musllinux* pp*" CIBW_ARCHS: "aarch64" - CIBW_BUILD: "cp310-* cp311-* cp312-*" + CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*" CMAKE_BUILD_PARALLEL_LEVEL: $(nproc) with: output-dir: wheelhouse diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index 930aad72..bea2fd49 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -37,7 +37,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_BUILD_FRONTEND: "build" CIBW_ARCHS: "arm64" - CIBW_BUILD: "cp310-* cp311-* cp312-*" + CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-* cp311-* cp312-*" CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON" MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }} CMAKE_BUILD_PARALLEL_LEVEL: 4 diff --git a/.gitignore b/.gitignore index 047b7b5e..00d554d9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ build*.sh *.dll *.dylib *.a +generated_images/ # Python __pycache__/ diff --git a/README.md b/README.md index 6491a99b..48b4a3b1 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ We have released pre-built wheels for various Python versions, platforms, and ba #### CPU ```bash -pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple +pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra-index-url https://pypi.org/simple --no-cache-dir ``` #### GPU (Metal) @@ -73,52 +73,31 @@ pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cpu --extra For the GPU version supporting **Metal (macOS)**: ```bash -CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple +CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir ``` -#### GPU (CUDA) - -For the GPU version supporting **CUDA (Linux/Windows)**: - -```bash -CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple +
+FAQ: cannot using Metal/GPU on m1 +try the following command: ``` - -> [!NOTE] -> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X - - -#### GPU (Metal) - -For the GPU version supporting Metal (macOS): - -```bash -CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai +wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh +bash Miniforge3-MacOSX-arm64.sh +conda create -n llama python=3.10 +conda activate llama +CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/metal --extra-index-url https://pypi.org/simple --no-cache-dir ``` +
#### GPU (CUDA) -For the GPU version supporting CUDA (Linux/Windows), run the following command: +For the GPU version supporting **CUDA (Linux/Windows)**: ```bash -CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai +CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir ``` -> [!TIP] -> You can accelerate the building process via parallel cmake by appending the following to the commands above: -> -> ```bash -> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -> ``` -> -> For example: -> -> ```bash -> CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) CMAKE_ARGS="-DGGML_METAL=ON -DSD_METAL -> ``` - -> [!TIP] -> For Windows users, we recommend running the installation command in Git Bash to avoid unexpected behavior. +> [!NOTE] +> The CUDA wheels are built with CUDA 12.4, but should be compatible with all CUDA 12.X
diff --git a/docs/.media/error.jpeg b/docs/.media/error.jpeg new file mode 100644 index 00000000..1e374e19 Binary files /dev/null and b/docs/.media/error.jpeg differ diff --git a/docs/cmd_macos.sh b/docs/cmd_macos.sh deleted file mode 100644 index 0fbdae18..00000000 --- a/docs/cmd_macos.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -cd "$(dirname "${BASH_SOURCE[0]}")" - -if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi - -# deactivate existing conda envs as needed to avoid conflicts -{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null - -# config -CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda" -INSTALL_ENV_DIR="$(pwd)/installer_files/env" - -# environment isolation -export PYTHONNOUSERSITE=1 -unset PYTHONPATH -unset PYTHONHOME -export CUDA_PATH="$INSTALL_ENV_DIR" -export CUDA_HOME="$CUDA_PATH" - -# activate env -source $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh -conda activate $INSTALL_ENV_DIR -exec bash --norc \ No newline at end of file diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py index f12054f1..a2352607 100644 --- a/nexa/gguf/nexa_inference_text.py +++ b/nexa/gguf/nexa_inference_text.py @@ -96,7 +96,7 @@ def create_embedding( @SpinningCursorAnimation() def _load_model(self): - logging.debug(f"Loading model from {self.downloaded_path}") + logging.debug(f"Loading model from {self.downloaded_path}, use_cuda_or_metal : {is_gpu_available()}") start_time = time.time() with suppress_stdout_stderr(): try: diff --git a/pyproject.toml b/pyproject.toml index c4f8f9df..f54a8e27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,8 @@ dependencies = [ ] classifiers = [ "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11",