diff --git a/.github/workflows/build-wheels-cpu-macos.yaml b/.github/workflows/build-wheels-cpu-macos.yaml new file mode 100644 index 00000000..a97c69f6 --- /dev/null +++ b/.github/workflows/build-wheels-cpu-macos.yaml @@ -0,0 +1,68 @@ +name: Build Wheels (CPU) (MacOS) + +on: workflow_dispatch + +permissions: + contents: write + +jobs: + + build_wheels_macos: + name: Build wheels on macos-${{ matrix.os }} + runs-on: macos-${{ matrix.os }} + strategy: + matrix: + os: [12, 13, 14] + + steps: + - uses: actions/checkout@v4 + with: + submodules: "recursive" + + # Used to host cibuildwheel + - uses: actions/setup-python@v5 + with: + python-version: "3.8" + cache: "pip" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # python -m pip install -e . + python -m pip install build wheel + + - name: Build wheels + uses: pypa/cibuildwheel@v2.20.0 + env: + # disable repair + CIBW_REPAIR_WHEEL_COMMAND: "" + CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 " + CIBW_BUILD: "cp310-* cp311-* cp312-*" + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }} + CMAKE_BUILD_PARALLEL_LEVEL: 4 + with: + package-dir: . + output-dir: wheelhouse + + - uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.os }} + path: ./wheelhouse/*.whl + + release: + name: Release + needs: [build_wheels_macos] + runs-on: ubuntu-latest + + steps: + - uses: actions/download-artifact@v4 + with: + merge-multiple: true + path: dist + + - uses: softprops/action-gh-release@v2 + with: + files: dist/* + tag_name: ${{ github.ref_name }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/build-wheels-cpu.yaml b/.github/workflows/build-wheels-cpu.yaml index 7a13cef5..e18e66b5 100644 --- a/.github/workflows/build-wheels-cpu.yaml +++ b/.github/workflows/build-wheels-cpu.yaml @@ -88,48 +88,6 @@ jobs: name: wheels-${{ matrix.os }} path: ./wheelhouse/*.whl - build_wheels_macos: - name: Build wheels on macos-${{ matrix.os }} - runs-on: macos-${{ matrix.os }} - strategy: - matrix: - os: [12, 13, 14] - - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - - # Used to host cibuildwheel - - uses: actions/setup-python@v5 - with: - python-version: "3.8" - cache: "pip" - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - # python -m pip install -e . - python -m pip install build wheel - - - name: Build wheels - uses: pypa/cibuildwheel@v2.20.0 - env: - # disable repair - CIBW_REPAIR_WHEEL_COMMAND: "" - CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 " - CIBW_BUILD: "cp310-* cp311-* cp312-*" - MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }} - CMAKE_BUILD_PARALLEL_LEVEL: 4 - with: - package-dir: . - output-dir: wheelhouse - - - uses: actions/upload-artifact@v4 - with: - name: wheels-macos-${{ matrix.os }} - path: ./wheelhouse/*.whl - build_wheels_arm64: name: Build arm64 wheels runs-on: ubuntu-20.04 @@ -185,7 +143,7 @@ jobs: release: name: Release - needs: [build_wheels, build_wheels_macos, build_wheels_arm64, build_sdist] + needs: [build_wheels_linux, build_wheels_win, build_wheels_arm64, build_sdist] runs-on: ubuntu-latest steps: diff --git a/.github/workflows/build-wheels-cuda-linux.yaml b/.github/workflows/build-wheels-cuda-linux.yaml index a4cd4b89..b2a001c5 100644 --- a/.github/workflows/build-wheels-cuda-linux.yaml +++ b/.github/workflows/build-wheels-cuda-linux.yaml @@ -115,7 +115,8 @@ jobs: $env:VERBOSE = '1' $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc) - cp tomls/pyproject_cuda.toml pyproject.toml + $env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all' + $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF' python -m build --wheel # write the build tag to the output diff --git a/.github/workflows/build-wheels-cuda-win.yaml b/.github/workflows/build-wheels-cuda-win.yaml index b1e33402..1eeb1381 100644 --- a/.github/workflows/build-wheels-cuda-win.yaml +++ b/.github/workflows/build-wheels-cuda-win.yaml @@ -115,7 +115,8 @@ jobs: } $env:VERBOSE = '1' - cp tomls/pyproject_cuda.toml pyproject.toml + $env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all' + $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF' $env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc) python -m build --wheel diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml index dc5b1cca..b423c92c 100644 --- a/.github/workflows/build-wheels-metal.yaml +++ b/.github/workflows/build-wheels-metal.yaml @@ -30,10 +30,6 @@ jobs: # python -m pip install -e . python -m pip install build wheel - - name: Copy pyproject.toml - run: | - cp tomls/pyproject_metal.toml pyproject.toml - - name: Build wheels uses: pypa/cibuildwheel@v2.20.0 env: @@ -41,6 +37,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND: "" CIBW_ARCHS: "arm64" CIBW_BUILD: "cp310-* cp311-* cp312-*" + CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON" MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }} CMAKE_BUILD_PARALLEL_LEVEL: 4 with: diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3bdcd138..85c19f7f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,28 +10,37 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + + strategy: + matrix: + os: [macos-latest, windows-latest] + python-version: [3.10] steps: - name: Checkout code uses: actions/checkout@v3 with: submodules: recursive # This will clone the repository with all its submodules - fetch-depth: 0 # This fetches all history so you can access any version of the submodules - + fetch-depth: 0 # This fetches all history so you can access any version of the submodules - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' # Specify the Python version you want + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build pytest + shell: bash + - name: Build DLL run: | python -m pip install -e . + shell: bash + - name: Run tests run: | - python -m pytest tests \ No newline at end of file + python -m pytest tests + shell: bash diff --git a/.github/workflows/generate-index-from-release.yaml b/.github/workflows/generate-index-from-release.yaml index 662cb6f6..bb096098 100644 --- a/.github/workflows/generate-index-from-release.yaml +++ b/.github/workflows/generate-index-from-release.yaml @@ -32,14 +32,19 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 + - name: Setup Pages uses: actions/configure-pages@v5 + + - name: Set execute permissions for script + run: chmod +x ./scripts/releases-to-pep-503.sh + - name: Build run: | ./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$' - ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$' - ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$' - ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$' + # ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$' + # ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$' + # ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$' ./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$' ./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$' - name: Upload artifact diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b4264fa..0b30e6e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,19 @@ cmake_minimum_required(VERSION 3.16) if (GGML_CUDA OR GGML_METAL) - set(EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt") + set(SOURCE_EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt") add_custom_command( - OUTPUT ${EMPTY_FILE_PATH} - COMMAND ${CMAKE_COMMAND} -E touch ${EMPTY_FILE_PATH} - COMMENT "Creating an empty file because MY_FEATURE is ON" + OUTPUT ${SOURCE_EMPTY_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E touch ${SOURCE_EMPTY_FILE_PATH} + COMMENT "Creating an empty file to source folder because gpu option is ON" + ) + set(WHEEL_EMPTY_FILE_PATH "${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/empty_file.txt") + add_custom_command( + OUTPUT ${WHEEL_EMPTY_FILE_PATH} + COMMAND ${CMAKE_COMMAND} -E touch ${WHEEL_EMPTY_FILE_PATH} + COMMENT "Creating an empty file to lib folder because gpu option is ON" ) - add_custom_target(create_empty_file ALL DEPENDS ${EMPTY_FILE_PATH}) + add_custom_target(create_empty_file ALL DEPENDS ${SOURCE_EMPTY_FILE_PATH} ${WHEEL_EMPTY_FILE_PATH}) endif() # Project: stable_diffusion_cpp diff --git a/README.md b/README.md index c0e2b1e2..a10d1d64 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ Example: `docker run -v /home/ubuntu/.cache/nexa/hub/official:/model -it nexa4ai/sdk:latest nexa gen-text /model/Phi-3-mini-128k-instruct/q4_0.gguf` will create an interactive session with text generation -``` + ## Nexa CLI commands diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py index 819e3dbf..b0086d61 100644 --- a/nexa/gguf/nexa_inference_image.py +++ b/nexa/gguf/nexa_inference_image.py @@ -13,7 +13,9 @@ DEFAULT_IMG_GEN_PARAMS_LCM, DEFAULT_IMG_GEN_PARAMS_TURBO, ) -from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr +from nexa.utils import SpinningCursorAnimation, nexa_prompt +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr + from streamlit.web import cli as stcli from nexa.general import pull_model diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py index 9027e50a..9f0afd60 100644 --- a/nexa/gguf/nexa_inference_text.py +++ b/nexa/gguf/nexa_inference_text.py @@ -13,8 +13,10 @@ NEXA_STOP_WORDS_MAP, ) from nexa.gguf.lib_utils import is_gpu_available -from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr from nexa.general import pull_model +from nexa.utils import SpinningCursorAnimation, nexa_prompt +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr + logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" diff --git a/nexa/gguf/nexa_inference_vlm.py b/nexa/gguf/nexa_inference_vlm.py index ac886ccf..da8103f5 100644 --- a/nexa/gguf/nexa_inference_vlm.py +++ b/nexa/gguf/nexa_inference_vlm.py @@ -24,7 +24,8 @@ Llava16ChatHandler, NanoLlavaChatHandler, ) -from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr +from nexa.utils import SpinningCursorAnimation, nexa_prompt +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py index 0725f704..ef0a653b 100644 --- a/nexa/gguf/nexa_inference_voice.py +++ b/nexa/gguf/nexa_inference_voice.py @@ -10,9 +10,11 @@ EXIT_REMINDER, NEXA_RUN_MODEL_MAP_VOICE, ) -from nexa.utils import nexa_prompt -from nexa.utils import nexa_prompt, SpinningCursorAnimation, suppress_stdout_stderr from nexa.general import pull_model +from nexa.utils import nexa_prompt, SpinningCursorAnimation +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr + + logging.basicConfig(level=logging.INFO) @@ -69,6 +71,7 @@ def _load_model(self): logging.debug(f"Loading model from: {self.downloaded_path}") with suppress_stdout_stderr(): + os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" self.model = WhisperModel( self.downloaded_path, device="cpu", diff --git a/nexa/onnx/nexa_inference_image.py b/nexa/onnx/nexa_inference_image.py index 9e10f7bb..6566cbb2 100644 --- a/nexa/onnx/nexa_inference_image.py +++ b/nexa/onnx/nexa_inference_image.py @@ -14,7 +14,7 @@ ) from nexa.general import pull_model from nexa.constants import EXIT_REMINDER, NEXA_RUN_MODEL_MAP_ONNX -from nexa.utils import nexa_prompt +from nexa.utils import nexa_prompt, SpinningCursorAnimation logging.basicConfig(level=logging.INFO) @@ -75,6 +75,7 @@ def run(self): self._load_model(self.download_onnx_folder) self._dialogue_mode() + @SpinningCursorAnimation() def _load_model(self, model_path): """ Load the model from the given model path using the appropriate pipeline. @@ -149,7 +150,7 @@ def generate_images(self, prompt, negative_prompt): images = self.pipeline(**pipeline_kwargs).images return images - + def _save_images(self, images): """ diff --git a/nexa/onnx/nexa_inference_text.py b/nexa/onnx/nexa_inference_text.py index 794efca9..765d26b7 100644 --- a/nexa/onnx/nexa_inference_text.py +++ b/nexa/onnx/nexa_inference_text.py @@ -9,7 +9,7 @@ from transformers import AutoTokenizer, TextStreamer from nexa.general import pull_model from nexa.constants import NEXA_RUN_MODEL_MAP_ONNX -from nexa.utils import nexa_prompt +from nexa.utils import nexa_prompt, SpinningCursorAnimation logging.basicConfig(level=logging.INFO) @@ -51,6 +51,7 @@ def __init__(self, model_path, local_path=None, **kwargs): self.timings = kwargs.get("timings", False) self.device = "cpu" + @SpinningCursorAnimation() def _load_model_and_tokenizer(self) -> Tuple[Any, Any, Any, bool]: logging.debug(f"Loading model from {self.downloaded_onnx_folder}") start_time = time.time() diff --git a/nexa/utils.py b/nexa/utils.py index 2483582f..71e9ca7e 100644 --- a/nexa/utils.py +++ b/nexa/utils.py @@ -10,9 +10,6 @@ from prompt_toolkit.styles import Style from nexa.constants import EXIT_COMMANDS, EXIT_REMINDER -from nexa.gguf.llama._utils_transformers import ( - suppress_stdout_stderr, -) # re-import, don't comment out def is_package_installed(package_name: str) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 74007e06..a51549a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "nexaai" -version = "0.0.2.dev" +version = "0.0.5" description = "Nexa AI SDK" readme = "README.md" license = { text = "MIT" } @@ -36,7 +36,7 @@ classifiers = [ [project.optional-dependencies] onnx = [ "librosa", - "optimum[onnxruntime]>=1.7.3", # for CPU version + "optimum[onnxruntime]", # for CPU version "diffusers", # required for image generation "optuna", "pydantic", @@ -78,7 +78,7 @@ sdist.exclude = [".github", "build", "dist", "nexa.egg-info", "dependency/llama. build.verbose = true cmake.build-type = "Release" cmake.version = ">=3.16" -cmake.args = ["-DCMAKE_CXX_FLAGS=-fopenmp"] +# cmake.args = ["-DCMAKE_CXX_FLAGS=-fopenmp"] [tool.pytest.ini_options] testpaths = ["tests"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a05f5892..5dfbadee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,7 +16,6 @@ fastapi uvicorn pydantic pillow -prompt_toolkit # For onnx optimum[onnxruntime] # for CPU version @@ -31,6 +30,7 @@ transformers ttstokenizer # Shared dependencies +prompt_toolkit tqdm tabulate streamlit diff --git a/scripts/releases-to-pep-503.sh b/scripts/releases-to-pep-503.sh index 05195868..9d2dc312 100644 --- a/scripts/releases-to-pep-503.sh +++ b/scripts/releases-to-pep-503.sh @@ -1,7 +1,7 @@ #!/bin/bash package_name="nexaai" -repo_name="nexaai-sdk-cpp" +repo_name="nexa-sdk" # Get output directory or default to index/whl/cpu output_dir=${1:-"index/whl/cpu"}