Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/NexaAI/nexaai-sdk-cpp into …
Browse files Browse the repository at this point in the history
…david/bugfix
  • Loading branch information
Davidqian123 committed Aug 22, 2024
2 parents ac59357 + 40a911f commit 78f55b4
Show file tree
Hide file tree
Showing 19 changed files with 131 additions and 79 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/build-wheels-cpu-macos.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: Build Wheels (CPU) (MacOS)

on: workflow_dispatch

permissions:
contents: write

jobs:

build_wheels_macos:
name: Build wheels on macos-${{ matrix.os }}
runs-on: macos-${{ matrix.os }}
strategy:
matrix:
os: [12, 13, 14]

steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"

# Used to host cibuildwheel
- uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: "pip"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
CIBW_BUILD: "cp310-* cp311-* cp312-*"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
package-dir: .
output-dir: wheelhouse

- uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.os }}
path: ./wheelhouse/*.whl

release:
name: Release
needs: [build_wheels_macos]
runs-on: ubuntu-latest

steps:
- uses: actions/download-artifact@v4
with:
merge-multiple: true
path: dist

- uses: softprops/action-gh-release@v2
with:
files: dist/*
tag_name: ${{ github.ref_name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44 changes: 1 addition & 43 deletions .github/workflows/build-wheels-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,48 +88,6 @@ jobs:
name: wheels-${{ matrix.os }}
path: ./wheelhouse/*.whl

build_wheels_macos:
name: Build wheels on macos-${{ matrix.os }}
runs-on: macos-${{ matrix.os }}
strategy:
matrix:
os: [12, 13, 14]

steps:
- uses: actions/checkout@v4
with:
submodules: "recursive"

# Used to host cibuildwheel
- uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: "pip"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
# python -m pip install -e .
python -m pip install build wheel
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64 "
CIBW_BUILD: "cp310-* cp311-* cp312-*"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
package-dir: .
output-dir: wheelhouse

- uses: actions/upload-artifact@v4
with:
name: wheels-macos-${{ matrix.os }}
path: ./wheelhouse/*.whl

build_wheels_arm64:
name: Build arm64 wheels
runs-on: ubuntu-20.04
Expand Down Expand Up @@ -185,7 +143,7 @@ jobs:

release:
name: Release
needs: [build_wheels, build_wheels_macos, build_wheels_arm64, build_sdist]
needs: [build_wheels_linux, build_wheels_win, build_wheels_arm64, build_sdist]
runs-on: ubuntu-latest

steps:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build-wheels-cuda-linux.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ jobs:
$env:VERBOSE = '1'
$env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
cp tomls/pyproject_cuda.toml pyproject.toml
$env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all'
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
python -m build --wheel
# write the build tag to the output
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/build-wheels-cuda-win.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ jobs:
}
$env:VERBOSE = '1'
cp tomls/pyproject_cuda.toml pyproject.toml
$env:CMAKE_ARGS = '-DSD_CUBLAS=ON -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=all'
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_CUDA_FORCE_MMQ=ON -DGGML_AVX2=OFF -DGGML_FMA=OFF -DGGML_F16C=OFF'
$env:CMAKE_BUILD_PARALLEL_LEVEL = $(nproc)
python -m build --wheel
Expand Down
5 changes: 1 addition & 4 deletions .github/workflows/build-wheels-metal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,14 @@ jobs:
# python -m pip install -e .
python -m pip install build wheel
- name: Copy pyproject.toml
run: |
cp tomls/pyproject_metal.toml pyproject.toml
- name: Build wheels
uses: pypa/[email protected]
env:
# disable repair
CIBW_REPAIR_WHEEL_COMMAND: ""
CIBW_ARCHS: "arm64"
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=ON -DSD_METAL=ON"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os }}
CMAKE_BUILD_PARALLEL_LEVEL: 4
with:
Expand Down
19 changes: 14 additions & 5 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,37 @@ on:

jobs:
build:
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}

strategy:
matrix:
os: [macos-latest, windows-latest]
python-version: [3.10]

steps:
- name: Checkout code
uses: actions/checkout@v3
with:
submodules: recursive # This will clone the repository with all its submodules
fetch-depth: 0 # This fetches all history so you can access any version of the submodules

fetch-depth: 0 # This fetches all history so you can access any version of the submodules

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10' # Specify the Python version you want
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install build pytest
shell: bash

- name: Build DLL
run: |
python -m pip install -e .
shell: bash

- name: Run tests
run: |
python -m pytest tests
python -m pytest tests
shell: bash
11 changes: 8 additions & 3 deletions .github/workflows/generate-index-from-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,19 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Pages
uses: actions/configure-pages@v5

- name: Set execute permissions for script
run: chmod +x ./scripts/releases-to-pep-503.sh

- name: Build
run: |
./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
# ./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
# ./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
# ./scripts/releases-to-pep-503.sh index/whl/cu123 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu123$'
./scripts/releases-to-pep-503.sh index/whl/cu124 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu124$'
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
- name: Upload artifact
Expand Down
16 changes: 11 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
cmake_minimum_required(VERSION 3.16)

if (GGML_CUDA OR GGML_METAL)
set(EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt")
set(SOURCE_EMPTY_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib/empty_file.txt")
add_custom_command(
OUTPUT ${EMPTY_FILE_PATH}
COMMAND ${CMAKE_COMMAND} -E touch ${EMPTY_FILE_PATH}
COMMENT "Creating an empty file because MY_FEATURE is ON"
OUTPUT ${SOURCE_EMPTY_FILE_PATH}
COMMAND ${CMAKE_COMMAND} -E touch ${SOURCE_EMPTY_FILE_PATH}
COMMENT "Creating an empty file to source folder because gpu option is ON"
)
set(WHEEL_EMPTY_FILE_PATH "${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib/empty_file.txt")
add_custom_command(
OUTPUT ${WHEEL_EMPTY_FILE_PATH}
COMMAND ${CMAKE_COMMAND} -E touch ${WHEEL_EMPTY_FILE_PATH}
COMMENT "Creating an empty file to lib folder because gpu option is ON"
)
add_custom_target(create_empty_file ALL DEPENDS ${EMPTY_FILE_PATH})
add_custom_target(create_empty_file ALL DEPENDS ${SOURCE_EMPTY_FILE_PATH} ${WHEEL_EMPTY_FILE_PATH})
endif()

# Project: stable_diffusion_cpp
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ Example:
`docker run -v /home/ubuntu/.cache/nexa/hub/official:/model -it nexa4ai/sdk:latest nexa gen-text /model/Phi-3-mini-128k-instruct/q4_0.gguf`

will create an interactive session with text generation
```


## Nexa CLI commands

Expand Down
4 changes: 3 additions & 1 deletion nexa/gguf/nexa_inference_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
DEFAULT_IMG_GEN_PARAMS_LCM,
DEFAULT_IMG_GEN_PARAMS_TURBO,
)
from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
from nexa.utils import SpinningCursorAnimation, nexa_prompt
from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr

from streamlit.web import cli as stcli
from nexa.general import pull_model

Expand Down
4 changes: 3 additions & 1 deletion nexa/gguf/nexa_inference_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
NEXA_STOP_WORDS_MAP,
)
from nexa.gguf.lib_utils import is_gpu_available
from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
from nexa.general import pull_model
from nexa.utils import SpinningCursorAnimation, nexa_prompt
from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr


logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
Expand Down
3 changes: 2 additions & 1 deletion nexa/gguf/nexa_inference_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
Llava16ChatHandler,
NanoLlavaChatHandler,
)
from nexa.utils import SpinningCursorAnimation, nexa_prompt, suppress_stdout_stderr
from nexa.utils import SpinningCursorAnimation, nexa_prompt
from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr

logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
Expand Down
7 changes: 5 additions & 2 deletions nexa/gguf/nexa_inference_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
EXIT_REMINDER,
NEXA_RUN_MODEL_MAP_VOICE,
)
from nexa.utils import nexa_prompt
from nexa.utils import nexa_prompt, SpinningCursorAnimation, suppress_stdout_stderr
from nexa.general import pull_model
from nexa.utils import nexa_prompt, SpinningCursorAnimation
from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr


logging.basicConfig(level=logging.INFO)


Expand Down Expand Up @@ -69,6 +71,7 @@ def _load_model(self):

logging.debug(f"Loading model from: {self.downloaded_path}")
with suppress_stdout_stderr():
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
self.model = WhisperModel(
self.downloaded_path,
device="cpu",
Expand Down
5 changes: 3 additions & 2 deletions nexa/onnx/nexa_inference_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)
from nexa.general import pull_model
from nexa.constants import EXIT_REMINDER, NEXA_RUN_MODEL_MAP_ONNX
from nexa.utils import nexa_prompt
from nexa.utils import nexa_prompt, SpinningCursorAnimation

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -75,6 +75,7 @@ def run(self):
self._load_model(self.download_onnx_folder)
self._dialogue_mode()

@SpinningCursorAnimation()
def _load_model(self, model_path):
"""
Load the model from the given model path using the appropriate pipeline.
Expand Down Expand Up @@ -149,7 +150,7 @@ def generate_images(self, prompt, negative_prompt):
images = self.pipeline(**pipeline_kwargs).images
return images



def _save_images(self, images):
"""
Expand Down
3 changes: 2 additions & 1 deletion nexa/onnx/nexa_inference_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from transformers import AutoTokenizer, TextStreamer
from nexa.general import pull_model
from nexa.constants import NEXA_RUN_MODEL_MAP_ONNX
from nexa.utils import nexa_prompt
from nexa.utils import nexa_prompt, SpinningCursorAnimation

logging.basicConfig(level=logging.INFO)

Expand Down Expand Up @@ -51,6 +51,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
self.timings = kwargs.get("timings", False)
self.device = "cpu"

@SpinningCursorAnimation()
def _load_model_and_tokenizer(self) -> Tuple[Any, Any, Any, bool]:
logging.debug(f"Loading model from {self.downloaded_onnx_folder}")
start_time = time.time()
Expand Down
3 changes: 0 additions & 3 deletions nexa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
from prompt_toolkit.styles import Style

from nexa.constants import EXIT_COMMANDS, EXIT_REMINDER
from nexa.gguf.llama._utils_transformers import (
suppress_stdout_stderr,
) # re-import, don't comment out


def is_package_installed(package_name: str) -> bool:
Expand Down
Loading

0 comments on commit 78f55b4

Please sign in to comment.