From e56be508d90b925bc919a1aca085095a903684fd Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Thu, 31 Oct 2024 04:53:48 +0000 Subject: [PATCH 001/160] add nexa's llama.cpp submodule --- .gitmodules | 6 +++++- dependency/README.md | 38 +++++++++++++++----------------------- dependency/nexa_llama.cpp | 1 + 3 files changed, 21 insertions(+), 24 deletions(-) create mode 160000 dependency/nexa_llama.cpp diff --git a/.gitmodules b/.gitmodules index a09b7c00..aaa70648 100644 --- a/.gitmodules +++ b/.gitmodules @@ -6,7 +6,11 @@ path = dependency/llama.cpp url = https://github.com/NexaAI/llama.cpp.git branch = master-release +[submodule "dependency/nexa_llama.cpp"] + path = dependency/nexa_llama.cpp + url = https://github.com/NexaAI/llama.cpp + branch = nexa-audio-lm [submodule "nexa/eval/benchmark_tasks"] path = nexa/eval/benchmark_tasks url = https://github.com/NexaAI/benchmark-tasks.git - branch = main + branch = main \ No newline at end of file diff --git a/dependency/README.md b/dependency/README.md index 02fc5a7f..ddaf865a 100644 --- a/dependency/README.md +++ b/dependency/README.md @@ -1,36 +1,28 @@ # Add as submodule -``` -# Step 1: Clone the main repository recursively to include existing submodules +Step 1: Clone the main repository recursively to include existing submodules +```shell git clone --recursive https://github.com/NexaAI/nexa-sdk-ggml +``` -# Step 2: Navigate to the cloned repository -cd nexa-sdk-ggml - -# Step 3: Add the first submodule -git submodule add https://github.com/ggerganov/llama.cpp dependency/llama.cpp - -# Step 4: Add the second submodule -git submodule add https://github.com/leejet/stable-diffusion.cpp dependency/stable-diffusion.cpp +Step 2: Add submodule in a specific branch, for example `nexa-audio-lm` +```shell +git submodule add -b nexa-audio-lm https://github.com/NexaAI/llama.cpp dependency/nexa_llama.cpp +``` -# Step 5: Initialize and update the submodules +Step 3: Initialize and update the submodules +```shell git submodule update --init --recursive +``` -# Step 6: Commit the changes to .gitmodules and the added submodules +Step 4: Commit the changes to .gitmodules and the added submodules +```shell git add .gitmodules dependency/ git commit -m "Added llama.cpp and stable-diffusion.cpp as submodules" ``` # Update submodules pull the latest change -``` -git pull origin main -``` -Update submodules -``` -git submodule update --remote --merge -``` -Then add and commit the changes -``` -git add . -git commit -m "Updated submodules" +```shell +git submodule sync +git submodule update --init --recursive --remote ``` \ No newline at end of file diff --git a/dependency/nexa_llama.cpp b/dependency/nexa_llama.cpp new file mode 160000 index 00000000..be24dc9d --- /dev/null +++ b/dependency/nexa_llama.cpp @@ -0,0 +1 @@ +Subproject commit be24dc9d1ea55e04175589178ab2277d135ed3a9 From 271c50ee8ae27f8401144bd8941996ee3a2e75d4 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Thu, 31 Oct 2024 16:44:53 +0000 Subject: [PATCH 002/160] update CMakeList --- CMakeLists.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4670bff2..4d0388b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,34 @@ if(LLAMA_BUILD) ) endif() +# nexa_llama_cpp project +# option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) +# if(NEXA_LLAMA_BUILD) +# set(LLAMA_CUDA ${GGML_CUDA}) +# set(LLAMA_METAL ${GGML_METAL}) + +# ExternalProject_Add(nexa_llama_project +# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp +# BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build +# CMAKE_ARGS +# ${USER_DEFINED_OPTIONS} +# ${COMMON_CMAKE_OPTIONS} +# -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install +# -DCMAKE_POSITION_INDEPENDENT_CODE=ON +# -DCMAKE_CXX_STANDARD=17 +# -DLLAMA_CUDA=${LLAMA_CUDA} +# -DLLAMA_METAL=${LLAMA_METAL} +# -DGGML_AVX=$,$>>,OFF,ON> +# -DGGML_AVX2=$,$>>,OFF,ON> +# -DGGML_FMA=$,$>>,OFF,ON> +# -DGGML_F16C=$,$>>,OFF,ON> +# -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> +# BUILD_ALWAYS 1 +# BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} +# INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install +# ) +# endif() + # bark_cpp project option(BARK_BUILD "Build bark.cpp" ON) if(BARK_BUILD) @@ -164,6 +192,7 @@ if(WIN32) DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib @@ -176,6 +205,7 @@ if(WIN32) DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib @@ -188,6 +218,7 @@ else() DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS @@ -200,6 +231,7 @@ else() DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS From c3c1cbc7f3b403512d82071b8b16b64e1f609b6f Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Thu, 31 Oct 2024 18:03:27 +0000 Subject: [PATCH 003/160] nexa_llama.cpp added to project --- CMakeLists.txt | 106 ++++++++++++++++++++++++++++++++----------------- pyproject.toml | 3 ++ 2 files changed, 73 insertions(+), 36 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d0388b7..cb5594ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.16) project(nexa_gguf) include(ExternalProject) +find_package(OpenMP REQUIRED) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_CXX_STANDARD 17) @@ -55,6 +56,7 @@ endfunction() # Collect all user-defined options get_all_options(USER_DEFINED_OPTIONS) +message(STATUS "USER_DEFINED_OPTIONS: ${USER_DEFINED_OPTIONS}") if(APPLE) set(CMAKE_INSTALL_RPATH "@loader_path") @@ -88,6 +90,7 @@ endif() # stable_diffusion_cpp project option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp" ON) if(STABLE_DIFFUSION_BUILD) + message(STATUS "Configuring stable-diffusion.cpp build...") ExternalProject_Add(stable_diffusion_project SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/stable-diffusion.cpp BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build @@ -104,6 +107,8 @@ if(STABLE_DIFFUSION_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install + LOG_CONFIGURE 1 + LOG_BUILD 1 LOG_INSTALL 1 ) endif() @@ -111,6 +116,7 @@ endif() # llama_cpp project option(LLAMA_BUILD "Build llama.cpp" ON) if(LLAMA_BUILD) + message(STATUS "Configuring llama.cpp build...") set(LLAMA_CUDA ${GGML_CUDA}) set(LLAMA_METAL ${GGML_METAL}) @@ -125,6 +131,9 @@ if(LLAMA_BUILD) -DCMAKE_CXX_STANDARD=17 -DLLAMA_CUDA=${LLAMA_CUDA} -DLLAMA_METAL=${LLAMA_METAL} + -DCMAKE_C_FLAGS=${OpenMP_C_FLAGS} + -DCMAKE_CXX_FLAGS=${OpenMP_CXX_FLAGS} + -DCMAKE_EXE_LINKER_FLAGS=${OpenMP_EXE_LINKER_FLAGS} -DGGML_AVX=$,$>>,OFF,ON> -DGGML_AVX2=$,$>>,OFF,ON> -DGGML_FMA=$,$>>,OFF,ON> @@ -133,40 +142,62 @@ if(LLAMA_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 ) endif() # nexa_llama_cpp project -# option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) -# if(NEXA_LLAMA_BUILD) -# set(LLAMA_CUDA ${GGML_CUDA}) -# set(LLAMA_METAL ${GGML_METAL}) - -# ExternalProject_Add(nexa_llama_project -# SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp -# BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build -# CMAKE_ARGS -# ${USER_DEFINED_OPTIONS} -# ${COMMON_CMAKE_OPTIONS} -# -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install -# -DCMAKE_POSITION_INDEPENDENT_CODE=ON -# -DCMAKE_CXX_STANDARD=17 -# -DLLAMA_CUDA=${LLAMA_CUDA} -# -DLLAMA_METAL=${LLAMA_METAL} -# -DGGML_AVX=$,$>>,OFF,ON> -# -DGGML_AVX2=$,$>>,OFF,ON> -# -DGGML_FMA=$,$>>,OFF,ON> -# -DGGML_F16C=$,$>>,OFF,ON> -# -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> -# BUILD_ALWAYS 1 -# BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} -# INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install -# ) -# endif() +option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) +if(NEXA_LLAMA_BUILD) + set(LLAMA_CUDA ${GGML_CUDA}) + set(LLAMA_METAL ${GGML_METAL}) + + ExternalProject_Add(nexa_llama_project + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build + CMAKE_ARGS + ${USER_DEFINED_OPTIONS} + ${COMMON_CMAKE_OPTIONS} + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DCMAKE_CXX_STANDARD=17 + -DLLAMA_CUDA=${LLAMA_CUDA} + -DLLAMA_METAL=${LLAMA_METAL} + -DCMAKE_C_FLAGS=${OpenMP_C_FLAGS} + -DCMAKE_CXX_FLAGS=${OpenMP_CXX_FLAGS} + -DCMAKE_EXE_LINKER_FLAGS=${OpenMP_EXE_LINKER_FLAGS} + -DGGML_AVX=$,$>>,OFF,ON> + -DGGML_AVX2=$,$>>,OFF,ON> + -DGGML_FMA=$,$>>,OFF,ON> + -DGGML_F16C=$,$>>,OFF,ON> + -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> + -DLLAMA_BUILD_INFO=OFF + BUILD_ALWAYS 1 + BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} + INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + LOG_OUTPUT_ON_FAILURE 1 + ) + + # Add a custom target to copy build logs to a permanent location + add_custom_command(TARGET nexa_llama_project POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/build_logs + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_project-prefix/src/nexa_llama_project-stamp/nexa_llama_project-*.log + ${CMAKE_CURRENT_SOURCE_DIR}/build_logs/ + COMMENT "Copying nexa_llama build logs to permanent location" + ) + +endif() # bark_cpp project option(BARK_BUILD "Build bark.cpp" ON) if(BARK_BUILD) + message(STATUS "Configuring bark.cpp build...") ExternalProject_Add(bark_project SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/bark.cpp BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bark_build @@ -183,16 +214,21 @@ if(BARK_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install + LOG_CONFIGURE 1 + LOG_BUILD 1 + LOG_INSTALL 1 + LOG_OUTPUT_ON_FAILURE 1 ) endif() # Install the built libraries to the final destination +message(STATUS "Installing built libraries to final destination...") if(WIN32) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ - ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ - # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_build/bin/Release/ + # ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib @@ -200,12 +236,11 @@ if(WIN32) FILES_MATCHING PATTERN "*.dll" ) - install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ - ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ - # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_build/bin/Release/ + # ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib @@ -217,8 +252,8 @@ else() install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ - ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ - # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_install/lib/ + # ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS @@ -226,12 +261,11 @@ else() PATTERN "*.so" PATTERN "*.dylib" ) - install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ - ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ - # $(CMAKE_CURRENT_BINARY_DIR)/nexa_llama_install/lib/ + # ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS diff --git a/pyproject.toml b/pyproject.toml index 071faca5..24bd26e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,7 @@ wheel.packages = [ sdist.include = [ "CMakeLists.txt", "dependency/llama.cpp/*", + "dependency/nexa_llama.cpp/*", "dependency/stable-diffusion.cpp/*", "dependency/bark.cpp/*", ] @@ -118,6 +119,7 @@ sdist.exclude = [ "dist", "nexa.egg-info", "dependency/llama.cpp/build", + "dependency/nexa_llama.cpp/build", "dependency/stable-diffusion.cpp/build", "dependency/bark.cpp/build", ] @@ -128,6 +130,7 @@ cmake.args = [ "-DCMAKE_BUILD_PARALLEL_LEVEL=16", "-DSTABLE_DIFFUSION_BUILD=ON", "-DLLAMA_BUILD=ON", + "-DNEXA_LLAMA_BUIL=ON", "-DBARK_BUILD=ON" ] From dd7c4911ca88c603681592b83011b513a2ce61b1 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Fri, 1 Nov 2024 07:26:27 +0000 Subject: [PATCH 004/160] add audio LM mapping --- CMakeLists.txt | 8 +- docs/README.md | 1 + nexa/constants.py | 18 ++++ nexa/gguf/__init__.py | 1 + nexa/gguf/llama/nexa_audio_lm_cpp.py | 112 ++++++++++++++++++++ nexa/gguf/nexa_inference_audio_lm.py | 146 +++++++++++++++++++++++++++ 6 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 nexa/gguf/llama/nexa_audio_lm_cpp.py create mode 100644 nexa/gguf/nexa_inference_audio_lm.py diff --git a/CMakeLists.txt b/CMakeLists.txt index cb5594ea..96d197be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -227,7 +227,7 @@ if(WIN32) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ - # ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ @@ -239,7 +239,7 @@ if(WIN32) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ - # ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ + ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ @@ -252,7 +252,7 @@ else() install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ - # ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib @@ -264,7 +264,7 @@ else() install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ - # ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ + ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib diff --git a/docs/README.md b/docs/README.md index c2b5903a..f093c40e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -193,6 +193,7 @@ Test individual modules with downloaded GGUF files: ```shell python tests/verify_text_generation.py python tests/verify_vlm.py +python tests/verify_audio_lm.py python tests/verify_image_generation.py python tests/verify_tts_generation.py ``` diff --git a/nexa/constants.py b/nexa/constants.py index 8d423f73..d3e16acd 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -108,6 +108,15 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:model-fp16", } +NEXA_RUN_MODEL_MAP_AUDIO_LM = { + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:q4_0", + "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:fp16", + "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:q4_0", + "octoaudio": "Octo-omni-audio:fp16", + "octoaudio:fp16": "Octo-omni-audio:fp16", + "octoaudio:q4_0": "Octo-omni-audio:q4_0", +} + NEXA_RUN_MODEL_MAP_VOICE = { "whisper-large": "whisper-large:bin-large-v3", "whisper-tiny": "whisper-tiny:bin-tiny", @@ -160,6 +169,15 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16", } +NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "octoaudio": "Octo-omni-audio:projector-fp16", + "octoaudio:fp16": "Octo-omni-audio:projector-fp16", + "octoaudio:q4_0": "Octo-omni-audio:projector-q4_0", +} + NEXA_RUN_T5XXL_MAP = { "flux": "FLUX.1-schnell:t5xxl-q4_0", "FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0", diff --git a/nexa/gguf/__init__.py b/nexa/gguf/__init__.py index 81e20228..9ba70de9 100644 --- a/nexa/gguf/__init__.py +++ b/nexa/gguf/__init__.py @@ -10,4 +10,5 @@ "NexaVLMInference", "NexaVoiceInference", "NexaTTSInference", + "NexaAudioLMInference" ] \ No newline at end of file diff --git a/nexa/gguf/llama/nexa_audio_lm_cpp.py b/nexa/gguf/llama/nexa_audio_lm_cpp.py new file mode 100644 index 00000000..38bb5c72 --- /dev/null +++ b/nexa/gguf/llama/nexa_audio_lm_cpp.py @@ -0,0 +1,112 @@ +import ctypes +import os +import sys +import pathlib + +# Load the library +def _load_shared_library(lib_base_name: str): + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + lib_ext = ".so" + elif sys.platform == "darwin": + lib_ext = ".dylib" + elif sys.platform == "win32": + lib_ext = ".dll" + else: + raise RuntimeError("Unsupported platform") + + # Construct the paths to the possible shared library names + _base_path = pathlib.Path(__file__).parent.parent.resolve() + _lib_paths = [ + _base_path / f"lib/lib{lib_base_name}{lib_ext}", + _base_path / f"lib/{lib_base_name}{lib_ext}", + ] + + if "NEXA_LLAMA_CPP_LIB" in os.environ: + lib_base_name = os.environ["NEXA_LLAMA_CPP_LIB"] + _lib = pathlib.Path(lib_base_name) + _base_path = _lib.parent.resolve() + _lib_paths = [_lib.resolve()] + + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path)) + except Exception as e: + raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") + + raise FileNotFoundError( + f"Shared library with base name '{lib_base_name}' not found" + ) + +# Load both libraries +_lib_base_name = "hf-omni-audio-cli_shared" +_lib_omni = _load_shared_library(_lib_base_name) +_lib_base_name = "hf-qwen2-audio_shared" +_lib_qwen2 = _load_shared_library(_lib_base_name) + + +# struct omni_context_params +# { +# char *model; +# char *mmproj; +# char *file; +# char *prompt; +# int32_t n_gpu_layers; +# }; +class omni_context_params(ctypes.Structure): + _fields_ = [ + ("model", ctypes.c_char_p), + ("mmproj", ctypes.c_char_p), + ("file", ctypes.c_char_p), + ("prompt", ctypes.c_char_p), + ("n_gpu_layers", ctypes.c_int32), + ] + + +omni_context_params_p = ctypes.POINTER(omni_context_params) +omni_context_p = ctypes.c_void_p + + +def get_lib(is_qwen: bool): + return _lib_qwen2 if is_qwen else _lib_omni + + +def context_default_params(is_qwen: bool = False) -> omni_context_params: + lib = get_lib(is_qwen) + return lib.omni_context_default_params() + + +def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_context_p: # type: ignore + lib = get_lib(is_qwen) + return lib.omni_init_context(params) + + +def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bool = False): # type: ignore + lib = get_lib(is_qwen) + return lib.omni_process_full(ctx, params) + + +def free_context(ctx: omni_context_p, is_qwen: bool = False): + lib = get_lib(is_qwen) + return lib.omni_free(ctx) + + +# Set up function signatures for both libraries +for lib in [_lib_omni, _lib_qwen2]: + lib.omni_context_default_params.argtypes = [] + lib.omni_context_default_params.restype = omni_context_params + + lib.omni_init_context.argtypes = [omni_context_params_p] + lib.omni_init_context.restype = omni_context_p + + lib.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] + lib.omni_process_full.restype = None + + lib.omni_free.argtypes = [omni_context_p] + lib.omni_free.restype = None \ No newline at end of file diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py new file mode 100644 index 00000000..42e8c6e1 --- /dev/null +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -0,0 +1,146 @@ +import ctypes +import logging +import os + +from nexa.constants import ( + DEFAULT_TEXT_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_VLM, + NEXA_RUN_PROJECTOR_MAP, +) +from nexa.gguf.lib_utils import is_gpu_available +from nexa.gguf.llama import nexa_audio_lm_cpp +from nexa.general import pull_model + +def is_qwen(model_name): + if "qwen2" in model_name: # TEMPORARY SOLUTION : this hardcode can be risky + return True + return False + +class NexaAudioLMInference: + """ + A class used for loading Bark text-to-speech models and running text-to-speech generation. + + Methods: + run: Run the audio LM generation loop. + + Args: + model_path (str): Path to the model file. + mmproj_path (str): Path to the audio projector file. + n_gpu_layers(int): Number of gpu layers to use for processing. Defaults to -1. + output_dir (str): Output directory for tts. Defaults to "tts". + verbosity (int): Verbosity level for the Bark model. Defaults to 0. + """ + + def __init__(self, model_path: str, mmproj_path: str, verbosity=0, device="auto", **kwargs): + if model_path is None and local_path is None: + raise ValueError("Either model_path or local_path must be provided.") + self.params = DEFAULT_TEXT_GEN_PARAMS.copy() + self.params.update(kwargs) + self.model = None + self.device = device + + self.model_path = model_path + self.mmproj_path = mmproj_path + if self.device == "auto" or self.device == "gpu": + self.n_gpu_layers = -1 if is_gpu_available() else 0 + else: + self.n_gpu_layers = 0 + self.is_qwen = is_qwen(model_path) + self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) + self.context = None + self.verbosity = verbosity + self.params = { + "output_path": os.path.join(os.getcwd(), "audio-lm"), + } + self.params.update(kwargs) + self.downloaded_path, _ = pull_model(self.model_path, **kwargs) + if self.downloaded_path is None: + logging.error( + f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", + exc_info=True, + ) + exit(1) + self._load_model() + + def _load_model(self): + logging.debug(f"Loading model from {self.model_path} and {self.mmproj_path}") + try: + self.ctx_params.model = ctypes.c_char_p(self.model_path.encode("utf-8")) + self.ctx_params.mmproj = ctypes.c_char_p(self.mmproj_path.encode("utf-8")) + self.ctx_params.n_gpu_layers = ( + 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers + ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers + + self.context = nexa_audio_lm_cpp.init_context( + ctypes.byref(self.ctx_params), + self.is_qwen + ) + if not self.context: + raise RuntimeError("Failed to load Bark model") + logging.debug("Model loaded successfully") + except Exception as e: + logging.error(f"Error loading model: {e}") + raise + + def run(self): + while True: + try: + audio_path = input("Audio Path (leave empty if no audio): ") + if audio_path and not os.path.exists(audio_path): + print(f"'{audio_path}' is not a path to audio. Will ignore.") + + user_input = input("Enter text: ") + + self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) + self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) + + nexa_audio_lm_cpp.process_full( + self.context, ctypes.byref(self.ctx_params), + self.is_qwen + ) + + except KeyboardInterrupt: + print("\nExiting...") + break + + except Exception as e: + logging.error(f"\nError during audio generation: {e}", exc_info=True) + + def __del__(self): + """ + Destructor to free the Bark context when the instance is deleted. + """ + if self.context: + nexa_audio_lm_cpp.free_context(self.context) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Run audio-in text-out generation with nexa-omni-audio model" + ) + parser.add_argument( + "model_path", + type=str, + help="Path or identifier for the model in Nexa Model Hub", + ) + parser.add_argument( + "-d", + "--device", + type=str, + choices=["auto", "cpu", "gpu"], + default="auto", + help="Device to use for inference (auto, cpu, or gpu)", + ) + args = parser.parse_args() + kwargs = {k: v for k, v in vars(args).items() if v is not None} + model_path = kwargs.pop("model_path") + device = kwargs.pop("device", "auto") + + inference = NexaAudioLMInference( + model_path, + device=device, + **kwargs + ) + inference.run() \ No newline at end of file From 0c7917b50d40039c14df0bf41dce4d83d8d43a9f Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Fri, 1 Nov 2024 10:50:31 -0700 Subject: [PATCH 005/160] swift project init. Collaborate with @Davidqian123 and @liute110 --- .gitignore | 11 +- Package.swift | 32 + README.md | 4 + examples/swift-test/Shared/ContentView.swift | 39 + examples/swift-test/Shared/ViewModel.swift | 50 ++ .../swift-test/TestApp-Commandline/main.swift | 62 ++ .../AccentColor.colorset/Contents.json | 11 + .../AppIcon.appiconset/Contents.json | 58 ++ .../Assets.xcassets/Contents.json | 6 + .../Preview Assets.xcassets/Contents.json | 6 + .../TestApp-Macos/TestApp_Macos.entitlements | 10 + .../TestApp-Macos/TestApp_MacosApp.swift | 10 + .../AccentColor.colorset/Contents.json | 11 + .../AppIcon.appiconset/Contents.json | 13 + .../TestApp-iOS/Assets.xcassets/Contents.json | 6 + .../Preview Assets.xcassets/Contents.json | 6 + .../TestApp-iOS/TestApp_iOSApp.swift | 10 + .../TestApp.xcodeproj/project.pbxproj | 665 ++++++++++++++++++ .../contents.xcworkspacedata | 7 + .../xcshareddata/swiftpm/Package.resolved | 15 + .../xcschemes/TestApp-Commandline.xcscheme | 84 +++ .../xcschemes/TestApp-iOS.xcscheme | 77 ++ swift/README.md | 84 +++ swift/Sources/NexaSwift/General.swift | 123 ++++ swift/Sources/NexaSwift/LlamaModel.swift | 207 ++++++ swift/Sources/NexaSwift/Models/Batch.swift | 23 + .../Models/ChatCompletionMessage.swift | 497 +++++++++++++ .../Models/ChatCompletionResponse.swift | 91 +++ swift/Sources/NexaSwift/Models/Common.swift | 28 + .../NexaSwift/Models/CompletionResponse.swift | 52 ++ .../NexaSwift/Models/Configuration.swift | 53 ++ .../NexaSwift/Models/SwiftLlamaError.swift | 6 + .../Sources/NexaSwift/Models/TypeAlias.swift | 10 + swift/Sources/NexaSwift/NexaSwiftActor.swift | 6 + .../Sources/NexaSwift/NexaTextInference.swift | 356 ++++++++++ .../Tests/NexaSwiftTests/NexaSwiftTests.swift | 6 + 36 files changed, 2734 insertions(+), 1 deletion(-) create mode 100644 Package.swift create mode 100644 examples/swift-test/Shared/ContentView.swift create mode 100644 examples/swift-test/Shared/ViewModel.swift create mode 100644 examples/swift-test/TestApp-Commandline/main.swift create mode 100644 examples/swift-test/TestApp-Macos/Assets.xcassets/AccentColor.colorset/Contents.json create mode 100644 examples/swift-test/TestApp-Macos/Assets.xcassets/AppIcon.appiconset/Contents.json create mode 100644 examples/swift-test/TestApp-Macos/Assets.xcassets/Contents.json create mode 100644 examples/swift-test/TestApp-Macos/Preview Content/Preview Assets.xcassets/Contents.json create mode 100644 examples/swift-test/TestApp-Macos/TestApp_Macos.entitlements create mode 100644 examples/swift-test/TestApp-Macos/TestApp_MacosApp.swift create mode 100644 examples/swift-test/TestApp-iOS/Assets.xcassets/AccentColor.colorset/Contents.json create mode 100644 examples/swift-test/TestApp-iOS/Assets.xcassets/AppIcon.appiconset/Contents.json create mode 100644 examples/swift-test/TestApp-iOS/Assets.xcassets/Contents.json create mode 100644 examples/swift-test/TestApp-iOS/Preview Content/Preview Assets.xcassets/Contents.json create mode 100644 examples/swift-test/TestApp-iOS/TestApp_iOSApp.swift create mode 100644 examples/swift-test/TestApp.xcodeproj/project.pbxproj create mode 100644 examples/swift-test/TestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata create mode 100644 examples/swift-test/TestApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved create mode 100644 examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-Commandline.xcscheme create mode 100644 examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-iOS.xcscheme create mode 100644 swift/README.md create mode 100644 swift/Sources/NexaSwift/General.swift create mode 100644 swift/Sources/NexaSwift/LlamaModel.swift create mode 100644 swift/Sources/NexaSwift/Models/Batch.swift create mode 100644 swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift create mode 100644 swift/Sources/NexaSwift/Models/ChatCompletionResponse.swift create mode 100644 swift/Sources/NexaSwift/Models/Common.swift create mode 100644 swift/Sources/NexaSwift/Models/CompletionResponse.swift create mode 100644 swift/Sources/NexaSwift/Models/Configuration.swift create mode 100644 swift/Sources/NexaSwift/Models/SwiftLlamaError.swift create mode 100644 swift/Sources/NexaSwift/Models/TypeAlias.swift create mode 100644 swift/Sources/NexaSwift/NexaSwiftActor.swift create mode 100644 swift/Sources/NexaSwift/NexaTextInference.swift create mode 100644 swift/Tests/NexaSwiftTests/NexaSwiftTests.swift diff --git a/.gitignore b/.gitignore index 9063bffa..a22e3c43 100644 --- a/.gitignore +++ b/.gitignore @@ -90,4 +90,13 @@ build_*/ .cache/ # tests -quantization_test.py \ No newline at end of file +quantization_test.py + +# Swift +.swiftpm/ +UserInterfaceState.xcuserstate +xcuserdata/ +*.xcworkspace/xcuserdata/ +*.playground/playground.xcworkspace/xcuserdata/ +*.generated.plist +.build/ \ No newline at end of file diff --git a/Package.swift b/Package.swift new file mode 100644 index 00000000..a5ffa87f --- /dev/null +++ b/Package.swift @@ -0,0 +1,32 @@ +// swift-tools-version: 6.0 + +import PackageDescription + +let package = Package( + name: "NexaSwift", + platforms: [ + .macOS(.v15), + .iOS(.v18), + .watchOS(.v11), + .tvOS(.v18), + .visionOS(.v2) + ], + products: [ + .library(name: "NexaSwift", targets: ["NexaSwift"]), + ], + dependencies: [ + .package(url: "https://github.com/ggerganov/llama.cpp.git", branch: "master") + ], + targets: [ + .target( + name: "NexaSwift", + dependencies: [ + .product(name: "llama", package: "llama.cpp") + ], + path: "swift/Sources/NexaSwift"), + .testTarget( + name: "NexaSwiftTests", + dependencies: ["NexaSwift"], + path: "swift/Tests/NexaSwiftTests"), + ] +) diff --git a/README.md b/README.md index d685f756..60e21792 100644 --- a/README.md +++ b/README.md @@ -330,6 +330,10 @@ For detailed information on CLI commands and usage, please refer to the [CLI Ref To start a local server using models on your local computer, you can use the `nexa server` command. For detailed information on server setup, API endpoints, and usage examples, please refer to the [Server Reference](SERVER.md) document. +## Swift Package + +**[Swift SDK](https://github.com/NexaAI/nexa-sdk/tree/main/swift):** Provides a Swifty API, allowing Swift developers to easily integrate and use llama.cpp models in their projects. + ## Acknowledgements We would like to thank the following projects: diff --git a/examples/swift-test/Shared/ContentView.swift b/examples/swift-test/Shared/ContentView.swift new file mode 100644 index 00000000..140ad45a --- /dev/null +++ b/examples/swift-test/Shared/ContentView.swift @@ -0,0 +1,39 @@ +import SwiftUI + +struct ContentView: View { + @State private var viewModel = ViewModel() + @State private var prompt = "" + + var body: some View { + VStack { + Text("Swift Llama Demo").font(.title) + + Toggle(isOn: $viewModel.usingStream) { + Text("Use Stream") + } + .padding(.bottom) + + TextField("Enter your message", text: $prompt, axis: .vertical) + .textFieldStyle(.roundedBorder) + .lineLimit(3...5) + .padding(.bottom) + .onSubmit { + guard !prompt.isEmpty else { return } + viewModel.run(for: prompt) + } + + ScrollView { + Text(viewModel.result) + .frame(maxWidth: .infinity, alignment: .leading) + .textSelection(.enabled) + } + + Spacer() + } + .padding() + } +} + +#Preview { + ContentView() +} diff --git a/examples/swift-test/Shared/ViewModel.swift b/examples/swift-test/Shared/ViewModel.swift new file mode 100644 index 00000000..4d141691 --- /dev/null +++ b/examples/swift-test/Shared/ViewModel.swift @@ -0,0 +1,50 @@ +import Foundation +import NexaSwift +import SwiftUI +import Combine + +@Observable +class ViewModel { + let nexaSwift: NexaTextInference + var result = "" + var usingStream = true + private var cancallable: Set = [] + + init() { + let configuration = Configuration(maxNewToken: 128, stopTokens: [""]) + let path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" + nexaSwift = (try? NexaTextInference(modelPath: path, modelConfiguration: configuration))! + } + + func formatUserMessage(_ message: String) -> String { + let formatted = """ + Below is the query from the users, please call the correct function and generate the parameters to call the function. + + Query: \(message) + + Response: + """ + return formatted + } + + func run(for userMessage: String) { + result = "" + + let formattedUserMessage = formatUserMessage(userMessage) + + Task { + switch usingStream { + case true: + for try await value in await nexaSwift.createCompletionStream(for: formattedUserMessage) { + print("Received content: \(value.choices[0].text)") // DEBUG + result += value.choices[0].text + } + case false: + if let completionResponse = try? await nexaSwift.createCompletion(for: formattedUserMessage) { + print("Received completion response: \(completionResponse.choices[0].text)") // DEBUG + result += completionResponse.choices[0].text + } + } + } + } +} diff --git a/examples/swift-test/TestApp-Commandline/main.swift b/examples/swift-test/TestApp-Commandline/main.swift new file mode 100644 index 00000000..8408eac2 --- /dev/null +++ b/examples/swift-test/TestApp-Commandline/main.swift @@ -0,0 +1,62 @@ +import Foundation +import NexaSwift + +let configuration = NexaSwift.Configuration( + maxNewToken: 128, + stopTokens: [] +) + +let model_path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" +let nexaSwift = try NexaSwift.NexaTextInference(modelPath: model_path, modelConfiguration: configuration) + +var streamMode = false +print("Do you want to enable stream mode? (yes/y or no/n):", terminator: " ") +var userInput = readLine()?.lowercased() ?? "" +if userInput == "yes" || userInput == "y" { + streamMode = true +} +print("") + +var messages:[ChatCompletionRequestMessage] = [] +let maxHistory = 2 + +while true { + print("You:", terminator: " ") + userInput = readLine() ?? "" + print("Bot:", terminator: " ") + + let userMessageText = ChatCompletionRequestMessage.user( + ChatCompletionRequestUserMessage(content: .text(userInput)) + ) + + messages.append(userMessageText) + if messages.count > maxHistory * 2 { + messages.removeFirst(2) + } + + var currentMessage = "" + if streamMode{ + for try await value in await nexaSwift + .createChatCompletionStream(for: messages) { + print(value.choices[0].delta.content ?? "", terminator: "") + currentMessage += value.choices[0].delta.content ?? "" + } + }else{ + let response = try await nexaSwift.createChatCompletion(for: messages) + print(response.choices[0].message.content ?? "", terminator: "") + currentMessage += response.choices[0].message.content ?? "" + } + + + let assistantMessage = ChatCompletionRequestMessage.assistant( + ChatCompletionRequestAssistantMessage( + content: currentMessage, + toolCalls: nil, + functionCall: nil + ) + ) + + messages.append(assistantMessage) + + print("") +} diff --git a/examples/swift-test/TestApp-Macos/Assets.xcassets/AccentColor.colorset/Contents.json b/examples/swift-test/TestApp-Macos/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 00000000..eb878970 --- /dev/null +++ b/examples/swift-test/TestApp-Macos/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-Macos/Assets.xcassets/AppIcon.appiconset/Contents.json b/examples/swift-test/TestApp-Macos/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000..3f00db43 --- /dev/null +++ b/examples/swift-test/TestApp-Macos/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,58 @@ +{ + "images" : [ + { + "idiom" : "mac", + "scale" : "1x", + "size" : "16x16" + }, + { + "idiom" : "mac", + "scale" : "2x", + "size" : "16x16" + }, + { + "idiom" : "mac", + "scale" : "1x", + "size" : "32x32" + }, + { + "idiom" : "mac", + "scale" : "2x", + "size" : "32x32" + }, + { + "idiom" : "mac", + "scale" : "1x", + "size" : "128x128" + }, + { + "idiom" : "mac", + "scale" : "2x", + "size" : "128x128" + }, + { + "idiom" : "mac", + "scale" : "1x", + "size" : "256x256" + }, + { + "idiom" : "mac", + "scale" : "2x", + "size" : "256x256" + }, + { + "idiom" : "mac", + "scale" : "1x", + "size" : "512x512" + }, + { + "idiom" : "mac", + "scale" : "2x", + "size" : "512x512" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-Macos/Assets.xcassets/Contents.json b/examples/swift-test/TestApp-Macos/Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/examples/swift-test/TestApp-Macos/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-Macos/Preview Content/Preview Assets.xcassets/Contents.json b/examples/swift-test/TestApp-Macos/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/examples/swift-test/TestApp-Macos/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-Macos/TestApp_Macos.entitlements b/examples/swift-test/TestApp-Macos/TestApp_Macos.entitlements new file mode 100644 index 00000000..18aff0ce --- /dev/null +++ b/examples/swift-test/TestApp-Macos/TestApp_Macos.entitlements @@ -0,0 +1,10 @@ + + + + + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-only + + + diff --git a/examples/swift-test/TestApp-Macos/TestApp_MacosApp.swift b/examples/swift-test/TestApp-Macos/TestApp_MacosApp.swift new file mode 100644 index 00000000..2c39f8ae --- /dev/null +++ b/examples/swift-test/TestApp-Macos/TestApp_MacosApp.swift @@ -0,0 +1,10 @@ +import SwiftUI + +@main +struct TestApp_MacosApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} diff --git a/examples/swift-test/TestApp-iOS/Assets.xcassets/AccentColor.colorset/Contents.json b/examples/swift-test/TestApp-iOS/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 00000000..eb878970 --- /dev/null +++ b/examples/swift-test/TestApp-iOS/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-iOS/Assets.xcassets/AppIcon.appiconset/Contents.json b/examples/swift-test/TestApp-iOS/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 00000000..13613e3e --- /dev/null +++ b/examples/swift-test/TestApp-iOS/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,13 @@ +{ + "images" : [ + { + "idiom" : "universal", + "platform" : "ios", + "size" : "1024x1024" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-iOS/Assets.xcassets/Contents.json b/examples/swift-test/TestApp-iOS/Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/examples/swift-test/TestApp-iOS/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-iOS/Preview Content/Preview Assets.xcassets/Contents.json b/examples/swift-test/TestApp-iOS/Preview Content/Preview Assets.xcassets/Contents.json new file mode 100644 index 00000000..73c00596 --- /dev/null +++ b/examples/swift-test/TestApp-iOS/Preview Content/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/examples/swift-test/TestApp-iOS/TestApp_iOSApp.swift b/examples/swift-test/TestApp-iOS/TestApp_iOSApp.swift new file mode 100644 index 00000000..da5c8f69 --- /dev/null +++ b/examples/swift-test/TestApp-iOS/TestApp_iOSApp.swift @@ -0,0 +1,10 @@ +import SwiftUI + +@main +struct TestAppApp: App { + var body: some Scene { + WindowGroup { + ContentView() + } + } +} diff --git a/examples/swift-test/TestApp.xcodeproj/project.pbxproj b/examples/swift-test/TestApp.xcodeproj/project.pbxproj new file mode 100644 index 00000000..29e827ba --- /dev/null +++ b/examples/swift-test/TestApp.xcodeproj/project.pbxproj @@ -0,0 +1,665 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 60; + objects = { + +/* Begin PBXBuildFile section */ + 4B0B057A2BE5C451002BC7AF /* TestApp_MacosApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B0B05792BE5C451002BC7AF /* TestApp_MacosApp.swift */; }; + 4B0B057E2BE5C452002BC7AF /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4B0B057D2BE5C452002BC7AF /* Assets.xcassets */; }; + 4B0B05812BE5C452002BC7AF /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4B0B05802BE5C452002BC7AF /* Preview Assets.xcassets */; }; + 4B1334FA2BE5C4AC0020AB8E /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4B1334F42BE5C4AC0020AB8E /* Assets.xcassets */; }; + 4B1334FB2BE5C4AC0020AB8E /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */; }; + 4B1334FC2BE5C4AC0020AB8E /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4B1334F72BE5C4AC0020AB8E /* Preview Assets.xcassets */; }; + 4B1334FD2BE5C4AC0020AB8E /* TestApp_iOSApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F82BE5C4AC0020AB8E /* TestApp_iOSApp.swift */; }; + 4B1334FE2BE5C4AC0020AB8E /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F92BE5C4AC0020AB8E /* ContentView.swift */; }; + 4BB1E3E12BE6464F00F1D21A /* NexaSwift in Frameworks */ = {isa = PBXBuildFile; productRef = 4BB1E3E02BE6464F00F1D21A /* NexaSwift */; }; + 4BB1E3E32BE6466A00F1D21A /* NexaSwift in Frameworks */ = {isa = PBXBuildFile; productRef = 4BB1E3E22BE6466A00F1D21A /* NexaSwift */; }; + 4BB1E3E52BE646CF00F1D21A /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F92BE5C4AC0020AB8E /* ContentView.swift */; }; + 4BB1E3E62BE646CF00F1D21A /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */; }; + 4BEE1DB62BE70024001CE949 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4BEE1DB52BE70024001CE949 /* main.swift */; }; + 4BEE1DBB2BE7003E001CE949 /* NexaSwift in Frameworks */ = {isa = PBXBuildFile; productRef = 4BEE1DBA2BE7003E001CE949 /* NexaSwift */; }; + 932931422CD4CA5500B5A8D7 /* octopusv2_q4_0.gguf in CopyFiles */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; + D3EA0DDF2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; + D3EA0DE22CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 4BEE1DB12BE70024001CE949 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 12; + dstPath = ""; + dstSubfolderSpec = 16; + files = ( + 932931422CD4CA5500B5A8D7 /* octopusv2_q4_0.gguf in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 4B0B055A2BE5B44E002BC7AF /* TestApp-iOS.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "TestApp-iOS.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 4B0B05772BE5C451002BC7AF /* TestApp-Macos.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "TestApp-Macos.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 4B0B05792BE5C451002BC7AF /* TestApp_MacosApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TestApp_MacosApp.swift; sourceTree = ""; }; + 4B0B057D2BE5C452002BC7AF /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 4B0B05802BE5C452002BC7AF /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 4B0B05822BE5C452002BC7AF /* TestApp_Macos.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = TestApp_Macos.entitlements; sourceTree = ""; }; + 4B1334F42BE5C4AC0020AB8E /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ViewModel.swift; sourceTree = ""; }; + 4B1334F72BE5C4AC0020AB8E /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 4B1334F82BE5C4AC0020AB8E /* TestApp_iOSApp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TestApp_iOSApp.swift; sourceTree = ""; }; + 4B1334F92BE5C4AC0020AB8E /* ContentView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; + 4B51A47B2BE7449700F65BFC /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 4BEE1DB32BE70024001CE949 /* TestApp-CLI */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "TestApp-CLI"; sourceTree = BUILT_PRODUCTS_DIR; }; + 4BEE1DB52BE70024001CE949 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; + D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = octopusv2_q4_0.gguf; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 4B0B05572BE5B44E002BC7AF /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 4BB1E3E12BE6464F00F1D21A /* NexaSwift in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4B0B05742BE5C451002BC7AF /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 4BB1E3E32BE6466A00F1D21A /* NexaSwift in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4BEE1DB02BE70024001CE949 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 4BEE1DBB2BE7003E001CE949 /* NexaSwift in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 4B0B05512BE5B44E002BC7AF = { + isa = PBXGroup; + children = ( + 4B51A47B2BE7449700F65BFC /* README.md */, + 4BB1E3E42BE646BB00F1D21A /* Shared */, + 4B1334F32BE5C4AC0020AB8E /* TestApp-iOS */, + 4B0B05782BE5C451002BC7AF /* TestApp-Macos */, + 4BEE1DB42BE70024001CE949 /* TestApp-Commandline */, + 4B0B055B2BE5B44E002BC7AF /* Products */, + ); + sourceTree = ""; + }; + 4B0B055B2BE5B44E002BC7AF /* Products */ = { + isa = PBXGroup; + children = ( + 4B0B055A2BE5B44E002BC7AF /* TestApp-iOS.app */, + 4B0B05772BE5C451002BC7AF /* TestApp-Macos.app */, + 4BEE1DB32BE70024001CE949 /* TestApp-CLI */, + ); + name = Products; + sourceTree = ""; + }; + 4B0B05782BE5C451002BC7AF /* TestApp-Macos */ = { + isa = PBXGroup; + children = ( + 4B0B05792BE5C451002BC7AF /* TestApp_MacosApp.swift */, + 4B0B057D2BE5C452002BC7AF /* Assets.xcassets */, + 4B0B05822BE5C452002BC7AF /* TestApp_Macos.entitlements */, + 4B0B057F2BE5C452002BC7AF /* Preview Content */, + ); + path = "TestApp-Macos"; + sourceTree = ""; + }; + 4B0B057F2BE5C452002BC7AF /* Preview Content */ = { + isa = PBXGroup; + children = ( + 4B0B05802BE5C452002BC7AF /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 4B10A3302BE5CD6600BEA6A1 /* Models */ = { + isa = PBXGroup; + children = ( + D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */, + ); + name = Models; + path = Shared/Models; + sourceTree = SOURCE_ROOT; + }; + 4B1334F32BE5C4AC0020AB8E /* TestApp-iOS */ = { + isa = PBXGroup; + children = ( + 4B1334F82BE5C4AC0020AB8E /* TestApp_iOSApp.swift */, + 4B1334F42BE5C4AC0020AB8E /* Assets.xcassets */, + 4B1334F62BE5C4AC0020AB8E /* Preview Content */, + ); + path = "TestApp-iOS"; + sourceTree = ""; + }; + 4B1334F62BE5C4AC0020AB8E /* Preview Content */ = { + isa = PBXGroup; + children = ( + 4B1334F72BE5C4AC0020AB8E /* Preview Assets.xcassets */, + ); + path = "Preview Content"; + sourceTree = ""; + }; + 4BB1E3E42BE646BB00F1D21A /* Shared */ = { + isa = PBXGroup; + children = ( + 4B10A3302BE5CD6600BEA6A1 /* Models */, + 4B1334F92BE5C4AC0020AB8E /* ContentView.swift */, + 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */, + ); + path = Shared; + sourceTree = ""; + }; + 4BEE1DB42BE70024001CE949 /* TestApp-Commandline */ = { + isa = PBXGroup; + children = ( + 4BEE1DB52BE70024001CE949 /* main.swift */, + ); + path = "TestApp-Commandline"; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 4B0B05592BE5B44E002BC7AF /* TestApp-iOS */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4B0B05682BE5B450002BC7AF /* Build configuration list for PBXNativeTarget "TestApp-iOS" */; + buildPhases = ( + 4B0B05562BE5B44E002BC7AF /* Sources */, + 4B0B05572BE5B44E002BC7AF /* Frameworks */, + 4B0B05582BE5B44E002BC7AF /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "TestApp-iOS"; + packageProductDependencies = ( + 4BB1E3E02BE6464F00F1D21A /* NexaSwift */, + ); + productName = TestApp; + productReference = 4B0B055A2BE5B44E002BC7AF /* TestApp-iOS.app */; + productType = "com.apple.product-type.application"; + }; + 4B0B05762BE5C451002BC7AF /* TestApp-Macos */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4B0B05832BE5C452002BC7AF /* Build configuration list for PBXNativeTarget "TestApp-Macos" */; + buildPhases = ( + 4B0B05732BE5C451002BC7AF /* Sources */, + 4B0B05742BE5C451002BC7AF /* Frameworks */, + 4B0B05752BE5C451002BC7AF /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "TestApp-Macos"; + packageProductDependencies = ( + 4BB1E3E22BE6466A00F1D21A /* NexaSwift */, + ); + productName = "TestApp-Macos"; + productReference = 4B0B05772BE5C451002BC7AF /* TestApp-Macos.app */; + productType = "com.apple.product-type.application"; + }; + 4BEE1DB22BE70024001CE949 /* TestApp-CLI */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4BEE1DB92BE70024001CE949 /* Build configuration list for PBXNativeTarget "TestApp-CLI" */; + buildPhases = ( + 4BEE1DAF2BE70024001CE949 /* Sources */, + 4BEE1DB02BE70024001CE949 /* Frameworks */, + 4BEE1DB12BE70024001CE949 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "TestApp-CLI"; + packageProductDependencies = ( + 4BEE1DBA2BE7003E001CE949 /* NexaSwift */, + ); + productName = "TestApp-Commandline"; + productReference = 4BEE1DB32BE70024001CE949 /* TestApp-CLI */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 4B0B05522BE5B44E002BC7AF /* Project object */ = { + isa = PBXProject; + attributes = { + BuildIndependentTargetsInParallel = 1; + LastSwiftUpdateCheck = 1520; + LastUpgradeCheck = 1520; + TargetAttributes = { + 4B0B05592BE5B44E002BC7AF = { + CreatedOnToolsVersion = 15.2; + }; + 4B0B05762BE5C451002BC7AF = { + CreatedOnToolsVersion = 15.2; + }; + 4BEE1DB22BE70024001CE949 = { + CreatedOnToolsVersion = 15.2; + }; + }; + }; + buildConfigurationList = 4B0B05552BE5B44E002BC7AF /* Build configuration list for PBXProject "TestApp" */; + compatibilityVersion = "Xcode 14.0"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 4B0B05512BE5B44E002BC7AF; + packageReferences = ( + 4B1335012BE5C58A0020AB8E /* XCLocalSwiftPackageReference "../.." */, + ); + productRefGroup = 4B0B055B2BE5B44E002BC7AF /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 4B0B05592BE5B44E002BC7AF /* TestApp-iOS */, + 4B0B05762BE5C451002BC7AF /* TestApp-Macos */, + 4BEE1DB22BE70024001CE949 /* TestApp-CLI */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 4B0B05582BE5B44E002BC7AF /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4B1334FC2BE5C4AC0020AB8E /* Preview Assets.xcassets in Resources */, + 4B1334FA2BE5C4AC0020AB8E /* Assets.xcassets in Resources */, + D3EA0DDF2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4B0B05752BE5C451002BC7AF /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4B0B05812BE5C452002BC7AF /* Preview Assets.xcassets in Resources */, + 4B0B057E2BE5C452002BC7AF /* Assets.xcassets in Resources */, + D3EA0DE22CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 4B0B05562BE5B44E002BC7AF /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4B1334FB2BE5C4AC0020AB8E /* ViewModel.swift in Sources */, + 4B1334FD2BE5C4AC0020AB8E /* TestApp_iOSApp.swift in Sources */, + 4B1334FE2BE5C4AC0020AB8E /* ContentView.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4B0B05732BE5C451002BC7AF /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4BB1E3E62BE646CF00F1D21A /* ViewModel.swift in Sources */, + 4BB1E3E52BE646CF00F1D21A /* ContentView.swift in Sources */, + 4B0B057A2BE5C451002BC7AF /* TestApp_MacosApp.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 4BEE1DAF2BE70024001CE949 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4BEE1DB62BE70024001CE949 /* main.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 4B0B05662BE5B450002BC7AF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.2; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 4B0B05672BE5B450002BC7AF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.2; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 4B0B05692BE5B450002BC7AF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"TestApp-iOS/Preview Content\""; + DEVELOPMENT_TEAM = 8235LVN3VW; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; + INFOPLIST_KEY_UILaunchScreen_Generation = YES; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + IPHONEOS_DEPLOYMENT_TARGET = 18.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = test.tim.TestApp; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 4B0B056A2BE5B450002BC7AF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_STYLE = Automatic; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"TestApp-iOS/Preview Content\""; + DEVELOPMENT_TEAM = 8235LVN3VW; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES; + INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES; + INFOPLIST_KEY_UILaunchScreen_Generation = YES; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight"; + IPHONEOS_DEPLOYMENT_TARGET = 18.0; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = test.tim.TestApp; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; + 4B0B05842BE5C452002BC7AF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_ENTITLEMENTS = "TestApp-Macos/TestApp_Macos.entitlements"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"TestApp-Macos/Preview Content\""; + DEVELOPMENT_TEAM = 8235LVN3VW; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 15.0; + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "test.tim.TestApp-Macos"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + }; + name = Debug; + }; + 4B0B05852BE5C452002BC7AF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_ENTITLEMENTS = "TestApp-Macos/TestApp_Macos.entitlements"; + "CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 1; + DEVELOPMENT_ASSET_PATHS = "\"TestApp-Macos/Preview Content\""; + DEVELOPMENT_TEAM = 8235LVN3VW; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + GENERATE_INFOPLIST_FILE = YES; + INFOPLIST_KEY_NSHumanReadableCopyright = ""; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 15.0; + MARKETING_VERSION = 1.0; + PRODUCT_BUNDLE_IDENTIFIER = "test.tim.TestApp-Macos"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_EMIT_LOC_STRINGS = YES; + SWIFT_VERSION = 5.0; + }; + name = Release; + }; + 4BEE1DB72BE70024001CE949 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = H34TY7DNP5; + ENABLE_HARDENED_RUNTIME = YES; + MACOSX_DEPLOYMENT_TARGET = 15.0; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_VERSION = 5.0; + }; + name = Debug; + }; + 4BEE1DB82BE70024001CE949 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = H34TY7DNP5; + ENABLE_HARDENED_RUNTIME = YES; + MACOSX_DEPLOYMENT_TARGET = 15.0; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = macosx; + SWIFT_VERSION = 5.0; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 4B0B05552BE5B44E002BC7AF /* Build configuration list for PBXProject "TestApp" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4B0B05662BE5B450002BC7AF /* Debug */, + 4B0B05672BE5B450002BC7AF /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4B0B05682BE5B450002BC7AF /* Build configuration list for PBXNativeTarget "TestApp-iOS" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4B0B05692BE5B450002BC7AF /* Debug */, + 4B0B056A2BE5B450002BC7AF /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4B0B05832BE5C452002BC7AF /* Build configuration list for PBXNativeTarget "TestApp-Macos" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4B0B05842BE5C452002BC7AF /* Debug */, + 4B0B05852BE5C452002BC7AF /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4BEE1DB92BE70024001CE949 /* Build configuration list for PBXNativeTarget "TestApp-CLI" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4BEE1DB72BE70024001CE949 /* Debug */, + 4BEE1DB82BE70024001CE949 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + +/* Begin XCLocalSwiftPackageReference section */ + 4B1335012BE5C58A0020AB8E /* XCLocalSwiftPackageReference "../.." */ = { + isa = XCLocalSwiftPackageReference; + relativePath = ../..; + }; +/* End XCLocalSwiftPackageReference section */ + +/* Begin XCSwiftPackageProductDependency section */ + 4BB1E3E02BE6464F00F1D21A /* NexaSwift */ = { + isa = XCSwiftPackageProductDependency; + productName = NexaSwift; + }; + 4BB1E3E22BE6466A00F1D21A /* NexaSwift */ = { + isa = XCSwiftPackageProductDependency; + productName = NexaSwift; + }; + 4BEE1DBA2BE7003E001CE949 /* NexaSwift */ = { + isa = XCSwiftPackageProductDependency; + productName = NexaSwift; + }; +/* End XCSwiftPackageProductDependency section */ + }; + rootObject = 4B0B05522BE5B44E002BC7AF /* Project object */; +} diff --git a/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 00000000..919434a6 --- /dev/null +++ b/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved new file mode 100644 index 00000000..7c675f2c --- /dev/null +++ b/examples/swift-test/TestApp.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -0,0 +1,15 @@ +{ + "originHash" : "b2becbcc0254795d9a140b56614e328e9054fc0127630faeab494795dc5b48bf", + "pins" : [ + { + "identity" : "llama.cpp", + "kind" : "remoteSourceControl", + "location" : "https://github.com/ggerganov/llama.cpp.git", + "state" : { + "branch" : "master", + "revision" : "96b69121033d2b6b951d1b6b1b43f8b4f97dac99" + } + } + ], + "version" : 3 +} diff --git a/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-Commandline.xcscheme b/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-Commandline.xcscheme new file mode 100644 index 00000000..6e130b24 --- /dev/null +++ b/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-Commandline.xcscheme @@ -0,0 +1,84 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-iOS.xcscheme b/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-iOS.xcscheme new file mode 100644 index 00000000..4d900c28 --- /dev/null +++ b/examples/swift-test/TestApp.xcodeproj/xcshareddata/xcschemes/TestApp-iOS.xcscheme @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/swift/README.md b/swift/README.md new file mode 100644 index 00000000..23ac3bdb --- /dev/null +++ b/swift/README.md @@ -0,0 +1,84 @@ +# NexaSwift + +**NexaSwift** is a Swift wrapper for the [llama.cpp](https://github.com/ggerganov/llama.cpp.git) library. This repository provides a Swifty API, allowing Swift developers to easily integrate and use `llama.cpp` models in their projects. +**NOTE:** Currently, we support text inference capabilities. + +## Installation + +To add NexaSwift to your Swift project, add the following dependency in your `Package.swift` file: + +```swift +.package(url: "https://github.com/NexaAI/nexa-sdk/NexaSwift.git", .branch("main")) +``` + +## Usage + +### 1. Initialize NexaSwift with Model Path + +Create a configuration and initialize NexaSwift with the path to your model file: + +```swift +let configuration = NexaSwift.Configuration( + maxNewToken: 128, + stopTokens: [] +) +let modelPath = "path/to/your/model" +let nexaSwift = try NexaSwift.NexaTextInference(modelPath: modelPath, modelConfiguration: configuration) +``` + +### 2 Completion chat API + +#### Generate messages + +```swift +var messages:[ChatCompletionRequestMessage] = [] +let userMessage = ChatCompletionRequestMessage.user( + ChatCompletionRequestUserMessage(content: .text("user input")) +) +messages.append(userMessage) +``` + +#### Non-Streaming Mode + +For non-streaming mode, simply call the start method with your prompt. This will return the complete response once it’s available. + +```swift +let response = try await nexaSwift.createChatCompletion(for: messages) +print(response.choices[0].message.content ?? "") +``` + +#### Streaming Mode + +In streaming mode, you can process the response in real-time as it’s generated: + +```swift +for try await response in await nexaSwift.createChatCompletionStream(for: messages) { + print(response.choices[0].delta.content ?? "") +} +``` + +### 3 Completion API + +#### Non-Streaming Mode + +```swift +if let response = try? await nexaSwift.createCompletion(for: prompt) { + print(response.choices[0].text)) +} +``` + +#### Streaming Mode + +```swift +for try await response in await nexaSwift.createCompletionStream(for: prompt) { + print(response.choices[0].text) +} +``` + +## Quick Start + +Open the [swift test project](../examples/swift-test/) folder in Xcode and run the project. + +## Download Models + +NexaSwift supports all models compatible with llama.cpp. You can download models from the [Nexa AI ModelHub](https://nexa.ai/models). diff --git a/swift/Sources/NexaSwift/General.swift b/swift/Sources/NexaSwift/General.swift new file mode 100644 index 00000000..c9956b0c --- /dev/null +++ b/swift/Sources/NexaSwift/General.swift @@ -0,0 +1,123 @@ +import Foundation + +let NEXA_RUN_MODEL_MAP_TEXT: [String: String] = [ + "octopus-v2": "Octopus-v2:q4_0", + "octopus-v4": "Octopus-v4:q4_0", + "gpt2": "gpt2:q4_0", + "tinyllama": "TinyLlama-1.1B-Chat-v1.0:fp16", + "llama2": "Llama-2-7b-chat:q4_0", + "llama3": "Meta-Llama-3-8B-Instruct:q4_0", + "llama3.1": "Meta-Llama-3.1-8B-Instruct:q4_0", + "llama3.2": "Llama3.2-3B-Instruct:q4_0", + "gemma": "gemma-1.1-2b-instruct:q4_0", + "gemma2": "gemma-2-2b-instruct:q4_0", + "qwen1.5": "Qwen1.5-7B-Instruct:q4_0", + "qwen2": "Qwen2-1.5B-Instruct:q4_0", + "qwen2.5": "Qwen2.5-1.5B-Instruct:q4_0", + "mistral": "Mistral-7B-Instruct-v0.3:q4_0", + "codegemma": "codegemma-2b:q4_0", + "codellama": "CodeLlama-7b-Instruct:q4_0", + "codeqwen": "Qwen2.5-Coder-1.5B-Instruct:q4_0", + "mathqwen": "Qwen2.5-Math-1.5B-Instruct:q4_0", + "deepseek-coder": "deepseek-coder-1.3b-instruct:q4_0", + "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0", + "phi2": "Phi-2:q4_0", + "phi3": "Phi-3-mini-128k-instruct:q4_0", + "phi3.5": "Phi-3.5-mini-instruct:q4_0", + "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0", + "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M", + "openelm": "OpenELM-3B:q4_K_M" +] + +let NEXA_RUN_MODEL_MAP_VLM: [String: String] = [ + "nanollava": "nanoLLaVA:model-fp16", + "nanoLLaVA:fp16": "nanoLLaVA:model-fp16", + "llava-phi3": "llava-phi-3-mini:model-q4_0", + "llava-phi-3-mini:q4_0": "llava-phi-3-mini:model-q4_0", + "llava-phi-3-mini:fp16": "llava-phi-3-mini:model-fp16", + "llava-llama3": "llava-llama-3-8b-v1.1:model-q4_0", + "llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:model-q4_0", + "llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:model-fp16", + "llava1.6-mistral": "llava-v1.6-mistral-7b:model-q4_0", + "llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:model-q4_0", + "llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:model-fp16", + "llava1.6-vicuna": "llava-v1.6-vicuna-7b:model-q4_0", + "llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:model-q4_0", + "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:model-fp16" +] + +let NEXA_RUN_MODEL_MAP_IMAGE : [String: String] = [ + "sd1-4": "stable-diffusion-v1-4:q4_0", + "sd1-5": "stable-diffusion-v1-5:q4_0", + "sd2-1": "stable-diffusion-v2-1:q4_0", + "sd3": "stable-diffusion-3-medium:q4_0", + "sdxl-turbo": "sdxl-turbo:q8_0", + "flux": "FLUX.1-schnell:q4_0", + "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16", + "anything-lcm": "anything-v30-LCM:fp16", + "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16", +] + +let NEXA_RUN_MODEL_MAP_VOICE:[String: String] = [ + "whisper-large": "whisper-large:bin-large-v3", + "whisper-tiny": "whisper-tiny:bin-tiny", + "faster-whisper-tiny": "faster-whisper-tiny:bin-cpu-fp16", + "faster-whisper-tiny.en": "faster-whisper-tiny.en:bin-cpu-fp16", + "faster-whisper-small": "faster-whisper-small:bin-cpu-fp16", + "faster-whisper-small.en": "faster-whisper-small.en:bin-cpu-fp16", + "faster-whisper-medium": "faster-whisper-medium:bin-cpu-fp16", + "faster-whisper-medium.en": "faster-whisper-medium.en:bin-cpu-fp16", + "faster-whisper-base": "faster-whisper-base:bin-cpu-fp16", + "faster-whisper-base.en": "faster-whisper-base.en:bin-cpu-fp16", + "faster-whisper-large": "faster-whisper-large-v3:bin-cpu-fp16", + "faster-whisper-large-turbo": "faster-whisper-large-v3-turbo:bin-cpu-fp16", +] + +let NEXA_RUN_MODEL_MAP_FUNCTION_CALLING:[String: String] = [ + "llama2-function-calling": "Llama2-7b-function-calling:q4_K_M", + "Llama2-7b-function-calling:fp16": "Llama2-7b-function-calling:fp16", + "Llama2-7b-function-calling:q2_K": "Llama2-7b-function-calling:q2_K", + "Llama2-7b-function-calling:q3_K_L": "Llama2-7b-function-calling:q3_K_L", + "Llama2-7b-function-calling:q3_K_M": "Llama2-7b-function-calling:q3_K_M", + "Llama2-7b-function-calling:q3_K_S": "Llama2-7b-function-calling:q3_K_S", + "Llama2-7b-function-calling:q4_K_M": "Llama2-7b-function-calling:q4_K_M", + "Llama2-7b-function-calling:q4_K_S": "Llama2-7b-function-calling:q4_K_S", + "Llama2-7b-function-calling:q5_K_M": "Llama2-7b-function-calling:q5_K_M", + "Llama2-7b-function-calling:q5_K_S": "Llama2-7b-function-calling:q5_K_S", + "Llama2-7b-function-calling:q6_K": "Llama2-7b-function-calling:q6_K", + "Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0", +] +let NEXA_RUN_MODEL_MAP_FLUX:[String: String] = [ + "flux": "FLUX.1-schnell:flux1-schnell-q4_0", + "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0", + "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0", + "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1", + "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0", + "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16", +] + +let NEXA_RUN_MODEL_MAP_TEXT_EMBEDDING:[String: String] = [ + "mxbai": "mxbai-embed-large-v1:fp16", + "mxbai-embed-large-v1": "mxbai-embed-large-v1:fp16", + "mxbai-embed-large-v1:fp16": "mxbai-embed-large-v1:fp16", + "nomic": "nomic-embed-text-v1.5:fp16", + "nomic-embed-text-v1.5": "nomic-embed-text-v1.5:fp16", + "nomic-embed-text-v1.5:fp16": "nomic-embed-text-v1.5:fp16", + "all-MiniLM": "all-MiniLM-L6-v2:fp16", + "all-MiniLM-L6-v2": "all-MiniLM-L6-v2:fp16", + "all-MiniLM-L6-v2:fp16": "all-MiniLM-L6-v2:fp16", + "all-MiniLM-L12-v2": "all-MiniLM-L12-v2:fp16", + "all-MiniLM-L12-v2:fp16": "all-MiniLM-L12-v2:fp16", +] + +let NEXA_RUN_MODEL_MAP: [String: String] = { + var combinedMap = NEXA_RUN_MODEL_MAP_TEXT + combinedMap.merge(NEXA_RUN_MODEL_MAP_IMAGE) { (_, new) in new } + combinedMap.merge(NEXA_RUN_MODEL_MAP_VLM) { (_, new) in new } + combinedMap.merge(NEXA_RUN_MODEL_MAP_VOICE) { (_, new) in new } + combinedMap.merge(NEXA_RUN_MODEL_MAP_FUNCTION_CALLING) { (_, new) in new } + combinedMap.merge(NEXA_RUN_MODEL_MAP_FLUX) { (_, new) in new } + combinedMap.merge(NEXA_RUN_MODEL_MAP_TEXT_EMBEDDING) { (_, new) in new } + // Merge other maps as needed + return combinedMap +}() diff --git a/swift/Sources/NexaSwift/LlamaModel.swift b/swift/Sources/NexaSwift/LlamaModel.swift new file mode 100644 index 00000000..d8bc6f68 --- /dev/null +++ b/swift/Sources/NexaSwift/LlamaModel.swift @@ -0,0 +1,207 @@ +import Foundation +import llama + +class LlamaModel { + private let model: Model + public var configuration: Configuration + private let context: OpaquePointer + private var sampler: UnsafeMutablePointer + private var batch: Batch + private var tokens: [Token] + private var temporaryInvalidCChars: [CChar] = [] + private var generatedTokenAccount: Int32 = 0 + private var totalTokensProcessed: Int32 = 0 + private var ended = false + private let n_ctx: Int32 + public var arch: String { + return getModelDetails().arch + } + public var modelType: String { + return getModelDetails().modelType + } + public var modelFtype: String { + return getModelDetails().modelFtype + } + + var shouldContinue: Bool { + generatedTokenAccount < configuration.maxNewToken && !ended + } + + public func reset() { + generatedTokenAccount = 0 + ended = false + } + + init(path: String, configuration: Configuration = .init()) throws { + self.configuration = configuration + llama_backend_init() + llama_numa_init(GGML_NUMA_STRATEGY_DISABLED) + var model_params = llama_model_default_params() + #if os(iOS) || targetEnvironment(simulator) + model_params.n_gpu_layers = 0 + #endif + guard let model = llama_load_model_from_file(path, model_params) else { + throw NexaSwiftError.others("Cannot load model at path \(path)") + } + + self.model = model + + guard let context = llama_new_context_with_model(model, configuration.contextParameters) else { + throw NexaSwiftError.others("Cannot load model context") + } + self.context = context + self.n_ctx = Int32(llama_n_ctx(context)) + self.tokens = [] + self.sampler = llama_sampler_chain_init(llama_sampler_chain_default_params()) + self.batch = llama_batch_init(configuration.nTokens, 0, 1) + try checkContextLength() + } + + public func updateSampler() { + self.sampler = llama_sampler_chain_init(llama_sampler_chain_default_params()) + llama_sampler_chain_add(sampler, llama_sampler_init_temp(configuration.temperature)) + llama_sampler_chain_add(sampler, llama_sampler_init_top_k(configuration.topK)) + llama_sampler_chain_add(sampler, llama_sampler_init_top_p(configuration.topP, 1)) + llama_sampler_chain_add(sampler, llama_sampler_init_softmax()) + llama_sampler_chain_add(sampler, llama_sampler_init_dist(configuration.seed)) + } + + private func checkContextLength() throws { + let n_ctx_train = llama_n_ctx_train(model) + if n_ctx > n_ctx_train { + throw NexaSwiftError.others("Model was trained on \(n_ctx_train) context but tokens \(n_ctx) specified") + } + } + + private func getModelDetails() -> (arch: String, modelType: String, modelFtype: String) { + let bufSize = 256 + var buf = [CChar](repeating: 0, count: bufSize) + let result = llama_model_desc(model, &buf, bufSize) + + if result > 0 { + let modelDesc = String(cString: buf) + let components = modelDesc.components(separatedBy: " ") + let arch = components[0] ?? "Unknown" + let modelType = components[1] ?? "Unknown" + let modelFtype = components[2] ?? "Unknown" + return (arch, modelType, modelFtype) + } else { + return ("Unknown", "Unknown", "Unknown") + } + } + + func start(for prompt: String) throws { +// print("arch: \(arch), modelType: \(modelType), modelFtype: \(modelFtype)") + updateSampler() + ended = false + tokens = tokenize(text: prompt, addBos: true) + + // Check for token length + if tokens.count > n_ctx { + let originalCount = tokens.count + tokens = Array(tokens.prefix(Int(n_ctx))) + print(""" + WARNING: Input tokens (\(originalCount)) exceed context length (\(n_ctx)). + Truncating to first \(n_ctx) tokens. Some content at the end will be ignored. + Consider splitting your input into smaller chunks for better results. + """) + } + + temporaryInvalidCChars = [] + batch.clear() + + tokens.enumerated().forEach { index, token in + batch.add(token: token, position: Int32(index), seqIDs: [0], logit: false) + } + batch.logits[Int(batch.n_tokens) - 1] = 1 + + if llama_decode(context, batch) != 0 { + throw NexaSwiftError.decodeError + } + generatedTokenAccount = 0 + totalTokensProcessed = batch.n_tokens + } + + func `continue`() throws -> String { + if totalTokensProcessed >= n_ctx { + print("WARNING: Reached maximum context length (\(n_ctx)). Stopping generation.") + temporaryInvalidCChars.removeAll() + ended = true + return "" + } + + let newToken = llama_sampler_sample(sampler, context, batch.n_tokens - 1) + + if llama_token_is_eog(model, newToken) { + temporaryInvalidCChars.removeAll() + ended = true + return "" + } + + + let newTokenCChars = tokenToCChars(token: newToken) + temporaryInvalidCChars.append(contentsOf: newTokenCChars) + + let newTokenStr: String + if let validString = String(validating: temporaryInvalidCChars + [0], as: UTF8.self) { + newTokenStr = validString + temporaryInvalidCChars.removeAll() + } else if let suffixIndex = temporaryInvalidCChars.firstIndex(where: { $0 != 0 }), + let validSuffix = String(validating: Array(temporaryInvalidCChars.suffix(from: suffixIndex)) + [0], + as: UTF8.self) { + newTokenStr = validSuffix + temporaryInvalidCChars.removeAll() + } else { + newTokenStr = "" + } + + batch.clear() + batch.add(token: newToken, position: totalTokensProcessed, seqIDs: [0], logit: true) + generatedTokenAccount += 1 + totalTokensProcessed += 1 + + if llama_decode(context, batch) != 0 { + throw NexaSwiftError.decodeError + } + return newTokenStr.filter { $0 != "\0" } + } + + private func tokenToCChars(token: llama_token) -> [CChar] { + var length: Int32 = 8 + var piece = Array(repeating: 0, count: Int(length)) + + let nTokens = llama_token_to_piece(model, token, &piece, length, 0, false) + if nTokens >= 0 { + return Array(piece.prefix(Int(nTokens))) + } else { + length = -nTokens + piece = Array(repeating: 0, count: Int(length)) + let nNewTokens = llama_token_to_piece(model, token, &piece, length, 0, false) + return Array(piece.prefix(Int(nNewTokens))) + } + } + + private func tokenize(text: String, addBos: Bool) -> [Token] { + let utf8Count = text.utf8.count + let n_tokens = utf8Count + (addBos ? 1 : 0) + 1 + + return Array(unsafeUninitializedCapacity: n_tokens) { buffer, initializedCount in + initializedCount = Int( + llama_tokenize(model, text, Int32(utf8Count), buffer.baseAddress, Int32(n_tokens), addBos, false) + ) + } + } + + func clear() { + tokens.removeAll() + temporaryInvalidCChars.removeAll() + llama_kv_cache_clear(context) + } + + deinit { + llama_batch_free(batch) + llama_free(context) + llama_free_model(model) + llama_backend_free() + } +} diff --git a/swift/Sources/NexaSwift/Models/Batch.swift b/swift/Sources/NexaSwift/Models/Batch.swift new file mode 100644 index 00000000..ca784716 --- /dev/null +++ b/swift/Sources/NexaSwift/Models/Batch.swift @@ -0,0 +1,23 @@ +import Foundation +import llama + +extension Batch { + mutating func clear() { + self.n_tokens = 0 + } + + mutating func add(token: Token, + position: Position, + seqIDs: [SeqID], + logit: Bool) { + let nextIndex = Int(n_tokens) + self.token[nextIndex] = token + self.pos[nextIndex] = position + self.n_seq_id[nextIndex] = Int32(seqIDs.count) + seqIDs.enumerated().forEach { index, id in + seq_id[nextIndex]?[index] = id + } + self.logits[nextIndex] = logit ? 1 : 0 + self.n_tokens += 1 + } +} diff --git a/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift new file mode 100644 index 00000000..f571170c --- /dev/null +++ b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift @@ -0,0 +1,497 @@ +import Foundation + + +public struct ChatCompletionRequestSystemMessage: Codable { + public var role: Role = .system + public let content: String? + + public init(content: String?) { + self.content = content + } +} + +public struct ChatCompletionRequestUserMessage: Codable { + public var role: Role = .user + public let content: UserMessageContent + + public init(content: UserMessageContent) { + self.content = content + } +} + +public enum UserMessageContent: Codable { + case text(String) + case image(ImageContent) + + enum CodingKeys: String, CodingKey { + case type, text, imageUrl + } + + enum ContentType: String, Codable { + case text + case imageUrl + } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + let type = try container.decode(ContentType.self, forKey: .type) + + switch type { + case .text: + let text = try container.decode(String.self, forKey: .text) + self = .text(text) + case .imageUrl: + let imageUrl = try container.decode(ImageContent.self, forKey: .imageUrl) + self = .image(imageUrl) + } + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case .text(let text): + try container.encode(ContentType.text, forKey: .type) + try container.encode(text, forKey: .text) + case .image(let imageUrl): + try container.encode(ContentType.imageUrl, forKey: .type) + try container.encode(imageUrl, forKey: .imageUrl) + } + } +} + +public struct ImageContent: Codable { + public let url: String + public let detail: String? + + public init(url: String, detail: String? = nil) { + self.url = url + self.detail = detail + } +} + +public struct ChatCompletionRequestAssistantMessage: Codable { + public var role: Role = .assistant + public let content: String? + public let toolCalls: [ChatCompletionMessageToolCall]? + public let functionCall: ChatCompletionRequestAssistantMessageFunctionCall? + + public init(content: String?, toolCalls: [ChatCompletionMessageToolCall]? = nil, functionCall: ChatCompletionRequestAssistantMessageFunctionCall? = nil) { + self.content = content + self.toolCalls = toolCalls + self.functionCall = functionCall + } +} + +public struct ChatCompletionRequestToolMessage: Codable { + public var role: Role = .tool + public let content: String? + public let toolCallID: String + + public init(content: String?, toolCallID: String) { + self.content = content + self.toolCallID = toolCallID + } +} + +public struct ChatCompletionRequestFunctionMessage: Codable { + public var role: Role = .function + public let content: String? + public let name: String + + public init(content: String?, name: String) { + self.content = content + self.name = name + } +} + +public struct ChatCompletionRequestAssistantMessageFunctionCall: Codable { + public let name: String + public let arguments: String + + public init(name: String, arguments: String) { + self.name = name + self.arguments = arguments + } +} + + +class ChatFormatterRegistry { + private var formatters = [String: ChatFormatter]() + + init() { + register(name: ChatCompletionModel.octopusv2.rawValue, formatter: OctopusV2Formatter()) + register(name: ChatCompletionModel.llama.rawValue, formatter: LlamaFormatter()) + register(name: ChatCompletionModel.llama3.rawValue, formatter: Llama3Formatter()) + register(name: ChatCompletionModel.gemma.rawValue, formatter: GemmaFormatter()) + register(name: ChatCompletionModel.qwen.rawValue, formatter: QwenFormatter()) + register(name: ChatCompletionModel.mistral.rawValue, formatter: MistralFormatter()) + } + + func register(name: String, formatter: ChatFormatter) { + formatters[name] = formatter + } + + func getFormatter(name: String?) -> ChatFormatter? { + return formatters[getFormatterName(name: name)] + } + + func getFormatterName(name: String?) -> String { + return name ?? ChatCompletionModel.llama.rawValue + } +} + +//formatter +public struct ChatFormatterResponse { + let prompt: String + let stop: [String] +} + +public protocol ChatFormatter { + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse +} + + +class OctopusV2Formatter: ChatFormatter { + private let systemMessage = """ + Below is the query from the users, please call the correct function and generate the parameters to call the function. + + """ + private let separator = "\n\n" + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + var formattedMessages = mapRoles(messages: messages) + + // Assuming the last message should be the assistant's response + formattedMessages.append(("Response:", nil)) + + var prompt = systemMessage + for (role, content) in formattedMessages { + if let content = content { + prompt += "\(role) \(content.trimmingCharacters(in: .whitespacesAndNewlines))\(separator)" + } else { + prompt += "\(role) " + } + } + + return ChatFormatterResponse(prompt: prompt.trimmingCharacters(in: .whitespacesAndNewlines), stop: [separator]) + } + + private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { + var mappedMessages = [(String, String?)]() + let roleMapping: [Role: String] = [ + .user: "Query:", + .assistant: "Response:" + ] + + for message in messages { + var rolePrefix = "" + var content: String? = nil + + switch message { + case .system(let systemMessage): + // Include system message if necessary + continue + case .user(let userMessage): + rolePrefix = roleMapping[.user] ?? "Query:" + switch userMessage.content { + case .text(let text): + content = text + case .image(let imageContent): + content = imageContent.detail ?? imageContent.url + } + case .assistant(let assistantMessage): + rolePrefix = roleMapping[.assistant] ?? "Response:" + content = assistantMessage.content + case .tool(let toolMessage): + rolePrefix = "Tool:" + content = toolMessage.content + case .function(let functionMessage): + rolePrefix = "Function:" + content = functionMessage.content + } + + mappedMessages.append((rolePrefix, content)) + } + + return mappedMessages + } +} + + +//https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-2/ +class LlamaFormatter: ChatFormatter { + private let systemTemplate = "[INST] <>\n{system_message}\n<>" + private let roles: [String: String] = [ + "user": "[INST]", + "assistant": "[/INST]" + ] + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + let formattedMessages = mapRoles(messages: messages) + let systemMessage = getSystemMessage(messages) + let formattedSystemMessage = systemMessage.map { msg in + systemTemplate.replacingOccurrences(of: "{system_message}", with: msg) + } + let prompt = formatPrompt(systemMessage: formattedSystemMessage, messages: formattedMessages) + return ChatFormatterResponse(prompt: prompt + "[/INST]", stop: [""]) + } + + private func getSystemMessage(_ messages: [ChatCompletionRequestMessage]) -> String? { + for message in messages { + if case .system(let systemMessage) = message { + return systemMessage.content + } + } + return nil + } + + private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { + return messages.compactMap { message in + switch message { + case .system: + return nil + case .user(let userMessage): + let content: String? + switch userMessage.content { + case .text(let text): + content = text + case .image(let imageContent): + content = imageContent.detail + } + return (roles["user"] ?? "", content) + case .assistant(let assistantMessage): + return (roles["assistant"] ?? "", assistantMessage.content) + case .tool, .function: + return nil + } + } + } + + private func formatPrompt(systemMessage: String?, messages: [(String, String?)]) -> String { + var prompt = "" + + if let (firstRole, firstContent) = messages.first, + let content = firstContent { + if let sysMsg = systemMessage { + prompt += "\(firstRole) \(sysMsg)\n\(content)" + } else { + prompt += "\(firstRole) \(content)" + } + } + + for (role, content) in messages.dropFirst() { + if let content = content { + prompt += " \(role) \(content)" + } + } + + return prompt.trimmingCharacters(in: .whitespacesAndNewlines) + } +} + +//https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-3/ +class Llama3Formatter: ChatFormatter { + private let roles: [String: String] = [ + "system": "<|start_header_id|>system<|end_header_id|>\n\n", + "user": "<|start_header_id|>user<|end_header_id|>\n\n", + "assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n" + ] + + private let separator = "<|eot_id|>" + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + var formattedMessages = mapRoles(messages: messages) + + formattedMessages.append((roles["assistant"] ?? "", nil)) + + let prompt = formatPrompt(formattedMessages) + + return ChatFormatterResponse(prompt: prompt, stop: [separator]) + } + + private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { + return messages.map { message in + var rolePrefix = "" + var content: String? = "" + + switch message { + case .system(let systemMessage): + rolePrefix = roles["system"] ?? "" + content = systemMessage.content + case .user(let userMessage): + rolePrefix = roles["user"] ?? "" + switch userMessage.content { + case .text(let text): + content = text + case .image(let imageContent): + content = imageContent.detail + } + case .assistant(let assistantMessage): + rolePrefix = roles["assistant"] ?? "" + content = assistantMessage.content + case .tool(let toolMessage): + rolePrefix = roles["tool"] ?? "" + content = toolMessage.content + case .function(let functionMessage): + rolePrefix = roles["function"] ?? "" + content = functionMessage.content + } + + return (rolePrefix, content) + } + } + + private func formatPrompt(_ formattedMessages: [(String, String?)]) -> String { + var prompt = "<|begin_of_text|>" + for (role, content) in formattedMessages { + if let content = content { + prompt += "\(role)\(content.trimmingCharacters(in: .whitespacesAndNewlines))\(separator)" + } else { + prompt += "\(role) " + } + } + return prompt.trimmingCharacters(in: .whitespacesAndNewlines) + } +} + + +//https://ai.google.dev/gemma/docs/formatting +class GemmaFormatter: ChatFormatter { + private let roles: [String: String] = [ + "user": "user\n", + "assistant": "model\n" + ] + + private let separator = "\n" + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + var formattedMessages = mapRoles(messages: messages) + formattedMessages.append((roles["assistant"]!, nil)) + let prompt = formatPrompt(formattedMessages) + + return ChatFormatterResponse(prompt: prompt, stop: [separator]) + } + + private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { + return messages.compactMap { message in + switch message { + case .system: + return nil + case .user(let userMessage): + let content: String? + switch userMessage.content { + case .text(let text): + content = text + case .image(let imageContent): + content = imageContent.detail + } + return (roles["user"] ?? "", content) + case .assistant(let assistantMessage): + return (roles["assistant"] ?? "", assistantMessage.content) + case .tool, .function: + return nil + } + } + } + + private func formatPrompt(_ formattedMessages: [(String, String?)]) -> String { + var prompt = "" + + for (index, (role, content)) in formattedMessages.enumerated() { + if index == formattedMessages.count - 1 { + prompt += role + } else if let content = content { + prompt += "\(role)\(content)\(separator)" + } + } + return prompt.trimmingCharacters(in: .whitespacesAndNewlines) + } +} + +class QwenFormatter: ChatFormatter { + private let roles: [String: String] = [ + "user": "<|im_start|>user", + "assistant": "<|im_start|>assistant" + ] + + private let systemTemplate = "<|im_start|>system\n{system_message}" + private let defaultSystemMessage = "You are a helpful assistant." + private let separator = "<|im_end|>" + private let endToken = "<|endoftext|>" + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + let systemMessage = formatSystemMessage() + var formattedMessages = mapRoles(messages: messages) + formattedMessages.append((roles["assistant"]!, nil)) + let prompt = formatChatML(systemMessage: systemMessage, messages: formattedMessages) + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) + } + + private func formatSystemMessage() -> String { + return systemTemplate.replacingOccurrences(of: "{system_message}", with: defaultSystemMessage) + } + + private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { + return messages.compactMap { message in + switch message { + case .user(let userMessage): + let content: String? + switch userMessage.content { + case .text(let text): + content = text + case .image(let imageContent): + content = imageContent.detail + } + return (roles["user"]!, content) + case .assistant(let assistantMessage): + return (roles["assistant"]!, assistantMessage.content) + case .system, .tool, .function: + return nil + } + } + } + + private func formatChatML(systemMessage: String, messages: [(String, String?)]) -> String { + var prompt = systemMessage.isEmpty ? "" : "\(systemMessage)\(separator)\n" + for (role, content) in messages { + if let content = content { + prompt += "\(role)\n\(content)\(separator)\n" + } else { + prompt += "\(role)\n" + } + } + return prompt.trimmingCharacters(in: .whitespacesAndNewlines) + } +} + +//https://www.promptingguide.ai/models/mistral-7b +class MistralFormatter: ChatFormatter { + private let endToken = "" + private let instructStart = "[INST] " + private let instructEnd = " [/INST]" + + func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { + var prompt = "" + + for message in messages { + switch message { + case .user(let userMessage): + switch userMessage.content { + case .text(let text): + prompt += "\(instructStart)\(text)" + case .image: + continue + } + + case .assistant(let assistantMessage): + if let content = assistantMessage.content { + prompt += "\(instructEnd)\(content)\(endToken)" + } + default: + continue + } + } + prompt += instructEnd + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) + } +} diff --git a/swift/Sources/NexaSwift/Models/ChatCompletionResponse.swift b/swift/Sources/NexaSwift/Models/ChatCompletionResponse.swift new file mode 100644 index 00000000..396dba5c --- /dev/null +++ b/swift/Sources/NexaSwift/Models/ChatCompletionResponse.swift @@ -0,0 +1,91 @@ +import Foundation + + +public struct ChatCompletionMessageToolCallFunction: Codable { + public let name: String + public let arguments: String + + public init(name: String, arguments: String) { + self.name = name + self.arguments = arguments + } +} + +public struct ChatCompletionMessageToolCall: Codable { + public let id: String + public var type: Role = .function + public let function: ChatCompletionMessageToolCallFunction + + public init(id: String, function: ChatCompletionMessageToolCallFunction) { + self.id = id + self.function = function + } +} + +public struct ChatCompletionResponseFunctionCall:Codable{ + public let name: String + public let arguments: String +} + +public struct ChatCompletionResponseMessage: Codable{ + public let content: String? + public let toolCalls: [ChatCompletionMessageToolCall]? + public let role: String? + public let functionCall: ChatCompletionResponseFunctionCall? +} + +public struct ChatCompletionResponseChoice: Codable{ + public let index: Int + public let message: ChatCompletionResponseMessage + public let logprobs: CompletionLogprobs? + public let finishReason: FinishReason? +} + +public struct ChatCompletionResponse: Codable { + public let id: String + public let object: String + public let created: Int + public let model: String + public let choices: [ChatCompletionResponseChoice] + public let usage: CompletionUsage? + + enum CodingKeys: String, CodingKey { + case id + case object + case created + case model + case choices + case usage + } +} + +public struct ChatCompletionStreamResponseDelta: Codable { + public var content: String? + public var functionCall: ChatCompletionStreamResponseDeltaFunctionCall? // DEPRECATED + public var toolCalls: [ChatCompletionMessageToolCallChunk]? + public var role: Role? + +} + +public struct ChatCompletionStreamResponseDeltaFunctionCall: Codable { + +} + +public struct ChatCompletionMessageToolCallChunk: Codable { + +} + +public struct ChatCompletionStreamResponseChoice: Codable { + public var index: Int + public var delta: ChatCompletionStreamResponseDelta + public var finishReason: FinishReason? + public var logprobs: CompletionLogprobs? +} + +public struct CreateChatCompletionStreamResponse: Codable { + public var id: String + public var model: String + public var object: String + public var created: Int + public var choices: [ChatCompletionStreamResponseChoice] +} diff --git a/swift/Sources/NexaSwift/Models/Common.swift b/swift/Sources/NexaSwift/Models/Common.swift new file mode 100644 index 00000000..56cb371c --- /dev/null +++ b/swift/Sources/NexaSwift/Models/Common.swift @@ -0,0 +1,28 @@ +public enum Role: String, Codable { + case system + case user + case assistant + case tool + case function +} + +public enum ChatCompletionRequestMessage { + case system(ChatCompletionRequestSystemMessage) + case user(ChatCompletionRequestUserMessage) + case assistant(ChatCompletionRequestAssistantMessage) + case tool(ChatCompletionRequestToolMessage) + case function(ChatCompletionRequestFunctionMessage) +} + +public enum FinishReason: String, Codable { + case stop, length, toolCalls = "tool_calls", functionCall = "function_call" +} + +public enum ChatCompletionModel: String, Codable { + case octopusv2 + case llama + case llama3 + case gemma + case qwen + case mistral +} diff --git a/swift/Sources/NexaSwift/Models/CompletionResponse.swift b/swift/Sources/NexaSwift/Models/CompletionResponse.swift new file mode 100644 index 00000000..b3d16519 --- /dev/null +++ b/swift/Sources/NexaSwift/Models/CompletionResponse.swift @@ -0,0 +1,52 @@ +import Foundation + +public struct CompletionUsage: Codable { + public let promptTokens: Int + public let completionTokens: Int + public let totalTokens: Int + + enum CodingKeys: String, CodingKey { + case promptTokens = "prompt_tokens" + case completionTokens = "completion_tokens" + case totalTokens = "total_tokens" + } +} + +public struct CompletionLogprobs: Codable { + public let textOffset: [Int]? + public let tokenLogprobs: [Float?]? + public let tokens: [String]? + public let topLogprobs: [Dictionary?]? +} + +public struct CompletionChoice: Codable { + public let text: String + public let index: Int + public let logprobs: CompletionLogprobs? + public let finishReason: FinishReason? + + enum CodingKeys: String, CodingKey { + case text + case index + case logprobs + case finishReason = "finish_reason" + } +} + +public struct CompletionResponse: Codable { + public let id: String + public let object: String + public let created: Int + public let model: String + public let choices: [CompletionChoice] + public let usage: CompletionUsage? + + enum CodingKeys: String, CodingKey { + case id + case object + case created + case model + case choices + case usage + } +} diff --git a/swift/Sources/NexaSwift/Models/Configuration.swift b/swift/Sources/NexaSwift/Models/Configuration.swift new file mode 100644 index 00000000..b8c3757c --- /dev/null +++ b/swift/Sources/NexaSwift/Models/Configuration.swift @@ -0,0 +1,53 @@ +import Foundation +import llama + +public struct Configuration { + public var nTokens:Int32 + public var embd: Int32 + public var nSeqMax: Int32 + public var seed: UInt32 + public var topK: Int32 + public var topP: Float + public var nCTX: Int + public var temperature: Float + public var maxNewToken: Int + public var batchSize: Int + public var stopTokens: [String] + + public init( + nTokens:Int32 = 2048, + embd:Int32 = 512, + nSeqMax:Int32 = 2, + seed: UInt32 = 1234, + topK: Int32 = 50, + topP: Float = 1.0, + nCTX: Int = 2048, + temperature: Float = 0.7, + batchSize: Int = 2048, + stopSequence: String? = nil, + maxNewToken: Int = 128, + stopTokens: [String] = []) { + self.nTokens = nTokens + self.embd = embd + self.nSeqMax = nSeqMax + self.seed = seed + self.topK = topK + self.topP = topP + self.nCTX = nCTX + self.batchSize = batchSize + self.temperature = temperature + self.maxNewToken = maxNewToken + self.stopTokens = stopTokens + } +} + +extension Configuration { + var contextParameters: ContextParameters { + var params = llama_context_default_params() + let processorCount = max(1, min(16, ProcessInfo.processInfo.processorCount - 2)) + params.n_ctx = max(8, UInt32(self.nCTX)) // minimum context size is 8 + params.n_threads = Int32(processorCount) + params.n_threads_batch = Int32(processorCount) + return params + } +} diff --git a/swift/Sources/NexaSwift/Models/SwiftLlamaError.swift b/swift/Sources/NexaSwift/Models/SwiftLlamaError.swift new file mode 100644 index 00000000..37c04296 --- /dev/null +++ b/swift/Sources/NexaSwift/Models/SwiftLlamaError.swift @@ -0,0 +1,6 @@ +import Foundation + +public enum NexaSwiftError: Error { + case decodeError + case others(String) +} diff --git a/swift/Sources/NexaSwift/Models/TypeAlias.swift b/swift/Sources/NexaSwift/Models/TypeAlias.swift new file mode 100644 index 00000000..4711efc8 --- /dev/null +++ b/swift/Sources/NexaSwift/Models/TypeAlias.swift @@ -0,0 +1,10 @@ +import Foundation +import llama + +typealias Batch = llama_batch +typealias Model = OpaquePointer +typealias Context = OpaquePointer +typealias Token = llama_token +typealias Position = llama_pos +typealias SeqID = llama_seq_id +typealias ContextParameters = llama_context_params diff --git a/swift/Sources/NexaSwift/NexaSwiftActor.swift b/swift/Sources/NexaSwift/NexaSwiftActor.swift new file mode 100644 index 00000000..b295dc71 --- /dev/null +++ b/swift/Sources/NexaSwift/NexaSwiftActor.swift @@ -0,0 +1,6 @@ +import Foundation + +@globalActor +public actor NexaSwiftActor { + public static let shared = NexaSwiftActor() +} diff --git a/swift/Sources/NexaSwift/NexaTextInference.swift b/swift/Sources/NexaSwift/NexaTextInference.swift new file mode 100644 index 00000000..895c3f8a --- /dev/null +++ b/swift/Sources/NexaSwift/NexaTextInference.swift @@ -0,0 +1,356 @@ +import Foundation +import llama +import Combine + +public class NexaTextInference { + private let model: LlamaModel + private let modelPath: String + private var generatedTokenCache = "" + private var contentStarted = false + private let chatFormatterRegistry: ChatFormatterRegistry + + var maxLengthOfStopToken: Int { + model.configuration.stopTokens.map { $0.count }.max() ?? 0 + } + + public init(modelPath: String, + modelConfiguration: Configuration = .init()) throws { + if modelPath.isEmpty { + throw NSError(domain: "InvalidParameterError", code: 400, userInfo: [NSLocalizedDescriptionKey: "Either modelPath or localPath must be provided."]) + } + self.model = try LlamaModel(path: modelPath, configuration: modelConfiguration) + self.modelPath = modelPath.lowercased() + self.chatFormatterRegistry = .init() + } + + private func updateConfiguration( + temperature: Float?, + maxNewToken: Int?, + topK: Int32?, + topP: Float?, + stopTokens: [String]? + ) { + if let temperature = temperature { + model.configuration.temperature = temperature + } + if let maxNewToken = maxNewToken { + model.configuration.maxNewToken = maxNewToken + } + if let topK = topK { + model.configuration.topK = topK + } + if let topP = topP { + model.configuration.topP = topP + } + if let stopTokens = stopTokens { + model.configuration.stopTokens = stopTokens + } + } + + private func getFormatterForModel() -> ChatFormatter? { + let modelArch = model.arch.lowercased() + + let modelType: ChatCompletionModel? = { + switch modelArch { + case _ where modelArch.contains("gemma"): + return .gemma + case _ where modelArch.contains("qwen"): + return .qwen + case _ where modelArch.contains("llama"): + // For Llama-based models, check the model path + if modelPath.contains("llama-2") || modelPath.contains("llama2") { + return .llama + } else if modelPath.contains("llama-3") || modelPath.contains("llama3") { + return .llama3 + } else if modelPath.contains("mistral") { + return .mistral + } else { + // If can't determine specific version, default to Llama2 + print("Warning: Unable to determine specific Llama model version from path: \(modelPath). Defaulting to Llama2 format.") + return .llama + } + default: + print("Warning: Unknown model architecture: \(modelArch). Defaulting to Llama2 format.") + return .llama + } + }() + + return chatFormatterRegistry.getFormatter(name: modelType?.rawValue) + } + + private func isStopToken() -> Bool { + model.configuration.stopTokens.reduce(false) { partialResult, stopToken in + generatedTokenCache.hasSuffix(stopToken) + } + } + + private func response(for prompt: String, output: (String) -> Void, finish: () -> Void) { + func finaliseOutput() { + model.configuration.stopTokens.forEach { + generatedTokenCache = generatedTokenCache.replacingOccurrences(of: $0, with: "") + } + output(generatedTokenCache) + finish() + generatedTokenCache = "" + } + defer { model.clear() } + do { + try model.start(for: prompt) + while model.shouldContinue { + var delta = try model.continue() + if contentStarted { // remove the prefix empty spaces + if needToStop(after: delta, output: output) { + finish() + break + } + } else { + delta = delta.trimmingCharacters(in: .whitespacesAndNewlines) + if !delta.isEmpty { + contentStarted = true + if needToStop(after: delta, output: output) { + finish() + break + } + } + } + } + finaliseOutput() + } catch { + finaliseOutput() + } + } + + private func needToStop(after delta: String, output: (String) -> Void) -> Bool { + guard maxLengthOfStopToken > 0 else { + output(delta) + return false + } + generatedTokenCache += delta + + if generatedTokenCache.count >= maxLengthOfStopToken * 2 { + if let stopToken = model.configuration.stopTokens.first(where: { generatedTokenCache.contains($0) }), + let index = generatedTokenCache.range(of: stopToken) { + let outputCandidate = String(generatedTokenCache[.. AsyncThrowingStream { + return .init { continuation in + Task { + response(for: prompt) { [weak self] delta in + continuation.yield(delta) + } finish: { [weak self] in + continuation.finish() + } + } + } + } + + @NexaSwiftActor + public func createCompletion( + for prompt: String, + temperature: Float? = nil, + maxNewToken: Int? = nil, + topK: Int32? = nil, + topP: Float? = nil, + stopTokens: [String]? = nil) async throws -> CompletionResponse { + updateConfiguration( + temperature: temperature, + maxNewToken: maxNewToken, + topK: topK, + topP: topP, + stopTokens: stopTokens + ) + model.reset() + var result = "" + for try await value in await run(for: prompt) { + result += value + } + + let completionResponse = CompletionResponse( + id: UUID().uuidString, + object: "text_completion", + created: Int(Date().timeIntervalSince1970), + model: "", + choices: [ + CompletionChoice( + text: result, + index: 0, + logprobs: nil, + finishReason: FinishReason.stop + ) + ], + usage: CompletionUsage( + promptTokens: 0, + completionTokens: 0, + totalTokens: 0 + ) + ) + return completionResponse + } + + @NexaSwiftActor + public func createCompletionStream( + for prompt: String, + temperature: Float? = nil, + maxNewToken: Int? = nil, + topK: Int32? = nil, + topP: Float? = nil, + stopTokens: [String]? = nil) -> AsyncThrowingStream { + updateConfiguration( + temperature: temperature, + maxNewToken: maxNewToken, + topK: topK, + topP: topP, + stopTokens: stopTokens + ) + model.reset() + return .init { continuation in + Task { + var index = 0 + response(for: prompt) { text in + let completionResponse = CompletionResponse( + id: UUID().uuidString, + object: "text_completion", + created: Int(Date().timeIntervalSince1970), + model: "", + choices: [ + CompletionChoice( + text: text, + index: 0, + logprobs: nil, + finishReason: FinishReason.stop + ) + ], + usage: CompletionUsage( + promptTokens: 0, + completionTokens: 0, + totalTokens: 0 + ) + ) + + index += 1 + continuation.yield(completionResponse) + } finish: { + continuation.finish() + } + } + } + } + + @NexaSwiftActor + public func createChatCompletion( + for messages: [ChatCompletionRequestMessage], + temperature: Float? = nil, + maxNewToken: Int? = nil, + topK: Int32? = nil, + topP: Float? = nil, + stopTokens: [String]? = nil, + modelType: ChatCompletionModel? = nil) async throws -> ChatCompletionResponse { + let formatter = modelType.map { chatFormatterRegistry.getFormatter(name: $0.rawValue) } ?? getFormatterForModel() + let chatFormatter: ChatFormatterResponse? = formatter?.format(messages: messages) + // let chatFormatter: ChatFormatterResponse? = chatFormatterRegistry.getFormatter(name: modelType?.rawValue)?.format(messages: messages) ?? nil + updateConfiguration( + temperature: temperature, + maxNewToken: maxNewToken, + topK: topK, + topP: topP, + stopTokens: stopTokens ?? (!model.configuration.stopTokens.isEmpty ? model.configuration.stopTokens : chatFormatter?.stop) ?? nil + ) + model.reset() + + var result = "" + for try await value in await run(for: chatFormatter?.prompt ?? "") { + result += value + } + + let response = ChatCompletionResponse( + id: UUID().uuidString, + object: "chat.completion", + created: Int(Date().timeIntervalSince1970), + model: chatFormatterRegistry.getFormatterName(name: modelType?.rawValue), + choices: [ + ChatCompletionResponseChoice( + index: 0, + message: ChatCompletionResponseMessage( + content: result, + toolCalls: nil, + role: nil, + functionCall: nil + ), + logprobs: nil, + finishReason: FinishReason.stop + ) + ], + usage: nil + ) + + return response + } + + @NexaSwiftActor + public func createChatCompletionStream( + for messages: [ChatCompletionRequestMessage], + temperature: Float? = nil, + maxNewToken: Int? = nil, + topK: Int32? = nil, + topP: Float? = nil, + stopTokens: [String]? = nil, + modelType: ChatCompletionModel? = nil + ) -> AsyncThrowingStream { + model.reset() + let formatter = modelType.map { chatFormatterRegistry.getFormatter(name: $0.rawValue) } ?? getFormatterForModel() + let chatFormatter: ChatFormatterResponse? = formatter?.format(messages: messages) +// let chatFormatter: ChatFormatterResponse? = chatFormatterRegistry.getFormatter(name: modelType?.rawValue)?.format(messages: messages) ?? nil + updateConfiguration( + temperature: temperature, + maxNewToken: maxNewToken, + topK: topK, + topP: topP, + stopTokens: stopTokens ?? (!model.configuration.stopTokens.isEmpty ? model.configuration.stopTokens : chatFormatter?.stop) ?? nil + ) + return .init { continuation in + Task { + var index = 0 + response(for: chatFormatter?.prompt ?? "") { text in + let response = CreateChatCompletionStreamResponse( + id: UUID().uuidString, + model: chatFormatterRegistry.getFormatterName(name: modelType?.rawValue), + object: "chat.completion.chunk", + created: Int(Date().timeIntervalSince1970), + choices: [ + ChatCompletionStreamResponseChoice( + index: index, + delta: ChatCompletionStreamResponseDelta( + content: text, + functionCall: nil, + toolCalls: nil, + role: nil + ), + finishReason: FinishReason.stop, + logprobs: nil + ) + ] + ) + + index += 1 + continuation.yield(response) + } finish: { + continuation.finish() + } + } + } + } +} diff --git a/swift/Tests/NexaSwiftTests/NexaSwiftTests.swift b/swift/Tests/NexaSwiftTests/NexaSwiftTests.swift new file mode 100644 index 00000000..9661ba10 --- /dev/null +++ b/swift/Tests/NexaSwiftTests/NexaSwiftTests.swift @@ -0,0 +1,6 @@ +import XCTest +@testable import NexaSwift + +final class NexaSwiftTests: XCTestCase { + +} From fd5d206547871ab7727e3a4399a9572eb0ec1eae Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Fri, 1 Nov 2024 12:07:14 -0700 Subject: [PATCH 006/160] update some project settingsf --- .../swift-test/TestApp-Commandline/main.swift | 2 +- .../TestApp.xcodeproj/project.pbxproj | 26 +++++++++---------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/examples/swift-test/TestApp-Commandline/main.swift b/examples/swift-test/TestApp-Commandline/main.swift index 8408eac2..8493c22c 100644 --- a/examples/swift-test/TestApp-Commandline/main.swift +++ b/examples/swift-test/TestApp-Commandline/main.swift @@ -6,7 +6,7 @@ let configuration = NexaSwift.Configuration( stopTokens: [] ) -let model_path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" +let model_path = "" // For Commandline, please add the local path here. let nexaSwift = try NexaSwift.NexaTextInference(modelPath: model_path, modelConfiguration: configuration) var streamMode = false diff --git a/examples/swift-test/TestApp.xcodeproj/project.pbxproj b/examples/swift-test/TestApp.xcodeproj/project.pbxproj index 29e827ba..3d4bddf8 100644 --- a/examples/swift-test/TestApp.xcodeproj/project.pbxproj +++ b/examples/swift-test/TestApp.xcodeproj/project.pbxproj @@ -21,9 +21,8 @@ 4BB1E3E62BE646CF00F1D21A /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */; }; 4BEE1DB62BE70024001CE949 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4BEE1DB52BE70024001CE949 /* main.swift */; }; 4BEE1DBB2BE7003E001CE949 /* NexaSwift in Frameworks */ = {isa = PBXBuildFile; productRef = 4BEE1DBA2BE7003E001CE949 /* NexaSwift */; }; - 932931422CD4CA5500B5A8D7 /* octopusv2_q4_0.gguf in CopyFiles */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; - D3EA0DDF2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; - D3EA0DE22CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */; }; + D38D5E552CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */; }; + D38D5E562CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -33,7 +32,6 @@ dstPath = ""; dstSubfolderSpec = 16; files = ( - 932931422CD4CA5500B5A8D7 /* octopusv2_q4_0.gguf in CopyFiles */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -54,7 +52,7 @@ 4B51A47B2BE7449700F65BFC /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; 4BEE1DB32BE70024001CE949 /* TestApp-CLI */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "TestApp-CLI"; sourceTree = BUILT_PRODUCTS_DIR; }; 4BEE1DB52BE70024001CE949 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; - D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = octopusv2_q4_0.gguf; sourceTree = ""; }; + D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = octopusv2_q4_0.gguf; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -129,7 +127,7 @@ 4B10A3302BE5CD6600BEA6A1 /* Models */ = { isa = PBXGroup; children = ( - D3EA0DDC2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf */, + D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */, ); name = Models; path = Shared/Models; @@ -284,8 +282,8 @@ buildActionMask = 2147483647; files = ( 4B1334FC2BE5C4AC0020AB8E /* Preview Assets.xcassets in Resources */, + D38D5E562CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */, 4B1334FA2BE5C4AC0020AB8E /* Assets.xcassets in Resources */, - D3EA0DDF2CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -294,8 +292,8 @@ buildActionMask = 2147483647; files = ( 4B0B05812BE5C452002BC7AF /* Preview Assets.xcassets in Resources */, + D38D5E552CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */, 4B0B057E2BE5C452002BC7AF /* Assets.xcassets in Resources */, - D3EA0DE22CCC97B900EA9CA7 /* octopusv2_q4_0.gguf in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -474,7 +472,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = test.tim.TestApp; + PRODUCT_BUNDLE_IDENTIFIER = test.nexaai.TestApp; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_VERSION = 5.0; @@ -504,7 +502,7 @@ "@executable_path/Frameworks", ); MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = test.tim.TestApp; + PRODUCT_BUNDLE_IDENTIFIER = test.nexaai.TestApp; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; SWIFT_VERSION = 5.0; @@ -534,7 +532,7 @@ ); MACOSX_DEPLOYMENT_TARGET = 15.0; MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = "test.tim.TestApp-Macos"; + PRODUCT_BUNDLE_IDENTIFIER = "test.nexaai.TestApp-Macos"; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; SWIFT_EMIT_LOC_STRINGS = YES; @@ -564,7 +562,7 @@ ); MACOSX_DEPLOYMENT_TARGET = 15.0; MARKETING_VERSION = 1.0; - PRODUCT_BUNDLE_IDENTIFIER = "test.tim.TestApp-Macos"; + PRODUCT_BUNDLE_IDENTIFIER = "test.nexaai.TestApp-Macos"; PRODUCT_NAME = "$(TARGET_NAME)"; SDKROOT = macosx; SWIFT_EMIT_LOC_STRINGS = YES; @@ -576,7 +574,7 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = H34TY7DNP5; + DEVELOPMENT_TEAM = 8235LVN3VW; ENABLE_HARDENED_RUNTIME = YES; MACOSX_DEPLOYMENT_TARGET = 15.0; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -589,7 +587,7 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = H34TY7DNP5; + DEVELOPMENT_TEAM = 8235LVN3VW; ENABLE_HARDENED_RUNTIME = YES; MACOSX_DEPLOYMENT_TARGET = 15.0; PRODUCT_NAME = "$(TARGET_NAME)"; From 59f223da479516dae442be85774d99c4746b1188 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Fri, 1 Nov 2024 13:36:01 -0700 Subject: [PATCH 007/160] update ViewModel and octopusv2 chatFormatter --- examples/swift-test/Shared/ViewModel.swift | 49 +++++++++++-------- .../swift-test/TestApp-Commandline/main.swift | 2 +- .../Sources/NexaSwift/NexaTextInference.swift | 16 ++++-- 3 files changed, 40 insertions(+), 27 deletions(-) diff --git a/examples/swift-test/Shared/ViewModel.swift b/examples/swift-test/Shared/ViewModel.swift index 4d141691..6e5912de 100644 --- a/examples/swift-test/Shared/ViewModel.swift +++ b/examples/swift-test/Shared/ViewModel.swift @@ -8,43 +8,50 @@ class ViewModel { let nexaSwift: NexaTextInference var result = "" var usingStream = true + private var messages: [ChatCompletionRequestMessage] = [] + private let maxHistory = 1 private var cancallable: Set = [] init() { let configuration = Configuration(maxNewToken: 128, stopTokens: [""]) - let path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" - nexaSwift = (try? NexaTextInference(modelPath: path, modelConfiguration: configuration))! - } - - func formatUserMessage(_ message: String) -> String { - let formatted = """ - Below is the query from the users, please call the correct function and generate the parameters to call the function. - - Query: \(message) - - Response: - """ - return formatted + let model_path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" + nexaSwift = (try? NexaTextInference(modelPath: model_path, modelConfiguration: configuration))! } func run(for userMessage: String) { result = "" - - let formattedUserMessage = formatUserMessage(userMessage) + let userMessageText = ChatCompletionRequestMessage.user( + ChatCompletionRequestUserMessage(content: .text(userMessage)) + ) + + messages.append(userMessageText) + if messages.count > maxHistory * 2 { + messages.removeFirst(2) + } Task { switch usingStream { case true: - for try await value in await nexaSwift.createCompletionStream(for: formattedUserMessage) { - print("Received content: \(value.choices[0].text)") // DEBUG - result += value.choices[0].text + for try await value in await nexaSwift.createChatCompletionStream(for: messages) { + let delta = value.choices[0].delta.content ?? "" + result += delta } case false: - if let completionResponse = try? await nexaSwift.createCompletion(for: formattedUserMessage) { - print("Received completion response: \(completionResponse.choices[0].text)") // DEBUG - result += completionResponse.choices[0].text + if let completionResponse = try? await nexaSwift.createChatCompletion(for: messages) { + let content = completionResponse.choices[0].message.content ?? "" + result += content } } + + // Add assistant's response to history + let assistantMessage = ChatCompletionRequestMessage.assistant( + ChatCompletionRequestAssistantMessage( + content: result, + toolCalls: nil, + functionCall: nil + ) + ) + messages.append(assistantMessage) } } } diff --git a/examples/swift-test/TestApp-Commandline/main.swift b/examples/swift-test/TestApp-Commandline/main.swift index 8493c22c..b50e9ae9 100644 --- a/examples/swift-test/TestApp-Commandline/main.swift +++ b/examples/swift-test/TestApp-Commandline/main.swift @@ -6,7 +6,7 @@ let configuration = NexaSwift.Configuration( stopTokens: [] ) -let model_path = "" // For Commandline, please add the local path here. +let model_path = "path/to/your/model" // For Commandline, please add the local path here. let nexaSwift = try NexaSwift.NexaTextInference(modelPath: model_path, modelConfiguration: configuration) var streamMode = false diff --git a/swift/Sources/NexaSwift/NexaTextInference.swift b/swift/Sources/NexaSwift/NexaTextInference.swift index 895c3f8a..7c137bc7 100644 --- a/swift/Sources/NexaSwift/NexaTextInference.swift +++ b/swift/Sources/NexaSwift/NexaTextInference.swift @@ -19,7 +19,7 @@ public class NexaTextInference { throw NSError(domain: "InvalidParameterError", code: 400, userInfo: [NSLocalizedDescriptionKey: "Either modelPath or localPath must be provided."]) } self.model = try LlamaModel(path: modelPath, configuration: modelConfiguration) - self.modelPath = modelPath.lowercased() + self.modelPath = modelPath self.chatFormatterRegistry = .init() } @@ -49,20 +49,26 @@ public class NexaTextInference { private func getFormatterForModel() -> ChatFormatter? { let modelArch = model.arch.lowercased() + let lowerModelPath = modelPath.lowercased() let modelType: ChatCompletionModel? = { switch modelArch { case _ where modelArch.contains("gemma"): - return .gemma + // For Gemma-based models, check the model path + if lowerModelPath.contains("octopus-v2") || lowerModelPath.contains("octopusv2") { + return .octopusv2 + } else { + return .gemma + } case _ where modelArch.contains("qwen"): return .qwen case _ where modelArch.contains("llama"): // For Llama-based models, check the model path - if modelPath.contains("llama-2") || modelPath.contains("llama2") { + if lowerModelPath.contains("llama-2") || lowerModelPath.contains("llama2") { return .llama - } else if modelPath.contains("llama-3") || modelPath.contains("llama3") { + } else if lowerModelPath.contains("llama-3") || lowerModelPath.contains("llama3") { return .llama3 - } else if modelPath.contains("mistral") { + } else if lowerModelPath.contains("mistral") { return .mistral } else { // If can't determine specific version, default to Llama2 From 06cb50ff0e140ea810d540a8af79b390dc6e3a8c Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Fri, 1 Nov 2024 13:51:31 -0700 Subject: [PATCH 008/160] update README --- swift/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swift/README.md b/swift/README.md index 23ac3bdb..0b284330 100644 --- a/swift/README.md +++ b/swift/README.md @@ -8,7 +8,7 @@ To add NexaSwift to your Swift project, add the following dependency in your `Package.swift` file: ```swift -.package(url: "https://github.com/NexaAI/nexa-sdk/NexaSwift.git", .branch("main")) +.package(url: "https://github.com/NexaAI/nexa-sdk.git", .branch("main")) ``` ## Usage From 58d7140a7480d6a436e22f1cae16c0db59476e45 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Fri, 1 Nov 2024 23:21:44 +0000 Subject: [PATCH 009/160] fix load llava library error --- nexa/gguf/llama/llava_cpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexa/gguf/llama/llava_cpp.py b/nexa/gguf/llama/llava_cpp.py index fa8eb7c0..9671eafb 100644 --- a/nexa/gguf/llama/llava_cpp.py +++ b/nexa/gguf/llama/llava_cpp.py @@ -33,7 +33,7 @@ from nexa.gguf.lib_utils import load_library # Specify the base name of the shared library to load -_libllava_base_name = "llava" +_libllava_base_name = "llava_shared" # Load the library _libllava = load_library(_libllava_base_name) From 0b2318de291fe74ce698bedc7e74c87de2a3e4f7 Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Fri, 1 Nov 2024 16:56:54 -0700 Subject: [PATCH 010/160] update title --- examples/swift-test/Shared/ContentView.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/swift-test/Shared/ContentView.swift b/examples/swift-test/Shared/ContentView.swift index 140ad45a..8b969b54 100644 --- a/examples/swift-test/Shared/ContentView.swift +++ b/examples/swift-test/Shared/ContentView.swift @@ -6,7 +6,7 @@ struct ContentView: View { var body: some View { VStack { - Text("Swift Llama Demo").font(.title) + Text("Nexa Swift Demo").font(.title) Toggle(isOn: $viewModel.usingStream) { Text("Use Stream") From ac47b2f1c356ad1effd9cd6d0bd3031814076517 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Sat, 2 Nov 2024 00:00:49 +0000 Subject: [PATCH 011/160] add BUILD_SHARED_LIBS flag in llama.cpp build --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4670bff2..8215ddbb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,6 +123,7 @@ if(LLAMA_BUILD) -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/llama_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17 + -DBUILD_SHARED_LIBS=ON -DLLAMA_CUDA=${LLAMA_CUDA} -DLLAMA_METAL=${LLAMA_METAL} -DGGML_AVX=$,$>>,OFF,ON> From f82d2ace1304878554cab00408a77e95f3979cd2 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sat, 2 Nov 2024 01:37:57 +0000 Subject: [PATCH 012/160] audio lm works in SDK --- CMakeLists.txt | 55 +++++++++++----------- docs/README.md | 4 +- nexa/constants.py | 1 + nexa/gguf/lib_utils.py | 1 + nexa/gguf/llama/nexa_audio_lm_cpp.py | 5 +- nexa/gguf/nexa_inference_audio_lm.py | 70 +++++++++++++++++++++------- pyproject.toml | 2 +- 7 files changed, 90 insertions(+), 48 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 96d197be..870a1349 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,20 +113,20 @@ if(STABLE_DIFFUSION_BUILD) ) endif() -# llama_cpp project -option(LLAMA_BUILD "Build llama.cpp" ON) -if(LLAMA_BUILD) - message(STATUS "Configuring llama.cpp build...") + +# nexa_llama_cpp project +option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) +if(NEXA_LLAMA_BUILD) set(LLAMA_CUDA ${GGML_CUDA}) set(LLAMA_METAL ${GGML_METAL}) - ExternalProject_Add(llama_project - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/llama.cpp - BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/llama_build + ExternalProject_Add(nexa_llama_project + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build CMAKE_ARGS ${USER_DEFINED_OPTIONS} ${COMMON_CMAKE_OPTIONS} - -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/llama_install + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17 -DLLAMA_CUDA=${LLAMA_CUDA} @@ -139,28 +139,41 @@ if(LLAMA_BUILD) -DGGML_FMA=$,$>>,OFF,ON> -DGGML_F16C=$,$>>,OFF,ON> -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> + -DLLAMA_BUILD_INFO=OFF BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 + LOG_OUTPUT_ON_FAILURE 1 + ) + + # Add a custom target to copy build logs to a permanent location + add_custom_command(TARGET nexa_llama_project POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/build_logs + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_project-prefix/src/nexa_llama_project-stamp/nexa_llama_project-*.log + ${CMAKE_CURRENT_SOURCE_DIR}/build_logs/ + COMMENT "Copying nexa_llama build logs to permanent location" ) + endif() -# nexa_llama_cpp project -option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) -if(NEXA_LLAMA_BUILD) +# llama_cpp project +option(LLAMA_BUILD "Build llama.cpp" ON) +if(LLAMA_BUILD) + message(STATUS "Configuring llama.cpp build...") set(LLAMA_CUDA ${GGML_CUDA}) set(LLAMA_METAL ${GGML_METAL}) - ExternalProject_Add(nexa_llama_project - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp - BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build + ExternalProject_Add(llama_project + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/llama.cpp + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/llama_build CMAKE_ARGS ${USER_DEFINED_OPTIONS} ${COMMON_CMAKE_OPTIONS} - -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install + -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/llama_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17 -DLLAMA_CUDA=${LLAMA_CUDA} @@ -173,25 +186,13 @@ if(NEXA_LLAMA_BUILD) -DGGML_FMA=$,$>>,OFF,ON> -DGGML_F16C=$,$>>,OFF,ON> -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> - -DLLAMA_BUILD_INFO=OFF BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install LOG_CONFIGURE 1 LOG_BUILD 1 LOG_INSTALL 1 - LOG_OUTPUT_ON_FAILURE 1 ) - - # Add a custom target to copy build logs to a permanent location - add_custom_command(TARGET nexa_llama_project POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/build_logs - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_project-prefix/src/nexa_llama_project-stamp/nexa_llama_project-*.log - ${CMAKE_CURRENT_SOURCE_DIR}/build_logs/ - COMMENT "Copying nexa_llama build logs to permanent location" - ) - endif() # bark_cpp project diff --git a/docs/README.md b/docs/README.md index f093c40e..ddabfb54 100644 --- a/docs/README.md +++ b/docs/README.md @@ -32,7 +32,7 @@ pip install nexaai[onnx] # if you need ONNX support ```bash git clone --recursive https://github.com/NexaAI/nexa-sdk.git cd nexa-sdk -pip install -e . +pip install -e . --verbose # use --verbose for debugging pip install -e .[onnx] # if you need ONNX support ``` @@ -75,6 +75,8 @@ wget -O control_normal-fp16.safetensors https://huggingface.co/webui/ControlNet- wget -O controlnet_test.png https://huggingface.co/takuma104/controlnet_dev/resolve/main/gen_compare/control_images/converted/control_human_normal.png python -m nexa.gguf.nexa_inference_image sd1-5 --control_net_path control_normal-fp16.safetensors --control_image_path controlnet_test.png python -m nexa.gguf.nexa_inference_voice whisper-tiny +python -m nexa.gguf.nexa_inference_audio_lm qwen2audio +python -m nexa.gguf.nexa_inference_audio_lm octoaudio ``` ### Test with Streamlit UI diff --git a/nexa/constants.py b/nexa/constants.py index d3e16acd..f3e572ba 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -225,6 +225,7 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP = { **NEXA_RUN_MODEL_MAP_TEXT, **NEXA_RUN_MODEL_MAP_VLM, + **NEXA_RUN_MODEL_MAP_AUDIO_LM, **NEXA_RUN_MODEL_MAP_IMAGE, **NEXA_RUN_MODEL_MAP_VOICE, **NEXA_RUN_MODEL_MAP_TTS, diff --git a/nexa/gguf/lib_utils.py b/nexa/gguf/lib_utils.py index ec030b9d..8bb4c2a1 100644 --- a/nexa/gguf/lib_utils.py +++ b/nexa/gguf/lib_utils.py @@ -64,6 +64,7 @@ def load_library(lib_base_name: str): # Try to load the shared library, handling potential errors for _lib_path in _lib_paths: + print("Trying to load", _lib_path) if _lib_path.exists(): try: return ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore diff --git a/nexa/gguf/llama/nexa_audio_lm_cpp.py b/nexa/gguf/llama/nexa_audio_lm_cpp.py index 38bb5c72..95a51ac3 100644 --- a/nexa/gguf/llama/nexa_audio_lm_cpp.py +++ b/nexa/gguf/llama/nexa_audio_lm_cpp.py @@ -34,6 +34,7 @@ def _load_shared_library(lib_base_name: str): # Try to load the shared library, handling potential errors for _lib_path in _lib_paths: + print("Trying to load", _lib_path) if _lib_path.exists(): try: return ctypes.CDLL(str(_lib_path)) @@ -45,9 +46,9 @@ def _load_shared_library(lib_base_name: str): ) # Load both libraries -_lib_base_name = "hf-omni-audio-cli_shared" +_lib_base_name = "nexa-omni-audio-lib" _lib_omni = _load_shared_library(_lib_base_name) -_lib_base_name = "hf-qwen2-audio_shared" +_lib_base_name = "nexa-qwen2-audio-lib" _lib_qwen2 = _load_shared_library(_lib_base_name) diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 42e8c6e1..29fbf14a 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -1,11 +1,12 @@ import ctypes import logging import os +from pathlib import Path from nexa.constants import ( DEFAULT_TEXT_GEN_PARAMS, - NEXA_RUN_MODEL_MAP_VLM, - NEXA_RUN_PROJECTOR_MAP, + NEXA_RUN_MODEL_MAP_AUDIO_LM, + NEXA_RUN_AUDIO_LM_PROJECTOR_MAP, ) from nexa.gguf.lib_utils import is_gpu_available from nexa.gguf.llama import nexa_audio_lm_cpp @@ -16,6 +17,11 @@ def is_qwen(model_name): return True return False +assert ( + set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) + == set(NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys()) +), "Model, projector, and handler should have the same keys" + class NexaAudioLMInference: """ A class used for loading Bark text-to-speech models and running text-to-speech generation. @@ -31,36 +37,66 @@ class NexaAudioLMInference: verbosity (int): Verbosity level for the Bark model. Defaults to 0. """ - def __init__(self, model_path: str, mmproj_path: str, verbosity=0, device="auto", **kwargs): + def __init__(self, model_path=None, local_path=None, projector_local_path=None, device="auto", **kwargs): if model_path is None and local_path is None: raise ValueError("Either model_path or local_path must be provided.") + self.params = DEFAULT_TEXT_GEN_PARAMS.copy() self.params.update(kwargs) self.model = None + self.projector = None + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.get(model_path, None) + self.downloaded_path = local_path + self.projector_downloaded_path = projector_local_path self.device = device - - self.model_path = model_path - self.mmproj_path = mmproj_path if self.device == "auto" or self.device == "gpu": self.n_gpu_layers = -1 if is_gpu_available() else 0 else: self.n_gpu_layers = 0 - self.is_qwen = is_qwen(model_path) - self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) - self.context = None - self.verbosity = verbosity - self.params = { - "output_path": os.path.join(os.getcwd(), "audio-lm"), - } - self.params.update(kwargs) - self.downloaded_path, _ = pull_model(self.model_path, **kwargs) - if self.downloaded_path is None: + + if self.downloaded_path is not None and self.projector_downloaded_path is not None: + # when running from local, both path should be provided + pass + elif self.downloaded_path is not None: + if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] + self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: + self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] + self.downloaded_path, _ = pull_model(self.model_path, **kwargs) + self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + elif Path(model_path).parent.exists(): + local_dir = Path(model_path).parent + model_name = Path(model_path).name + tag_and_ext = model_name.split(":")[-1] + self.downloaded_path = local_dir / f"model-{tag_and_ext}" + self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" + if not (self.downloaded_path.exists() and self.projector_downloaded_path.exists()): + logging.error( + f"Model or projector not found in {local_dir}. " + "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." + ) + exit(1) + else: + logging.error("VLM user model from hub is not supported yet.") + exit(1) + + if self.downloaded_path is None or self.projector_downloaded_path is None: logging.error( f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", exc_info=True, ) exit(1) - self._load_model() + self.profiling = kwargs.get("profiling", False) + + if not kwargs.get("streamlit", False): + self._load_model() + if self.model is None: + logging.error( + "Failed to load model or tokenizer. Exiting.", exc_info=True + ) + exit(1) def _load_model(self): logging.debug(f"Loading model from {self.model_path} and {self.mmproj_path}") diff --git a/pyproject.toml b/pyproject.toml index 24bd26e4..736a32f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["scikit-build-core"] +requires = ["scikit-build-core", "setuptools>=64.0"] build-backend = "scikit_build_core.build" [project] From c9582c853b8694307b3d27b0710d8f4dc9574f95 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sat, 2 Nov 2024 08:19:32 +0000 Subject: [PATCH 013/160] add pybinding for audio LM --- dependency/nexa_llama.cpp | 2 +- nexa/gguf/llama/nexa_audio_lm_cpp.py | 7 +++++-- nexa/gguf/nexa_inference_audio_lm.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dependency/nexa_llama.cpp b/dependency/nexa_llama.cpp index be24dc9d..ddc52e36 160000 --- a/dependency/nexa_llama.cpp +++ b/dependency/nexa_llama.cpp @@ -1 +1 @@ -Subproject commit be24dc9d1ea55e04175589178ab2277d135ed3a9 +Subproject commit ddc52e3625872f0fa0354e0641f50892faa7cd0d diff --git a/nexa/gguf/llama/nexa_audio_lm_cpp.py b/nexa/gguf/llama/nexa_audio_lm_cpp.py index 95a51ac3..c3cfc1d8 100644 --- a/nexa/gguf/llama/nexa_audio_lm_cpp.py +++ b/nexa/gguf/llama/nexa_audio_lm_cpp.py @@ -3,6 +3,9 @@ import sys import pathlib +# make sure only below functions are exposed +__all__ = ['context_default_params', 'init_context', 'process_full', 'free'] + # Load the library def _load_shared_library(lib_base_name: str): # Determine the file extension based on the platform @@ -48,10 +51,10 @@ def _load_shared_library(lib_base_name: str): # Load both libraries _lib_base_name = "nexa-omni-audio-lib" _lib_omni = _load_shared_library(_lib_base_name) -_lib_base_name = "nexa-qwen2-audio-lib" +# _lib_base_name = "nexa-qwen2-audio-lib" +# _lib_qwen2 = _load_shared_library(_lib_base_name) _lib_qwen2 = _load_shared_library(_lib_base_name) - # struct omni_context_params # { # char *model; diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 29fbf14a..02062080 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -13,7 +13,7 @@ from nexa.general import pull_model def is_qwen(model_name): - if "qwen2" in model_name: # TEMPORARY SOLUTION : this hardcode can be risky + if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky return True return False From 010d67fdf70d03c8d2a45862ac8a1777d9f10a14 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sat, 2 Nov 2024 08:42:24 +0000 Subject: [PATCH 014/160] audio lm can correctly load model --- nexa/constants.py | 14 ++--- nexa/gguf/nexa_inference_audio_lm.py | 80 ++++++++++++++++++---------- 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/nexa/constants.py b/nexa/constants.py index f3e572ba..c0943cdb 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -109,12 +109,12 @@ class ModelType(Enum): } NEXA_RUN_MODEL_MAP_AUDIO_LM = { - "qwen2audio": "Qwen2-Audio-7.8B-Instruct:q4_0", - "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:fp16", - "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:q4_0", - "octoaudio": "Octo-omni-audio:fp16", - "octoaudio:fp16": "Octo-omni-audio:fp16", - "octoaudio:q4_0": "Octo-omni-audio:q4_0", + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", + "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", + "octoaudio": "Octo-omni-audio:model-fp16", + "octoaudio:fp16": "Octo-omni-audio:model-fp16", + "octoaudio:q4_0": "Octo-omni-audio:model-q4_0", } NEXA_RUN_MODEL_MAP_VOICE = { @@ -409,6 +409,8 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, "llava-phi-3-mini": ModelType.MULTIMODAL, "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL, + "octoaudio": ModelType.MULTIMODAL, + "qwen2audio": ModelType.MULTIMODAL, "faster-whisper-tiny.en": ModelType.AUDIO, "faster-whisper-tiny": ModelType.AUDIO, "faster-whisper-small.en": ModelType.AUDIO, diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 02062080..178c284c 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -10,18 +10,21 @@ ) from nexa.gguf.lib_utils import is_gpu_available from nexa.gguf.llama import nexa_audio_lm_cpp +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr from nexa.general import pull_model + def is_qwen(model_name): - if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky + if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky return True return False -assert ( - set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) - == set(NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys()) + +assert set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) == set( + NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys() ), "Model, projector, and handler should have the same keys" + class NexaAudioLMInference: """ A class used for loading Bark text-to-speech models and running text-to-speech generation. @@ -37,10 +40,17 @@ class NexaAudioLMInference: verbosity (int): Verbosity level for the Bark model. Defaults to 0. """ - def __init__(self, model_path=None, local_path=None, projector_local_path=None, device="auto", **kwargs): + def __init__( + self, + model_path=None, + local_path=None, + projector_local_path=None, + device="auto", + **kwargs, + ): if model_path is None and local_path is None: raise ValueError("Either model_path or local_path must be provided.") - + self.params = DEFAULT_TEXT_GEN_PARAMS.copy() self.params.update(kwargs) self.model = None @@ -49,30 +59,41 @@ def __init__(self, model_path=None, local_path=None, projector_local_path=None, self.downloaded_path = local_path self.projector_downloaded_path = projector_local_path self.device = device + self.context = None if self.device == "auto" or self.device == "gpu": self.n_gpu_layers = -1 if is_gpu_available() else 0 else: self.n_gpu_layers = 0 - if self.downloaded_path is not None and self.projector_downloaded_path is not None: + if ( + self.downloaded_path is not None + and self.projector_downloaded_path is not None + ): # when running from local, both path should be provided pass elif self.downloaded_path is not None: if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] - self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] self.downloaded_path, _ = pull_model(self.model_path, **kwargs) - self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) elif Path(model_path).parent.exists(): local_dir = Path(model_path).parent model_name = Path(model_path).name tag_and_ext = model_name.split(":")[-1] self.downloaded_path = local_dir / f"model-{tag_and_ext}" self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" - if not (self.downloaded_path.exists() and self.projector_downloaded_path.exists()): + if not ( + self.downloaded_path.exists() + and self.projector_downloaded_path.exists() + ): logging.error( f"Model or projector not found in {local_dir}. " "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." @@ -88,10 +109,13 @@ def __init__(self, model_path=None, local_path=None, projector_local_path=None, exc_info=True, ) exit(1) + self.is_qwen = is_qwen(self.downloaded_path) # TEMPORARY SOLUTION : this hardcode can be risky + self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) self.profiling = kwargs.get("profiling", False) if not kwargs.get("streamlit", False): - self._load_model() + with suppress_stdout_stderr(): + self._load_model() if self.model is None: logging.error( "Failed to load model or tokenizer. Exiting.", exc_info=True @@ -99,20 +123,25 @@ def __init__(self, model_path=None, local_path=None, projector_local_path=None, exit(1) def _load_model(self): - logging.debug(f"Loading model from {self.model_path} and {self.mmproj_path}") + logging.debug( + f"Loading model from {self.downloaded_path} and {self.projector_downloaded_path}" + ) try: - self.ctx_params.model = ctypes.c_char_p(self.model_path.encode("utf-8")) - self.ctx_params.mmproj = ctypes.c_char_p(self.mmproj_path.encode("utf-8")) + self.ctx_params.model = ctypes.c_char_p( + self.downloaded_path.encode("utf-8") + ) + self.ctx_params.mmproj = ctypes.c_char_p( + self.projector_downloaded_path.encode("utf-8") + ) self.ctx_params.n_gpu_layers = ( 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers self.context = nexa_audio_lm_cpp.init_context( - ctypes.byref(self.ctx_params), - self.is_qwen + ctypes.byref(self.ctx_params), self.is_qwen ) if not self.context: - raise RuntimeError("Failed to load Bark model") + raise RuntimeError("Failed to load audio language model") logging.debug("Model loaded successfully") except Exception as e: logging.error(f"Error loading model: {e}") @@ -121,18 +150,17 @@ def _load_model(self): def run(self): while True: try: - audio_path = input("Audio Path (leave empty if no audio): ") + audio_path = input("Audio Path (required): ") if audio_path and not os.path.exists(audio_path): print(f"'{audio_path}' is not a path to audio. Will ignore.") - user_input = input("Enter text: ") + user_input = input("Enter text (leave empty if no prompt): ") self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) nexa_audio_lm_cpp.process_full( - self.context, ctypes.byref(self.ctx_params), - self.is_qwen + self.context, ctypes.byref(self.ctx_params), self.is_qwen ) except KeyboardInterrupt: @@ -147,7 +175,7 @@ def __del__(self): Destructor to free the Bark context when the instance is deleted. """ if self.context: - nexa_audio_lm_cpp.free_context(self.context) + nexa_audio_lm_cpp.free_context(self.context, self.is_qwen) if __name__ == "__main__": @@ -174,9 +202,5 @@ def __del__(self): model_path = kwargs.pop("model_path") device = kwargs.pop("device", "auto") - inference = NexaAudioLMInference( - model_path, - device=device, - **kwargs - ) - inference.run() \ No newline at end of file + inference = NexaAudioLMInference(model_path, device=device, **kwargs) + inference.run() From 6112c69d98ff7c0104feca67fbc74a0ea940b42c Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 05:08:24 +0000 Subject: [PATCH 015/160] remove nexa-llama.cpp submodule --- .gitmodules | 6 +- CMakeLists.txt | 71 +---- dependency/llama.cpp | 2 +- dependency/nexa_llama.cpp | 1 - docs/README.md | 7 +- nexa/constants.py | 4 +- nexa/gguf/llama/nexa_audio_lm_cpp.py | 1 - nexa/gguf/nexa_inference_audio_lm.py | 412 +++++++++++++-------------- pyproject.toml | 5 +- 9 files changed, 221 insertions(+), 288 deletions(-) delete mode 160000 dependency/nexa_llama.cpp diff --git a/.gitmodules b/.gitmodules index b369db31..77565f68 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,11 +5,7 @@ [submodule "dependency/llama.cpp"] path = dependency/llama.cpp url = https://github.com/NexaAI/llama.cpp.git - branch = master-release -[submodule "dependency/nexa_llama.cpp"] - path = dependency/nexa_llama.cpp - url = https://github.com/NexaAI/llama.cpp - branch = nexa-audio-lm + branch = zack-dev [submodule "nexa/eval/benchmark_tasks"] path = nexa/eval/benchmark_tasks url = https://github.com/NexaAI/benchmark-tasks.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 870a1349..dc785298 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.16) project(nexa_gguf) include(ExternalProject) -find_package(OpenMP REQUIRED) +find_package(OpenMP REQUIRED) # Find OpenMP package which is needed for llama.cpp set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_CXX_STANDARD 17) @@ -56,7 +56,6 @@ endfunction() # Collect all user-defined options get_all_options(USER_DEFINED_OPTIONS) -message(STATUS "USER_DEFINED_OPTIONS: ${USER_DEFINED_OPTIONS}") if(APPLE) set(CMAKE_INSTALL_RPATH "@loader_path") @@ -90,7 +89,6 @@ endif() # stable_diffusion_cpp project option(STABLE_DIFFUSION_BUILD "Build stable-diffusion.cpp" ON) if(STABLE_DIFFUSION_BUILD) - message(STATUS "Configuring stable-diffusion.cpp build...") ExternalProject_Add(stable_diffusion_project SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/stable-diffusion.cpp BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build @@ -107,63 +105,12 @@ if(STABLE_DIFFUSION_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 ) endif() - -# nexa_llama_cpp project -option(NEXA_LLAMA_BUILD "Build nexa-llama.cpp" ON) -if(NEXA_LLAMA_BUILD) - set(LLAMA_CUDA ${GGML_CUDA}) - set(LLAMA_METAL ${GGML_METAL}) - - ExternalProject_Add(nexa_llama_project - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/nexa_llama.cpp - BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build - CMAKE_ARGS - ${USER_DEFINED_OPTIONS} - ${COMMON_CMAKE_OPTIONS} - -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_CXX_STANDARD=17 - -DLLAMA_CUDA=${LLAMA_CUDA} - -DLLAMA_METAL=${LLAMA_METAL} - -DCMAKE_C_FLAGS=${OpenMP_C_FLAGS} - -DCMAKE_CXX_FLAGS=${OpenMP_CXX_FLAGS} - -DCMAKE_EXE_LINKER_FLAGS=${OpenMP_EXE_LINKER_FLAGS} - -DGGML_AVX=$,$>>,OFF,ON> - -DGGML_AVX2=$,$>>,OFF,ON> - -DGGML_FMA=$,$>>,OFF,ON> - -DGGML_F16C=$,$>>,OFF,ON> - -DGGML_METAL_EMBED_LIBRARY=$,ON,OFF> - -DLLAMA_BUILD_INFO=OFF - BUILD_ALWAYS 1 - BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} - INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - LOG_OUTPUT_ON_FAILURE 1 - ) - - # Add a custom target to copy build logs to a permanent location - add_custom_command(TARGET nexa_llama_project POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_SOURCE_DIR}/build_logs - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_project-prefix/src/nexa_llama_project-stamp/nexa_llama_project-*.log - ${CMAKE_CURRENT_SOURCE_DIR}/build_logs/ - COMMENT "Copying nexa_llama build logs to permanent location" - ) - -endif() - # llama_cpp project option(LLAMA_BUILD "Build llama.cpp" ON) if(LLAMA_BUILD) - message(STATUS "Configuring llama.cpp build...") set(LLAMA_CUDA ${GGML_CUDA}) set(LLAMA_METAL ${GGML_METAL}) @@ -176,6 +123,7 @@ if(LLAMA_BUILD) -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/llama_install -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17 + -DBUILD_SHARED_LIBS=ON -DLLAMA_CUDA=${LLAMA_CUDA} -DLLAMA_METAL=${LLAMA_METAL} -DCMAKE_C_FLAGS=${OpenMP_C_FLAGS} @@ -189,16 +137,12 @@ if(LLAMA_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 ) endif() # bark_cpp project option(BARK_BUILD "Build bark.cpp" ON) if(BARK_BUILD) - message(STATUS "Configuring bark.cpp build...") ExternalProject_Add(bark_project SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/bark.cpp BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bark_build @@ -215,21 +159,15 @@ if(BARK_BUILD) BUILD_ALWAYS 1 BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release -- ${MSBUILD_ARGS} INSTALL_COMMAND ${CMAKE_COMMAND} --build . --config Release --target install - LOG_CONFIGURE 1 - LOG_BUILD 1 - LOG_INSTALL 1 - LOG_OUTPUT_ON_FAILURE 1 ) endif() # Install the built libraries to the final destination -message(STATUS "Installing built libraries to final destination...") if(WIN32) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib @@ -237,11 +175,11 @@ if(WIN32) FILES_MATCHING PATTERN "*.dll" ) + install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/llama_build/bin/Release/ - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/bin/Release/ ${CMAKE_CURRENT_BINARY_DIR}/bark_build/Release/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib @@ -254,7 +192,6 @@ else() DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${SKBUILD_PLATLIB_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS @@ -262,11 +199,11 @@ else() PATTERN "*.so" PATTERN "*.dylib" ) + install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/stable_diffusion_build/bin/ ${CMAKE_CURRENT_BINARY_DIR}/llama_install/lib/ - ${CMAKE_CURRENT_BINARY_DIR}/nexa_llama_install/lib/ ${CMAKE_CURRENT_BINARY_DIR}/bark_install/lib/ DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/nexa/gguf/lib USE_SOURCE_PERMISSIONS diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 4a29bca8..68060061 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 4a29bca867e2601a2e69e007640ac1abb9f3a381 +Subproject commit 68060061dc37b86557a1e0bc2ec2d3f1075b54aa diff --git a/dependency/nexa_llama.cpp b/dependency/nexa_llama.cpp deleted file mode 160000 index ddc52e36..00000000 --- a/dependency/nexa_llama.cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit ddc52e3625872f0fa0354e0641f50892faa7cd0d diff --git a/docs/README.md b/docs/README.md index ddabfb54..f43852b7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,7 +28,12 @@ pip install nexaai[onnx] # if you need ONNX support ``` ### build from source - +To build C++ only +``` +cmake -B build -S . +cmake --build build --config Release +``` +To build C++ and install python package from source, run the following commands: ```bash git clone --recursive https://github.com/NexaAI/nexa-sdk.git cd nexa-sdk diff --git a/nexa/constants.py b/nexa/constants.py index c0943cdb..15e8c874 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -109,7 +109,7 @@ class ModelType(Enum): } NEXA_RUN_MODEL_MAP_AUDIO_LM = { - "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-fp16", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "octoaudio": "Octo-omni-audio:model-fp16", @@ -170,7 +170,7 @@ class ModelType(Enum): } NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { - "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "octoaudio": "Octo-omni-audio:projector-fp16", diff --git a/nexa/gguf/llama/nexa_audio_lm_cpp.py b/nexa/gguf/llama/nexa_audio_lm_cpp.py index c3cfc1d8..37b3e40d 100644 --- a/nexa/gguf/llama/nexa_audio_lm_cpp.py +++ b/nexa/gguf/llama/nexa_audio_lm_cpp.py @@ -52,7 +52,6 @@ def _load_shared_library(lib_base_name: str): _lib_base_name = "nexa-omni-audio-lib" _lib_omni = _load_shared_library(_lib_base_name) # _lib_base_name = "nexa-qwen2-audio-lib" -# _lib_qwen2 = _load_shared_library(_lib_base_name) _lib_qwen2 = _load_shared_library(_lib_base_name) # struct omni_context_params diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 178c284c..3f5f4e34 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -1,206 +1,206 @@ -import ctypes -import logging -import os -from pathlib import Path - -from nexa.constants import ( - DEFAULT_TEXT_GEN_PARAMS, - NEXA_RUN_MODEL_MAP_AUDIO_LM, - NEXA_RUN_AUDIO_LM_PROJECTOR_MAP, -) -from nexa.gguf.lib_utils import is_gpu_available -from nexa.gguf.llama import nexa_audio_lm_cpp -from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr -from nexa.general import pull_model - - -def is_qwen(model_name): - if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky - return True - return False - - -assert set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) == set( - NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys() -), "Model, projector, and handler should have the same keys" - - -class NexaAudioLMInference: - """ - A class used for loading Bark text-to-speech models and running text-to-speech generation. - - Methods: - run: Run the audio LM generation loop. - - Args: - model_path (str): Path to the model file. - mmproj_path (str): Path to the audio projector file. - n_gpu_layers(int): Number of gpu layers to use for processing. Defaults to -1. - output_dir (str): Output directory for tts. Defaults to "tts". - verbosity (int): Verbosity level for the Bark model. Defaults to 0. - """ - - def __init__( - self, - model_path=None, - local_path=None, - projector_local_path=None, - device="auto", - **kwargs, - ): - if model_path is None and local_path is None: - raise ValueError("Either model_path or local_path must be provided.") - - self.params = DEFAULT_TEXT_GEN_PARAMS.copy() - self.params.update(kwargs) - self.model = None - self.projector = None - self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.get(model_path, None) - self.downloaded_path = local_path - self.projector_downloaded_path = projector_local_path - self.device = device - self.context = None - if self.device == "auto" or self.device == "gpu": - self.n_gpu_layers = -1 if is_gpu_available() else 0 - else: - self.n_gpu_layers = 0 - - if ( - self.downloaded_path is not None - and self.projector_downloaded_path is not None - ): - # when running from local, both path should be provided - pass - elif self.downloaded_path is not None: - if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: - self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] - self.projector_downloaded_path, _ = pull_model( - self.projector_path, **kwargs - ) - elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: - self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] - self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] - self.downloaded_path, _ = pull_model(self.model_path, **kwargs) - self.projector_downloaded_path, _ = pull_model( - self.projector_path, **kwargs - ) - elif Path(model_path).parent.exists(): - local_dir = Path(model_path).parent - model_name = Path(model_path).name - tag_and_ext = model_name.split(":")[-1] - self.downloaded_path = local_dir / f"model-{tag_and_ext}" - self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" - if not ( - self.downloaded_path.exists() - and self.projector_downloaded_path.exists() - ): - logging.error( - f"Model or projector not found in {local_dir}. " - "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." - ) - exit(1) - else: - logging.error("VLM user model from hub is not supported yet.") - exit(1) - - if self.downloaded_path is None or self.projector_downloaded_path is None: - logging.error( - f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", - exc_info=True, - ) - exit(1) - self.is_qwen = is_qwen(self.downloaded_path) # TEMPORARY SOLUTION : this hardcode can be risky - self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) - self.profiling = kwargs.get("profiling", False) - - if not kwargs.get("streamlit", False): - with suppress_stdout_stderr(): - self._load_model() - if self.model is None: - logging.error( - "Failed to load model or tokenizer. Exiting.", exc_info=True - ) - exit(1) - - def _load_model(self): - logging.debug( - f"Loading model from {self.downloaded_path} and {self.projector_downloaded_path}" - ) - try: - self.ctx_params.model = ctypes.c_char_p( - self.downloaded_path.encode("utf-8") - ) - self.ctx_params.mmproj = ctypes.c_char_p( - self.projector_downloaded_path.encode("utf-8") - ) - self.ctx_params.n_gpu_layers = ( - 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers - ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers - - self.context = nexa_audio_lm_cpp.init_context( - ctypes.byref(self.ctx_params), self.is_qwen - ) - if not self.context: - raise RuntimeError("Failed to load audio language model") - logging.debug("Model loaded successfully") - except Exception as e: - logging.error(f"Error loading model: {e}") - raise - - def run(self): - while True: - try: - audio_path = input("Audio Path (required): ") - if audio_path and not os.path.exists(audio_path): - print(f"'{audio_path}' is not a path to audio. Will ignore.") - - user_input = input("Enter text (leave empty if no prompt): ") - - self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) - self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) - - nexa_audio_lm_cpp.process_full( - self.context, ctypes.byref(self.ctx_params), self.is_qwen - ) - - except KeyboardInterrupt: - print("\nExiting...") - break - - except Exception as e: - logging.error(f"\nError during audio generation: {e}", exc_info=True) - - def __del__(self): - """ - Destructor to free the Bark context when the instance is deleted. - """ - if self.context: - nexa_audio_lm_cpp.free_context(self.context, self.is_qwen) - - -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser( - description="Run audio-in text-out generation with nexa-omni-audio model" - ) - parser.add_argument( - "model_path", - type=str, - help="Path or identifier for the model in Nexa Model Hub", - ) - parser.add_argument( - "-d", - "--device", - type=str, - choices=["auto", "cpu", "gpu"], - default="auto", - help="Device to use for inference (auto, cpu, or gpu)", - ) - args = parser.parse_args() - kwargs = {k: v for k, v in vars(args).items() if v is not None} - model_path = kwargs.pop("model_path") - device = kwargs.pop("device", "auto") - - inference = NexaAudioLMInference(model_path, device=device, **kwargs) - inference.run() +# import ctypes +# import logging +# import os +# from pathlib import Path + +# from nexa.constants import ( +# DEFAULT_TEXT_GEN_PARAMS, +# NEXA_RUN_MODEL_MAP_AUDIO_LM, +# NEXA_RUN_AUDIO_LM_PROJECTOR_MAP, +# ) +# from nexa.gguf.lib_utils import is_gpu_available +# from nexa.gguf.llama import nexa_audio_lm_cpp +# from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr +# from nexa.general import pull_model + + +# def is_qwen(model_name): +# if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky +# return True +# return False + + +# assert set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) == set( +# NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys() +# ), "Model, projector, and handler should have the same keys" + + +# class NexaAudioLMInference: +# """ +# A class used for loading Bark text-to-speech models and running text-to-speech generation. + +# Methods: +# run: Run the audio LM generation loop. + +# Args: +# model_path (str): Path to the model file. +# mmproj_path (str): Path to the audio projector file. +# n_gpu_layers(int): Number of gpu layers to use for processing. Defaults to -1. +# output_dir (str): Output directory for tts. Defaults to "tts". +# verbosity (int): Verbosity level for the Bark model. Defaults to 0. +# """ + +# def __init__( +# self, +# model_path=None, +# local_path=None, +# projector_local_path=None, +# device="auto", +# **kwargs, +# ): +# if model_path is None and local_path is None: +# raise ValueError("Either model_path or local_path must be provided.") + +# self.params = DEFAULT_TEXT_GEN_PARAMS.copy() +# self.params.update(kwargs) +# self.model = None +# self.projector = None +# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.get(model_path, None) +# self.downloaded_path = local_path +# self.projector_downloaded_path = projector_local_path +# self.device = device +# self.context = None +# if self.device == "auto" or self.device == "gpu": +# self.n_gpu_layers = -1 if is_gpu_available() else 0 +# else: +# self.n_gpu_layers = 0 + +# if ( +# self.downloaded_path is not None +# and self.projector_downloaded_path is not None +# ): +# # when running from local, both path should be provided +# pass +# elif self.downloaded_path is not None: +# if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: +# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] +# self.projector_downloaded_path, _ = pull_model( +# self.projector_path, **kwargs +# ) +# elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: +# self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] +# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] +# self.downloaded_path, _ = pull_model(self.model_path, **kwargs) +# self.projector_downloaded_path, _ = pull_model( +# self.projector_path, **kwargs +# ) +# elif Path(model_path).parent.exists(): +# local_dir = Path(model_path).parent +# model_name = Path(model_path).name +# tag_and_ext = model_name.split(":")[-1] +# self.downloaded_path = local_dir / f"model-{tag_and_ext}" +# self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" +# if not ( +# self.downloaded_path.exists() +# and self.projector_downloaded_path.exists() +# ): +# logging.error( +# f"Model or projector not found in {local_dir}. " +# "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." +# ) +# exit(1) +# else: +# logging.error("VLM user model from hub is not supported yet.") +# exit(1) + +# if self.downloaded_path is None or self.projector_downloaded_path is None: +# logging.error( +# f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", +# exc_info=True, +# ) +# exit(1) +# self.is_qwen = is_qwen(self.downloaded_path) # TEMPORARY SOLUTION : this hardcode can be risky +# self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) +# self.profiling = kwargs.get("profiling", False) + +# if not kwargs.get("streamlit", False): +# # with suppress_stdout_stderr(): +# self._load_model() +# if self.model is None: +# logging.error( +# "Failed to load model or tokenizer. Exiting.", exc_info=True +# ) +# exit(1) + +# def _load_model(self): +# logging.debug( +# f"Loading model from {self.downloaded_path} and {self.projector_downloaded_path}" +# ) +# try: +# self.ctx_params.model = ctypes.c_char_p( +# self.downloaded_path.encode("utf-8") +# ) +# self.ctx_params.mmproj = ctypes.c_char_p( +# self.projector_downloaded_path.encode("utf-8") +# ) +# self.ctx_params.n_gpu_layers = ( +# 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers +# ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers + +# self.context = nexa_audio_lm_cpp.init_context( +# ctypes.byref(self.ctx_params), self.is_qwen +# ) +# if not self.context: +# raise RuntimeError("Failed to load audio language model") +# logging.debug("Model loaded successfully") +# except Exception as e: +# logging.error(f"Error loading model: {e}") +# raise + +# def run(self): +# while True: +# try: +# audio_path = input("Audio Path (required): ") +# if audio_path and not os.path.exists(audio_path): +# print(f"'{audio_path}' is not a path to audio. Will ignore.") + +# user_input = input("Enter text (leave empty if no prompt): ") + +# self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) +# self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) + +# nexa_audio_lm_cpp.process_full( +# self.context, ctypes.byref(self.ctx_params), self.is_qwen +# ) + +# except KeyboardInterrupt: +# print("\nExiting...") +# break + +# except Exception as e: +# logging.error(f"\nError during audio generation: {e}", exc_info=True) + +# def __del__(self): +# """ +# Destructor to free the Bark context when the instance is deleted. +# """ +# if self.context: +# nexa_audio_lm_cpp.free_context(self.context, self.is_qwen) + + +# if __name__ == "__main__": +# import argparse + +# parser = argparse.ArgumentParser( +# description="Run audio-in text-out generation with nexa-omni-audio model" +# ) +# parser.add_argument( +# "model_path", +# type=str, +# help="Path or identifier for the model in Nexa Model Hub", +# ) +# parser.add_argument( +# "-d", +# "--device", +# type=str, +# choices=["auto", "cpu", "gpu"], +# default="auto", +# help="Device to use for inference (auto, cpu, or gpu)", +# ) +# args = parser.parse_args() +# kwargs = {k: v for k, v in vars(args).items() if v is not None} +# model_path = kwargs.pop("model_path") +# device = kwargs.pop("device", "auto") + +# inference = NexaAudioLMInference(model_path, device=device, **kwargs) +# inference.run() diff --git a/pyproject.toml b/pyproject.toml index 736a32f0..53aaabcf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,6 @@ wheel.packages = [ sdist.include = [ "CMakeLists.txt", "dependency/llama.cpp/*", - "dependency/nexa_llama.cpp/*", "dependency/stable-diffusion.cpp/*", "dependency/bark.cpp/*", ] @@ -119,7 +118,6 @@ sdist.exclude = [ "dist", "nexa.egg-info", "dependency/llama.cpp/build", - "dependency/nexa_llama.cpp/build", "dependency/stable-diffusion.cpp/build", "dependency/bark.cpp/build", ] @@ -130,7 +128,6 @@ cmake.args = [ "-DCMAKE_BUILD_PARALLEL_LEVEL=16", "-DSTABLE_DIFFUSION_BUILD=ON", "-DLLAMA_BUILD=ON", - "-DNEXA_LLAMA_BUIL=ON", "-DBARK_BUILD=ON" ] @@ -139,4 +136,4 @@ provider = "scikit_build_core.metadata.regex" input = "nexa/__init__.py" [tool.pytest.ini_options] -testpaths = ["tests"] +testpaths = ["tests"] \ No newline at end of file From 179ff2b63e7ae63859842d849359d9d2d8d671c9 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 14:51:37 +0000 Subject: [PATCH 016/160] update docs --- docs/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/README.md b/docs/README.md index f43852b7..23813624 100644 --- a/docs/README.md +++ b/docs/README.md @@ -232,3 +232,9 @@ del /s /q *.dll # delete Get-ChildItem -Recurse -Filter *.dll # in PowerShell dumpbin /dependents your_executable_or_dll.dll # in Developer PowerShell for Visual Studio ``` + +### Debug dynamic lib +According to [isse](https://github.com/abetlen/llama-cpp-python/issues/1346), below can check the exported symbols on linux. +``` +readelf -Ws --dyn-syms libllama.so +``` \ No newline at end of file From e2253a61aa33254c6ba49ad253cf73fa69b8e0af Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Sun, 3 Nov 2024 11:02:55 -0800 Subject: [PATCH 017/160] delete the `General.swift` since we are not using it for now. --- swift/Sources/NexaSwift/General.swift | 123 -------------------------- 1 file changed, 123 deletions(-) delete mode 100644 swift/Sources/NexaSwift/General.swift diff --git a/swift/Sources/NexaSwift/General.swift b/swift/Sources/NexaSwift/General.swift deleted file mode 100644 index c9956b0c..00000000 --- a/swift/Sources/NexaSwift/General.swift +++ /dev/null @@ -1,123 +0,0 @@ -import Foundation - -let NEXA_RUN_MODEL_MAP_TEXT: [String: String] = [ - "octopus-v2": "Octopus-v2:q4_0", - "octopus-v4": "Octopus-v4:q4_0", - "gpt2": "gpt2:q4_0", - "tinyllama": "TinyLlama-1.1B-Chat-v1.0:fp16", - "llama2": "Llama-2-7b-chat:q4_0", - "llama3": "Meta-Llama-3-8B-Instruct:q4_0", - "llama3.1": "Meta-Llama-3.1-8B-Instruct:q4_0", - "llama3.2": "Llama3.2-3B-Instruct:q4_0", - "gemma": "gemma-1.1-2b-instruct:q4_0", - "gemma2": "gemma-2-2b-instruct:q4_0", - "qwen1.5": "Qwen1.5-7B-Instruct:q4_0", - "qwen2": "Qwen2-1.5B-Instruct:q4_0", - "qwen2.5": "Qwen2.5-1.5B-Instruct:q4_0", - "mistral": "Mistral-7B-Instruct-v0.3:q4_0", - "codegemma": "codegemma-2b:q4_0", - "codellama": "CodeLlama-7b-Instruct:q4_0", - "codeqwen": "Qwen2.5-Coder-1.5B-Instruct:q4_0", - "mathqwen": "Qwen2.5-Math-1.5B-Instruct:q4_0", - "deepseek-coder": "deepseek-coder-1.3b-instruct:q4_0", - "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0", - "phi2": "Phi-2:q4_0", - "phi3": "Phi-3-mini-128k-instruct:q4_0", - "phi3.5": "Phi-3.5-mini-instruct:q4_0", - "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0", - "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M", - "openelm": "OpenELM-3B:q4_K_M" -] - -let NEXA_RUN_MODEL_MAP_VLM: [String: String] = [ - "nanollava": "nanoLLaVA:model-fp16", - "nanoLLaVA:fp16": "nanoLLaVA:model-fp16", - "llava-phi3": "llava-phi-3-mini:model-q4_0", - "llava-phi-3-mini:q4_0": "llava-phi-3-mini:model-q4_0", - "llava-phi-3-mini:fp16": "llava-phi-3-mini:model-fp16", - "llava-llama3": "llava-llama-3-8b-v1.1:model-q4_0", - "llava-llama-3-8b-v1.1:q4_0": "llava-llama-3-8b-v1.1:model-q4_0", - "llava-llama-3-8b-v1.1:fp16": "llava-llama-3-8b-v1.1:model-fp16", - "llava1.6-mistral": "llava-v1.6-mistral-7b:model-q4_0", - "llava-v1.6-mistral-7b:q4_0": "llava-v1.6-mistral-7b:model-q4_0", - "llava-v1.6-mistral-7b:fp16": "llava-v1.6-mistral-7b:model-fp16", - "llava1.6-vicuna": "llava-v1.6-vicuna-7b:model-q4_0", - "llava-v1.6-vicuna-7b:q4_0": "llava-v1.6-vicuna-7b:model-q4_0", - "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:model-fp16" -] - -let NEXA_RUN_MODEL_MAP_IMAGE : [String: String] = [ - "sd1-4": "stable-diffusion-v1-4:q4_0", - "sd1-5": "stable-diffusion-v1-5:q4_0", - "sd2-1": "stable-diffusion-v2-1:q4_0", - "sd3": "stable-diffusion-3-medium:q4_0", - "sdxl-turbo": "sdxl-turbo:q8_0", - "flux": "FLUX.1-schnell:q4_0", - "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16", - "anything-lcm": "anything-v30-LCM:fp16", - "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16", -] - -let NEXA_RUN_MODEL_MAP_VOICE:[String: String] = [ - "whisper-large": "whisper-large:bin-large-v3", - "whisper-tiny": "whisper-tiny:bin-tiny", - "faster-whisper-tiny": "faster-whisper-tiny:bin-cpu-fp16", - "faster-whisper-tiny.en": "faster-whisper-tiny.en:bin-cpu-fp16", - "faster-whisper-small": "faster-whisper-small:bin-cpu-fp16", - "faster-whisper-small.en": "faster-whisper-small.en:bin-cpu-fp16", - "faster-whisper-medium": "faster-whisper-medium:bin-cpu-fp16", - "faster-whisper-medium.en": "faster-whisper-medium.en:bin-cpu-fp16", - "faster-whisper-base": "faster-whisper-base:bin-cpu-fp16", - "faster-whisper-base.en": "faster-whisper-base.en:bin-cpu-fp16", - "faster-whisper-large": "faster-whisper-large-v3:bin-cpu-fp16", - "faster-whisper-large-turbo": "faster-whisper-large-v3-turbo:bin-cpu-fp16", -] - -let NEXA_RUN_MODEL_MAP_FUNCTION_CALLING:[String: String] = [ - "llama2-function-calling": "Llama2-7b-function-calling:q4_K_M", - "Llama2-7b-function-calling:fp16": "Llama2-7b-function-calling:fp16", - "Llama2-7b-function-calling:q2_K": "Llama2-7b-function-calling:q2_K", - "Llama2-7b-function-calling:q3_K_L": "Llama2-7b-function-calling:q3_K_L", - "Llama2-7b-function-calling:q3_K_M": "Llama2-7b-function-calling:q3_K_M", - "Llama2-7b-function-calling:q3_K_S": "Llama2-7b-function-calling:q3_K_S", - "Llama2-7b-function-calling:q4_K_M": "Llama2-7b-function-calling:q4_K_M", - "Llama2-7b-function-calling:q4_K_S": "Llama2-7b-function-calling:q4_K_S", - "Llama2-7b-function-calling:q5_K_M": "Llama2-7b-function-calling:q5_K_M", - "Llama2-7b-function-calling:q5_K_S": "Llama2-7b-function-calling:q5_K_S", - "Llama2-7b-function-calling:q6_K": "Llama2-7b-function-calling:q6_K", - "Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0", -] -let NEXA_RUN_MODEL_MAP_FLUX:[String: String] = [ - "flux": "FLUX.1-schnell:flux1-schnell-q4_0", - "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0", - "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0", - "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1", - "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0", - "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16", -] - -let NEXA_RUN_MODEL_MAP_TEXT_EMBEDDING:[String: String] = [ - "mxbai": "mxbai-embed-large-v1:fp16", - "mxbai-embed-large-v1": "mxbai-embed-large-v1:fp16", - "mxbai-embed-large-v1:fp16": "mxbai-embed-large-v1:fp16", - "nomic": "nomic-embed-text-v1.5:fp16", - "nomic-embed-text-v1.5": "nomic-embed-text-v1.5:fp16", - "nomic-embed-text-v1.5:fp16": "nomic-embed-text-v1.5:fp16", - "all-MiniLM": "all-MiniLM-L6-v2:fp16", - "all-MiniLM-L6-v2": "all-MiniLM-L6-v2:fp16", - "all-MiniLM-L6-v2:fp16": "all-MiniLM-L6-v2:fp16", - "all-MiniLM-L12-v2": "all-MiniLM-L12-v2:fp16", - "all-MiniLM-L12-v2:fp16": "all-MiniLM-L12-v2:fp16", -] - -let NEXA_RUN_MODEL_MAP: [String: String] = { - var combinedMap = NEXA_RUN_MODEL_MAP_TEXT - combinedMap.merge(NEXA_RUN_MODEL_MAP_IMAGE) { (_, new) in new } - combinedMap.merge(NEXA_RUN_MODEL_MAP_VLM) { (_, new) in new } - combinedMap.merge(NEXA_RUN_MODEL_MAP_VOICE) { (_, new) in new } - combinedMap.merge(NEXA_RUN_MODEL_MAP_FUNCTION_CALLING) { (_, new) in new } - combinedMap.merge(NEXA_RUN_MODEL_MAP_FLUX) { (_, new) in new } - combinedMap.merge(NEXA_RUN_MODEL_MAP_TEXT_EMBEDDING) { (_, new) in new } - // Merge other maps as needed - return combinedMap -}() From bbf388de6bc8401f2bac275537edaaf0ae305a8d Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Sun, 3 Nov 2024 12:13:13 -0800 Subject: [PATCH 018/160] change test project model to `llama3.2:q4_K_M`; update llama3 formatter --- examples/swift-test/Shared/ViewModel.swift | 4 ++-- .../swift-test/TestApp.xcodeproj/project.pbxproj | 12 ++++++------ .../NexaSwift/Models/ChatCompletionMessage.swift | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/swift-test/Shared/ViewModel.swift b/examples/swift-test/Shared/ViewModel.swift index 6e5912de..b2733ee4 100644 --- a/examples/swift-test/Shared/ViewModel.swift +++ b/examples/swift-test/Shared/ViewModel.swift @@ -13,8 +13,8 @@ class ViewModel { private var cancallable: Set = [] init() { - let configuration = Configuration(maxNewToken: 128, stopTokens: [""]) - let model_path = Bundle.main.path(forResource: "octopusv2_q4_0", ofType: "gguf") ?? "" + let configuration = Configuration(maxNewToken: 128, stopTokens: []) + let model_path = Bundle.main.path(forResource: "llama3_2_3b_q4_K_M", ofType: "gguf") ?? "" nexaSwift = (try? NexaTextInference(modelPath: model_path, modelConfiguration: configuration))! } diff --git a/examples/swift-test/TestApp.xcodeproj/project.pbxproj b/examples/swift-test/TestApp.xcodeproj/project.pbxproj index 3d4bddf8..7a9f5219 100644 --- a/examples/swift-test/TestApp.xcodeproj/project.pbxproj +++ b/examples/swift-test/TestApp.xcodeproj/project.pbxproj @@ -21,8 +21,8 @@ 4BB1E3E62BE646CF00F1D21A /* ViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B1334F52BE5C4AC0020AB8E /* ViewModel.swift */; }; 4BEE1DB62BE70024001CE949 /* main.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4BEE1DB52BE70024001CE949 /* main.swift */; }; 4BEE1DBB2BE7003E001CE949 /* NexaSwift in Frameworks */ = {isa = PBXBuildFile; productRef = 4BEE1DBA2BE7003E001CE949 /* NexaSwift */; }; - D38D5E552CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */; }; - D38D5E562CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */; }; + D3DF75452CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3DF75442CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf */; }; + D3DF75462CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf in Resources */ = {isa = PBXBuildFile; fileRef = D3DF75442CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf */; }; /* End PBXBuildFile section */ /* Begin PBXCopyFilesBuildPhase section */ @@ -52,7 +52,7 @@ 4B51A47B2BE7449700F65BFC /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; 4BEE1DB32BE70024001CE949 /* TestApp-CLI */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "TestApp-CLI"; sourceTree = BUILT_PRODUCTS_DIR; }; 4BEE1DB52BE70024001CE949 /* main.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = main.swift; sourceTree = ""; }; - D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = octopusv2_q4_0.gguf; sourceTree = ""; }; + D3DF75442CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = llama3_2_3b_q4_K_M.gguf; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -127,7 +127,7 @@ 4B10A3302BE5CD6600BEA6A1 /* Models */ = { isa = PBXGroup; children = ( - D38D5E542CD54ED900EB536A /* octopusv2_q4_0.gguf */, + D3DF75442CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf */, ); name = Models; path = Shared/Models; @@ -282,8 +282,8 @@ buildActionMask = 2147483647; files = ( 4B1334FC2BE5C4AC0020AB8E /* Preview Assets.xcassets in Resources */, - D38D5E562CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */, 4B1334FA2BE5C4AC0020AB8E /* Assets.xcassets in Resources */, + D3DF75462CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -292,8 +292,8 @@ buildActionMask = 2147483647; files = ( 4B0B05812BE5C452002BC7AF /* Preview Assets.xcassets in Resources */, - D38D5E552CD54ED900EB536A /* octopusv2_q4_0.gguf in Resources */, 4B0B057E2BE5C452002BC7AF /* Assets.xcassets in Resources */, + D3DF75452CD80FBD00FEFA14 /* llama3_2_3b_q4_K_M.gguf in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift index f571170c..c6cae277 100644 --- a/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift +++ b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift @@ -297,7 +297,7 @@ class Llama3Formatter: ChatFormatter { "assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n" ] - private let separator = "<|eot_id|>" + private let separator = "<|eot_id|>\n" func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { var formattedMessages = mapRoles(messages: messages) From 8ac89f98b50dc676a4c540044a964c9c043682d3 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 21:44:55 +0000 Subject: [PATCH 019/160] audio LM works E2E --- .gitmodules | 2 +- dependency/llama.cpp | 2 +- docs/README.md | 2 +- nexa/gguf/llama/audio_lm_cpp.py | 131 +++++++++ nexa/gguf/llama/nexa_audio_lm_cpp.py | 115 -------- nexa/gguf/nexa_inference_audio_lm.py | 403 +++++++++++++-------------- 6 files changed, 331 insertions(+), 324 deletions(-) create mode 100644 nexa/gguf/llama/audio_lm_cpp.py delete mode 100644 nexa/gguf/llama/nexa_audio_lm_cpp.py diff --git a/.gitmodules b/.gitmodules index 77565f68..c7b3e555 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,7 +5,7 @@ [submodule "dependency/llama.cpp"] path = dependency/llama.cpp url = https://github.com/NexaAI/llama.cpp.git - branch = zack-dev + branch = master-release-audio-lm [submodule "nexa/eval/benchmark_tasks"] path = nexa/eval/benchmark_tasks url = https://github.com/NexaAI/benchmark-tasks.git diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 68060061..9e67ef75 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 68060061dc37b86557a1e0bc2ec2d3f1075b54aa +Subproject commit 9e67ef75b46b4d267b9df4ac6c1f232681470a4c diff --git a/docs/README.md b/docs/README.md index 23813624..5cfc2488 100644 --- a/docs/README.md +++ b/docs/README.md @@ -31,7 +31,7 @@ pip install nexaai[onnx] # if you need ONNX support To build C++ only ``` cmake -B build -S . -cmake --build build --config Release +cmake --build build --config Release -j32 ``` To build C++ and install python package from source, run the following commands: ```bash diff --git a/nexa/gguf/llama/audio_lm_cpp.py b/nexa/gguf/llama/audio_lm_cpp.py new file mode 100644 index 00000000..71bb3888 --- /dev/null +++ b/nexa/gguf/llama/audio_lm_cpp.py @@ -0,0 +1,131 @@ +import ctypes +import os +import sys +from pathlib import Path + + +# Load the library +def _load_shared_library(lib_base_name: str, base_path: Path = None): + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + lib_ext = ".so" + elif sys.platform == "darwin": + lib_ext = ".dylib" + elif sys.platform == "win32": + lib_ext = ".dll" + else: + raise RuntimeError("Unsupported platform") + # Construct the paths to the possible shared library names + if base_path is None: + _base_path = Path(__file__).parent.parent.resolve() + else: + _base_path = base_path + _lib_paths = [ + _base_path / f"lib{lib_base_name}{lib_ext}", + _base_path / f"{lib_base_name}{lib_ext}", + ] + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path)) + except Exception as e: + print(f"Failed to load shared library '{_lib_path}': {e}") + raise FileNotFoundError( + f"Shared library with base name '{lib_base_name}' not found" + ) + +def _get_lib(is_qwen: bool = False): + # Specify the base name of the shared library to load + _lib_base_name = "nexa-qwen2-audio-lib_shared" if is_qwen else "nexa-omni-audio-lib_shared" + base_path = ( + Path(__file__).parent.parent.parent.parent.resolve() + / "nexa" + / "gguf" + / "lib" + ) + return _load_shared_library(_lib_base_name, base_path) + +# Initialize both libraries +_lib_omni = _get_lib(is_qwen=False) +_lib_qwen = _get_lib(is_qwen=True) + +# conda config --add channels conda-forge +# conda update libstdcxx-ng +# struct omni_context_params +# { +# char *model; +# char *mmproj; +# char *file; +# char *prompt; +# int32_t n_gpu_layers; +# }; +class omni_context_params(ctypes.Structure): + _fields_ = [ + ("model", ctypes.c_char_p), + ("mmproj", ctypes.c_char_p), + ("file", ctypes.c_char_p), + ("prompt", ctypes.c_char_p), + ("n_gpu_layers", ctypes.c_int32), + ] + +omni_context_params_p = ctypes.POINTER(omni_context_params) +omni_context_p = ctypes.c_void_p + +# OMNI_AUDIO_API omni_context_params omni_context_default_params(); +def context_default_params(is_qwen: bool = False) -> omni_context_params: + _lib = _lib_qwen if is_qwen else _lib_omni + return _lib.omni_context_default_params() + +# _lib_omni.omni_context_default_params.argtypes = [] +# _lib_qwen.omni_context_default_params.argtypes = [] +# _lib_omni.omni_context_default_params.restype = omni_context_params +# _lib_qwen.omni_context_default_params.restype = omni_context_params + +# OMNI_AUDIO_API struct omni_context *omni_init_context(omni_context_params ¶ms); +def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_context_p: # type: ignore + _lib = _lib_qwen if is_qwen else _lib_omni + return _lib.omni_init_context(params) + +# _lib_omni.omni_init_context.argtypes = [omni_context_params_p] +# _lib_qwen.omni_init_context.argtypes = [omni_context_params_p] +# _lib_omni.omni_init_context.restype = omni_context_p +# _lib_qwen.omni_init_context.restype = omni_context_p + +# OMNI_AUDIO_API void omni_process_full( +# struct omni_context *ctx_omni, +# omni_context_params ¶ms +# ); +def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bool = False): # type: ignore + _lib = _lib_qwen if is_qwen else _lib_omni + return _lib.omni_process_full(ctx, params) + +# _lib_omni.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] +# _lib_qwen.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] +# _lib_omni.omni_process_full.restype = None +# _lib_qwen.omni_process_full.restype = None + +# OMNI_AUDIO_API void omni_free(struct omni_context *ctx_omni); +def free(ctx: omni_context_p, is_qwen: bool = False): + _lib = _lib_qwen if is_qwen else _lib_omni + return _lib.omni_free(ctx) + +for lib in [_lib_omni, _lib_qwen]: + # Configure context_default_params + lib.omni_context_default_params.argtypes = [] + lib.omni_context_default_params.restype = omni_context_params + + # Configure init_context + lib.omni_init_context.argtypes = [omni_context_params_p] + lib.omni_init_context.restype = omni_context_p + + # Configure process_full + lib.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] + lib.omni_process_full.restype = None + + # Configure free + lib.omni_free.argtypes = [omni_context_p] + lib.omni_free.restype = None \ No newline at end of file diff --git a/nexa/gguf/llama/nexa_audio_lm_cpp.py b/nexa/gguf/llama/nexa_audio_lm_cpp.py deleted file mode 100644 index 37b3e40d..00000000 --- a/nexa/gguf/llama/nexa_audio_lm_cpp.py +++ /dev/null @@ -1,115 +0,0 @@ -import ctypes -import os -import sys -import pathlib - -# make sure only below functions are exposed -__all__ = ['context_default_params', 'init_context', 'process_full', 'free'] - -# Load the library -def _load_shared_library(lib_base_name: str): - # Determine the file extension based on the platform - if sys.platform.startswith("linux"): - lib_ext = ".so" - elif sys.platform == "darwin": - lib_ext = ".dylib" - elif sys.platform == "win32": - lib_ext = ".dll" - else: - raise RuntimeError("Unsupported platform") - - # Construct the paths to the possible shared library names - _base_path = pathlib.Path(__file__).parent.parent.resolve() - _lib_paths = [ - _base_path / f"lib/lib{lib_base_name}{lib_ext}", - _base_path / f"lib/{lib_base_name}{lib_ext}", - ] - - if "NEXA_LLAMA_CPP_LIB" in os.environ: - lib_base_name = os.environ["NEXA_LLAMA_CPP_LIB"] - _lib = pathlib.Path(lib_base_name) - _base_path = _lib.parent.resolve() - _lib_paths = [_lib.resolve()] - - # Add the library directory to the DLL search path on Windows (if needed) - if sys.platform == "win32" and sys.version_info >= (3, 8): - os.add_dll_directory(str(_base_path)) - - # Try to load the shared library, handling potential errors - for _lib_path in _lib_paths: - print("Trying to load", _lib_path) - if _lib_path.exists(): - try: - return ctypes.CDLL(str(_lib_path)) - except Exception as e: - raise RuntimeError(f"Failed to load shared library '{_lib_path}': {e}") - - raise FileNotFoundError( - f"Shared library with base name '{lib_base_name}' not found" - ) - -# Load both libraries -_lib_base_name = "nexa-omni-audio-lib" -_lib_omni = _load_shared_library(_lib_base_name) -# _lib_base_name = "nexa-qwen2-audio-lib" -_lib_qwen2 = _load_shared_library(_lib_base_name) - -# struct omni_context_params -# { -# char *model; -# char *mmproj; -# char *file; -# char *prompt; -# int32_t n_gpu_layers; -# }; -class omni_context_params(ctypes.Structure): - _fields_ = [ - ("model", ctypes.c_char_p), - ("mmproj", ctypes.c_char_p), - ("file", ctypes.c_char_p), - ("prompt", ctypes.c_char_p), - ("n_gpu_layers", ctypes.c_int32), - ] - - -omni_context_params_p = ctypes.POINTER(omni_context_params) -omni_context_p = ctypes.c_void_p - - -def get_lib(is_qwen: bool): - return _lib_qwen2 if is_qwen else _lib_omni - - -def context_default_params(is_qwen: bool = False) -> omni_context_params: - lib = get_lib(is_qwen) - return lib.omni_context_default_params() - - -def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_context_p: # type: ignore - lib = get_lib(is_qwen) - return lib.omni_init_context(params) - - -def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bool = False): # type: ignore - lib = get_lib(is_qwen) - return lib.omni_process_full(ctx, params) - - -def free_context(ctx: omni_context_p, is_qwen: bool = False): - lib = get_lib(is_qwen) - return lib.omni_free(ctx) - - -# Set up function signatures for both libraries -for lib in [_lib_omni, _lib_qwen2]: - lib.omni_context_default_params.argtypes = [] - lib.omni_context_default_params.restype = omni_context_params - - lib.omni_init_context.argtypes = [omni_context_params_p] - lib.omni_init_context.restype = omni_context_p - - lib.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] - lib.omni_process_full.restype = None - - lib.omni_free.argtypes = [omni_context_p] - lib.omni_free.restype = None \ No newline at end of file diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 3f5f4e34..777ae2e6 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -1,206 +1,197 @@ -# import ctypes -# import logging -# import os -# from pathlib import Path - -# from nexa.constants import ( -# DEFAULT_TEXT_GEN_PARAMS, -# NEXA_RUN_MODEL_MAP_AUDIO_LM, -# NEXA_RUN_AUDIO_LM_PROJECTOR_MAP, -# ) -# from nexa.gguf.lib_utils import is_gpu_available -# from nexa.gguf.llama import nexa_audio_lm_cpp -# from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr -# from nexa.general import pull_model - - -# def is_qwen(model_name): -# if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky -# return True -# return False - - -# assert set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) == set( -# NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys() -# ), "Model, projector, and handler should have the same keys" - - -# class NexaAudioLMInference: -# """ -# A class used for loading Bark text-to-speech models and running text-to-speech generation. - -# Methods: -# run: Run the audio LM generation loop. - -# Args: -# model_path (str): Path to the model file. -# mmproj_path (str): Path to the audio projector file. -# n_gpu_layers(int): Number of gpu layers to use for processing. Defaults to -1. -# output_dir (str): Output directory for tts. Defaults to "tts". -# verbosity (int): Verbosity level for the Bark model. Defaults to 0. -# """ - -# def __init__( -# self, -# model_path=None, -# local_path=None, -# projector_local_path=None, -# device="auto", -# **kwargs, -# ): -# if model_path is None and local_path is None: -# raise ValueError("Either model_path or local_path must be provided.") - -# self.params = DEFAULT_TEXT_GEN_PARAMS.copy() -# self.params.update(kwargs) -# self.model = None -# self.projector = None -# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.get(model_path, None) -# self.downloaded_path = local_path -# self.projector_downloaded_path = projector_local_path -# self.device = device -# self.context = None -# if self.device == "auto" or self.device == "gpu": -# self.n_gpu_layers = -1 if is_gpu_available() else 0 -# else: -# self.n_gpu_layers = 0 - -# if ( -# self.downloaded_path is not None -# and self.projector_downloaded_path is not None -# ): -# # when running from local, both path should be provided -# pass -# elif self.downloaded_path is not None: -# if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: -# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] -# self.projector_downloaded_path, _ = pull_model( -# self.projector_path, **kwargs -# ) -# elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: -# self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] -# self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] -# self.downloaded_path, _ = pull_model(self.model_path, **kwargs) -# self.projector_downloaded_path, _ = pull_model( -# self.projector_path, **kwargs -# ) -# elif Path(model_path).parent.exists(): -# local_dir = Path(model_path).parent -# model_name = Path(model_path).name -# tag_and_ext = model_name.split(":")[-1] -# self.downloaded_path = local_dir / f"model-{tag_and_ext}" -# self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" -# if not ( -# self.downloaded_path.exists() -# and self.projector_downloaded_path.exists() -# ): -# logging.error( -# f"Model or projector not found in {local_dir}. " -# "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." -# ) -# exit(1) -# else: -# logging.error("VLM user model from hub is not supported yet.") -# exit(1) - -# if self.downloaded_path is None or self.projector_downloaded_path is None: -# logging.error( -# f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", -# exc_info=True, -# ) -# exit(1) -# self.is_qwen = is_qwen(self.downloaded_path) # TEMPORARY SOLUTION : this hardcode can be risky -# self.ctx_params = nexa_audio_lm_cpp.context_default_params(self.is_qwen) -# self.profiling = kwargs.get("profiling", False) - -# if not kwargs.get("streamlit", False): -# # with suppress_stdout_stderr(): -# self._load_model() -# if self.model is None: -# logging.error( -# "Failed to load model or tokenizer. Exiting.", exc_info=True -# ) -# exit(1) - -# def _load_model(self): -# logging.debug( -# f"Loading model from {self.downloaded_path} and {self.projector_downloaded_path}" -# ) -# try: -# self.ctx_params.model = ctypes.c_char_p( -# self.downloaded_path.encode("utf-8") -# ) -# self.ctx_params.mmproj = ctypes.c_char_p( -# self.projector_downloaded_path.encode("utf-8") -# ) -# self.ctx_params.n_gpu_layers = ( -# 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers -# ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers - -# self.context = nexa_audio_lm_cpp.init_context( -# ctypes.byref(self.ctx_params), self.is_qwen -# ) -# if not self.context: -# raise RuntimeError("Failed to load audio language model") -# logging.debug("Model loaded successfully") -# except Exception as e: -# logging.error(f"Error loading model: {e}") -# raise - -# def run(self): -# while True: -# try: -# audio_path = input("Audio Path (required): ") -# if audio_path and not os.path.exists(audio_path): -# print(f"'{audio_path}' is not a path to audio. Will ignore.") - -# user_input = input("Enter text (leave empty if no prompt): ") - -# self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) -# self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) - -# nexa_audio_lm_cpp.process_full( -# self.context, ctypes.byref(self.ctx_params), self.is_qwen -# ) - -# except KeyboardInterrupt: -# print("\nExiting...") -# break - -# except Exception as e: -# logging.error(f"\nError during audio generation: {e}", exc_info=True) - -# def __del__(self): -# """ -# Destructor to free the Bark context when the instance is deleted. -# """ -# if self.context: -# nexa_audio_lm_cpp.free_context(self.context, self.is_qwen) - - -# if __name__ == "__main__": -# import argparse - -# parser = argparse.ArgumentParser( -# description="Run audio-in text-out generation with nexa-omni-audio model" -# ) -# parser.add_argument( -# "model_path", -# type=str, -# help="Path or identifier for the model in Nexa Model Hub", -# ) -# parser.add_argument( -# "-d", -# "--device", -# type=str, -# choices=["auto", "cpu", "gpu"], -# default="auto", -# help="Device to use for inference (auto, cpu, or gpu)", -# ) -# args = parser.parse_args() -# kwargs = {k: v for k, v in vars(args).items() if v is not None} -# model_path = kwargs.pop("model_path") -# device = kwargs.pop("device", "auto") - -# inference = NexaAudioLMInference(model_path, device=device, **kwargs) -# inference.run() +import ctypes +import logging +import os +from pathlib import Path + +from nexa.constants import ( + DEFAULT_TEXT_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_AUDIO_LM, + NEXA_RUN_AUDIO_LM_PROJECTOR_MAP, +) +from nexa.gguf.lib_utils import is_gpu_available +from nexa.gguf.llama import audio_lm_cpp +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr +from nexa.general import pull_model + + +def is_qwen(model_name): + if "qwen" in model_name.lower(): # TEMPORARY SOLUTION : this hardcode can be risky + return True + return False + + +assert set(NEXA_RUN_MODEL_MAP_AUDIO_LM.keys()) == set( + NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.keys() +), "Model, projector, and handler should have the same keys" + + +class NexaAudioLMInference: + """ + A class used for loading Bark text-to-speech models and running text-to-speech generation. + + Methods: + run: Run the audio LM generation loop. + + Args: + model_path (str): Path to the model file. + mmproj_path (str): Path to the audio projector file. + n_gpu_layers(int): Number of gpu layers to use for processing. Defaults to -1. + output_dir (str): Output directory for tts. Defaults to "tts". + verbosity (int): Verbosity level for the Bark model. Defaults to 0. + """ + + def __init__( + self, + model_path=None, + local_path=None, + projector_local_path=None, + device="auto", + **kwargs, + ): + if model_path is None and local_path is None: + raise ValueError("Either model_path or local_path must be provided.") + + self.params = DEFAULT_TEXT_GEN_PARAMS.copy() + self.params.update(kwargs) + self.model = None + self.projector = None + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP.get(model_path, None) + self.downloaded_path = local_path + self.projector_downloaded_path = projector_local_path + self.device = device + self.context = None + if self.device == "auto" or self.device == "gpu": + self.n_gpu_layers = -1 if is_gpu_available() else 0 + else: + self.n_gpu_layers = 0 + + if ( + self.downloaded_path is not None + and self.projector_downloaded_path is not None + ): + # when running from local, both path should be provided + pass + elif self.downloaded_path is not None: + if model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) + elif model_path in NEXA_RUN_MODEL_MAP_AUDIO_LM: + self.model_path = NEXA_RUN_MODEL_MAP_AUDIO_LM[model_path] + self.projector_path = NEXA_RUN_AUDIO_LM_PROJECTOR_MAP[model_path] + self.downloaded_path, _ = pull_model(self.model_path, **kwargs) + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) + elif Path(model_path).parent.exists(): + local_dir = Path(model_path).parent + model_name = Path(model_path).name + tag_and_ext = model_name.split(":")[-1] + self.downloaded_path = local_dir / f"model-{tag_and_ext}" + self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" + if not ( + self.downloaded_path.exists() + and self.projector_downloaded_path.exists() + ): + logging.error( + f"Model or projector not found in {local_dir}. " + "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." + ) + exit(1) + else: + logging.error("VLM user model from hub is not supported yet.") + exit(1) + + if self.downloaded_path is None or self.projector_downloaded_path is None: + logging.error( + f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", + exc_info=True, + ) + exit(1) + self.is_qwen = is_qwen(self.downloaded_path) # TEMPORARY SOLUTION : this hardcode can be risky + self.ctx_params = audio_lm_cpp.context_default_params(self.is_qwen) + with suppress_stdout_stderr(): + self._load_model() + + def _load_model(self): + try: + self.ctx_params.model = ctypes.c_char_p( + self.downloaded_path.encode("utf-8") + ) + self.ctx_params.mmproj = ctypes.c_char_p( + self.projector_downloaded_path.encode("utf-8") + ) + self.ctx_params.n_gpu_layers = ( + 0x7FFFFFFF if self.n_gpu_layers == -1 else self.n_gpu_layers + ) # 0x7FFFFFFF is INT32 max, will be auto set to all layers + + self.context = audio_lm_cpp.init_context( + ctypes.byref(self.ctx_params), is_qwen=self.is_qwen + ) + if not self.context: + raise RuntimeError("Failed to load audio language model") + logging.debug("Model loaded successfully") + except Exception as e: + logging.error(f"Error loading model: {e}") + raise + + def run(self): + while True: + try: + while True: + audio_path = input("Audio Path (required): ") + if os.path.exists(audio_path): + break + print(f"'{audio_path}' is not a valid audio path. Please try again.") + + user_input = input("Enter text (leave empty if no prompt): ") + + self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) + self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) + + audio_lm_cpp.process_full( + self.context, ctypes.byref(self.ctx_params), is_qwen=self.is_qwen + ) + + except KeyboardInterrupt: + print("\nExiting...") + break + + except Exception as e: + logging.error(f"\nError during audio generation: {e}", exc_info=True) + + def __del__(self): + """ + Destructor to free the Bark context when the instance is deleted. + """ + if self.context: + audio_lm_cpp.free(self.context, is_qwen=self.is_qwen) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Run audio-in text-out generation with nexa-omni-audio model" + ) + parser.add_argument( + "model_path", + type=str, + help="Path or identifier for the model in Nexa Model Hub", + ) + parser.add_argument( + "-d", + "--device", + type=str, + choices=["auto", "cpu", "gpu"], + default="auto", + help="Device to use for inference (auto, cpu, or gpu)", + ) + args = parser.parse_args() + kwargs = {k: v for k, v in vars(args).items() if v is not None} + model_path = kwargs.pop("model_path") + device = kwargs.pop("device", "auto") + + inference = NexaAudioLMInference(model_path, device=device, **kwargs) + inference.run() From 16cdddb83370c1737b47faba8ceab555668cfbf3 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 21:52:57 +0000 Subject: [PATCH 020/160] make OpenMP optional for Mac --- CMakeLists.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dc785298..9568b2b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,19 @@ cmake_minimum_required(VERSION 3.16) project(nexa_gguf) include(ExternalProject) -find_package(OpenMP REQUIRED) # Find OpenMP package which is needed for llama.cpp + +# makes OpenMP optional on macOS +if(APPLE) + find_package(OpenMP QUIET) + if(NOT OpenMP_FOUND) + message(STATUS "OpenMP not found - OpenMP support will be disabled") + set(OpenMP_C_FLAGS "") + set(OpenMP_CXX_FLAGS "") + set(OpenMP_EXE_LINKER_FLAGS "") + endif() +else() + find_package(OpenMP REQUIRED) +endif()# Find OpenMP package which is needed for llama.cpp set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_CXX_STANDARD 17) From 748a62ec5d2f6eda19ca7bf93a92d221cd7fa1ab Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 22:57:25 +0000 Subject: [PATCH 021/160] update llama.cpp --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 9e67ef75..4bdc70aa 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 9e67ef75b46b4d267b9df4ac6c1f232681470a4c +Subproject commit 4bdc70aaac8884df987f4b079b3d063f2f31e076 From 704ff643d4c27cb902a1b4ee94544d0169981df7 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 23:11:33 +0000 Subject: [PATCH 022/160] remove unessary comments --- nexa/gguf/llama/audio_lm_cpp.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/nexa/gguf/llama/audio_lm_cpp.py b/nexa/gguf/llama/audio_lm_cpp.py index 71bb3888..5d53a484 100644 --- a/nexa/gguf/llama/audio_lm_cpp.py +++ b/nexa/gguf/llama/audio_lm_cpp.py @@ -80,21 +80,11 @@ def context_default_params(is_qwen: bool = False) -> omni_context_params: _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_context_default_params() -# _lib_omni.omni_context_default_params.argtypes = [] -# _lib_qwen.omni_context_default_params.argtypes = [] -# _lib_omni.omni_context_default_params.restype = omni_context_params -# _lib_qwen.omni_context_default_params.restype = omni_context_params - # OMNI_AUDIO_API struct omni_context *omni_init_context(omni_context_params ¶ms); def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_context_p: # type: ignore _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_init_context(params) -# _lib_omni.omni_init_context.argtypes = [omni_context_params_p] -# _lib_qwen.omni_init_context.argtypes = [omni_context_params_p] -# _lib_omni.omni_init_context.restype = omni_context_p -# _lib_qwen.omni_init_context.restype = omni_context_p - # OMNI_AUDIO_API void omni_process_full( # struct omni_context *ctx_omni, # omni_context_params ¶ms @@ -103,11 +93,6 @@ def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bo _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_process_full(ctx, params) -# _lib_omni.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] -# _lib_qwen.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] -# _lib_omni.omni_process_full.restype = None -# _lib_qwen.omni_process_full.restype = None - # OMNI_AUDIO_API void omni_free(struct omni_context *ctx_omni); def free(ctx: omni_context_p, is_qwen: bool = False): _lib = _lib_qwen if is_qwen else _lib_omni From 5c67f2e305a4e656115c0a9ddf6b6765342ba98d Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 15:12:31 -0800 Subject: [PATCH 023/160] update default bool --- nexa/gguf/llama/audio_lm_cpp.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nexa/gguf/llama/audio_lm_cpp.py b/nexa/gguf/llama/audio_lm_cpp.py index 5d53a484..d6a877e1 100644 --- a/nexa/gguf/llama/audio_lm_cpp.py +++ b/nexa/gguf/llama/audio_lm_cpp.py @@ -38,7 +38,7 @@ def _load_shared_library(lib_base_name: str, base_path: Path = None): f"Shared library with base name '{lib_base_name}' not found" ) -def _get_lib(is_qwen: bool = False): +def _get_lib(is_qwen: bool = True): # Specify the base name of the shared library to load _lib_base_name = "nexa-qwen2-audio-lib_shared" if is_qwen else "nexa-omni-audio-lib_shared" base_path = ( @@ -76,12 +76,12 @@ class omni_context_params(ctypes.Structure): omni_context_p = ctypes.c_void_p # OMNI_AUDIO_API omni_context_params omni_context_default_params(); -def context_default_params(is_qwen: bool = False) -> omni_context_params: +def context_default_params(is_qwen: bool = True) -> omni_context_params: _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_context_default_params() # OMNI_AUDIO_API struct omni_context *omni_init_context(omni_context_params ¶ms); -def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_context_p: # type: ignore +def init_context(params: omni_context_params_p, is_qwen: bool = True) -> omni_context_p: # type: ignore _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_init_context(params) @@ -89,12 +89,12 @@ def init_context(params: omni_context_params_p, is_qwen: bool = False) -> omni_c # struct omni_context *ctx_omni, # omni_context_params ¶ms # ); -def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bool = False): # type: ignore +def process_full(ctx: omni_context_p, params: omni_context_params_p, is_qwen: bool = True): # type: ignore _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_process_full(ctx, params) # OMNI_AUDIO_API void omni_free(struct omni_context *ctx_omni); -def free(ctx: omni_context_p, is_qwen: bool = False): +def free(ctx: omni_context_p, is_qwen: bool = True): _lib = _lib_qwen if is_qwen else _lib_omni return _lib.omni_free(ctx) From 155260a7ce0699deea94a82e678913e072bdf401 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 23:31:57 +0000 Subject: [PATCH 024/160] enable nexa cli --- nexa/cli/entry.py | 3 +++ nexa/constants.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index dff1526a..9bf70147 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -107,6 +107,9 @@ def run_ggml_inference(args): elif run_type == "TTS": from nexa.gguf.nexa_inference_tts import NexaTTSInference inference = NexaTTSInference(model_path=model_path, local_path=local_path, **kwargs) + elif run_type == "AudioLM": + from nexa.gguf.nexa_inference_audio_lm import NexaAudioLMInference + inference = NexaAudioLMInference(model_path=model_path, local_path=local_path, **kwargs) else: print(f"Unknown task: {run_type}. Skipping inference.") return diff --git a/nexa/constants.py b/nexa/constants.py index 15e8c874..c6c261fb 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -38,6 +38,7 @@ class ModelType(Enum): TTS = "TTS" MULTIMODAL = "Multimodal" TEXT_EMBEDDING = "Text Embedding" + AUDIOLM = "AudioLM" NEXA_RUN_MODEL_MAP_TEXT = { @@ -409,8 +410,8 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, "llava-phi-3-mini": ModelType.MULTIMODAL, "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL, - "octoaudio": ModelType.MULTIMODAL, - "qwen2audio": ModelType.MULTIMODAL, + "octoaudio": ModelType.AUDIOLM, + "qwen2audio": ModelType.AUDIOLM, "faster-whisper-tiny.en": ModelType.AUDIO, "faster-whisper-tiny": ModelType.AUDIO, "faster-whisper-small.en": ModelType.AUDIO, From c98758e7f73a3c250bb8111396602a86f249585a Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 23:35:39 +0000 Subject: [PATCH 025/160] use omniaudio --- nexa/constants.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nexa/constants.py b/nexa/constants.py index c6c261fb..cd354706 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -113,9 +113,9 @@ class ModelType(Enum): "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-fp16", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", - "octoaudio": "Octo-omni-audio:model-fp16", - "octoaudio:fp16": "Octo-omni-audio:model-fp16", - "octoaudio:q4_0": "Octo-omni-audio:model-q4_0", + "omniaudio": "Octo-omni-audio:model-fp16", + "omniaudio:fp16": "Octo-omni-audio:model-fp16", + "omniaudio:q4_0": "Octo-omni-audio:model-q4_0", } NEXA_RUN_MODEL_MAP_VOICE = { @@ -174,9 +174,9 @@ class ModelType(Enum): "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", - "octoaudio": "Octo-omni-audio:projector-fp16", - "octoaudio:fp16": "Octo-omni-audio:projector-fp16", - "octoaudio:q4_0": "Octo-omni-audio:projector-q4_0", + "omniaudio": "Octo-omni-audio:projector-fp16", + "omniaudio:fp16": "Octo-omni-audio:projector-fp16", + "omniaudio:q4_0": "Octo-omni-audio:projector-q4_0", } NEXA_RUN_T5XXL_MAP = { @@ -410,7 +410,7 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, "llava-phi-3-mini": ModelType.MULTIMODAL, "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL, - "octoaudio": ModelType.AUDIOLM, + "omniaudio": ModelType.AUDIOLM, "qwen2audio": ModelType.AUDIOLM, "faster-whisper-tiny.en": ModelType.AUDIO, "faster-whisper-tiny": ModelType.AUDIO, From 12e4901d2edac2df89ffa5d78d5de40f33c75db4 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sun, 3 Nov 2024 23:47:11 +0000 Subject: [PATCH 026/160] moved llama_cpp imports from outer constants to inner converter constants --- nexa/constants.py | 150 ---------------------------- nexa/gguf/converter/constants.py | 149 +++++++++++++++++++++++++++ nexa/gguf/converter/nexa_convert.py | 2 +- 3 files changed, 150 insertions(+), 151 deletions(-) create mode 100644 nexa/gguf/converter/constants.py diff --git a/nexa/constants.py b/nexa/constants.py index c78c4a43..a363551b 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -403,154 +403,4 @@ class ModelType(Enum): "nomic-embed-text-v1.5": ModelType.TEXT_EMBEDDING, "all-MiniLM-L6-v2": ModelType.TEXT_EMBEDDING, "all-MiniLM-L12-v2": ModelType.TEXT_EMBEDDING, -} - -from nexa.gguf.llama.llama_cpp import ( - LLAMA_FTYPE_ALL_F32, - LLAMA_FTYPE_MOSTLY_F16, - LLAMA_FTYPE_MOSTLY_Q4_0, - LLAMA_FTYPE_MOSTLY_Q4_1, - LLAMA_FTYPE_MOSTLY_Q8_0, - LLAMA_FTYPE_MOSTLY_Q5_0, - LLAMA_FTYPE_MOSTLY_Q5_1, - LLAMA_FTYPE_MOSTLY_Q2_K, - LLAMA_FTYPE_MOSTLY_Q3_K_S, - LLAMA_FTYPE_MOSTLY_Q3_K_M, - LLAMA_FTYPE_MOSTLY_Q3_K_L, - LLAMA_FTYPE_MOSTLY_Q4_K_S, - LLAMA_FTYPE_MOSTLY_Q4_K_M, - LLAMA_FTYPE_MOSTLY_Q5_K_S, - LLAMA_FTYPE_MOSTLY_Q5_K_M, - LLAMA_FTYPE_MOSTLY_Q6_K, - LLAMA_FTYPE_MOSTLY_IQ2_XXS, - LLAMA_FTYPE_MOSTLY_IQ2_XS, - LLAMA_FTYPE_MOSTLY_Q2_K_S, - LLAMA_FTYPE_MOSTLY_IQ3_XS, - LLAMA_FTYPE_MOSTLY_IQ3_XXS, - LLAMA_FTYPE_MOSTLY_IQ1_S, - LLAMA_FTYPE_MOSTLY_IQ4_NL, - LLAMA_FTYPE_MOSTLY_IQ3_S, - LLAMA_FTYPE_MOSTLY_IQ3_M, - LLAMA_FTYPE_MOSTLY_IQ2_S, - LLAMA_FTYPE_MOSTLY_IQ2_M, - LLAMA_FTYPE_MOSTLY_IQ4_XS, - LLAMA_FTYPE_MOSTLY_IQ1_M, - LLAMA_FTYPE_MOSTLY_BF16, - LLAMA_FTYPE_MOSTLY_Q4_0_4_4, - LLAMA_FTYPE_MOSTLY_Q4_0_4_8, - LLAMA_FTYPE_MOSTLY_Q4_0_8_8, - LLAMA_FTYPE_MOSTLY_TQ1_0, - LLAMA_FTYPE_MOSTLY_TQ2_0, -) -from nexa.gguf.llama.llama_cpp import ( - GGML_TYPE_F32, - GGML_TYPE_F16, - GGML_TYPE_Q4_0, - GGML_TYPE_Q4_1, - GGML_TYPE_Q5_0, - GGML_TYPE_Q5_1, - GGML_TYPE_Q8_0, - GGML_TYPE_Q8_1, - GGML_TYPE_Q2_K, - GGML_TYPE_Q3_K, - GGML_TYPE_Q4_K, - GGML_TYPE_Q5_K, - GGML_TYPE_Q6_K, - GGML_TYPE_Q8_K, - GGML_TYPE_IQ2_XXS, - GGML_TYPE_IQ2_XS, - GGML_TYPE_IQ3_XXS, - GGML_TYPE_IQ1_S, - GGML_TYPE_IQ4_NL, - GGML_TYPE_IQ3_S, - GGML_TYPE_IQ2_S, - GGML_TYPE_IQ4_XS, - GGML_TYPE_I8, - GGML_TYPE_I16, - GGML_TYPE_I32, - GGML_TYPE_I64, - GGML_TYPE_F64, - GGML_TYPE_IQ1_M, - GGML_TYPE_BF16, - GGML_TYPE_Q4_0_4_4, - GGML_TYPE_Q4_0_4_8, - GGML_TYPE_Q4_0_8_8, - GGML_TYPE_COUNT, -) - -# From quantize.cpp -# For mapping of general quantization options (ftypes) -LLAMA_QUANTIZATION_TYPES = { - "q4_0": LLAMA_FTYPE_MOSTLY_Q4_0, - "q4_1": LLAMA_FTYPE_MOSTLY_Q4_1, - "q5_0": LLAMA_FTYPE_MOSTLY_Q5_0, - "q5_1": LLAMA_FTYPE_MOSTLY_Q5_1, - "q8_0": LLAMA_FTYPE_MOSTLY_Q8_0, - "q2_k": LLAMA_FTYPE_MOSTLY_Q2_K, - "q3_k_s": LLAMA_FTYPE_MOSTLY_Q3_K_S, - "q3_k_m": LLAMA_FTYPE_MOSTLY_Q3_K_M, - "q3_k_l": LLAMA_FTYPE_MOSTLY_Q3_K_L, - "q4_k_s": LLAMA_FTYPE_MOSTLY_Q4_K_S, - "q4_k_m": LLAMA_FTYPE_MOSTLY_Q4_K_M, - "q5_k_s": LLAMA_FTYPE_MOSTLY_Q5_K_S, - "q5_k_m": LLAMA_FTYPE_MOSTLY_Q5_K_M, - "q6_k": LLAMA_FTYPE_MOSTLY_Q6_K, - "iq2_xxs": LLAMA_FTYPE_MOSTLY_IQ2_XXS, - "iq2_xs": LLAMA_FTYPE_MOSTLY_IQ2_XS, - "q2_k_s": LLAMA_FTYPE_MOSTLY_Q2_K_S, - "iq3_xs": LLAMA_FTYPE_MOSTLY_IQ3_XS, - "iq3_xxs": LLAMA_FTYPE_MOSTLY_IQ3_XXS, - "iq1_s": LLAMA_FTYPE_MOSTLY_IQ1_S, - "iq4_nl": LLAMA_FTYPE_MOSTLY_IQ4_NL, - "iq3_s": LLAMA_FTYPE_MOSTLY_IQ3_S, - "iq3_m": LLAMA_FTYPE_MOSTLY_IQ3_M, - "iq2_s": LLAMA_FTYPE_MOSTLY_IQ2_S, - "iq2_m": LLAMA_FTYPE_MOSTLY_IQ2_M, - "iq4_xs": LLAMA_FTYPE_MOSTLY_IQ4_XS, - "iq1_m": LLAMA_FTYPE_MOSTLY_IQ1_M, - "f16": LLAMA_FTYPE_MOSTLY_F16, - "f32": LLAMA_FTYPE_ALL_F32, - "bf16": LLAMA_FTYPE_MOSTLY_BF16, - "q4_0_4_4": LLAMA_FTYPE_MOSTLY_Q4_0_4_4, - "q4_0_4_8": LLAMA_FTYPE_MOSTLY_Q4_0_4_8, - "q4_0_8_8": LLAMA_FTYPE_MOSTLY_Q4_0_8_8, - "tq1_0": LLAMA_FTYPE_MOSTLY_TQ1_0, - "tq2_0": LLAMA_FTYPE_MOSTLY_TQ2_0, -} - -# From ggml.h -# For mapping of output_tensor_type and token_embedding_type only -GGML_TYPES = { - "f32": GGML_TYPE_F32, - "f16": GGML_TYPE_F16, - "q4_0": GGML_TYPE_Q4_0, - "q4_1": GGML_TYPE_Q4_1, - "q5_0": GGML_TYPE_Q5_0, - "q5_1": GGML_TYPE_Q5_1, - "q8_0": GGML_TYPE_Q8_0, - "q8_1": GGML_TYPE_Q8_1, - "q2_k": GGML_TYPE_Q2_K, - "q3_k": GGML_TYPE_Q3_K, - "q4_k": GGML_TYPE_Q4_K, - "q5_k": GGML_TYPE_Q5_K, - "q6_k": GGML_TYPE_Q6_K, - "q8_k": GGML_TYPE_Q8_K, - "iq2_xxs": GGML_TYPE_IQ2_XXS, - "iq2_xs": GGML_TYPE_IQ2_XS, - "iq3_xxs": GGML_TYPE_IQ3_XXS, - "iq1_s": GGML_TYPE_IQ1_S, - "iq4_nl": GGML_TYPE_IQ4_NL, - "iq3_s": GGML_TYPE_IQ3_S, - "iq2_s": GGML_TYPE_IQ2_S, - "iq4_xs": GGML_TYPE_IQ4_XS, - "i8": GGML_TYPE_I8, - "i16": GGML_TYPE_I16, - "i32": GGML_TYPE_I32, - "i64": GGML_TYPE_I64, - "f64": GGML_TYPE_F64, - "iq1_m": GGML_TYPE_IQ1_M, - "bf16": GGML_TYPE_BF16, - "q4_0_4_4": GGML_TYPE_Q4_0_4_4, - "q4_0_4_8": GGML_TYPE_Q4_0_4_8, - "q4_0_8_8": GGML_TYPE_Q4_0_8_8, } \ No newline at end of file diff --git a/nexa/gguf/converter/constants.py b/nexa/gguf/converter/constants.py new file mode 100644 index 00000000..d2869ab1 --- /dev/null +++ b/nexa/gguf/converter/constants.py @@ -0,0 +1,149 @@ +from nexa.gguf.llama.llama_cpp import ( + LLAMA_FTYPE_ALL_F32, + LLAMA_FTYPE_MOSTLY_F16, + LLAMA_FTYPE_MOSTLY_Q4_0, + LLAMA_FTYPE_MOSTLY_Q4_1, + LLAMA_FTYPE_MOSTLY_Q8_0, + LLAMA_FTYPE_MOSTLY_Q5_0, + LLAMA_FTYPE_MOSTLY_Q5_1, + LLAMA_FTYPE_MOSTLY_Q2_K, + LLAMA_FTYPE_MOSTLY_Q3_K_S, + LLAMA_FTYPE_MOSTLY_Q3_K_M, + LLAMA_FTYPE_MOSTLY_Q3_K_L, + LLAMA_FTYPE_MOSTLY_Q4_K_S, + LLAMA_FTYPE_MOSTLY_Q4_K_M, + LLAMA_FTYPE_MOSTLY_Q5_K_S, + LLAMA_FTYPE_MOSTLY_Q5_K_M, + LLAMA_FTYPE_MOSTLY_Q6_K, + LLAMA_FTYPE_MOSTLY_IQ2_XXS, + LLAMA_FTYPE_MOSTLY_IQ2_XS, + LLAMA_FTYPE_MOSTLY_Q2_K_S, + LLAMA_FTYPE_MOSTLY_IQ3_XS, + LLAMA_FTYPE_MOSTLY_IQ3_XXS, + LLAMA_FTYPE_MOSTLY_IQ1_S, + LLAMA_FTYPE_MOSTLY_IQ4_NL, + LLAMA_FTYPE_MOSTLY_IQ3_S, + LLAMA_FTYPE_MOSTLY_IQ3_M, + LLAMA_FTYPE_MOSTLY_IQ2_S, + LLAMA_FTYPE_MOSTLY_IQ2_M, + LLAMA_FTYPE_MOSTLY_IQ4_XS, + LLAMA_FTYPE_MOSTLY_IQ1_M, + LLAMA_FTYPE_MOSTLY_BF16, + LLAMA_FTYPE_MOSTLY_Q4_0_4_4, + LLAMA_FTYPE_MOSTLY_Q4_0_4_8, + LLAMA_FTYPE_MOSTLY_Q4_0_8_8, + LLAMA_FTYPE_MOSTLY_TQ1_0, + LLAMA_FTYPE_MOSTLY_TQ2_0, +) +from nexa.gguf.llama.llama_cpp import ( + GGML_TYPE_F32, + GGML_TYPE_F16, + GGML_TYPE_Q4_0, + GGML_TYPE_Q4_1, + GGML_TYPE_Q5_0, + GGML_TYPE_Q5_1, + GGML_TYPE_Q8_0, + GGML_TYPE_Q8_1, + GGML_TYPE_Q2_K, + GGML_TYPE_Q3_K, + GGML_TYPE_Q4_K, + GGML_TYPE_Q5_K, + GGML_TYPE_Q6_K, + GGML_TYPE_Q8_K, + GGML_TYPE_IQ2_XXS, + GGML_TYPE_IQ2_XS, + GGML_TYPE_IQ3_XXS, + GGML_TYPE_IQ1_S, + GGML_TYPE_IQ4_NL, + GGML_TYPE_IQ3_S, + GGML_TYPE_IQ2_S, + GGML_TYPE_IQ4_XS, + GGML_TYPE_I8, + GGML_TYPE_I16, + GGML_TYPE_I32, + GGML_TYPE_I64, + GGML_TYPE_F64, + GGML_TYPE_IQ1_M, + GGML_TYPE_BF16, + GGML_TYPE_Q4_0_4_4, + GGML_TYPE_Q4_0_4_8, + GGML_TYPE_Q4_0_8_8, + GGML_TYPE_COUNT, +) + +# From quantize.cpp +# For mapping of general quantization options (ftypes) +LLAMA_QUANTIZATION_TYPES = { + "q4_0": LLAMA_FTYPE_MOSTLY_Q4_0, + "q4_1": LLAMA_FTYPE_MOSTLY_Q4_1, + "q5_0": LLAMA_FTYPE_MOSTLY_Q5_0, + "q5_1": LLAMA_FTYPE_MOSTLY_Q5_1, + "q8_0": LLAMA_FTYPE_MOSTLY_Q8_0, + "q2_k": LLAMA_FTYPE_MOSTLY_Q2_K, + "q3_k_s": LLAMA_FTYPE_MOSTLY_Q3_K_S, + "q3_k_m": LLAMA_FTYPE_MOSTLY_Q3_K_M, + "q3_k_l": LLAMA_FTYPE_MOSTLY_Q3_K_L, + "q4_k_s": LLAMA_FTYPE_MOSTLY_Q4_K_S, + "q4_k_m": LLAMA_FTYPE_MOSTLY_Q4_K_M, + "q5_k_s": LLAMA_FTYPE_MOSTLY_Q5_K_S, + "q5_k_m": LLAMA_FTYPE_MOSTLY_Q5_K_M, + "q6_k": LLAMA_FTYPE_MOSTLY_Q6_K, + "iq2_xxs": LLAMA_FTYPE_MOSTLY_IQ2_XXS, + "iq2_xs": LLAMA_FTYPE_MOSTLY_IQ2_XS, + "q2_k_s": LLAMA_FTYPE_MOSTLY_Q2_K_S, + "iq3_xs": LLAMA_FTYPE_MOSTLY_IQ3_XS, + "iq3_xxs": LLAMA_FTYPE_MOSTLY_IQ3_XXS, + "iq1_s": LLAMA_FTYPE_MOSTLY_IQ1_S, + "iq4_nl": LLAMA_FTYPE_MOSTLY_IQ4_NL, + "iq3_s": LLAMA_FTYPE_MOSTLY_IQ3_S, + "iq3_m": LLAMA_FTYPE_MOSTLY_IQ3_M, + "iq2_s": LLAMA_FTYPE_MOSTLY_IQ2_S, + "iq2_m": LLAMA_FTYPE_MOSTLY_IQ2_M, + "iq4_xs": LLAMA_FTYPE_MOSTLY_IQ4_XS, + "iq1_m": LLAMA_FTYPE_MOSTLY_IQ1_M, + "f16": LLAMA_FTYPE_MOSTLY_F16, + "f32": LLAMA_FTYPE_ALL_F32, + "bf16": LLAMA_FTYPE_MOSTLY_BF16, + "q4_0_4_4": LLAMA_FTYPE_MOSTLY_Q4_0_4_4, + "q4_0_4_8": LLAMA_FTYPE_MOSTLY_Q4_0_4_8, + "q4_0_8_8": LLAMA_FTYPE_MOSTLY_Q4_0_8_8, + "tq1_0": LLAMA_FTYPE_MOSTLY_TQ1_0, + "tq2_0": LLAMA_FTYPE_MOSTLY_TQ2_0, +} + +# From ggml.h +# For mapping of output_tensor_type and token_embedding_type only +GGML_TYPES = { + "f32": GGML_TYPE_F32, + "f16": GGML_TYPE_F16, + "q4_0": GGML_TYPE_Q4_0, + "q4_1": GGML_TYPE_Q4_1, + "q5_0": GGML_TYPE_Q5_0, + "q5_1": GGML_TYPE_Q5_1, + "q8_0": GGML_TYPE_Q8_0, + "q8_1": GGML_TYPE_Q8_1, + "q2_k": GGML_TYPE_Q2_K, + "q3_k": GGML_TYPE_Q3_K, + "q4_k": GGML_TYPE_Q4_K, + "q5_k": GGML_TYPE_Q5_K, + "q6_k": GGML_TYPE_Q6_K, + "q8_k": GGML_TYPE_Q8_K, + "iq2_xxs": GGML_TYPE_IQ2_XXS, + "iq2_xs": GGML_TYPE_IQ2_XS, + "iq3_xxs": GGML_TYPE_IQ3_XXS, + "iq1_s": GGML_TYPE_IQ1_S, + "iq4_nl": GGML_TYPE_IQ4_NL, + "iq3_s": GGML_TYPE_IQ3_S, + "iq2_s": GGML_TYPE_IQ2_S, + "iq4_xs": GGML_TYPE_IQ4_XS, + "i8": GGML_TYPE_I8, + "i16": GGML_TYPE_I16, + "i32": GGML_TYPE_I32, + "i64": GGML_TYPE_I64, + "f64": GGML_TYPE_F64, + "iq1_m": GGML_TYPE_IQ1_M, + "bf16": GGML_TYPE_BF16, + "q4_0_4_4": GGML_TYPE_Q4_0_4_4, + "q4_0_4_8": GGML_TYPE_Q4_0_4_8, + "q4_0_8_8": GGML_TYPE_Q4_0_8_8, +} \ No newline at end of file diff --git a/nexa/gguf/converter/nexa_convert.py b/nexa/gguf/converter/nexa_convert.py index c38970cd..5d5d7735 100644 --- a/nexa/gguf/converter/nexa_convert.py +++ b/nexa/gguf/converter/nexa_convert.py @@ -6,7 +6,7 @@ import json from nexa.gguf.llama.llama_cpp import GGML_TYPE_COUNT, LLAMA_FTYPE_MOSTLY_Q4_0 -from nexa.constants import LLAMA_QUANTIZATION_TYPES, GGML_TYPES +from nexa.gguf.converter.constants import LLAMA_QUANTIZATION_TYPES, GGML_TYPES from nexa.gguf.llama.llama_cpp import llama_model_quantize_params, llama_model_quantize # Set up logging From 40f7e3cc564bf87a0a2f61b471fc131dd19768af Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sun, 3 Nov 2024 23:52:54 +0000 Subject: [PATCH 027/160] enable nexa cli download works --- docs/README.md | 1 - nexa/constants.py | 5 +++-- nexa/gguf/lib_utils.py | 1 - pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/README.md b/docs/README.md index 5cfc2488..096856df 100644 --- a/docs/README.md +++ b/docs/README.md @@ -200,7 +200,6 @@ Test individual modules with downloaded GGUF files: ```shell python tests/verify_text_generation.py python tests/verify_vlm.py -python tests/verify_audio_lm.py python tests/verify_image_generation.py python tests/verify_tts_generation.py ``` diff --git a/nexa/constants.py b/nexa/constants.py index cd354706..c91d4286 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -339,6 +339,7 @@ class ModelType(Enum): "compute_type": "default", } +# key is the repo name on Nexa model hub NEXA_OFFICIAL_MODELS_TYPE = { "gemma-2b": ModelType.NLP, "Llama-2-7b-chat": ModelType.NLP, @@ -410,8 +411,8 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, "llava-phi-3-mini": ModelType.MULTIMODAL, "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL, - "omniaudio": ModelType.AUDIOLM, - "qwen2audio": ModelType.AUDIOLM, + "Octo-omni-audio": ModelType.AUDIOLM, + "Qwen2-Audio-7.8B-Instruct": ModelType.AUDIOLM, "faster-whisper-tiny.en": ModelType.AUDIO, "faster-whisper-tiny": ModelType.AUDIO, "faster-whisper-small.en": ModelType.AUDIO, diff --git a/nexa/gguf/lib_utils.py b/nexa/gguf/lib_utils.py index 8bb4c2a1..ec030b9d 100644 --- a/nexa/gguf/lib_utils.py +++ b/nexa/gguf/lib_utils.py @@ -64,7 +64,6 @@ def load_library(lib_base_name: str): # Try to load the shared library, handling potential errors for _lib_path in _lib_paths: - print("Trying to load", _lib_path) if _lib_path.exists(): try: return ctypes.CDLL(str(_lib_path), **cdll_args) # type: ignore diff --git a/pyproject.toml b/pyproject.toml index 53aaabcf..e80e093e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,4 +136,4 @@ provider = "scikit_build_core.metadata.regex" input = "nexa/__init__.py" [tool.pytest.ini_options] -testpaths = ["tests"] \ No newline at end of file +testpaths = ["tests"] From 46f177abe2e11fdc8b73769a50c89fc2180c44be Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 03:27:30 +0000 Subject: [PATCH 028/160] fix crash with the llama.cpp project trying to include cxxabi.h, which is a GCC-specific header not available on Windows/MSVC --- CMakeLists.txt | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9568b2b4..ec53e763 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,8 +3,20 @@ project(nexa_gguf) include(ExternalProject) -# makes OpenMP optional on macOS -if(APPLE) +# Platform-specific settings +if(WIN32) + # Windows-specific settings + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + # OpenMP is optional on Windows + find_package(OpenMP QUIET) + if(NOT OpenMP_FOUND) + message(STATUS "OpenMP not found - OpenMP support will be disabled") + set(OpenMP_C_FLAGS "") + set(OpenMP_CXX_FLAGS "") + set(OpenMP_EXE_LINKER_FLAGS "") + endif() +elseif(APPLE) + # macOS-specific settings find_package(OpenMP QUIET) if(NOT OpenMP_FOUND) message(STATUS "OpenMP not found - OpenMP support will be disabled") @@ -13,12 +25,21 @@ if(APPLE) set(OpenMP_EXE_LINKER_FLAGS "") endif() else() + # Linux and other Unix systems find_package(OpenMP REQUIRED) -endif()# Find OpenMP package which is needed for llama.cpp +endif() set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_CXX_STANDARD 17) +# Windows-specific configurations +if(WIN32) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + add_definitions(-DNOMINMAX) + add_definitions(-D_WIN32_WINNT=0x0A00) # Target Windows 10 or later + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() + # Function to collect all user-defined options function(get_all_options output_var) get_cmake_property(variables VARIABLES) @@ -126,6 +147,14 @@ if(LLAMA_BUILD) set(LLAMA_CUDA ${GGML_CUDA}) set(LLAMA_METAL ${GGML_METAL}) + if(WIN32) + # Add Windows-specific definitions and flags for llama.cpp + list(APPEND USER_DEFINED_OPTIONS + "-DLLAMA_DISABLE_CXXABI=ON" # Disable cxxabi.h dependency + "-DLLAMA_NATIVE=OFF" # Disable native CPU optimizations on Windows + ) + endif() + ExternalProject_Add(llama_project SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/dependency/llama.cpp BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/llama_build From 3c8a7d541297ad749e86b770cca2d5839f3bf145 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 03:36:38 +0000 Subject: [PATCH 029/160] llama build crash fix on windows --- CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ec53e763..3336bc51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,11 +148,11 @@ if(LLAMA_BUILD) set(LLAMA_METAL ${GGML_METAL}) if(WIN32) - # Add Windows-specific definitions and flags for llama.cpp - list(APPEND USER_DEFINED_OPTIONS - "-DLLAMA_DISABLE_CXXABI=ON" # Disable cxxabi.h dependency - "-DLLAMA_NATIVE=OFF" # Disable native CPU optimizations on Windows - ) + # Add Windows-specific definitions and flags for llama.cpp + list(APPEND COMMON_CMAKE_OPTIONS + -DLLAMA_DISABLE_CXXABI=ON # Disable cxxabi.h dependency + -DLLAMA_NATIVE=OFF # Disable native CPU optimizations on Windows + ) endif() ExternalProject_Add(llama_project From 3d484450079714d057e1b171c968609423f6e419 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 03:48:40 +0000 Subject: [PATCH 030/160] Disable cxxabi.h dependency on Windows --- CMakeLists.txt | 6 +++++- dependency/llama.cpp | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3336bc51..b79f5577 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,9 +150,13 @@ if(LLAMA_BUILD) if(WIN32) # Add Windows-specific definitions and flags for llama.cpp list(APPEND COMMON_CMAKE_OPTIONS - -DLLAMA_DISABLE_CXXABI=ON # Disable cxxabi.h dependency + -DCMAKE_WINDOWS_EXPORT_ALL_SYMBOLS=ON -DLLAMA_NATIVE=OFF # Disable native CPU optimizations on Windows + -DLLAMA_DISABLE_CXXABI=ON # Disable cxxabi.h dependency ) + + # Add compile definition for all targets + add_compile_definitions(LLAMA_DISABLE_CXXABI) endif() ExternalProject_Add(llama_project diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 4bdc70aa..995baefe 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 4bdc70aaac8884df987f4b079b3d063f2f31e076 +Subproject commit 995baefeed7407cdbaf4e8aef4debfeb2621a12b From cd6f683e26d898e24bad52cb15fd8ffa45aabf3e Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 04:03:38 +0000 Subject: [PATCH 031/160] update ci --- .github/workflows/ci.yaml | 41 ++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8ce64f0f..30d703f3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,6 +13,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: + fail-fast: false # This allows all matrix jobs to complete even if one fails matrix: os: [macos-latest, windows-latest] python-version: ["3.10"] @@ -29,27 +30,49 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Cache pip packages + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build pytest shell: bash - - name: Install CMake (if needed) - if: runner.os == 'macOS' || runner.os == 'Windows' + - name: Install CMake (macOS) + if: runner.os == 'macOS' + run: | + brew install cmake + shell: bash + + - name: Install CMake (Windows) + if: runner.os == 'Windows' run: | - if [[ "$RUNNER_OS" == "Windows" ]]; then - choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' || exit 1 - elif [[ "$RUNNER_OS" == "macOS" ]]; then - brew install cmake - fi + choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' shell: bash + - name: Setup MinGW (Windows) + if: runner.os == 'Windows' + uses: msys2/setup-msys2@v2 + with: + msystem: MINGW64 + update: true + install: >- + mingw-w64-x86_64-gcc + mingw-w64-x86_64-cmake + mingw-w64-x86_64-make + - name: Build DLL (Windows) if: runner.os == 'Windows' + shell: msys2 {0} run: | + CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" python -m pip install -e . - shell: bash - name: Build (macOS) if: runner.os == 'macOS' @@ -61,4 +84,4 @@ jobs: - name: Run tests run: | python -m pytest tests - shell: bash + shell: bash \ No newline at end of file From 932cde3c6ca5243e3800146e066f013210997174 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 04:11:27 +0000 Subject: [PATCH 032/160] update ci --- .github/workflows/ci.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 30d703f3..06e8644f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,15 +71,17 @@ jobs: if: runner.os == 'Windows' shell: msys2 {0} run: | - CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" + export PATH="/c/Python${{ matrix.python-version }}/:/c/Python${{ matrix.python-version }}/Scripts:$PATH" python -m pip install -e . + env: + CMAKE_ARGS: "-DCMAKE_CXX_FLAGS=-fopenmp" - name: Build (macOS) if: runner.os == 'macOS' run: | - CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" python -m pip install -e . - shell: bash + env: + CMAKE_ARGS: "-DCMAKE_CXX_FLAGS=-fopenmp" - name: Run tests run: | From da1a6634595d0a7751be08ca159bdea24dadf03d Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 04:20:37 +0000 Subject: [PATCH 033/160] update ci --- .github/workflows/ci.yaml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 06e8644f..12c81242 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -71,17 +71,16 @@ jobs: if: runner.os == 'Windows' shell: msys2 {0} run: | - export PATH="/c/Python${{ matrix.python-version }}/:/c/Python${{ matrix.python-version }}/Scripts:$PATH" - python -m pip install -e . - env: - CMAKE_ARGS: "-DCMAKE_CXX_FLAGS=-fopenmp" + export PATH="/c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64:/c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64/Scripts:$PATH" + CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" + /c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64/python.exe -m pip install -e . - name: Build (macOS) if: runner.os == 'macOS' run: | + CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" python -m pip install -e . - env: - CMAKE_ARGS: "-DCMAKE_CXX_FLAGS=-fopenmp" + shell: bash - name: Run tests run: | From 28b9cab57a28a4bb6eecc4f9822a6020aece3c2c Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 04:41:24 +0000 Subject: [PATCH 034/160] fix error on windows qwen2-audio/whisper.cpp:9935:38: err or: '_O_BINARY' was not declared in this scope --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 995baefe..a4747b2e 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 995baefeed7407cdbaf4e8aef4debfeb2621a12b +Subproject commit a4747b2edb90b9fbf8cb7c3108ba973fc79d7152 From a734d253e95f0732633f6356e4642e4ee96d682d Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 04:55:23 +0000 Subject: [PATCH 035/160] Adding #include & --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index a4747b2e..6f1ed6e5 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit a4747b2edb90b9fbf8cb7c3108ba973fc79d7152 +Subproject commit 6f1ed6e5cb1e8003b1b7146bc5aaf1e525bf9096 From dc445f8aad233178353dcb35e29bad0709d5e73f Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 05:15:18 +0000 Subject: [PATCH 036/160] update ci --- .github/workflows/ci.yaml | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 12c81242..c63c3e1f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -13,7 +13,6 @@ jobs: runs-on: ${{ matrix.os }} strategy: - fail-fast: false # This allows all matrix jobs to complete even if one fails matrix: os: [macos-latest, windows-latest] python-version: ["3.10"] @@ -44,36 +43,21 @@ jobs: python -m pip install build pytest shell: bash - - name: Install CMake (macOS) - if: runner.os == 'macOS' - run: | - brew install cmake - shell: bash - - - name: Install CMake (Windows) - if: runner.os == 'Windows' + - name: Install CMake (if needed) + if: runner.os == 'macOS' || runner.os == 'Windows' run: | - choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' + if [[ "$RUNNER_OS" == "Windows" ]]; then + choco install cmake --installargs 'ADD_CMAKE_TO_PATH=System' || exit 1 + elif [[ "$RUNNER_OS" == "macOS" ]]; then + brew install cmake + fi shell: bash - - name: Setup MinGW (Windows) - if: runner.os == 'Windows' - uses: msys2/setup-msys2@v2 - with: - msystem: MINGW64 - update: true - install: >- - mingw-w64-x86_64-gcc - mingw-w64-x86_64-cmake - mingw-w64-x86_64-make - - name: Build DLL (Windows) if: runner.os == 'Windows' - shell: msys2 {0} run: | - export PATH="/c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64:/c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64/Scripts:$PATH" - CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-fopenmp" - /c/hostedtoolcache/windows/Python/${{ matrix.python-version }}.11/x64/python.exe -m pip install -e . + python -m pip install -e . + shell: bash - name: Build (macOS) if: runner.os == 'macOS' From 82ad5b039358792d2a878e73f29d66d0dcf7f5de Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 05:46:04 +0000 Subject: [PATCH 037/160] disable for MSC_VER --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 6f1ed6e5..14196810 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 6f1ed6e5cb1e8003b1b7146bc5aaf1e525bf9096 +Subproject commit 141968108994905dc481863b75e0837cb693f5e3 From b4e93b4a7c354a9bc3ccc116ade468f3998d9430 Mon Sep 17 00:00:00 2001 From: Paul Zhu Date: Sun, 3 Nov 2024 21:49:28 -0800 Subject: [PATCH 038/160] Pass n_gpu_layers and turn off Metal for bark.cpp --- CMakeLists.txt | 2 +- nexa/gguf/bark/bark_cpp.py | 1 + nexa/gguf/nexa_inference_tts.py | 29 +++++++++++++++++++++++++++-- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4670bff2..539522ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,7 +149,7 @@ if(BARK_BUILD) -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_CXX_STANDARD=17 -DGGML_CUDA=${GGML_CUDA} - -DGGML_METAL=${GGML_METAL} + -DGGML_METAL=OFF -DBUILD_SHARED_LIBS=ON -DBARK_BUILD_EXAMPLES=OFF BUILD_ALWAYS 1 diff --git a/nexa/gguf/bark/bark_cpp.py b/nexa/gguf/bark/bark_cpp.py index d2e2a250..e015cf44 100644 --- a/nexa/gguf/bark/bark_cpp.py +++ b/nexa/gguf/bark/bark_cpp.py @@ -80,6 +80,7 @@ class bark_context_params(ctypes.Structure): ("verbosity", ctypes.c_int), ("temp", ctypes.c_float), ("fine_temp", ctypes.c_float), + ("n_gpu_layers", ctypes.c_int32), ("min_eos_p", ctypes.c_float), ("sliding_window_size", ctypes.c_int32), ("max_coarse_history", ctypes.c_int32), diff --git a/nexa/gguf/nexa_inference_tts.py b/nexa/gguf/nexa_inference_tts.py index 173996be..04488637 100644 --- a/nexa/gguf/nexa_inference_tts.py +++ b/nexa/gguf/nexa_inference_tts.py @@ -7,7 +7,9 @@ import platform import functools from .bark import bark_cpp + from nexa.general import pull_model +from nexa.gguf.lib_utils import is_gpu_available class NexaTTSInference: """ @@ -27,7 +29,8 @@ class NexaTTSInference: """ def __init__(self, model_path=None, local_path=None, n_threads=1, seed=0, - sampling_rate=24000, verbosity=0, win_stack_size=16*1024*1024, **kwargs): + sampling_rate=24000, verbosity=0, win_stack_size=16*1024*1024, + device="auto", n_gpu_layers=4, **kwargs): if model_path is None and local_path is None: raise ValueError("Either model_path or local_path must be provided.") @@ -38,6 +41,8 @@ def __init__(self, model_path=None, local_path=None, n_threads=1, seed=0, self.sampling_rate = sampling_rate self.verbosity = verbosity self.win_stack_size = win_stack_size + self.device = device + self.n_gpu_layers = n_gpu_layers self.params = { "output_path": os.path.join(os.getcwd(), "tts"), } @@ -93,9 +98,29 @@ def _load_model(self): params = bark_cpp.bark_context_default_params() params.sample_rate = self.sampling_rate params.verbosity = self.verbosity + + # Use configured n_gpu_layers when device is auto/gpu and GPU is available + if self.device == "auto" or self.device == "gpu": + if is_gpu_available(): + params.n_gpu_layers = self.n_gpu_layers + logging.info(f"Using GPU acceleration with {self.n_gpu_layers} layers") + else: + params.n_gpu_layers = 0 + logging.info("GPU not available, falling back to CPU") + else: + params.n_gpu_layers = 0 + logging.info("Using CPU mode") + c_model_path = ctypes.c_char_p(self.downloaded_path.encode('utf-8')) c_seed = ctypes.c_uint32(self.seed) - self.context = bark_cpp.bark_load_model(c_model_path, params, c_seed) + + try: + self.context = bark_cpp.bark_load_model(c_model_path, params, c_seed) + except Exception as e: + logging.error(f"Failed to load model with GPU. Falling back to CPU: {e}") + params.n_gpu_layers = 0 + self.context = bark_cpp.bark_load_model(c_model_path, params, c_seed) + if not self.context: raise RuntimeError("Failed to load Bark model") logging.debug("Model loaded successfully") From eeab9e940e226b56efdff2abfb8aa41855e272f0 Mon Sep 17 00:00:00 2001 From: Paul Zhu Date: Sun, 3 Nov 2024 22:16:24 -0800 Subject: [PATCH 039/160] bump up bark.cpp --- dependency/bark.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/bark.cpp b/dependency/bark.cpp index 451a7290..1c228860 160000 --- a/dependency/bark.cpp +++ b/dependency/bark.cpp @@ -1 +1 @@ -Subproject commit 451a7290c50fb41ab7113667f3c7854b4a271da2 +Subproject commit 1c22886058af2ff72b92624bc86f88cc11a3dfc6 From cd7b1bce56b0940d3135f86d2ab784e224242a39 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 22:51:20 +0000 Subject: [PATCH 040/160] fix C++20 issue on windows CI --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 14196810..d42e0371 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 141968108994905dc481863b75e0837cb693f5e3 +Subproject commit d42e0371f84b413c25511328d75f079962c6fbbb From b63881469450f4426210d8f6a2cf81d0215a1a35 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Mon, 4 Nov 2024 23:04:11 +0000 Subject: [PATCH 041/160] remove C++20 syntax --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index d42e0371..05853eb8 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit d42e0371f84b413c25511328d75f079962c6fbbb +Subproject commit 05853eb861d522cc51c450efbabdc1470118cf5b From e8be4042b3cfb8d3c478db5942b6cdbb7e686727 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Tue, 5 Nov 2024 01:08:01 +0000 Subject: [PATCH 042/160] fix onnx python interface loading bug --- nexa/onnx/nexa_inference_image.py | 16 ++++++++-------- nexa/onnx/nexa_inference_text.py | 26 +++++++++++++------------- nexa/onnx/nexa_inference_tts.py | 6 ++---- nexa/onnx/nexa_inference_voice.py | 15 ++++++++------- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/nexa/onnx/nexa_inference_image.py b/nexa/onnx/nexa_inference_image.py index 8392aa31..b38be335 100644 --- a/nexa/onnx/nexa_inference_image.py +++ b/nexa/onnx/nexa_inference_image.py @@ -64,10 +64,8 @@ def __init__(self, model_path=None, local_path=None, **kwargs): self.params.update(kwargs) self.pipeline = None - def run(self): - if self.download_onnx_folder is None: - self.download_onnx_folder, run_type = pull_model(self.model_path, **kwargs) + self.download_onnx_folder, _ = pull_model(self.model_path, **kwargs) if self.download_onnx_folder is None: logging.error( @@ -76,17 +74,19 @@ def run(self): ) exit(1) - self._load_model(self.download_onnx_folder) + self._load_model() + + def run(self): self._dialogue_mode() @SpinningCursorAnimation() - def _load_model(self, model_path): + def _load_model(self): """ Load the model from the given model path using the appropriate pipeline. """ - logging.debug(f"Loading model from {model_path}") + logging.debug(f"Loading model from {self.download_onnx_folder}") try: - model_index_path = os.path.join(model_path, "model_index.json") + model_index_path = os.path.join(self.download_onnx_folder, "model_index.json") with open(model_index_path, "r") as index_file: model_index = json.load(index_file) @@ -96,7 +96,7 @@ def _load_model(self, model_path): PipelineClass = ORT_PIPELINES_MAPPING.get( pipeline_class_name, ORTStableDiffusionPipeline ) - self.pipeline = PipelineClass.from_pretrained(model_path) + self.pipeline = PipelineClass.from_pretrained(self.download_onnx_folder) logging.debug(f"Model loaded successfully using {pipeline_class_name}") except Exception as e: logging.error(f"Error loading model: {e}") diff --git a/nexa/onnx/nexa_inference_text.py b/nexa/onnx/nexa_inference_text.py index fdb6db5f..f9a767e9 100644 --- a/nexa/onnx/nexa_inference_text.py +++ b/nexa/onnx/nexa_inference_text.py @@ -53,9 +53,21 @@ def __init__(self, model_path=None, local_path=None, **kwargs): self.downloaded_onnx_folder = local_path self.timings = kwargs.get("timings", False) self.device = "cpu" + + if self.downloaded_onnx_folder is None: + self.downloaded_onnx_folder, _ = pull_model(self.model_path, **kwargs) + + if self.downloaded_onnx_folder is None: + logging.error( + f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", + exc_info=True, + ) + exit(1) + + self._load_model_and_tokenizer() @SpinningCursorAnimation() - def _load_model_and_tokenizer(self) -> Tuple[Any, Any, Any, bool]: + def _load_model_and_tokenizer(self): logging.debug(f"Loading model from {self.downloaded_onnx_folder}") start_time = time.time() self.tokenizer = AutoTokenizer.from_pretrained(self.downloaded_onnx_folder) @@ -148,18 +160,6 @@ def run(self): if self.params.get("streamlit"): self.run_streamlit() else: - if self.downloaded_onnx_folder is None: - self.downloaded_onnx_folder, run_type = pull_model(self.model_path, **kwargs) - - if self.downloaded_onnx_folder is None: - logging.error( - f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", - exc_info=True, - ) - exit(1) - - self._load_model_and_tokenizer() - if self.model is None or self.tokenizer is None or self.streamer is None: logging.error( "Failed to load model or tokenizer. Exiting.", exc_info=True diff --git a/nexa/onnx/nexa_inference_tts.py b/nexa/onnx/nexa_inference_tts.py index fb1f2f9a..26c6d3e4 100644 --- a/nexa/onnx/nexa_inference_tts.py +++ b/nexa/onnx/nexa_inference_tts.py @@ -50,8 +50,8 @@ def __init__(self, model_path=None, local_path=None, **kwargs): self.downloaded_onnx_folder = local_path if self.downloaded_onnx_folder is None: - self.downloaded_onnx_folder, run_type = pull_model(self.model_path, **kwargs) - + self.downloaded_onnx_folder, _ = pull_model(self.model_path, **kwargs) + if self.downloaded_onnx_folder is None: logging.error( f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", @@ -69,12 +69,10 @@ def _load_model(self): logging.debug(f"Loading model from {self.downloaded_onnx_folder}") try: self.tokenizer = TTSTokenizer(self.config["token"]["list"]) - print(self.tokenizer) self.model = onnxruntime.InferenceSession( os.path.join(self.downloaded_onnx_folder, "model.onnx"), providers=["CPUExecutionProvider"], ) - print(self.model) logging.debug("Model and tokenizer loaded successfully") except Exception as e: logging.error(f"Error loading model or tokenizer: {e}") diff --git a/nexa/onnx/nexa_inference_voice.py b/nexa/onnx/nexa_inference_voice.py index e6d7d696..c0f56ab4 100644 --- a/nexa/onnx/nexa_inference_voice.py +++ b/nexa/onnx/nexa_inference_voice.py @@ -43,9 +43,8 @@ def __init__(self, model_path=None, local_path=None, **kwargs): self.model = None self.processor = None - def run(self): if self.downloaded_onnx_folder is None: - self.downloaded_onnx_folder, run_type = pull_model(self.model_path, **kwargs) + self.downloaded_onnx_folder, _ = pull_model(self.model_path, **kwargs) if self.downloaded_onnx_folder is None: logging.error( @@ -54,14 +53,16 @@ def run(self): ) exit(1) - self._load_model(self.downloaded_onnx_folder) + self._load_model() + + def run(self): self._dialogue_mode() - def _load_model(self, model_path): - logging.debug(f"Loading model from {model_path}") + def _load_model(self): + logging.debug(f"Loading model from {self.downloaded_onnx_folder}") try: - self.processor = AutoProcessor.from_pretrained(model_path) - self.model = ORTModelForSpeechSeq2Seq.from_pretrained(model_path) + self.processor = AutoProcessor.from_pretrained(self.downloaded_onnx_folder) + self.model = ORTModelForSpeechSeq2Seq.from_pretrained(self.downloaded_onnx_folder) logging.debug("Model and processor loaded successfully") except Exception as e: logging.error(f"Error loading model or processor: {e}") From 165cbdd54e5d86def8c811c0ac2b7c7cef986781 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Tue, 5 Nov 2024 01:26:46 +0000 Subject: [PATCH 043/160] add bark.cpp in Acknowledgements --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 60e21792..73f517e6 100644 --- a/README.md +++ b/README.md @@ -340,4 +340,5 @@ We would like to thank the following projects: - [llama.cpp](https://github.com/ggerganov/llama.cpp) - [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp) +- [bark.cpp](https://github.com/PABannier/bark.cpp) - [optimum](https://github.com/huggingface/optimum) From 9fe7d61b2fd6f0ce31679c1039fd6d6bc2ae7c70 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 20:55:31 +0000 Subject: [PATCH 044/160] update llama.cpp for vlm --- .gitmodules | 2 +- dependency/llama.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index c7b3e555..819ee30d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,7 +5,7 @@ [submodule "dependency/llama.cpp"] path = dependency/llama.cpp url = https://github.com/NexaAI/llama.cpp.git - branch = master-release-audio-lm + branch = master-release [submodule "nexa/eval/benchmark_tasks"] path = nexa/eval/benchmark_tasks url = https://github.com/NexaAI/benchmark-tasks.git diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 05853eb8..b535cd94 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 05853eb861d522cc51c450efbabdc1470118cf5b +Subproject commit b535cd941e657ac1984d8022dd5f0c98f2b9e265 From 3687820586b88ed9db361330eef532d2b9990626 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 20:57:07 +0000 Subject: [PATCH 045/160] update llama.cpp --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index b535cd94..38c6fa3b 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit b535cd941e657ac1984d8022dd5f0c98f2b9e265 +Subproject commit 38c6fa3b8fb6c88075102fd859d04eaea27aa87c From 27c5a92ce192ad3c945df500cc90ba4eec7533cb Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 20:57:20 +0000 Subject: [PATCH 046/160] enable omni-vlm --- docs/README.md | 3 +- nexa/cli/entry.py | 14 ++- nexa/constants.py | 13 +++ nexa/gguf/llama/omni_vlm_cpp.py | 79 ++++++++++++++ nexa/gguf/nexa_inference_vlm_omni.py | 147 +++++++++++++++++++++++++++ 5 files changed, 253 insertions(+), 3 deletions(-) create mode 100644 nexa/gguf/llama/omni_vlm_cpp.py create mode 100644 nexa/gguf/nexa_inference_vlm_omni.py diff --git a/docs/README.md b/docs/README.md index 096856df..c6481515 100644 --- a/docs/README.md +++ b/docs/README.md @@ -74,6 +74,7 @@ python -m nexa.gguf.nexa_inference_text gemma python -m nexa.gguf.nexa_inference_text octopusv2 --stop_words "" wget https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png -O test.png python -m nexa.gguf.nexa_inference_vlm nanollava +python -m nexa.gguf.nexa_inference_vlm_omni omnivision python -m nexa.gguf.nexa_inference_image sd1-4 python -m nexa.gguf.nexa_inference_image sd1-4 --img2img wget -O control_normal-fp16.safetensors https://huggingface.co/webui/ControlNet-modules-safetensors/resolve/main/control_normal-fp16.safetensors @@ -81,7 +82,7 @@ wget -O controlnet_test.png https://huggingface.co/takuma104/controlnet_dev/reso python -m nexa.gguf.nexa_inference_image sd1-5 --control_net_path control_normal-fp16.safetensors --control_image_path controlnet_test.png python -m nexa.gguf.nexa_inference_voice whisper-tiny python -m nexa.gguf.nexa_inference_audio_lm qwen2audio -python -m nexa.gguf.nexa_inference_audio_lm octoaudio +python -m nexa.gguf.nexa_inference_audio_lm omniaudio ``` ### Test with Streamlit UI diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 9bf70147..48346e5c 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -98,9 +98,19 @@ def run_ggml_inference(args): elif run_type == "Multimodal": from nexa.gguf.nexa_inference_vlm import NexaVLMInference if is_local_path: - inference = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) + if "omni" in local_path: + from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference + inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) + else: + from nexa.gguf.nexa_inference_vlm import NexaVLMInference + inference = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) else: - inference = NexaVLMInference(model_path=model_path, local_path=local_path, stop_words=stop_words, **kwargs) + if "omni" in local_path: + from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference + inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) + else: + from nexa.gguf.nexa_inference_vlm import NexaVLMInference + inference = NexaVLMInference(model_path=model_path, local_path=local_path, stop_words=stop_words, **kwargs) elif run_type == "Audio": from nexa.gguf.nexa_inference_voice import NexaVoiceInference inference = NexaVoiceInference(model_path=model_path, local_path=local_path, **kwargs) diff --git a/nexa/constants.py b/nexa/constants.py index 68e0fecb..8eb4048b 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -170,6 +170,18 @@ class ModelType(Enum): "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16", } +NEXA_RUN_OMNI_VLM_PROJECTOR_MAP = { + "omnivision": "Octo-omni-vision:projector-fp16", + "omnivision:fp16": "Octo-omni-vision:projector-fp16", + "omnivision:q4_0": "Octo-omni-vision:projector-q4_0", +} + +NEXA_RUN_OMNI_VLM_MAP = { + "omnivision": "Octo-omni-vision:model-fp16", + "omnivision:fp16": "Octo-omni-vision:model-fp16", + "omnivision:q4_0": "Octo-omni-vision:model-q4_0", +} + NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", @@ -406,6 +418,7 @@ class ModelType(Enum): "anything-v30-LCM": ModelType.COMPUTER_VISION, "FLUX.1-schnell": ModelType.COMPUTER_VISION, "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL, + "omnivision": ModelType.MULTIMODAL, "nanoLLaVA": ModelType.MULTIMODAL, "llava-v1.6-mistral-7b": ModelType.MULTIMODAL, "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, diff --git a/nexa/gguf/llama/omni_vlm_cpp.py b/nexa/gguf/llama/omni_vlm_cpp.py new file mode 100644 index 00000000..9ab251b9 --- /dev/null +++ b/nexa/gguf/llama/omni_vlm_cpp.py @@ -0,0 +1,79 @@ +import ctypes +import os +import sys +from pathlib import Path + + +# Load the library +def _load_shared_library(lib_base_name: str, base_path: Path = None): + # Determine the file extension based on the platform + if sys.platform.startswith("linux"): + lib_ext = ".so" + elif sys.platform == "darwin": + lib_ext = ".dylib" + elif sys.platform == "win32": + lib_ext = ".dll" + else: + raise RuntimeError("Unsupported platform") + # Construct the paths to the possible shared library names + if base_path is None: + _base_path = Path(__file__).parent.parent.resolve() + else: + _base_path = base_path + _lib_paths = [ + _base_path / f"lib{lib_base_name}{lib_ext}", + _base_path / f"{lib_base_name}{lib_ext}", + ] + # Add the library directory to the DLL search path on Windows (if needed) + if sys.platform == "win32" and sys.version_info >= (3, 8): + os.add_dll_directory(str(_base_path)) + # Try to load the shared library, handling potential errors + for _lib_path in _lib_paths: + if _lib_path.exists(): + try: + return ctypes.CDLL(str(_lib_path)) + except Exception as e: + print(f"Failed to load shared library '{_lib_path}': {e}") + raise FileNotFoundError( + f"Shared library with base name '{lib_base_name}' not found" + ) + +def _get_lib(): + # Specify the base name of the shared library to load + _lib_base_name = "omni_vlm_wrapper_shared" + base_path = ( + Path(__file__).parent.parent.parent.parent.resolve() + / "nexa" + / "gguf" + / "lib" + ) + return _load_shared_library(_lib_base_name, base_path) + +# Initialize both libraries +_lib = _get_lib() + +omni_char_p = ctypes.c_char_p + + +def omnivlm_init(llm_model_path: omni_char_p, mmproj_model_path: omni_char_p): + return _lib.omnivlm_init(llm_model_path, mmproj_model_path) + + +_lib.omnivlm_init.argtypes = [omni_char_p, omni_char_p] +_lib.omnivlm_init.restype = None + + +def omnivlm_inference(prompt: omni_char_p, image_path: omni_char_p): + return _lib.omnivlm_inference(prompt, image_path) + + +_lib.omnivlm_inference.argtypes = [omni_char_p, omni_char_p] +_lib.omnivlm_inference.restype = None + + +def omnivlm_free(): + return _lib.omnivlm_free() + + +_lib.omnivlm_free.argtypes = [] +_lib.omnivlm_free.restype = None \ No newline at end of file diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py new file mode 100644 index 00000000..91a31314 --- /dev/null +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -0,0 +1,147 @@ + +import ctypes +import logging +import os +from pathlib import Path + +from nexa.constants import ( + DEFAULT_TEXT_GEN_PARAMS, + NEXA_RUN_OMNI_VLM_PROJECTOR_MAP, + NEXA_RUN_OMNI_VLM_MAP +) +from nexa.gguf.lib_utils import is_gpu_available +from nexa.gguf.llama import omni_vlm_cpp +from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr +from nexa.general import pull_model + +class NexaOmniVlmInference: + """ + A class used for vision language model inference. + """ + + def __init__( + self, + model_path=None, + local_path=None, + projector_local_path=None, + device="auto", + **kwargs, + ): + if model_path is None and local_path is None: + raise ValueError("Either model_path or local_path must be provided.") + + self.params = DEFAULT_TEXT_GEN_PARAMS.copy() + self.params.update(kwargs) + self.model = None + self.projector = None + self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP.get(model_path, None) + self.downloaded_path = local_path + self.projector_downloaded_path = projector_local_path + self.device = device + self.context = None + if self.device == "auto" or self.device == "gpu": + self.n_gpu_layers = -1 if is_gpu_available() else 0 + else: + self.n_gpu_layers = 0 + + if ( + self.downloaded_path is not None + and self.projector_downloaded_path is not None + ): + # when running from local, both path should be provided + pass + elif self.downloaded_path is not None: + if model_path in NEXA_RUN_OMNI_VLM_MAP: + self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP[model_path] + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) + elif model_path in NEXA_RUN_OMNI_VLM_MAP: + self.model_path = NEXA_RUN_OMNI_VLM_MAP[model_path] + self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP[model_path] + self.downloaded_path, _ = pull_model(self.model_path, **kwargs) + self.projector_downloaded_path, _ = pull_model( + self.projector_path, **kwargs + ) + elif Path(model_path).parent.exists(): + local_dir = Path(model_path).parent + model_name = Path(model_path).name + tag_and_ext = model_name.split(":")[-1] + self.downloaded_path = local_dir / f"model-{tag_and_ext}" + self.projector_downloaded_path = local_dir / f"projector-{tag_and_ext}" + if not ( + self.downloaded_path.exists() + and self.projector_downloaded_path.exists() + ): + logging.error( + f"Model or projector not found in {local_dir}. " + "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." + ) + exit(1) + else: + logging.error("VLM user model from hub is not supported yet.") + exit(1) + with suppress_stdout_stderr(): + self._load_model() + + def _load_model(self): + try: + self.ctx_params_model = ctypes.c_char_p( + self.downloaded_path.encode("utf-8") + ) + self.ctx_params_mmproj = ctypes.c_char_p( + self.projector_downloaded_path.encode("utf-8") + ) + omni_vlm_cpp.omnivlm_init(self.ctx_params_model, self.ctx_params_mmproj) + except Exception as e: + logging.error(f"Error loading model: {e}") + raise + + def run(self): + while True: + try: + image_path = input("Image Path: ") + if not os.path.exists(image_path): + print(f"Image path: {image_path} not found, running omni VLM without image input.") + user_input = input("Enter text: ") + image_path = ctypes.c_char_p(image_path.encode("utf-8")) + user_input = ctypes.c_char_p(user_input.encode("utf-8")) + omni_vlm_cpp.omnivlm_inference(user_input, image_path) + + except KeyboardInterrupt: + print("\nExiting...") + break + + except Exception as e: + logging.error(f"\nError during audio generation: {e}", exc_info=True) + + def __del__(self): + omni_vlm_cpp.omnivlm_free() + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Run audio-in text-out generation with nexa-omni-audio model" + ) + parser.add_argument( + "model_path", + type=str, + help="Path or identifier for the model in Nexa Model Hub", + ) + parser.add_argument( + "-d", + "--device", + type=str, + choices=["auto", "cpu", "gpu"], + default="auto", + help="Device to use for inference (auto, cpu, or gpu)", + ) + args = parser.parse_args() + kwargs = {k: v for k, v in vars(args).items() if v is not None} + model_path = kwargs.pop("model_path") + device = kwargs.pop("device", "auto") + + inference = NexaOmniVlmInference(model_path, device=device, **kwargs) + inference.run() \ No newline at end of file From 7b241714479ba81dcf6abe929a14aa67030399e6 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 21:15:59 +0000 Subject: [PATCH 047/160] name nexa cli work E2E --- README.md | 3 ++- nexa/cli/entry.py | 9 +++++---- nexa/constants.py | 7 +++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 73f517e6..e27f2e26 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** models. ## Latest News 🔥 - +- [2024/11] Support Nexa AI's own vision language model (0.9B parameters): `nexa run omnivision` and audio language model (2.9B): `nexa run omniaudio` +- [2024/11] Support audio language model: `nexa run qwen2audio`, **we are the first open-source toolkit to support audio language model with GGML tensor library.** - [2024/10] Support embedding model: `nexa embed ` - [2024/10] Support pull and run supported Computer Vision models in GGUF format from HuggingFace: `nexa run -hf -mt COMPUTER_VISION` - [2024/10] Support VLM in local server. diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 48346e5c..3afeea96 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -96,18 +96,19 @@ def run_ggml_inference(args): inference.run_txt2img() return elif run_type == "Multimodal": - from nexa.gguf.nexa_inference_vlm import NexaVLMInference + print("debug run_type", run_type, "is_local_path", is_local_path) if is_local_path: if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference - inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) + inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, **kwargs) else: from nexa.gguf.nexa_inference_vlm import NexaVLMInference inference = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) else: + print("debug: local_path", local_path) if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference - inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) + inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, **kwargs) else: from nexa.gguf.nexa_inference_vlm import NexaVLMInference inference = NexaVLMInference(model_path=model_path, local_path=local_path, stop_words=stop_words, **kwargs) @@ -124,7 +125,7 @@ def run_ggml_inference(args): print(f"Unknown task: {run_type}. Skipping inference.") return except Exception as e: - print(f"Error loading GGUF models, please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") + print(f"Error {e}, please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") return if hasattr(args, 'streamlit') and args.streamlit: diff --git a/nexa/constants.py b/nexa/constants.py index 8eb4048b..7e1345fa 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -238,6 +238,7 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP = { **NEXA_RUN_MODEL_MAP_TEXT, **NEXA_RUN_MODEL_MAP_VLM, + **NEXA_RUN_OMNI_VLM_MAP, **NEXA_RUN_MODEL_MAP_AUDIO_LM, **NEXA_RUN_MODEL_MAP_IMAGE, **NEXA_RUN_MODEL_MAP_VOICE, @@ -351,7 +352,9 @@ class ModelType(Enum): "compute_type": "default", } -# key is the repo name on Nexa model hub +# key is the repo name on Nexa model hub, NOT model abbreviation +# For example : https://nexa.ai/NexaAI/Octo-omni-vision/gguf-fp16/readme +# We need to register key : Octo-omni-vision NEXA_OFFICIAL_MODELS_TYPE = { "gemma-2b": ModelType.NLP, "Llama-2-7b-chat": ModelType.NLP, @@ -418,7 +421,7 @@ class ModelType(Enum): "anything-v30-LCM": ModelType.COMPUTER_VISION, "FLUX.1-schnell": ModelType.COMPUTER_VISION, "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL, - "omnivision": ModelType.MULTIMODAL, + "Octo-omni-vision": ModelType.MULTIMODAL, "nanoLLaVA": ModelType.MULTIMODAL, "llava-v1.6-mistral-7b": ModelType.MULTIMODAL, "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, From 2519e1596c300021215930a0641ac8524667fdc4 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 21:19:38 +0000 Subject: [PATCH 048/160] use q4_0 for audioLM --- nexa/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nexa/constants.py b/nexa/constants.py index 7e1345fa..32e6a53a 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -110,7 +110,7 @@ class ModelType(Enum): } NEXA_RUN_MODEL_MAP_AUDIO_LM = { - "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "omniaudio": "Octo-omni-audio:model-fp16", @@ -183,7 +183,7 @@ class ModelType(Enum): } NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { - "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "omniaudio": "Octo-omni-audio:projector-fp16", From 6f04030549392dde9f9790b6f83b125217f31320 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Tue, 5 Nov 2024 21:26:40 +0000 Subject: [PATCH 049/160] update README --- README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e27f2e26..f806faa8 100644 --- a/README.md +++ b/README.md @@ -249,8 +249,16 @@ Our on-device model hub offers all types of quantized models (text, image, audio Supported models (full list at [Model Hub](https://nexa.ai/models)): | Model | Type | Format | Command | | ------------------------------------------------------------------------------------------------------- | --------------- | --------- | -------------------------------------- | +| [omniaudio](https://nexa.ai/NexaAI/Octo-omni-audio/gguf-q4_0/readme) | AudioLM | GGUF | `nexa run omniaudio` | +| [qwen2audio](https://www.nexaai.com/qnguyen3/nanoLLaVA/gguf-fp16/readme) | AudioLM | GGUF | `nexa run qwen2audio` | | [octopus-v2](https://www.nexaai.com/NexaAI/Octopus-v2/gguf-q4_0/readme) | NLP | GGUF | `nexa run octopus-v2` | -| [octopus-v4](https://www.nexaai.com/NexaAI/Octopus-v4/gguf-q4_0/readme) | NLP | GGUF | `nexa run octopus-v4` | +| [octo-net](https://www.nexaai.com/NexaAI/Octo-net/gguf-q4_0/readme) | NLP | GGUF | `nexa run octo-net` | +| [omnivision](https://nexa.ai/NexaAI/Octo-omni-vision/gguf-fp16/readme) | Multimodal | GGUF | `nexa run omnivision` | +| [nanollava](https://www.nexaai.com/qnguyen3/nanoLLaVA/gguf-fp16/readme) | Multimodal | GGUF | `nexa run nanollava` | +| [llava-phi3](https://www.nexaai.com/xtuner/llava-phi-3-mini/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-phi3` | +| [llava-llama3](https://www.nexaai.com/xtuner/llava-llama-3-8b-v1.1/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-llama3` | +| [llava1.6-mistral](https://www.nexaai.com/liuhaotian/llava-v1.6-mistral-7b/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava1.6-mistral` | +| [llava1.6-vicuna](https://www.nexaai.com/liuhaotian/llava-v1.6-vicuna-7b/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava1.6-vicuna` | | [gpt2](https://nexaai.com/openai/gpt2/gguf-q4_0/readme) | NLP | GGUF | `nexa run gpt2` | | [tinyllama](https://www.nexaai.com/TinyLlama/TinyLlama-1.1B-Chat-v1.0/gguf-fp16/readme) | NLP | GGUF | `nexa run tinyllama` | | [llama2](https://www.nexaai.com/meta/Llama2-7b-chat/gguf-q4_0/readme) | NLP | GGUF/ONNX | `nexa run llama2` | @@ -277,11 +285,6 @@ Supported models (full list at [Model Hub](https://nexa.ai/models)): | [phi3.5](https://nexaai.com/microsoft/Phi-3.5-mini-instruct/gguf-q4_0/readme) | NLP | GGUF | `nexa run phi3.5` | | [openelm](https://nexaai.com/apple/OpenELM-3B/gguf-q4_K_M/readme) | NLP | GGUF | `nexa run openelm` | | [AMD-Llama-135m](https://nexaai.com/amd/AMD-Llama-135m/gguf-fp16/readme) | NLP | GGUF | `nexa run AMD-Llama-135m:fp16` | -| [nanollava](https://www.nexaai.com/qnguyen3/nanoLLaVA/gguf-fp16/readme) | Multimodal | GGUF | `nexa run nanollava` | -| [llava-phi3](https://www.nexaai.com/xtuner/llava-phi-3-mini/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-phi3` | -| [llava-llama3](https://www.nexaai.com/xtuner/llava-llama-3-8b-v1.1/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-llama3` | -| [llava1.6-mistral](https://www.nexaai.com/liuhaotian/llava-v1.6-mistral-7b/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava1.6-mistral` | -| [llava1.6-vicuna](https://www.nexaai.com/liuhaotian/llava-v1.6-vicuna-7b/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava1.6-vicuna` | | [stable-diffusion-v1-4](https://www.nexaai.com/runwayml/stable-diffusion-v1-4/gguf-q4_0/readme) | Computer Vision | GGUF | `nexa run sd1-4` | | [stable-diffusion-v1-5](https://www.nexaai.com/runwayml/stable-diffusion-v1-5/gguf-q4_0/readme) | Computer Vision | GGUF/ONNX | `nexa run sd1-5` | | [stable-diffusion-v2-1](https://nexaai.com/StabilityAI/stable-diffusion-v2-1/gguf-q4_0/readme) | Computer Vision | GGUF | `nexa run sd2-1` | From a5ff3e07b69b1c3eda60ee28c69191d9c6fbd4d0 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Tue, 5 Nov 2024 14:29:51 -0800 Subject: [PATCH 050/160] update SpinningCursorAnimation and prompt instructions --- nexa/cli/entry.py | 8 ++++---- nexa/constants.py | 24 ++++++++++++------------ nexa/gguf/nexa_inference_audio_lm.py | 7 ++++--- nexa/gguf/nexa_inference_vlm_omni.py | 10 ++++++---- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 3afeea96..9ecd4b04 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -96,7 +96,6 @@ def run_ggml_inference(args): inference.run_txt2img() return elif run_type == "Multimodal": - print("debug run_type", run_type, "is_local_path", is_local_path) if is_local_path: if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference @@ -105,7 +104,6 @@ def run_ggml_inference(args): from nexa.gguf.nexa_inference_vlm import NexaVLMInference inference = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) else: - print("debug: local_path", local_path) if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, **kwargs) @@ -125,7 +123,8 @@ def run_ggml_inference(args): print(f"Unknown task: {run_type}. Skipping inference.") return except Exception as e: - print(f"Error {e}, please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") + print(f"Error running ggml inference: {e}") + print(f"Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") return if hasattr(args, 'streamlit') and args.streamlit: @@ -216,7 +215,8 @@ def run_onnx_inference(args): print(f"Unknown task: {run_type}. Skipping inference.") return except Exception as e: - print(f"Error loading ONNX models, please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") + print(f"Error running onnx inference: {e}") + print(f"Please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") return if hasattr(args, 'streamlit') and args.streamlit: diff --git a/nexa/constants.py b/nexa/constants.py index 32e6a53a..04ec51fb 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -111,11 +111,11 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP_AUDIO_LM = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", - "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", - "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", + "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "omniaudio": "Octo-omni-audio:model-fp16", - "omniaudio:fp16": "Octo-omni-audio:model-fp16", - "omniaudio:q4_0": "Octo-omni-audio:model-q4_0", + "Octo-omni-audio:fp16": "Octo-omni-audio:model-fp16", + "Octo-omni-audio:q4_0": "Octo-omni-audio:model-q4_0", } NEXA_RUN_MODEL_MAP_VOICE = { @@ -172,23 +172,23 @@ class ModelType(Enum): NEXA_RUN_OMNI_VLM_PROJECTOR_MAP = { "omnivision": "Octo-omni-vision:projector-fp16", - "omnivision:fp16": "Octo-omni-vision:projector-fp16", - "omnivision:q4_0": "Octo-omni-vision:projector-q4_0", + "Octo-omni-vision:fp16": "Octo-omni-vision:projector-fp16", + "Octo-omni-vision:q4_0": "Octo-omni-vision:projector-q4_0", } NEXA_RUN_OMNI_VLM_MAP = { "omnivision": "Octo-omni-vision:model-fp16", - "omnivision:fp16": "Octo-omni-vision:model-fp16", - "omnivision:q4_0": "Octo-omni-vision:model-q4_0", + "Octo-omni-vision:fp16": "Octo-omni-vision:model-fp16", + "Octo-omni-vision:q4_0": "Octo-omni-vision:model-q4_0", } NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", - "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", - "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "omniaudio": "Octo-omni-audio:projector-fp16", - "omniaudio:fp16": "Octo-omni-audio:projector-fp16", - "omniaudio:q4_0": "Octo-omni-audio:projector-q4_0", + "Octo-omni-audio:fp16": "Octo-omni-audio:projector-fp16", + "Octo-omni-audio:q4_0": "Octo-omni-audio:projector-q4_0", } NEXA_RUN_T5XXL_MAP = { diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 777ae2e6..101f62e1 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -2,7 +2,7 @@ import logging import os from pathlib import Path - +from nexa.utils import SpinningCursorAnimation, nexa_prompt from nexa.constants import ( DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_MODEL_MAP_AUDIO_LM, @@ -114,6 +114,7 @@ def __init__( with suppress_stdout_stderr(): self._load_model() + @SpinningCursorAnimation() def _load_model(self): try: self.ctx_params.model = ctypes.c_char_p( @@ -140,12 +141,12 @@ def run(self): while True: try: while True: - audio_path = input("Audio Path (required): ") + audio_path = nexa_prompt("Enter the path to your audio file (required): ") if os.path.exists(audio_path): break print(f"'{audio_path}' is not a valid audio path. Please try again.") - user_input = input("Enter text (leave empty if no prompt): ") + user_input = nexa_prompt("Enter text (leave empty if no prompt): ") self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py index 91a31314..0a9ca737 100644 --- a/nexa/gguf/nexa_inference_vlm_omni.py +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -3,7 +3,7 @@ import logging import os from pathlib import Path - +from nexa.utils import nexa_prompt, SpinningCursorAnimation from nexa.constants import ( DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_OMNI_VLM_PROJECTOR_MAP, @@ -84,6 +84,7 @@ def __init__( with suppress_stdout_stderr(): self._load_model() + @SpinningCursorAnimation() def _load_model(self): try: self.ctx_params_model = ctypes.c_char_p( @@ -100,10 +101,11 @@ def _load_model(self): def run(self): while True: try: - image_path = input("Image Path: ") + image_path = nexa_prompt("Image Path (required): ") if not os.path.exists(image_path): print(f"Image path: {image_path} not found, running omni VLM without image input.") - user_input = input("Enter text: ") + + user_input = nexa_prompt() image_path = ctypes.c_char_p(image_path.encode("utf-8")) user_input = ctypes.c_char_p(user_input.encode("utf-8")) omni_vlm_cpp.omnivlm_inference(user_input, image_path) @@ -111,9 +113,9 @@ def run(self): except KeyboardInterrupt: print("\nExiting...") break - except Exception as e: logging.error(f"\nError during audio generation: {e}", exc_info=True) + print("\n") def __del__(self): omni_vlm_cpp.omnivlm_free() From 329590bd5889be33dde3ba65ae856b27a05f7891 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Tue, 5 Nov 2024 16:29:48 -0800 Subject: [PATCH 051/160] fix vlm streamlit bug --- nexa/cli/entry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 9ecd4b04..7898a774 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -46,6 +46,8 @@ def run_ggml_inference(args): run_type = ModelType[model_type].value local_path = None + projector_local_path = None + if is_local_path or hf: if not model_type: print("Error: --model_type must be provided when using --local_path or --huggingface") From 8d4ffff464b53dd03b0df863632fc4686dd9e8a5 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Wed, 6 Nov 2024 00:57:07 +0000 Subject: [PATCH 052/160] release v0.0.9.1 --- nexa/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexa/__init__.py b/nexa/__init__.py index 237c12f8..5e8c630d 100644 --- a/nexa/__init__.py +++ b/nexa/__init__.py @@ -1 +1 @@ -__version__ = "0.0.9.0" +__version__ = "0.0.9.1" From 0d7e8038f496875f3fba76585be0bc704284a987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E4=B8=BA?= Date: Wed, 6 Nov 2024 16:09:44 +0800 Subject: [PATCH 053/160] add returned string for qwen2audio inference api --- dependency/llama.cpp | 2 +- nexa/gguf/llama/audio_lm_cpp.py | 4 ++-- nexa/gguf/nexa_inference_audio_lm.py | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 38c6fa3b..6a4cf0b9 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 38c6fa3b8fb6c88075102fd859d04eaea27aa87c +Subproject commit 6a4cf0b983195c7f32251e6d550f3c65b854ca6b diff --git a/nexa/gguf/llama/audio_lm_cpp.py b/nexa/gguf/llama/audio_lm_cpp.py index d6a877e1..88db2a33 100644 --- a/nexa/gguf/llama/audio_lm_cpp.py +++ b/nexa/gguf/llama/audio_lm_cpp.py @@ -109,8 +109,8 @@ def free(ctx: omni_context_p, is_qwen: bool = True): # Configure process_full lib.omni_process_full.argtypes = [omni_context_p, omni_context_params_p] - lib.omni_process_full.restype = None + lib.omni_process_full.restype = ctypes.c_char_p # Configure free lib.omni_free.argtypes = [omni_context_p] - lib.omni_free.restype = None \ No newline at end of file + lib.omni_free.restype = None diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 101f62e1..4a10c507 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -151,9 +151,10 @@ def run(self): self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) - audio_lm_cpp.process_full( + response = audio_lm_cpp.process_full( self.context, ctypes.byref(self.ctx_params), is_qwen=self.is_qwen - ) + ).decode("utf-8") + print(response) except KeyboardInterrupt: print("\nExiting...") From 0257acaa66c1e39dfd693bf2cdaf27654d817e0f Mon Sep 17 00:00:00 2001 From: Brian <122925040+JoyboyBrian@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:58:10 -0800 Subject: [PATCH 054/160] Update README.md for swift package --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f806faa8..f29b8db9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,9 @@ [![MacOS][MacOS-image]][release-url] [![Linux][Linux-image]][release-url] [![Windows][Windows-image]][release-url] -[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) +[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) + +[![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/NexaAI/nexa-sdk) [![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dplatforms)](https://swiftpackageindex.com/NexaAI/nexa-sdk) From 30d4ae7444dcb197d86f7367de4c7cf7852f1bf1 Mon Sep 17 00:00:00 2001 From: JoyboyBrian Date: Wed, 6 Nov 2024 16:46:14 -0800 Subject: [PATCH 055/160] add a send button for ios demo app --- examples/swift-test/Shared/ContentView.swift | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/examples/swift-test/Shared/ContentView.swift b/examples/swift-test/Shared/ContentView.swift index 8b969b54..ad686c70 100644 --- a/examples/swift-test/Shared/ContentView.swift +++ b/examples/swift-test/Shared/ContentView.swift @@ -3,6 +3,7 @@ import SwiftUI struct ContentView: View { @State private var viewModel = ViewModel() @State private var prompt = "" + @FocusState private var isInputActive: Bool var body: some View { VStack { @@ -21,6 +22,18 @@ struct ContentView: View { guard !prompt.isEmpty else { return } viewModel.run(for: prompt) } + .focused($isInputActive) + + Button(action: { + guard !prompt.isEmpty else { return } + viewModel.run(for: prompt) + isInputActive = false + }) { + Text("Send") + .frame(maxWidth: .infinity) + } + .buttonStyle(.borderedProminent) + .padding(.bottom) ScrollView { Text(viewModel.result) From a9f4fab0acd2f88e9fcef0347553afd553983a79 Mon Sep 17 00:00:00 2001 From: qmeng222 Date: Thu, 7 Nov 2024 03:55:17 +0000 Subject: [PATCH 056/160] add GGUF model router support across all model types (NLP, Vision, Multimodal, and Audio) in Streamlit & refactor the code --- nexa/gguf/streamlit/streamlit_image_chat.py | 397 +++++++++++++++----- nexa/gguf/streamlit/streamlit_text_chat.py | 387 ++++++++++++++----- nexa/gguf/streamlit/streamlit_vlm.py | 344 ++++++++++++----- nexa/gguf/streamlit/streamlit_voice_chat.py | 358 ++++++++++++------ nexa/utils.py | 99 ++++- 5 files changed, 1199 insertions(+), 386 deletions(-) diff --git a/nexa/gguf/streamlit/streamlit_image_chat.py b/nexa/gguf/streamlit/streamlit_image_chat.py index 9bb7e8f8..a54def8e 100644 --- a/nexa/gguf/streamlit/streamlit_image_chat.py +++ b/nexa/gguf/streamlit/streamlit_image_chat.py @@ -1,29 +1,97 @@ import os import sys from PIL import Image -from nexa.general import pull_model import streamlit as st +from nexa.general import pull_model from nexa.gguf.nexa_inference_image import NexaImageInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import ( + DEFAULT_IMG_GEN_PARAMS_LCM, + DEFAULT_IMG_GEN_PARAMS_TURBO, + DEFAULT_IMG_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_IMAGE, + NEXA_RUN_MODEL_MAP_FLUX, +) import io -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True - +specified_run_type = 'Computer Vision' +model_map = NEXA_RUN_MODEL_MAP_IMAGE | NEXA_RUN_MODEL_MAP_FLUX -@st.cache_resource -def load_model(model_path): - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) +def get_default_params(model_path: str) -> dict: + """Get default parameters based on model type.""" + if "lcm-dreamshaper" in model_path or "flux" in model_path: + return DEFAULT_IMG_GEN_PARAMS_LCM.copy() # fast LCM models: 4 steps @ 1.0 guidance + elif "sdxl-turbo" in model_path: + return DEFAULT_IMG_GEN_PARAMS_TURBO.copy() # sdxl-turbo: 5 steps @ 5.0 guidance else: - local_path, run_type = pull_model(model_path) - nexa_model = NexaImageInference(model_path=model_path, local_path=local_path) - return nexa_model + return DEFAULT_IMG_GEN_PARAMS.copy() # standard SD models: 20 steps @ 7.5 guidance + +@st.cache_resource(show_spinner=False) +def load_model(model_path: str, is_local: bool = False, is_hf: bool = False): + """Load model with proper error handling.""" + try: + if is_local: + local_path = model_path + elif is_hf: + try: + local_path, _ = pull_model(model_path, hf=True) + update_model_options(specified_run_type, model_map) + except Exception as e: + st.error(f"Error pulling HuggingFace model: {str(e)}") + return None + else: + try: + # model hub case: + local_path, run_type = pull_model(model_path) + if not local_path or not run_type: + st.error(f"Failed to pull model {model_path} from Nexa Model Hub") + return None + update_model_options(specified_run_type, model_map) # update options after successful pull + except ValueError as e: + st.error(f"Error pulling model from Nexa Model Hub: {str(e)}") + return None + except Exception as e: + st.error(f"Unexpected error while pulling model: {str(e)}") + return None + + try: + nexa_model = NexaImageInference( + model_path=model_path, + local_path=local_path + ) + + # force refresh of model options after successful load: + update_model_options(specified_run_type, model_map) + + return nexa_model + + except Exception as e: + st.error(f"Error initializing model: {str(e)}") + return None + + except Exception as e: + st.error(f"Error in load_model: {str(e)}") + return None +@st.cache_resource(show_spinner=False) +def load_local_model(local_path: str): + """Load local model with default parameters.""" + try: + nexa_model = NexaImageInference( + model_path="local_model", + local_path=local_path + ) + update_model_options(specified_run_type, model_map) # update options after successful local model load + return nexa_model + except Exception as e: + st.error(f"Error loading local model: {str(e)}") + return None def generate_images(nexa_model: NexaImageInference, prompt: str, negative_prompt: str): + """Generate images using the model.""" output_dir = os.path.dirname(nexa_model.params["output_path"]) if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -37,93 +105,236 @@ def generate_images(nexa_model: NexaImageInference, prompt: str, negative_prompt sample_steps=nexa_model.params["num_inference_steps"], seed=nexa_model.params["random_seed"] ) - + return images +# main execution: +try: + # get command line arguments with proper error handling: + if len(sys.argv) < 4: + st.error("Missing required command line arguments.") + sys.exit(1) # program terminated with an error -st.markdown( - r""" - - """, - unsafe_allow_html=True, -) -st.title("Nexa AI Image Generation") -st.caption("Powered by Nexa AI SDK🐙") + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" + + # UI setup: + st.set_page_config(page_title="Nexa AI Image Generation", layout="wide") + st.markdown( + r""" + + """, + unsafe_allow_html=True, + ) + st.title("Nexa AI Image Generation") + st.caption("Powered by Nexa AI SDK🐙") + + # force refresh model options on every page load: + if 'model_options' not in st.session_state: + st.session_state.model_options = get_model_options(specified_run_type, model_map) + else: + update_model_options(specified_run_type, model_map) -st.sidebar.header("Model Configuration") -model_path = st.sidebar.text_input("Model path", default_model) + # init session state variables: + if 'initialized' not in st.session_state: + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + if not is_local_path and not hf: + try: + with st.spinner(f"Loading model: {default_model}"): + st.session_state.nexa_model = load_model(default_model) + if st.session_state.nexa_model: + st.session_state.current_hub_model = default_model + except Exception as e: + st.error(f"Error loading default model: {str(e)}") + + if default_model not in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + else: + try: + st.session_state.current_model_index = st.session_state.model_options.index(default_model) + except ValueError: + st.session_state.current_model_index = 0 + + st.session_state.initialized = True + + # model selection sidebar: + st.sidebar.header("Model Configuration") + + # update selectbox index based on current model + if 'nexa_model' in st.session_state: + if st.session_state.current_hub_model: + current_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + elif st.session_state.current_local_path: + current_index = st.session_state.model_options.index("Local Model 📁") + elif st.session_state.current_model_path: + current_index = st.session_state.model_options.index(st.session_state.current_model_path) + else: + current_index = st.session_state.current_model_index + else: + current_index = st.session_state.current_model_index -if not model_path: - st.warning( - "Please enter a valid path or identifier for the model in Nexa Model Hub to proceed." + model_path = st.sidebar.selectbox( + "Select a Model", + st.session_state.model_options, + index=current_index, + key='model_selectbox' ) - st.stop() - -if ( - "current_model_path" not in st.session_state - or st.session_state.current_model_path != model_path -): - st.session_state.current_model_path = model_path - st.session_state.nexa_model = load_model(model_path) - if st.session_state.nexa_model is None: - st.stop() - -st.sidebar.header("Generation Parameters") -num_inference_steps = st.sidebar.slider( - "Number of Inference Steps", - 1, - 100, - st.session_state.nexa_model.params["num_inference_steps"], -) -height = st.sidebar.slider( - "Height", 64, 1024, st.session_state.nexa_model.params["height"] -) -width = st.sidebar.slider( - "Width", 64, 1024, st.session_state.nexa_model.params["width"] -) -guidance_scale = st.sidebar.slider( - "Guidance Scale", 0.0, 20.0, st.session_state.nexa_model.params["guidance_scale"] -) -random_seed = st.sidebar.slider( - "Random Seed", 0, 10000, st.session_state.nexa_model.params["random_seed"] -) -st.session_state.nexa_model.params.update( - { - "num_inference_steps": num_inference_steps, - "height": height, - "width": width, - "guidance_scale": guidance_scale, - "random_seed": random_seed, - } -) + # handle model path input: + if model_path == "Local Model 📁": + local_model_path = st.sidebar.text_input("Enter local model path") + if not local_model_path: + st.warning("Please enter a valid local model path to proceed.") + st.stop() + local_model_path = local_model_path.strip() # remove spaces -prompt = st.text_input("Enter your prompt:") -negative_prompt = st.text_input("Enter your negative prompt (optional):") + # handle local model path changes: + if 'nexa_model' not in st.session_state or st.session_state.current_local_path != local_model_path: + with st.spinner("Loading local model..."): + st.session_state.nexa_model = load_local_model(local_model_path) + st.session_state.current_local_path = local_model_path + + elif model_path == "Use Model From Nexa Model Hub 🔍": + initial_value = default_model if not is_local_path and not hf else "" + hub_model_name = st.sidebar.text_input( + "Enter model name from Nexa Model Hub", + value=initial_value + ) + + # empty string check: + if not hub_model_name: + st.warning(""" + How to add a model from Nexa Model Hub: + \n1. Visit [Nexa Model Hub](https://nexaai.com/models) + \n2. Find a vision model using the task filters + \n3. Select your desired model and copy either: + \n - The full nexa run command, or (e.g., nexa run stable-diffusion-v1-4:q4_0) + \n - Simply the model name (e.g., stable-diffusion-v1-4:q4_0) + \n4. Paste it into the field on the sidebar and press enter + """) + st.stop() + + # process the input after checking it's not empty: + if hub_model_name.startswith("nexa run"): + hub_model_name = hub_model_name.split("nexa run")[-1].strip() + else: + hub_model_name = hub_model_name.strip() + + # handle hub model name changes: + if 'nexa_model' not in st.session_state or st.session_state.current_hub_model != hub_model_name: + with st.spinner("Loading model from hub..."): + st.session_state.nexa_model = load_model(hub_model_name) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_hub_model = hub_model_name -if st.button("Generate Image"): - if not prompt: - st.warning("Please enter a prompt to proceed.") else: - with st.spinner("Generating images..."): - images = generate_images( - st.session_state.nexa_model, prompt, negative_prompt - ) - st.success("Images generated successfully!") - for i, image in enumerate(images): - st.image(image, caption=f"Generated Image", use_column_width=True) - - img_byte_arr = io.BytesIO() - image.save(img_byte_arr, format='PNG') - img_byte_arr = img_byte_arr.getvalue() - - st.download_button( - label=f"Download Image", - data=img_byte_arr, - file_name=f"generated_image.png", - mime="image/png" + # load selected model if it's not already loaded: + if ('nexa_model' not in st.session_state or getattr(st.session_state, 'current_model_path', None) != model_path): + with st.spinner(f"Loading model: {model_path}"): + st.session_state.nexa_model = load_model(model_path) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_model_path = model_path + + # generation params: + if 'nexa_model' in st.session_state and st.session_state.nexa_model: + st.sidebar.header("Generation Parameters") + + model_to_check = (st.session_state.current_hub_model if st.session_state.current_hub_model else st.session_state.current_local_path if st.session_state.current_local_path else st.session_state.current_model_path) + + # get model specific defaults: + default_params = get_default_params(model_to_check) + + # adjust step range based on model type: + max_steps = 100 + if "lcm-dreamshaper" in model_to_check or "flux" in model_to_check: + max_steps = 8 # 4-8 steps + elif "sdxl-turbo" in model_to_check: + max_steps = 10 # 5-10 steps + + # adjust guidance scale range based on model type: + max_guidance = 20.0 + if "lcm-dreamshaper" in model_to_check or "flux" in model_to_check: + max_guidance = 2.0 # 1.0-2.0 + elif "sdxl-turbo" in model_to_check: + max_guidance = 10.0 # 5.0-10.0 + + num_inference_steps = st.sidebar.slider( + "Number of Inference Steps", + 1, + max_steps, + default_params["num_inference_steps"] + ) + height = st.sidebar.slider( + "Height", + 64, + 1024, + default_params["height"] + ) + width = st.sidebar.slider( + "Width", + 64, + 1024, + default_params["width"] + ) + guidance_scale = st.sidebar.slider( + "Guidance Scale", + 0.0, + max_guidance, + default_params["guidance_scale"] + ) + random_seed = st.sidebar.slider( + "Random Seed", + 0, + 10000, + default_params["random_seed"] + ) + + st.session_state.nexa_model.params.update({ + "num_inference_steps": num_inference_steps, + "height": height, + "width": width, + "guidance_scale": guidance_scale, + "random_seed": random_seed, + }) + + # image generation interface: + prompt = st.text_input("Enter your prompt:") + negative_prompt = st.text_input("Enter your negative prompt (optional):") + + if st.button("Generate Image"): + if not prompt: + st.warning("Please enter a prompt to proceed.") + else: + with st.spinner("Generating images..."): + images = generate_images( + st.session_state.nexa_model, + prompt, + negative_prompt ) + st.success("Images generated successfully!") + for i, image in enumerate(images): + st.image(image, caption=f"Generated Image", use_column_width=True) + + img_byte_arr = io.BytesIO() + image.save(img_byte_arr, format='PNG') + img_byte_arr = img_byte_arr.getvalue() + + st.download_button( + label=f"Download Image", + data=img_byte_arr, + file_name=f"generated_image.png", + mime="image/png" + ) + +except Exception as e: + st.error(f"An unexpected error occurred: {str(e)}") + import traceback + st.error(f"Traceback: {traceback.format_exc()}") diff --git a/nexa/gguf/streamlit/streamlit_text_chat.py b/nexa/gguf/streamlit/streamlit_text_chat.py index 542e8059..4adf60e3 100644 --- a/nexa/gguf/streamlit/streamlit_text_chat.py +++ b/nexa/gguf/streamlit/streamlit_text_chat.py @@ -1,112 +1,313 @@ import sys -from typing import Iterator - +import subprocess +import re +from typing import Iterator, List import streamlit as st from nexa.general import pull_model from nexa.gguf.nexa_inference_text import NexaTextInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import ( + DEFAULT_TEXT_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_TEXT, +) -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True - -@st.cache_resource -def load_model(model_path): - st.session_state.messages = [] - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) - else: - local_path, run_type = pull_model(model_path) - nexa_model = NexaTextInference(model_path=model_path, local_path=local_path) - return nexa_model +specified_run_type = 'NLP' +model_map = NEXA_RUN_MODEL_MAP_TEXT + +# init: +DEFAULT_PARAMS = DEFAULT_TEXT_GEN_PARAMS.copy() + +@st.cache_resource(show_spinner=False) +def load_model(model_path: str, is_local: bool = False, is_hf: bool = False): + """Load model with proper error handling and state management.""" + try: + st.session_state.messages = [] + + if is_local: + local_path = model_path + elif is_hf: + try: + local_path, _ = pull_model(model_path, hf=True) + update_model_options(specified_run_type, model_map) # update options after successful pull + except Exception as e: + st.error(f"Error pulling HuggingFace model: {str(e)}") + return None + else: + try: + # model hub case: + local_path, run_type = pull_model(model_path) + if not local_path or not run_type: + st.error(f"Failed to pull model {model_path} from Nexa Model Hub") + return None + update_model_options(specified_run_type, model_map) # update options after successful pull + except ValueError as e: + st.error(f"Error pulling model from Nexa Model Hub: {str(e)}") + return None + except Exception as e: + st.error(f"Unexpected error while pulling model: {str(e)}") + return None + + try: + nexa_model = NexaTextInference( + model_path=model_path, + local_path=local_path, + **DEFAULT_PARAMS + ) + + # force refresh of model options after successful load: + update_model_options(specified_run_type, model_map) + + # reset the model index to include the new model: + if model_path in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index(model_path) + return nexa_model + + except Exception as e: + st.error(f"Error initializing model: {str(e)}") + return None + except Exception as e: + st.error(f"Error in load_model: {str(e)}") + return None + +@st.cache_resource(show_spinner=False) +def load_local_model(local_path: str): + """Load local model with default parameters.""" + try: + st.session_state.messages = [] + nexa_model = NexaTextInference( + model_path="local_model", + local_path=local_path, + **DEFAULT_PARAMS + ) + update_model_options(specified_run_type, model_map) # update options after successful local model load + return nexa_model + except Exception as e: + st.error(f"Error loading local model: {str(e)}") + return None def generate_response(nexa_model: NexaTextInference) -> Iterator: + """Generate response from the model.""" user_input = st.session_state.messages[-1]["content"] if hasattr(nexa_model, "chat_format") and nexa_model.chat_format: return nexa_model._chat(user_input) else: return nexa_model._complete(user_input) -st.markdown( - r""" - - """, - unsafe_allow_html=True, -) -st.title("Nexa AI Text Generation") -st.caption("Powered by Nexa AI SDK🐙") +# main execution: +try: + # get command line arguments with proper error handling: + if len(sys.argv) < 4: + st.error("Missing required command line arguments.") + sys.exit(1) # program terminated with an error -st.sidebar.header("Model Configuration") -model_path = st.sidebar.text_input("Model path", default_model) + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" -if not model_path: - st.warning( - "Please enter a valid path or identifier for the model in Nexa Model Hub to proceed." + # UI setup: + st.set_page_config(page_title="Nexa AI Text Generation", layout="wide") + st.markdown( + r""" + + """, + unsafe_allow_html=True, ) - st.stop() - -if ( - "current_model_path" not in st.session_state - or st.session_state.current_model_path != model_path -): - st.session_state.current_model_path = model_path - st.session_state.nexa_model = load_model(model_path) - if st.session_state.nexa_model is None: - st.stop() - -st.sidebar.header("Generation Parameters") -temperature = st.sidebar.slider( - "Temperature", 0.0, 1.0, st.session_state.nexa_model.params["temperature"] -) -max_new_tokens = st.sidebar.slider( - "Max New Tokens", 1, 500, st.session_state.nexa_model.params["max_new_tokens"] -) -top_k = st.sidebar.slider("Top K", 1, 100, st.session_state.nexa_model.params["top_k"]) -top_p = st.sidebar.slider( - "Top P", 0.0, 1.0, st.session_state.nexa_model.params["top_p"] -) + st.title("Nexa AI Text Generation") + st.caption("Powered by Nexa AI SDK🐙") -st.session_state.nexa_model.params.update( - { - "temperature": temperature, - "max_new_tokens": max_new_tokens, - "top_k": top_k, - "top_p": top_p, - } -) + # force refresh model options on every page load: + if 'model_options' not in st.session_state: + st.session_state.model_options = get_model_options(specified_run_type, model_map) + else: + update_model_options(specified_run_type, model_map) + + # init session state variables: + if 'initialized' not in st.session_state: + st.session_state.messages = [] + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + if not is_local_path and not hf: + try: + with st.spinner(f"Loading model: {default_model}"): + st.session_state.nexa_model = load_model(default_model) + if st.session_state.nexa_model: + st.session_state.current_hub_model = default_model + except Exception as e: + st.error(f"Error loading default model: {str(e)}") + + # set to model hub option if not found in list: + if default_model not in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + else: + try: + st.session_state.current_model_index = st.session_state.model_options.index(default_model) + except ValueError: + st.session_state.current_model_index = 0 + + st.session_state.initialized = True + + # model selection sidebar: + st.sidebar.header("Model Configuration") + + # update the selectbox index based on the currently loaded model: + if 'nexa_model' in st.session_state: + if st.session_state.current_hub_model: + # if we have a hub model loaded, select the hub option: + current_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + elif st.session_state.current_local_path: + # if we have a local model loaded, select the local option: + current_index = st.session_state.model_options.index("Local Model 📁") + elif st.session_state.current_model_path: + # if we have a listed model loaded, find its index: + current_index = st.session_state.model_options.index(st.session_state.current_model_path) + else: + current_index = st.session_state.current_model_index + else: + current_index = st.session_state.current_model_index + + model_path = st.sidebar.selectbox( + "Select a Model", + st.session_state.model_options, + index=current_index, + key='model_selectbox' + ) + + # update current model index when selection changes: + current_index = st.session_state.model_options.index(model_path) + if current_index != st.session_state.current_model_index: + st.session_state.current_model_index = current_index + if 'nexa_model' in st.session_state: + del st.session_state.nexa_model + st.session_state.messages = [] + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + # handle model loading based on selection: + if model_path == "Local Model 📁": + local_model_path = st.sidebar.text_input("Enter local model path") + if not local_model_path: + st.warning("Please enter a valid local model path to proceed.") + st.stop() + + local_model_path = local_model_path.strip() # remove spaces + if 'nexa_model' not in st.session_state or st.session_state.current_local_path != local_model_path: + with st.spinner("Loading local model..."): + st.session_state.nexa_model = load_local_model(local_model_path) + st.session_state.current_local_path = local_model_path + + elif model_path == "Use Model From Nexa Model Hub 🔍": + initial_value = default_model if not is_local_path and not hf else "" + hub_model_name = st.sidebar.text_input( + "Enter model name from Nexa Model Hub", + value=initial_value + ) + + # empty string check: + if not hub_model_name: + st.warning(f""" + How to add a model from Nexa Model Hub: + \n1. Visit [Nexa Model Hub](https://nexaai.com/models) + \n2. Find a NLP model using the task filters (chat, uncensored, etc.) + \n3. Select your desired model and copy either: + \n - The full nexa run command (e.g., nexa run Sao10K/MN-BackyardAI-Party-12B-v1:gguf-q4_K_M), or + \n - Simply the model name (e.g., Sao10K/MN-BackyardAI-Party-12B-v1:gguf-q4_K_M) + \n4. Paste it into the "Enter model name from Nexa Model Hub" field on the sidebar and press enter + """) + st.stop() + + # process the input after checking it's not empty: + if hub_model_name.startswith("nexa run"): + hub_model_name = hub_model_name.split("nexa run")[-1].strip() + else: + hub_model_name = hub_model_name.strip() + + if 'nexa_model' not in st.session_state or st.session_state.current_hub_model != hub_model_name: + with st.spinner("Loading model from hub..."): + st.session_state.nexa_model = load_model(hub_model_name) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_hub_model = hub_model_name + + else: + # load selected model if it's not already loaded: + if ('nexa_model' not in st.session_state or + getattr(st.session_state, 'current_model_path', None) != model_path): + with st.spinner(f"Loading model: {model_path}"): + st.session_state.nexa_model = load_model(model_path) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_model_path = model_path + + # generation params: + if 'nexa_model' in st.session_state and st.session_state.nexa_model: + st.sidebar.header("Generation Parameters") + model_params = st.session_state.nexa_model.params + + temperature = st.sidebar.slider( + "Temperature", 0.0, 1.0, model_params.get("temperature", DEFAULT_PARAMS["temperature"]) + ) + max_new_tokens = st.sidebar.slider( + "Max New Tokens", 1, 500, model_params.get("max_new_tokens", DEFAULT_PARAMS["max_new_tokens"]) + ) + top_k = st.sidebar.slider( + "Top K", 1, 100, model_params.get("top_k", DEFAULT_PARAMS["top_k"]) + ) + top_p = st.sidebar.slider( + "Top P", 0.0, 1.0, model_params.get("top_p", DEFAULT_PARAMS["top_p"]) + ) + nctx = st.sidebar.slider( + "Context length", 1000, 9999, model_params.get("nctx", DEFAULT_PARAMS["nctx"]) + ) + + st.session_state.nexa_model.params.update({ + "temperature": temperature, + "max_new_tokens": max_new_tokens, + "top_k": top_k, + "top_p": top_p, + "nctx": nctx, + }) + + # chat interface: + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + if prompt := st.chat_input("Say something..."): + if 'nexa_model' not in st.session_state or not st.session_state.nexa_model: + st.error("Please wait for the model to load or select a valid model.") + else: + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + with st.chat_message("assistant"): + response_placeholder = st.empty() + full_response = "" + for chunk in generate_response(st.session_state.nexa_model): + choice = chunk["choices"][0] + if "delta" in choice: + delta = choice["delta"] + content = delta.get("content", "") + elif "text" in choice: + delta = choice["text"] + content = delta + + full_response += content + response_placeholder.markdown(full_response, unsafe_allow_html=True) + response_placeholder.markdown(full_response) + + st.session_state.messages.append({"role": "assistant", "content": full_response}) -if "messages" not in st.session_state: - st.session_state.messages = [] - -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - -if prompt := st.chat_input("Say something..."): - st.session_state.messages.append({"role": "user", "content": prompt}) - with st.chat_message("user"): - st.markdown(prompt) - - with st.chat_message("assistant"): - response_placeholder = st.empty() - full_response = "" - for chunk in generate_response(st.session_state.nexa_model): - choice = chunk["choices"][0] - if "delta" in choice: - delta = choice["delta"] - content = delta.get("content", "") - elif "text" in choice: - delta = choice["text"] - content = delta - - full_response += content - response_placeholder.markdown(full_response, unsafe_allow_html=True) - response_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) +except Exception as e: + st.error(f"An unexpected error occurred: {str(e)}") + import traceback + st.error(f"Traceback: {traceback.format_exc()}") diff --git a/nexa/gguf/streamlit/streamlit_vlm.py b/nexa/gguf/streamlit/streamlit_vlm.py index 25f48d0e..a581b167 100644 --- a/nexa/gguf/streamlit/streamlit_vlm.py +++ b/nexa/gguf/streamlit/streamlit_vlm.py @@ -1,40 +1,65 @@ import sys import tempfile -from typing import Iterator - +import subprocess +import re +from typing import List, Iterator import streamlit as st from PIL import Image from nexa.general import pull_model from nexa.gguf.nexa_inference_vlm import NexaVLMInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import NEXA_RUN_MODEL_MAP_VLM -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True -projector_local_path = sys.argv[4] if len(sys.argv) > 4 else None +specified_run_type = 'Multimodal' +model_map = NEXA_RUN_MODEL_MAP_VLM +# init from command line args: +try: + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" + projector_local_path = sys.argv[4] if len(sys.argv) > 4 else None +except IndexError: + st.error("Missing required command line arguments.") + sys.exit(1) # terminate with an error -@st.cache_resource -def load_model(model_path): - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) - else: - local_path, run_type = pull_model(model_path) - - if is_local_path: - nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path) - else: - nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) - return nexa_model - +@st.cache_resource(show_spinner=False) +def load_model(model_path, is_local=False, is_hf=False, projector_path=None): + """Load model with model mapping logic.""" + try: + if is_local: + local_path = model_path + nexa_model = NexaVLMInference( + model_path=model_path, + local_path=local_path, + projector_local_path=projector_path + ) + elif is_hf: + local_path, _ = pull_model(model_path, hf=True) + nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) + else: + # get the actual model name from the mapping if it exists: + if model_path in NEXA_RUN_MODEL_MAP_VLM: + real_model_path = NEXA_RUN_MODEL_MAP_VLM[model_path] + local_path, run_type = pull_model(real_model_path) + else: + local_path, run_type = pull_model(model_path) + nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) + return nexa_model + except Exception as e: + st.error(f"Error loading model: {str(e)}") + return None def generate_response( nexa_model: NexaVLMInference, image_path: str, user_input: str ) -> Iterator: return nexa_model._chat(user_input, image_path) - +# UI setup: +st.set_page_config(page_title="Nexa AI Multimodal Generation", layout="wide") st.markdown( r""" + """, + unsafe_allow_html=True, +) +st.title("Nexa AI Omni VLM Generation") +st.caption("Powered by Nexa AI SDK🐙") + +st.sidebar.header("Model Configuration") +model_path = st.sidebar.text_input("Model path", default_model) + +if not model_path: + st.warning( + "Please enter a valid path or identifier for the model in Nexa Model Hub to proceed." + ) + st.stop() + +if ( + "current_model_path" not in st.session_state + or st.session_state.current_model_path != model_path +): + st.session_state.current_model_path = model_path + st.session_state.nexa_model = load_model(model_path) + if st.session_state.nexa_model is None: + st.stop() + +user_input = st.text_input("Enter your text input:") +uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) + +generate_button = st.button("Send") +spinner_placeholder = st.empty() +success_label = st.empty() +response_placeholder = st.empty() +image_placeholder = st.empty() + +if uploaded_file: + image_placeholder.image( + uploaded_file, caption="Uploaded Image", use_column_width=True + ) + +if generate_button: + if not user_input and not uploaded_file: + st.warning("Please enter text input and upload an image to proceed.") + else: + with spinner_placeholder: + with st.spinner("Generating description..."): + with tempfile.NamedTemporaryFile() as image_path: + full_path = None + if uploaded_file: + ext = uploaded_file.name.split(".")[-1] + full_path = f"{image_path.name}.{ext}" + with Image.open(uploaded_file) as img: + img.save(full_path) + + response = generate_response( + st.session_state.nexa_model, full_path, user_input + ) + + response_placeholder.write(response) + success_label.success("Response generated successfully.") \ No newline at end of file From 5cbf13d3291a1c215ff633851e4c59ef0df2f932 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Thu, 7 Nov 2024 00:54:17 -0800 Subject: [PATCH 058/160] update --- nexa/gguf/nexa_inference_audio_lm.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index a349cbde..f264cd3a 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -291,10 +291,20 @@ def run_streamlit(self, model_path: str, is_local_path = False, hf = False, proj default="auto", help="Device to use for inference (auto, cpu, or gpu)", ) + parser.add_argument( + "-st", + "--streamlit", + action="store_true", + help="Run the inference in Streamlit UI", + ) + args = parser.parse_args() kwargs = {k: v for k, v in vars(args).items() if v is not None} model_path = kwargs.pop("model_path") device = kwargs.pop("device", "auto") inference = NexaAudioLMInference(model_path, device=device, **kwargs) - inference.run() + if args.streamlit: + inference.run_streamlit(model_path) + else: + inference.run() From 8016aacdff52aa2c1795fc0c1a3528dfd10aa303 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Thu, 7 Nov 2024 14:47:00 -0800 Subject: [PATCH 059/160] streamlit bugfix --- nexa/gguf/streamlit/streamlit_audio_lm.py | 1 - nexa/gguf/streamlit/streamlit_vlm.py | 4 +--- nexa/gguf/streamlit/streamlit_vlm_omni.py | 4 +--- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/nexa/gguf/streamlit/streamlit_audio_lm.py b/nexa/gguf/streamlit/streamlit_audio_lm.py index 4d81b19c..ec4f4dac 100644 --- a/nexa/gguf/streamlit/streamlit_audio_lm.py +++ b/nexa/gguf/streamlit/streamlit_audio_lm.py @@ -8,7 +8,6 @@ from nexa.general import pull_model from nexa.gguf.nexa_inference_audio_lm import NexaAudioLMInference -from nexa.constants import NEXA_RUN_MODEL_MAP_AUDIO_LM default_model = sys.argv[1] is_local_path = False if sys.argv[2] == "False" else True diff --git a/nexa/gguf/streamlit/streamlit_vlm.py b/nexa/gguf/streamlit/streamlit_vlm.py index 25f48d0e..c9357904 100644 --- a/nexa/gguf/streamlit/streamlit_vlm.py +++ b/nexa/gguf/streamlit/streamlit_vlm.py @@ -97,9 +97,7 @@ def generate_response( image_placeholder = st.empty() if uploaded_file: - image_placeholder.image( - uploaded_file, caption="Uploaded Image", use_column_width=True - ) + image_placeholder.image(uploaded_file, caption="Uploaded Image") if generate_button: if not user_input and not uploaded_file: diff --git a/nexa/gguf/streamlit/streamlit_vlm_omni.py b/nexa/gguf/streamlit/streamlit_vlm_omni.py index 1bfb7931..3977597c 100644 --- a/nexa/gguf/streamlit/streamlit_vlm_omni.py +++ b/nexa/gguf/streamlit/streamlit_vlm_omni.py @@ -71,9 +71,7 @@ def generate_response(nexa_model: NexaOmniVlmInference, image_path: str, user_in image_placeholder = st.empty() if uploaded_file: - image_placeholder.image( - uploaded_file, caption="Uploaded Image", use_column_width=True - ) + image_placeholder.image(uploaded_file, caption="Uploaded Image") if generate_button: if not user_input and not uploaded_file: From c3c959fecf360a78f74d0bd64e39e43abc97af39 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Thu, 7 Nov 2024 16:11:15 -0800 Subject: [PATCH 060/160] modify to apply correct chat format --- .../Models/ChatCompletionMessage.swift | 72 ++++++++++++------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift index c6cae277..bae89664 100644 --- a/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift +++ b/swift/Sources/NexaSwift/Models/ChatCompletionMessage.swift @@ -220,11 +220,12 @@ class OctopusV2Formatter: ChatFormatter { //https://www.llama.com/docs/model-cards-and-prompt-formats/meta-llama-2/ class LlamaFormatter: ChatFormatter { - private let systemTemplate = "[INST] <>\n{system_message}\n<>" + private let systemTemplate = "<>\n{system_message}\n<>\n\n" private let roles: [String: String] = [ - "user": "[INST]", - "assistant": "[/INST]" + "user": "[INST] ", + "assistant": " [/INST] " ] + private let endToken = "" func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { let formattedMessages = mapRoles(messages: messages) @@ -233,7 +234,7 @@ class LlamaFormatter: ChatFormatter { systemTemplate.replacingOccurrences(of: "{system_message}", with: msg) } let prompt = formatPrompt(systemMessage: formattedSystemMessage, messages: formattedMessages) - return ChatFormatterResponse(prompt: prompt + "[/INST]", stop: [""]) + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) } private func getSystemMessage(_ messages: [ChatCompletionRequestMessage]) -> String? { @@ -268,24 +269,36 @@ class LlamaFormatter: ChatFormatter { } private func formatPrompt(systemMessage: String?, messages: [(String, String?)]) -> String { - var prompt = "" + var conversations: [String] = [] + var currentConversation = "" - if let (firstRole, firstContent) = messages.first, - let content = firstContent { - if let sysMsg = systemMessage { - prompt += "\(firstRole) \(sysMsg)\n\(content)" - } else { - prompt += "\(firstRole) \(content)" + for (index, (role, content)) in messages.enumerated() { + if index % 2 == 0 { // User message + if !currentConversation.isEmpty { + conversations.append(currentConversation + " " + endToken) + } + currentConversation = role // [INST] + if index == 0 && systemMessage != nil { + currentConversation += systemMessage! + content! + } else { + currentConversation += content ?? "" + } + } else { // Assistant message + if let content = content { + currentConversation += role + content // [/INST] response + } } } - for (role, content) in messages.dropFirst() { - if let content = content { - prompt += " \(role) \(content)" - } + // Add the last conversation if it's a user message without response + if messages.count % 2 != 0 { + currentConversation += roles["assistant"]! + conversations.append(currentConversation) + } else if !currentConversation.isEmpty { + conversations.append(currentConversation + endToken) } - return prompt.trimmingCharacters(in: .whitespacesAndNewlines) + return conversations.joined(separator: "\n") } } @@ -296,8 +309,7 @@ class Llama3Formatter: ChatFormatter { "user": "<|start_header_id|>user<|end_header_id|>\n\n", "assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n" ] - - private let separator = "<|eot_id|>\n" + private let endToken = "<|eot_id|>" func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { var formattedMessages = mapRoles(messages: messages) @@ -306,7 +318,7 @@ class Llama3Formatter: ChatFormatter { let prompt = formatPrompt(formattedMessages) - return ChatFormatterResponse(prompt: prompt, stop: [separator]) + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) } private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { @@ -345,7 +357,7 @@ class Llama3Formatter: ChatFormatter { var prompt = "<|begin_of_text|>" for (role, content) in formattedMessages { if let content = content { - prompt += "\(role)\(content.trimmingCharacters(in: .whitespacesAndNewlines))\(separator)" + prompt += "\(role)\(content.trimmingCharacters(in: .whitespacesAndNewlines))\(endToken)" } else { prompt += "\(role) " } @@ -362,6 +374,7 @@ class GemmaFormatter: ChatFormatter { "assistant": "model\n" ] + private let endToken = "" private let separator = "\n" func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { @@ -369,7 +382,7 @@ class GemmaFormatter: ChatFormatter { formattedMessages.append((roles["assistant"]!, nil)) let prompt = formatPrompt(formattedMessages) - return ChatFormatterResponse(prompt: prompt, stop: [separator]) + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) } private func mapRoles(messages: [ChatCompletionRequestMessage]) -> [(String, String?)] { @@ -408,6 +421,7 @@ class GemmaFormatter: ChatFormatter { } } +// https://qwen.readthedocs.io/zh-cn/latest/getting_started/concepts.html#control-tokens-chat-template class QwenFormatter: ChatFormatter { private let roles: [String: String] = [ "user": "<|im_start|>user", @@ -464,16 +478,17 @@ class QwenFormatter: ChatFormatter { } } -//https://www.promptingguide.ai/models/mistral-7b +// https://www.promptingguide.ai/models/mistral-7b#chat-template-for-mistral-7b-instruct class MistralFormatter: ChatFormatter { private let endToken = "" + private let conversationStart = "" private let instructStart = "[INST] " - private let instructEnd = " [/INST]" + private let instructEnd = " [/INST] " func format(messages: [ChatCompletionRequestMessage]) -> ChatFormatterResponse { - var prompt = "" + var prompt = conversationStart // Add only once at the start - for message in messages { + for (index, message) in messages.enumerated() { switch message { case .user(let userMessage): switch userMessage.content { @@ -491,7 +506,12 @@ class MistralFormatter: ChatFormatter { continue } } - prompt += instructEnd + + // Add instructEnd if the last message was from user (waiting for AI response) + if messages.last.map({ if case .user = $0 { return true } else { return false } }) ?? false { + prompt += instructEnd + } + return ChatFormatterResponse(prompt: prompt, stop: [endToken]) } } From e34ad7226619a3e1d28f670f0abe0e04cc2f9955 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Fri, 8 Nov 2024 11:30:08 -0800 Subject: [PATCH 061/160] update omni_vlm_cpp --- nexa/gguf/llama/omni_vlm_cpp.py | 6 +++--- nexa/gguf/nexa_inference_vlm_omni.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nexa/gguf/llama/omni_vlm_cpp.py b/nexa/gguf/llama/omni_vlm_cpp.py index 984ba71b..ab7a5699 100644 --- a/nexa/gguf/llama/omni_vlm_cpp.py +++ b/nexa/gguf/llama/omni_vlm_cpp.py @@ -55,11 +55,11 @@ def _get_lib(): omni_char_p = ctypes.c_char_p -def omnivlm_init(llm_model_path: omni_char_p, mmproj_model_path: omni_char_p): - return _lib.omnivlm_init(llm_model_path, mmproj_model_path) +def omnivlm_init(llm_model_path: omni_char_p, mmproj_model_path: omni_char_p, vlm_version: omni_char_p): + return _lib.omnivlm_init(llm_model_path, mmproj_model_path, vlm_version) -_lib.omnivlm_init.argtypes = [omni_char_p, omni_char_p] +_lib.omnivlm_init.argtypes = [omni_char_p, omni_char_p, omni_char_p] _lib.omnivlm_init.restype = None diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py index 23ce6c88..56f3380a 100644 --- a/nexa/gguf/nexa_inference_vlm_omni.py +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -95,7 +95,7 @@ def _load_model(self): self.ctx_params_mmproj = ctypes.c_char_p( self.projector_downloaded_path.encode("utf-8") ) - omni_vlm_cpp.omnivlm_init(self.ctx_params_model, self.ctx_params_mmproj) + omni_vlm_cpp.omnivlm_init(self.ctx_params_model, self.ctx_params_mmproj, self.omni_vlm_version) except Exception as e: logging.error(f"Error loading model: {e}") raise From e80a8764f15c6b48286088af771c8c439394b66a Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Fri, 8 Nov 2024 13:47:26 -0800 Subject: [PATCH 062/160] update NexaOmniVlmInference class --- nexa/constants.py | 8 +++++ nexa/gguf/nexa_inference_vlm_omni.py | 53 +++++++++++++++++----------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/nexa/constants.py b/nexa/constants.py index 04ec51fb..d59c4af9 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -174,12 +174,20 @@ class ModelType(Enum): "omnivision": "Octo-omni-vision:projector-fp16", "Octo-omni-vision:fp16": "Octo-omni-vision:projector-fp16", "Octo-omni-vision:q4_0": "Octo-omni-vision:projector-q4_0", + "omnivision-v2": "Octo-omni-vision-v2:projector-fp16", + "Octo-omni-vision-v2:fp16": "Octo-omni-vision-v2:projector-fp16", + "omnivision-v2-ocr": "Octo-omni-vision-v2-ocr:projector-fp16", + "Octo-omni-vision-v2-ocr:fp16": "Octo-omni-vision-v2-ocr:projector-fp16", } NEXA_RUN_OMNI_VLM_MAP = { "omnivision": "Octo-omni-vision:model-fp16", "Octo-omni-vision:fp16": "Octo-omni-vision:model-fp16", "Octo-omni-vision:q4_0": "Octo-omni-vision:model-q4_0", + "omnivision-v2": "Octo-omni-vision-v2:model-fp16", + "Octo-omni-vision-v2:fp16": "Octo-omni-vision-v2:model-fp16", + "omnivision-v2-ocr": "Octo-omni-vision-v2-ocr:model-fp16", + "Octo-omni-vision-v2-ocr:fp16": "Octo-omni-vision-v2-ocr:model-fp16", } NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py index 56f3380a..4e83b674 100644 --- a/nexa/gguf/nexa_inference_vlm_omni.py +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -7,7 +7,6 @@ from streamlit.web import cli as stcli from nexa.utils import nexa_prompt, SpinningCursorAnimation from nexa.constants import ( - DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_OMNI_VLM_PROJECTOR_MAP, NEXA_RUN_OMNI_VLM_MAP ) @@ -32,8 +31,6 @@ def __init__( if model_path is None and local_path is None: raise ValueError("Either model_path or local_path must be provided.") - self.params = DEFAULT_TEXT_GEN_PARAMS.copy() - self.params.update(kwargs) self.model = None self.projector = None self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP.get(model_path, None) @@ -41,30 +38,26 @@ def __init__( self.projector_downloaded_path = projector_local_path self.device = device self.context = None + self.omni_vlm_version = None if self.device == "auto" or self.device == "gpu": self.n_gpu_layers = -1 if is_gpu_available() else 0 else: self.n_gpu_layers = 0 - if ( - self.downloaded_path is not None - and self.projector_downloaded_path is not None - ): + if self.downloaded_path is not None and self.projector_downloaded_path is not None: # when running from local, both path should be provided - pass + self.omni_vlm_version = self._determine_vlm_version(str(self.downloaded_path)) elif self.downloaded_path is not None: if model_path in NEXA_RUN_OMNI_VLM_MAP: self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP[model_path] - self.projector_downloaded_path, _ = pull_model( - self.projector_path, **kwargs - ) + self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + self.omni_vlm_version = self._determine_vlm_version(model_path) elif model_path in NEXA_RUN_OMNI_VLM_MAP: self.model_path = NEXA_RUN_OMNI_VLM_MAP[model_path] self.projector_path = NEXA_RUN_OMNI_VLM_PROJECTOR_MAP[model_path] self.downloaded_path, _ = pull_model(self.model_path, **kwargs) - self.projector_downloaded_path, _ = pull_model( - self.projector_path, **kwargs - ) + self.projector_downloaded_path, _ = pull_model(self.projector_path, **kwargs) + self.omni_vlm_version = self._determine_vlm_version(model_path) elif Path(model_path).parent.exists(): local_dir = Path(model_path).parent model_name = Path(model_path).name @@ -80,22 +73,33 @@ def __init__( "Make sure to name them as 'model-.gguf' and 'projector-.gguf'." ) exit(1) + self.omni_vlm_version = self._determine_vlm_version(model_path) else: logging.error("VLM user model from hub is not supported yet.") exit(1) + + # Override version if specified in kwargs + if 'omni_vlm_version' in kwargs: + self.omni_vlm_version = kwargs.get('omni_vlm_version') + with suppress_stdout_stderr(): self._load_model() + def _determine_vlm_version(self, path_str: str) -> str: + """Helper function to determine VLM version from path string.""" + if 'v2-ocr' in path_str: + return "vlm-81-ocr" + elif 'v2' in path_str: + return "vlm-81-instruct" + return "nano-vlm-instruct" + @SpinningCursorAnimation() def _load_model(self): try: - self.ctx_params_model = ctypes.c_char_p( - self.downloaded_path.encode("utf-8") - ) - self.ctx_params_mmproj = ctypes.c_char_p( - self.projector_downloaded_path.encode("utf-8") - ) - omni_vlm_cpp.omnivlm_init(self.ctx_params_model, self.ctx_params_mmproj, self.omni_vlm_version) + self.ctx_params_model = ctypes.c_char_p(self.downloaded_path.encode("utf-8")) + self.ctx_params_mmproj = ctypes.c_char_p(self.projector_downloaded_path.encode("utf-8")) + self.ctx_params_omni_vlm_version = ctypes.c_char_p(self.omni_vlm_version.encode("utf-8")) + omni_vlm_cpp.omnivlm_init(self.ctx_params_model, self.ctx_params_mmproj, self.ctx_params_omni_vlm_version) except Exception as e: logging.error(f"Error loading model: {e}") raise @@ -170,6 +174,13 @@ def run_streamlit(self, model_path: str, is_local_path = False, hf = False, proj action="store_true", help="Run the inference in Streamlit UI", ) + parser.add_argument( + "--omni_vlm_version", + type=str, + choices=["vlm-81-ocr", "vlm-81-instruct", "nano-vlm-instruct"], + default="vlm-81-instruct", + help="omni-vlm-version to use", + ) args = parser.parse_args() kwargs = {k: v for k, v in vars(args).items() if v is not None} From 8563977a107a1b1023beb63cf730d408ec61dd6b Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sat, 9 Nov 2024 00:42:54 +0000 Subject: [PATCH 063/160] add q8_0 for qwenaudio --- nexa/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexa/constants.py b/nexa/constants.py index 32e6a53a..3c7df173 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -112,6 +112,7 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP_AUDIO_LM = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "qwen2audio:q8_0": "Qwen2-Audio-7.8B-Instruct:model-q8_0", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "omniaudio": "Octo-omni-audio:model-fp16", "omniaudio:fp16": "Octo-omni-audio:model-fp16", @@ -185,6 +186,7 @@ class ModelType(Enum): NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "qwen2audio:q8_0": "Qwen2-Audio-7.8B-Instruct:projector-q8_0", "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "omniaudio": "Octo-omni-audio:projector-fp16", "omniaudio:fp16": "Octo-omni-audio:projector-fp16", From 47796f5f5e1072ef0a0330d998273f3d6be21bd8 Mon Sep 17 00:00:00 2001 From: Zack Zhiyuan Li Date: Sat, 9 Nov 2024 00:48:00 +0000 Subject: [PATCH 064/160] make llama.cpp in master-release branch --- dependency/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 38c6fa3b..b535cd94 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 38c6fa3b8fb6c88075102fd859d04eaea27aa87c +Subproject commit b535cd941e657ac1984d8022dd5f0c98f2b9e265 From 9ecb4ea16977b1f925bc77249f6c1c9d52feaa44 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 01:38:25 +0000 Subject: [PATCH 065/160] set default model type when lp or hf, for run and server --- nexa/cli/entry.py | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 6736cff1..53b947b4 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -44,14 +44,13 @@ def run_ggml_inference(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path or hf: + run_type = ModelType["NLP"].value local_path = None projector_local_path = None if is_local_path or hf: - if not model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return if is_local_path: local_path = os.path.abspath(model_path) model_path = local_path @@ -80,7 +79,7 @@ def run_ggml_inference(args): else: # Model Hub from nexa.general import pull_model local_path, run_type = pull_model(model_path) - + stop_words = kwargs.pop("stop_words", None) try: @@ -124,16 +123,21 @@ def run_ggml_inference(args): else: print(f"Unknown task: {run_type}. Skipping inference.") return + except Exception as e: print(f"Error running ggml inference: {e}") print(f"Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") return + if (is_local_path or hf) and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if hasattr(args, 'streamlit') and args.streamlit: if run_type == "Multimodal" or run_type == "AudioLM": - inference.run_streamlit(model_path, is_local_path = is_local_path, hf = hf, projector_local_path = projector_local_path) + inference.run_streamlit(model_path, is_local_path=is_local_path, hf=hf, projector_local_path=projector_local_path) else: - inference.run_streamlit(model_path, is_local_path = is_local_path, hf = hf) + inference.run_streamlit(model_path, is_local_path=is_local_path, hf=hf) else: inference.run() @@ -149,6 +153,8 @@ def run_ggml_server(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path or hf: + run_type = ModelType["NLP"].value projector_local_path = None if run_type == "Multimodal" and is_local_path: @@ -167,6 +173,10 @@ def run_ggml_server(args): print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.") return + if (is_local_path or hf) and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + NexaServer( model_path_arg=model_path, is_local_path_arg=is_local_path, @@ -185,12 +195,11 @@ def run_onnx_inference(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path: + run_type = ModelType["NLP"].value local_path = None if is_local_path: - if not model_type: - print("Error: --model_type must be provided when using --local_path") - return local_path = os.path.abspath(model_path) if not os.path.isdir(local_path): print("Error: For ONNX models, the provided path must be a directory.") @@ -221,6 +230,10 @@ def run_onnx_inference(args): print(f"Please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") return + if is_local_path and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if hasattr(args, 'streamlit') and args.streamlit: inference.run_streamlit(model_path, is_local_path=is_local_path) else: @@ -338,9 +351,9 @@ def main(): run_parser.add_argument("model_path", type=str, help="Path or identifier for the model in Nexa Model Hub") run_parser.add_argument("-st", "--streamlit", action="store_true", help="Run the inference in Streamlit UI") run_parser.add_argument("-pf", "--profiling", action="store_true", help="Enable profiling logs for the inference process") - run_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path, must be used with -mt") - run_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type, must be used with -lp or -hf") - run_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub, must be used with -mt") + run_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path") + run_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type (default: NLP)") + run_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub") # Text generation/vlm arguments text_group = run_parser.add_argument_group('Text generation/VLM options') @@ -498,22 +511,13 @@ def main(): if args.local_path and args.huggingface: print("Error: --local_path and --huggingface flags cannot be used together") return - if (args.local_path or args.huggingface) and not args.model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return run_ggml_inference(args) elif args.command == "server": if args.local_path and args.huggingface: print("Error: --local_path and --huggingface flags cannot be used together") return - if (args.local_path or args.huggingface) and not args.model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return run_ggml_server(args) elif args.command == "onnx": - if args.local_path and not args.model_type: - print("Error: --model_type must be provided when using --local_path") - return run_onnx_inference(args) elif args.command == "eval": run_eval_tasks(args) From fa56bd0a2d4df8cf803d6552b37e8f7ca451f799 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 01:43:23 +0000 Subject: [PATCH 066/160] update cli and server readme for lp and hf flag change --- CLI.md | 24 ++++++++++++------------ SERVER.md | 4 ++-- nexa/cli/entry.py | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CLI.md b/CLI.md index 5c4f4ab4..8621b47a 100644 --- a/CLI.md +++ b/CLI.md @@ -109,9 +109,9 @@ options: -h, --help show this help message and exit -pf, --profiling Enable profiling logs for the inference process -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Text generation options: -t, --temperature TEMPERATURE @@ -143,9 +143,9 @@ positional arguments: options: -h, --help show this help message and exit -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Image generation options: -i2i, --img2img Whether to run image-to-image generation @@ -189,9 +189,9 @@ options: -h, --help show this help message and exit -pf, --profiling Enable profiling logs for the inference process -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub VLM generation options: -t, --temperature TEMPERATURE @@ -223,9 +223,9 @@ positional arguments: options: -h, --help show this help message and exit -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Automatic Speech Recognition options: -b, --beam_size BEAM_SIZE @@ -257,8 +257,8 @@ positional arguments: options: -h, --help show this help message and exit - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path + -hf, --huggingface Load model from Hugging Face Hub -n, --normalize Normalize the embeddings -nt, --no_truncate Not truncate the embeddings ``` @@ -342,9 +342,9 @@ positional arguments: options: -h, --help show this help message and exit - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub --host HOST Host to bind the server to --port PORT Port to bind the server to --reload Enable automatic reloading on code changes diff --git a/SERVER.md b/SERVER.md index 10462e8c..77c9b0b5 100644 --- a/SERVER.md +++ b/SERVER.md @@ -8,9 +8,9 @@ usage: nexa server [-h] [--host HOST] [--port PORT] [--reload] model_path ### Options: -- `-lp, --local_path`: Indicate that the model path provided is the local path, must be used with -mt +- `-lp, --local_path`: Indicate that the model path provided is the local path - `-mt, --model_type`: Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] -- `-hf, --huggingface`: Load model from Hugging Face Hub, must be used with -mt +- `-hf, --huggingface`: Load model from Hugging Face Hub - `--host`: Host to bind the server to - `--port`: Port to bind the server to - `--reload`: Enable automatic reloading on code changes diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 53b947b4..6f0f8bf3 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -466,9 +466,9 @@ def main(): # GGML server parser server_parser = subparsers.add_parser("server", help="Run the Nexa AI Text Generation Service") server_parser.add_argument("model_path", type=str, nargs='?', help="Path or identifier for the model in Nexa Model Hub") - server_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path, must be used with -mt") + server_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path") server_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type, must be used with -lp or -hf") - server_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub, must be used with -mt") + server_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub") server_parser.add_argument("--host", type=str, default="localhost", help="Host to bind the server to") server_parser.add_argument("--port", type=int, default=8000, help="Port to bind the server to") server_parser.add_argument("--reload", action="store_true", help="Enable automatic reloading on code changes") From 9b6afddd1013a12add80afeff82b8afa8cbab0a7 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 01:58:09 +0000 Subject: [PATCH 067/160] moved lp and hf print place --- nexa/cli/entry.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 6f0f8bf3..44db9811 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -83,6 +83,10 @@ def run_ggml_inference(args): stop_words = kwargs.pop("stop_words", None) try: + if (is_local_path or hf) and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if run_type == "NLP": from nexa.gguf.nexa_inference_text import NexaTextInference inference = NexaTextInference(model_path=model_path, local_path=local_path, stop_words=stop_words, **kwargs) @@ -210,6 +214,10 @@ def run_onnx_inference(args): local_path, run_type = pull_model(model_path) try: + if is_local_path and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if run_type == "NLP": from nexa.onnx.nexa_inference_text import NexaTextInference as NexaTextOnnxInference inference = NexaTextOnnxInference(model_path=model_path, local_path=local_path, **kwargs) From a6134d2f59a2cac86adbedb435e36b410ea6b5e1 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 02:01:03 +0000 Subject: [PATCH 068/160] fixed duplicate printing issue --- nexa/cli/entry.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 44db9811..2b167325 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -133,10 +133,6 @@ def run_ggml_inference(args): print(f"Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") return - if (is_local_path or hf) and not model_type: - print("No model type specified. Running with default model type: NLP") - print("You can specify a different model type using the -mt flag") - if hasattr(args, 'streamlit') and args.streamlit: if run_type == "Multimodal" or run_type == "AudioLM": inference.run_streamlit(model_path, is_local_path=is_local_path, hf=hf, projector_local_path=projector_local_path) @@ -238,10 +234,6 @@ def run_onnx_inference(args): print(f"Please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") return - if is_local_path and not model_type: - print("No model type specified. Running with default model type: NLP") - print("You can specify a different model type using the -mt flag") - if hasattr(args, 'streamlit') and args.streamlit: inference.run_streamlit(model_path, is_local_path=is_local_path) else: From 271f43477be2d510435dca84147fa1cc0735a14a Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 02:52:53 +0000 Subject: [PATCH 069/160] applied nexa list table width limit --- nexa/general.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nexa/general.py b/nexa/general.py index f522446d..ab437614 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -567,7 +567,11 @@ def list_models(): print( tabulate( - table, headers, tablefmt="pretty", colalign=("left", "left", "left", "left") + table, + headers, + tablefmt="pretty", + colalign=("left", "left", "left", "left"), + maxcolwidths=[50, 15, 20, 90] ) ) except Exception as e: From 86ac246c1617f46eb213cea53a98658cbcb40305 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 03:08:00 +0000 Subject: [PATCH 070/160] flexible run type when running from hf --- nexa/cli/entry.py | 4 ++-- nexa/general.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 2b167325..482c59bc 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -75,7 +75,7 @@ def run_ggml_inference(args): print("Running multimodal model or audio model or TTS model from Hugging Face is currently not supported in CLI mode. Please use SDK to run Multimodal model or Audio model or TTS model.") return from nexa.general import pull_model - local_path, _ = pull_model(model_path, hf=True) + local_path, _ = pull_model(model_path, hf=True, run_type=run_type) else: # Model Hub from nexa.general import pull_model local_path, run_type = pull_model(model_path) @@ -279,7 +279,7 @@ def run_embedding_generation(args): model_path = local_path else: # hf case from nexa.general import pull_model - local_path, _ = pull_model(model_path, hf=True) + local_path, _ = pull_model(model_path, hf=True, run_type="Text Embedding") else: # Model Hub from nexa.general import pull_model local_path, _ = pull_model(model_path) diff --git a/nexa/general.py b/nexa/general.py index ab437614..d0a6987d 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -230,7 +230,7 @@ def pull_model_from_official(model_path, **kwargs): "run_type": run_type_str } -def pull_model_from_hf(repo_id, **kwargs): +def pull_model_from_hf(repo_id, run_type = "NLP", **kwargs): repo_id, filename = select_gguf_in_hf_repo(repo_id) success, model_path = download_gguf_from_hf(repo_id, filename, **kwargs) @@ -239,7 +239,7 @@ def pull_model_from_hf(repo_id, **kwargs): "success": success, "local_path": model_path, "model_type": "gguf", - "run_type": "NLP" # Just for compatibility, hf model type will be passed from outsice + "run_type": run_type } From b3d795dc703f79e7c4e4ed1453f45edb1b547dba Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 20:21:00 +0000 Subject: [PATCH 071/160] updated hf store to nexa list logic --- nexa/general.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nexa/general.py b/nexa/general.py index d0a6987d..99a6be70 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -126,6 +126,7 @@ def pull_model(model_path, hf = False, **kwargs): if result["success"]: # Only add to model list if not using custom download path + model_path = model_path if not hf else f"{model_path}:{result['local_path'].split('/')[-1]}" if not kwargs.get('local_download_path'): add_model_to_list(model_path, result["local_path"], result["model_type"], result["run_type"]) @@ -571,7 +572,7 @@ def list_models(): headers, tablefmt="pretty", colalign=("left", "left", "left", "left"), - maxcolwidths=[50, 15, 20, 90] + maxcolwidths=[150, 15, 20, 90] ) ) except Exception as e: From 71dcfb85bc3bfd63f35fd49d1634a59ed7944e91 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 20:28:12 +0000 Subject: [PATCH 072/160] updated cli to support run listed models --- CLI.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CLI.md b/CLI.md index 8621b47a..45298791 100644 --- a/CLI.md +++ b/CLI.md @@ -31,7 +31,7 @@ options: ### List Local Models -List all models on your local computer. +List all models on your local computer. You can use `nexa run ` to run any model shown in the list. ``` nexa list @@ -96,6 +96,8 @@ Run a model on your local computer. If the model file is not yet downloaded, it By default, `nexa` will run gguf models. To run onnx models, use `nexa onnx MODEL_PATH` +You can run any model shown in `nexa list` command. + #### Run Text-Generation Model ``` From 9eb767ec907a4a46bb09d6b2f7f1c5e4d638ac1c Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 21:14:48 +0000 Subject: [PATCH 073/160] nexa convert high level cli interface refinement --- nexa/cli/entry.py | 90 +++++++++++++++++++++++++++----- nexa/gguf/converter/constants.py | 9 +++- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 482c59bc..b7b3a099 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -293,7 +293,71 @@ def run_embedding_generation(args): print(f"Error generating embedding: {e}") print("Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation") +def _select_model_type(): + """Helper function to get model type selection from user.""" + print("\nSelect model type:") + print("1. NLP (text generation)") + print("2. COMPUTER_VISION (image generation)") + + while True: + try: + choice = int(input("\nSelect model type (enter number): ")) + if choice == 1: + return "NLP" + elif choice == 2: + return "COMPUTER_VISION" + print("Invalid selection. Please try again.") + except ValueError: + print("Please enter a valid number.") + +def _select_quantization_type(): + """Helper function to get quantization type selection from user.""" + from nexa.gguf.converter.constants import LLAMA_QUANTIZATION_TYPES + print("\nAvailable quantization types:") + for i, qt in enumerate(LLAMA_QUANTIZATION_TYPES.keys(), 1): + print(f"{i}. {qt}") + + while True: + try: + choice = int(input("\nSelect quantization type (enter number): ")) - 1 + if 0 <= choice < len(LLAMA_QUANTIZATION_TYPES): + return list(LLAMA_QUANTIZATION_TYPES.keys())[choice] + print("Invalid selection. Please try again.") + except ValueError: + print("Please enter a valid number.") + +def _store_in_nexa_list(converted_path, model_type): + """Helper function to store converted model in nexa list.""" + import shutil + from nexa.general import add_model_to_list + from nexa.gguf.converter.constants import NEXA_MODELS_HUB_CONVERTED_DIR + + # Create the converted directory if it doesn't exist + os.makedirs(NEXA_MODELS_HUB_CONVERTED_DIR, exist_ok=True) + + # Copy the file to the converted directory + nexa_list_path = os.path.join(NEXA_MODELS_HUB_CONVERTED_DIR, os.path.basename(converted_path)) + shutil.copy2(converted_path, nexa_list_path) + + # Add the new path to the model list + add_model_to_list(os.path.basename(converted_path), nexa_list_path, "gguf", model_type) + +def _run_converted_model(converted_path, model_type): + """Helper function to run the converted model.""" + try: + import subprocess + command = f"nexa run {converted_path} -lp -mt {model_type}" + print(f"Running command: {command}") + subprocess.run(command.split(), check=True, text=True) + except subprocess.CalledProcessError as e: + print("Error running the converted model.") + print("Change model type with -mt to run the model correctly. Or refer to our docs: https://docs.nexa.ai/sdk/cli-reference") + def run_convert(args): + # Get model type and quantization type + model_type = _select_model_type() + ftype = args.ftype or _select_quantization_type() + input_path = args.input_path # Check if input_path is a valid directory @@ -315,24 +379,22 @@ def run_convert(args): converted_path = convert_hf_to_quantized_gguf( input_path, output_file=args.output_file, - ftype=args.ftype, + ftype=ftype, convert_type=args.convert_type, **kwargs ) if converted_path: - print(f"Conversion completed successfully. Output file: {converted_path}") + print(f"\nConversion completed successfully. Output file: {converted_path}") + + # Ask if user wants to store in nexa list + store_choice = input("\nWould you like to store this model in nexa list so you can run it with `nexa run ` anywhere and anytime? (y/N): ").strip().lower() + if store_choice == 'y': + _store_in_nexa_list(converted_path, model_type) - # Ask user if they want to run the converted model - user_choice = input("Would you like to run the converted model? (y/N) (Currently only supports NLP): ").strip().lower() - if user_choice == 'y': - try: - import subprocess - command = f"nexa run {converted_path} -lp -mt NLP" - print(f"Running command: {command}") - subprocess.run(command.split(), check=True, text=True) - except subprocess.CalledProcessError as e: - print("Error running the converted model.") - print("Change model type with -mt to run the model correctly. Or refer to our docs: https://docs.nexa.ai/sdk/cli-reference") + # Ask if user wants to run the model + run_choice = input("\nWould you like to run the converted model? (y/N): ").strip().lower() + if run_choice == 'y': + _run_converted_model(converted_path, model_type) else: print("Exiting without running the model.") return @@ -438,7 +500,7 @@ def main(): # Convert command convert_parser = subparsers.add_parser("convert", help="Convert and quantize a Hugging Face model to GGUF format.") convert_parser.add_argument("input_path", type=str, help="Path to the input Hugging Face model directory or GGUF file") - convert_parser.add_argument("ftype", nargs='?', type=str, default="q4_0", help="Quantization type (default: q4_0)") + convert_parser.add_argument("ftype", nargs='?', type=str, help="Quantization type") convert_parser.add_argument("output_file", nargs='?', type=str, help="Path to the output quantized GGUF file") convert_hf_parser = convert_parser.add_argument_group('Convert from safetensors options') diff --git a/nexa/gguf/converter/constants.py b/nexa/gguf/converter/constants.py index d2869ab1..2512f4d5 100644 --- a/nexa/gguf/converter/constants.py +++ b/nexa/gguf/converter/constants.py @@ -1,3 +1,6 @@ +import os +from pathlib import Path + from nexa.gguf.llama.llama_cpp import ( LLAMA_FTYPE_ALL_F32, LLAMA_FTYPE_MOSTLY_F16, @@ -146,4 +149,8 @@ "q4_0_4_4": GGML_TYPE_Q4_0_4_4, "q4_0_4_8": GGML_TYPE_Q4_0_4_8, "q4_0_8_8": GGML_TYPE_Q4_0_8_8, -} \ No newline at end of file +} + +NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser() +NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub" +NEXA_MODELS_HUB_CONVERTED_DIR = NEXA_MODELS_HUB_DIR / "converted" From 2a93e96f1a344218446b047bf88db97ffc67d977 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 21:27:44 +0000 Subject: [PATCH 074/160] added instructions after convert & run --- nexa/cli/entry.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index b7b3a099..793bb46f 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -397,7 +397,11 @@ def run_convert(args): _run_converted_model(converted_path, model_type) else: print("Exiting without running the model.") - return + + print(f"\nConverted model stored at {converted_path}") + running_command = f"nexa run {converted_path.split('/')[-1]}"\ + if store_choice == 'y' else f"nexa run {converted_path} -lp -mt {model_type}" + print(f"\nYou can run the converted model with command: {running_command}") else: print("Conversion failed.") except Exception as e: From 44d2aa514a8f3187dc1bdedc34f9c7cddd007ef4 Mon Sep 17 00:00:00 2001 From: zhycheng614 Date: Sat, 9 Nov 2024 21:38:55 +0000 Subject: [PATCH 075/160] update readmes --- CLI.md | 4 ++++ README.md | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CLI.md b/CLI.md index 45298791..06ed3a33 100644 --- a/CLI.md +++ b/CLI.md @@ -276,6 +276,10 @@ nexa embed sentence-transformers/all-MiniLM-L6-v2:gguf-fp16 "I love Nexa AI." >> ### Convert and quantize a Hugging Face Model to GGUF +Additional package `nexa-gguf` is required to run this command. + +You can install it by `pip install "nexaai[convert]"` or `pip install nexa-gguf`. + ``` nexa convert HF_MODEL_PATH [ftype] [output_file] usage: nexa convert [-h] [-t NTHREAD] [--convert_type CONVERT_TYPE] [--bigendian] [--use_temp_file] [--no_lazy] diff --git a/README.md b/README.md index f29b8db9..4551a14e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![MacOS][MacOS-image]][release-url] [![Linux][Linux-image]][release-url] [![Windows][Windows-image]][release-url] -[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) +[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) [![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/NexaAI/nexa-sdk) [![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dplatforms)](https://swiftpackageindex.com/NexaAI/nexa-sdk) @@ -26,6 +26,7 @@ Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** models. ## Latest News 🔥 + - [2024/11] Support Nexa AI's own vision language model (0.9B parameters): `nexa run omnivision` and audio language model (2.9B): `nexa run omniaudio` - [2024/11] Support audio language model: `nexa run qwen2audio`, **we are the first open-source toolkit to support audio language model with GGML tensor library.** - [2024/10] Support embedding model: `nexa embed ` @@ -84,8 +85,9 @@ We have released pre-built wheels for various Python versions, platforms, and ba > [!NOTE] > > 1. If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands. -> 2. If you want to convert and quantize huggingface models to GGUF models, just replace `pip install nexaai` with `pip install "nexaai[nexa-gguf]"`. -> 3. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands. +> 2. If you want to run benchmark evaluation, just replace `pip install nexaai` with `pip install "nexaai[eval]"` in provided commands. +> 3. If you want to convert and quantize huggingface models to GGUF models, just replace `pip install nexaai` with `pip install "nexaai[nexa-gguf]"` in provided commands. +> 4. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands. #### CPU From 40ce3cde00190753ec3b00a19873d8659c02f232 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Sun, 10 Nov 2024 14:03:32 -0800 Subject: [PATCH 076/160] wip --- nexa/constants.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nexa/constants.py b/nexa/constants.py index d59c4af9..7e61d041 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -112,6 +112,7 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP_AUDIO_LM = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "Qwen2-Audio-7.8B-Instruct:q8_0": "Qwen2-Audio-7.8B-Instruct:model-q8_0", "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "omniaudio": "Octo-omni-audio:model-fp16", "Octo-omni-audio:fp16": "Octo-omni-audio:model-fp16", @@ -194,6 +195,7 @@ class ModelType(Enum): "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "Qwen2-Audio-7.8B-Instruct:q8_0": "Qwen2-Audio-7.8B-Instruct:projector-q8_0", "omniaudio": "Octo-omni-audio:projector-fp16", "Octo-omni-audio:fp16": "Octo-omni-audio:projector-fp16", "Octo-omni-audio:q4_0": "Octo-omni-audio:projector-q4_0", @@ -430,6 +432,8 @@ class ModelType(Enum): "FLUX.1-schnell": ModelType.COMPUTER_VISION, "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL, "Octo-omni-vision": ModelType.MULTIMODAL, + "Octo-omni-vision-v2": ModelType.MULTIMODAL, + "Octo-omni-vision-v2-ocr": ModelType.MULTIMODAL, "nanoLLaVA": ModelType.MULTIMODAL, "llava-v1.6-mistral-7b": ModelType.MULTIMODAL, "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, From 7cb709f64a1e419f43eb62bc560994945b94792a Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Sun, 10 Nov 2024 14:04:38 -0800 Subject: [PATCH 077/160] merge origin main --- CLI.md | 32 +- README.md | 8 +- SERVER.md | 4 +- dependency/llama.cpp | 2 +- nexa/cli/entry.py | 150 ++++++-- nexa/constants.py | 1 + nexa/general.py | 11 +- nexa/gguf/converter/constants.py | 9 +- nexa/gguf/streamlit/streamlit_image_chat.py | 397 +++++++++++++++----- nexa/gguf/streamlit/streamlit_text_chat.py | 387 ++++++++++++++----- nexa/gguf/streamlit/streamlit_vlm.py | 342 ++++++++++++----- nexa/gguf/streamlit/streamlit_voice_chat.py | 358 ++++++++++++------ nexa/utils.py | 99 ++++- 13 files changed, 1353 insertions(+), 447 deletions(-) diff --git a/CLI.md b/CLI.md index 5c4f4ab4..06ed3a33 100644 --- a/CLI.md +++ b/CLI.md @@ -31,7 +31,7 @@ options: ### List Local Models -List all models on your local computer. +List all models on your local computer. You can use `nexa run ` to run any model shown in the list. ``` nexa list @@ -96,6 +96,8 @@ Run a model on your local computer. If the model file is not yet downloaded, it By default, `nexa` will run gguf models. To run onnx models, use `nexa onnx MODEL_PATH` +You can run any model shown in `nexa list` command. + #### Run Text-Generation Model ``` @@ -109,9 +111,9 @@ options: -h, --help show this help message and exit -pf, --profiling Enable profiling logs for the inference process -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Text generation options: -t, --temperature TEMPERATURE @@ -143,9 +145,9 @@ positional arguments: options: -h, --help show this help message and exit -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Image generation options: -i2i, --img2img Whether to run image-to-image generation @@ -189,9 +191,9 @@ options: -h, --help show this help message and exit -pf, --profiling Enable profiling logs for the inference process -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub VLM generation options: -t, --temperature TEMPERATURE @@ -223,9 +225,9 @@ positional arguments: options: -h, --help show this help message and exit -st, --streamlit Run the inference in Streamlit UI, can be used with -lp or -hf - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub Automatic Speech Recognition options: -b, --beam_size BEAM_SIZE @@ -257,8 +259,8 @@ positional arguments: options: -h, --help show this help message and exit - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path + -hf, --huggingface Load model from Hugging Face Hub -n, --normalize Normalize the embeddings -nt, --no_truncate Not truncate the embeddings ``` @@ -274,6 +276,10 @@ nexa embed sentence-transformers/all-MiniLM-L6-v2:gguf-fp16 "I love Nexa AI." >> ### Convert and quantize a Hugging Face Model to GGUF +Additional package `nexa-gguf` is required to run this command. + +You can install it by `pip install "nexaai[convert]"` or `pip install nexa-gguf`. + ``` nexa convert HF_MODEL_PATH [ftype] [output_file] usage: nexa convert [-h] [-t NTHREAD] [--convert_type CONVERT_TYPE] [--bigendian] [--use_temp_file] [--no_lazy] @@ -342,9 +348,9 @@ positional arguments: options: -h, --help show this help message and exit - -lp, --local_path Indicate that the model path provided is the local path, must be used with -mt + -lp, --local_path Indicate that the model path provided is the local path -mt, --model_type Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] - -hf, --huggingface Load model from Hugging Face Hub, must be used with -mt + -hf, --huggingface Load model from Hugging Face Hub --host HOST Host to bind the server to --port PORT Port to bind the server to --reload Enable automatic reloading on code changes diff --git a/README.md b/README.md index f29b8db9..4551a14e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![MacOS][MacOS-image]][release-url] [![Linux][Linux-image]][release-url] [![Windows][Windows-image]][release-url] -[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) +[![GitHub Release](https://img.shields.io/github/v/release/NexaAI/nexa-sdk)](https://github.com/NexaAI/nexa-sdk/releases/latest) [![Build workflow](https://img.shields.io/github/actions/workflow/status/NexaAI/nexa-sdk/ci.yaml?label=CI&logo=github)](https://github.com/NexaAI/nexa-sdk/actions/workflows/ci.yaml?query=branch%3Amain) ![GitHub License](https://img.shields.io/github/license/NexaAI/nexa-sdk) [![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/NexaAI/nexa-sdk) [![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FNexaAI%2Fnexa-sdk%2Fbadge%3Ftype%3Dplatforms)](https://swiftpackageindex.com/NexaAI/nexa-sdk) @@ -26,6 +26,7 @@ Nexa SDK is a comprehensive toolkit for supporting **ONNX** and **GGML** models. ## Latest News 🔥 + - [2024/11] Support Nexa AI's own vision language model (0.9B parameters): `nexa run omnivision` and audio language model (2.9B): `nexa run omniaudio` - [2024/11] Support audio language model: `nexa run qwen2audio`, **we are the first open-source toolkit to support audio language model with GGML tensor library.** - [2024/10] Support embedding model: `nexa embed ` @@ -84,8 +85,9 @@ We have released pre-built wheels for various Python versions, platforms, and ba > [!NOTE] > > 1. If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands. -> 2. If you want to convert and quantize huggingface models to GGUF models, just replace `pip install nexaai` with `pip install "nexaai[nexa-gguf]"`. -> 3. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands. +> 2. If you want to run benchmark evaluation, just replace `pip install nexaai` with `pip install "nexaai[eval]"` in provided commands. +> 3. If you want to convert and quantize huggingface models to GGUF models, just replace `pip install nexaai` with `pip install "nexaai[nexa-gguf]"` in provided commands. +> 4. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands. #### CPU diff --git a/SERVER.md b/SERVER.md index 10462e8c..77c9b0b5 100644 --- a/SERVER.md +++ b/SERVER.md @@ -8,9 +8,9 @@ usage: nexa server [-h] [--host HOST] [--port PORT] [--reload] model_path ### Options: -- `-lp, --local_path`: Indicate that the model path provided is the local path, must be used with -mt +- `-lp, --local_path`: Indicate that the model path provided is the local path - `-mt, --model_type`: Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO] -- `-hf, --huggingface`: Load model from Hugging Face Hub, must be used with -mt +- `-hf, --huggingface`: Load model from Hugging Face Hub - `--host`: Host to bind the server to - `--port`: Port to bind the server to - `--reload`: Enable automatic reloading on code changes diff --git a/dependency/llama.cpp b/dependency/llama.cpp index 20b9f02c..b535cd94 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit 20b9f02cee483d09d15832c35e6117e5a020f517 +Subproject commit b535cd941e657ac1984d8022dd5f0c98f2b9e265 diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 6736cff1..793bb46f 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -44,14 +44,13 @@ def run_ggml_inference(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path or hf: + run_type = ModelType["NLP"].value local_path = None projector_local_path = None if is_local_path or hf: - if not model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return if is_local_path: local_path = os.path.abspath(model_path) model_path = local_path @@ -76,14 +75,18 @@ def run_ggml_inference(args): print("Running multimodal model or audio model or TTS model from Hugging Face is currently not supported in CLI mode. Please use SDK to run Multimodal model or Audio model or TTS model.") return from nexa.general import pull_model - local_path, _ = pull_model(model_path, hf=True) + local_path, _ = pull_model(model_path, hf=True, run_type=run_type) else: # Model Hub from nexa.general import pull_model local_path, run_type = pull_model(model_path) - + stop_words = kwargs.pop("stop_words", None) try: + if (is_local_path or hf) and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if run_type == "NLP": from nexa.gguf.nexa_inference_text import NexaTextInference inference = NexaTextInference(model_path=model_path, local_path=local_path, stop_words=stop_words, **kwargs) @@ -124,6 +127,7 @@ def run_ggml_inference(args): else: print(f"Unknown task: {run_type}. Skipping inference.") return + except Exception as e: print(f"Error running ggml inference: {e}") print(f"Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") @@ -131,9 +135,9 @@ def run_ggml_inference(args): if hasattr(args, 'streamlit') and args.streamlit: if run_type == "Multimodal" or run_type == "AudioLM": - inference.run_streamlit(model_path, is_local_path = is_local_path, hf = hf, projector_local_path = projector_local_path) + inference.run_streamlit(model_path, is_local_path=is_local_path, hf=hf, projector_local_path=projector_local_path) else: - inference.run_streamlit(model_path, is_local_path = is_local_path, hf = hf) + inference.run_streamlit(model_path, is_local_path=is_local_path, hf=hf) else: inference.run() @@ -149,6 +153,8 @@ def run_ggml_server(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path or hf: + run_type = ModelType["NLP"].value projector_local_path = None if run_type == "Multimodal" and is_local_path: @@ -167,6 +173,10 @@ def run_ggml_server(args): print("Error: For Audio models with --local_path, the provided path must be a directory containing all related files.") return + if (is_local_path or hf) and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + NexaServer( model_path_arg=model_path, is_local_path_arg=is_local_path, @@ -185,12 +195,11 @@ def run_onnx_inference(args): run_type = None if model_type: run_type = ModelType[model_type].value + elif is_local_path: + run_type = ModelType["NLP"].value local_path = None if is_local_path: - if not model_type: - print("Error: --model_type must be provided when using --local_path") - return local_path = os.path.abspath(model_path) if not os.path.isdir(local_path): print("Error: For ONNX models, the provided path must be a directory.") @@ -201,6 +210,10 @@ def run_onnx_inference(args): local_path, run_type = pull_model(model_path) try: + if is_local_path and not model_type: + print("No model type specified. Running with default model type: NLP") + print("You can specify a different model type using the -mt flag") + if run_type == "NLP": from nexa.onnx.nexa_inference_text import NexaTextInference as NexaTextOnnxInference inference = NexaTextOnnxInference(model_path=model_path, local_path=local_path, **kwargs) @@ -266,7 +279,7 @@ def run_embedding_generation(args): model_path = local_path else: # hf case from nexa.general import pull_model - local_path, _ = pull_model(model_path, hf=True) + local_path, _ = pull_model(model_path, hf=True, run_type="Text Embedding") else: # Model Hub from nexa.general import pull_model local_path, _ = pull_model(model_path) @@ -280,7 +293,71 @@ def run_embedding_generation(args): print(f"Error generating embedding: {e}") print("Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation") +def _select_model_type(): + """Helper function to get model type selection from user.""" + print("\nSelect model type:") + print("1. NLP (text generation)") + print("2. COMPUTER_VISION (image generation)") + + while True: + try: + choice = int(input("\nSelect model type (enter number): ")) + if choice == 1: + return "NLP" + elif choice == 2: + return "COMPUTER_VISION" + print("Invalid selection. Please try again.") + except ValueError: + print("Please enter a valid number.") + +def _select_quantization_type(): + """Helper function to get quantization type selection from user.""" + from nexa.gguf.converter.constants import LLAMA_QUANTIZATION_TYPES + print("\nAvailable quantization types:") + for i, qt in enumerate(LLAMA_QUANTIZATION_TYPES.keys(), 1): + print(f"{i}. {qt}") + + while True: + try: + choice = int(input("\nSelect quantization type (enter number): ")) - 1 + if 0 <= choice < len(LLAMA_QUANTIZATION_TYPES): + return list(LLAMA_QUANTIZATION_TYPES.keys())[choice] + print("Invalid selection. Please try again.") + except ValueError: + print("Please enter a valid number.") + +def _store_in_nexa_list(converted_path, model_type): + """Helper function to store converted model in nexa list.""" + import shutil + from nexa.general import add_model_to_list + from nexa.gguf.converter.constants import NEXA_MODELS_HUB_CONVERTED_DIR + + # Create the converted directory if it doesn't exist + os.makedirs(NEXA_MODELS_HUB_CONVERTED_DIR, exist_ok=True) + + # Copy the file to the converted directory + nexa_list_path = os.path.join(NEXA_MODELS_HUB_CONVERTED_DIR, os.path.basename(converted_path)) + shutil.copy2(converted_path, nexa_list_path) + + # Add the new path to the model list + add_model_to_list(os.path.basename(converted_path), nexa_list_path, "gguf", model_type) + +def _run_converted_model(converted_path, model_type): + """Helper function to run the converted model.""" + try: + import subprocess + command = f"nexa run {converted_path} -lp -mt {model_type}" + print(f"Running command: {command}") + subprocess.run(command.split(), check=True, text=True) + except subprocess.CalledProcessError as e: + print("Error running the converted model.") + print("Change model type with -mt to run the model correctly. Or refer to our docs: https://docs.nexa.ai/sdk/cli-reference") + def run_convert(args): + # Get model type and quantization type + model_type = _select_model_type() + ftype = args.ftype or _select_quantization_type() + input_path = args.input_path # Check if input_path is a valid directory @@ -302,27 +379,29 @@ def run_convert(args): converted_path = convert_hf_to_quantized_gguf( input_path, output_file=args.output_file, - ftype=args.ftype, + ftype=ftype, convert_type=args.convert_type, **kwargs ) if converted_path: - print(f"Conversion completed successfully. Output file: {converted_path}") + print(f"\nConversion completed successfully. Output file: {converted_path}") + + # Ask if user wants to store in nexa list + store_choice = input("\nWould you like to store this model in nexa list so you can run it with `nexa run ` anywhere and anytime? (y/N): ").strip().lower() + if store_choice == 'y': + _store_in_nexa_list(converted_path, model_type) - # Ask user if they want to run the converted model - user_choice = input("Would you like to run the converted model? (y/N) (Currently only supports NLP): ").strip().lower() - if user_choice == 'y': - try: - import subprocess - command = f"nexa run {converted_path} -lp -mt NLP" - print(f"Running command: {command}") - subprocess.run(command.split(), check=True, text=True) - except subprocess.CalledProcessError as e: - print("Error running the converted model.") - print("Change model type with -mt to run the model correctly. Or refer to our docs: https://docs.nexa.ai/sdk/cli-reference") + # Ask if user wants to run the model + run_choice = input("\nWould you like to run the converted model? (y/N): ").strip().lower() + if run_choice == 'y': + _run_converted_model(converted_path, model_type) else: print("Exiting without running the model.") - return + + print(f"\nConverted model stored at {converted_path}") + running_command = f"nexa run {converted_path.split('/')[-1]}"\ + if store_choice == 'y' else f"nexa run {converted_path} -lp -mt {model_type}" + print(f"\nYou can run the converted model with command: {running_command}") else: print("Conversion failed.") except Exception as e: @@ -338,9 +417,9 @@ def main(): run_parser.add_argument("model_path", type=str, help="Path or identifier for the model in Nexa Model Hub") run_parser.add_argument("-st", "--streamlit", action="store_true", help="Run the inference in Streamlit UI") run_parser.add_argument("-pf", "--profiling", action="store_true", help="Enable profiling logs for the inference process") - run_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path, must be used with -mt") - run_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type, must be used with -lp or -hf") - run_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub, must be used with -mt") + run_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path") + run_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type (default: NLP)") + run_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub") # Text generation/vlm arguments text_group = run_parser.add_argument_group('Text generation/VLM options') @@ -425,7 +504,7 @@ def main(): # Convert command convert_parser = subparsers.add_parser("convert", help="Convert and quantize a Hugging Face model to GGUF format.") convert_parser.add_argument("input_path", type=str, help="Path to the input Hugging Face model directory or GGUF file") - convert_parser.add_argument("ftype", nargs='?', type=str, default="q4_0", help="Quantization type (default: q4_0)") + convert_parser.add_argument("ftype", nargs='?', type=str, help="Quantization type") convert_parser.add_argument("output_file", nargs='?', type=str, help="Path to the output quantized GGUF file") convert_hf_parser = convert_parser.add_argument_group('Convert from safetensors options') @@ -453,9 +532,9 @@ def main(): # GGML server parser server_parser = subparsers.add_parser("server", help="Run the Nexa AI Text Generation Service") server_parser.add_argument("model_path", type=str, nargs='?', help="Path or identifier for the model in Nexa Model Hub") - server_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path, must be used with -mt") + server_parser.add_argument("-lp", "--local_path", action="store_true", help="Indicate that the model path provided is the local path") server_parser.add_argument("-mt", "--model_type", type=str, choices=[e.name for e in ModelType], help="Indicate the model running type, must be used with -lp or -hf") - server_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub, must be used with -mt") + server_parser.add_argument("-hf", "--huggingface", action="store_true", help="Load model from Hugging Face Hub") server_parser.add_argument("--host", type=str, default="localhost", help="Host to bind the server to") server_parser.add_argument("--port", type=int, default=8000, help="Port to bind the server to") server_parser.add_argument("--reload", action="store_true", help="Enable automatic reloading on code changes") @@ -498,22 +577,13 @@ def main(): if args.local_path and args.huggingface: print("Error: --local_path and --huggingface flags cannot be used together") return - if (args.local_path or args.huggingface) and not args.model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return run_ggml_inference(args) elif args.command == "server": if args.local_path and args.huggingface: print("Error: --local_path and --huggingface flags cannot be used together") return - if (args.local_path or args.huggingface) and not args.model_type: - print("Error: --model_type must be provided when using --local_path or --huggingface") - return run_ggml_server(args) elif args.command == "onnx": - if args.local_path and not args.model_type: - print("Error: --model_type must be provided when using --local_path") - return run_onnx_inference(args) elif args.command == "eval": run_eval_tasks(args) diff --git a/nexa/constants.py b/nexa/constants.py index 7e61d041..bed3739d 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -194,6 +194,7 @@ class ModelType(Enum): NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "Qwen2-Audio-7.8B-Instruct:q8_0": "Qwen2-Audio-7.8B-Instruct:projector-q8_0", "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "Qwen2-Audio-7.8B-Instruct:q8_0": "Qwen2-Audio-7.8B-Instruct:projector-q8_0", "omniaudio": "Octo-omni-audio:projector-fp16", diff --git a/nexa/general.py b/nexa/general.py index f522446d..99a6be70 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -126,6 +126,7 @@ def pull_model(model_path, hf = False, **kwargs): if result["success"]: # Only add to model list if not using custom download path + model_path = model_path if not hf else f"{model_path}:{result['local_path'].split('/')[-1]}" if not kwargs.get('local_download_path'): add_model_to_list(model_path, result["local_path"], result["model_type"], result["run_type"]) @@ -230,7 +231,7 @@ def pull_model_from_official(model_path, **kwargs): "run_type": run_type_str } -def pull_model_from_hf(repo_id, **kwargs): +def pull_model_from_hf(repo_id, run_type = "NLP", **kwargs): repo_id, filename = select_gguf_in_hf_repo(repo_id) success, model_path = download_gguf_from_hf(repo_id, filename, **kwargs) @@ -239,7 +240,7 @@ def pull_model_from_hf(repo_id, **kwargs): "success": success, "local_path": model_path, "model_type": "gguf", - "run_type": "NLP" # Just for compatibility, hf model type will be passed from outsice + "run_type": run_type } @@ -567,7 +568,11 @@ def list_models(): print( tabulate( - table, headers, tablefmt="pretty", colalign=("left", "left", "left", "left") + table, + headers, + tablefmt="pretty", + colalign=("left", "left", "left", "left"), + maxcolwidths=[150, 15, 20, 90] ) ) except Exception as e: diff --git a/nexa/gguf/converter/constants.py b/nexa/gguf/converter/constants.py index d2869ab1..2512f4d5 100644 --- a/nexa/gguf/converter/constants.py +++ b/nexa/gguf/converter/constants.py @@ -1,3 +1,6 @@ +import os +from pathlib import Path + from nexa.gguf.llama.llama_cpp import ( LLAMA_FTYPE_ALL_F32, LLAMA_FTYPE_MOSTLY_F16, @@ -146,4 +149,8 @@ "q4_0_4_4": GGML_TYPE_Q4_0_4_4, "q4_0_4_8": GGML_TYPE_Q4_0_4_8, "q4_0_8_8": GGML_TYPE_Q4_0_8_8, -} \ No newline at end of file +} + +NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser() +NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub" +NEXA_MODELS_HUB_CONVERTED_DIR = NEXA_MODELS_HUB_DIR / "converted" diff --git a/nexa/gguf/streamlit/streamlit_image_chat.py b/nexa/gguf/streamlit/streamlit_image_chat.py index 9bb7e8f8..a54def8e 100644 --- a/nexa/gguf/streamlit/streamlit_image_chat.py +++ b/nexa/gguf/streamlit/streamlit_image_chat.py @@ -1,29 +1,97 @@ import os import sys from PIL import Image -from nexa.general import pull_model import streamlit as st +from nexa.general import pull_model from nexa.gguf.nexa_inference_image import NexaImageInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import ( + DEFAULT_IMG_GEN_PARAMS_LCM, + DEFAULT_IMG_GEN_PARAMS_TURBO, + DEFAULT_IMG_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_IMAGE, + NEXA_RUN_MODEL_MAP_FLUX, +) import io -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True - +specified_run_type = 'Computer Vision' +model_map = NEXA_RUN_MODEL_MAP_IMAGE | NEXA_RUN_MODEL_MAP_FLUX -@st.cache_resource -def load_model(model_path): - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) +def get_default_params(model_path: str) -> dict: + """Get default parameters based on model type.""" + if "lcm-dreamshaper" in model_path or "flux" in model_path: + return DEFAULT_IMG_GEN_PARAMS_LCM.copy() # fast LCM models: 4 steps @ 1.0 guidance + elif "sdxl-turbo" in model_path: + return DEFAULT_IMG_GEN_PARAMS_TURBO.copy() # sdxl-turbo: 5 steps @ 5.0 guidance else: - local_path, run_type = pull_model(model_path) - nexa_model = NexaImageInference(model_path=model_path, local_path=local_path) - return nexa_model + return DEFAULT_IMG_GEN_PARAMS.copy() # standard SD models: 20 steps @ 7.5 guidance + +@st.cache_resource(show_spinner=False) +def load_model(model_path: str, is_local: bool = False, is_hf: bool = False): + """Load model with proper error handling.""" + try: + if is_local: + local_path = model_path + elif is_hf: + try: + local_path, _ = pull_model(model_path, hf=True) + update_model_options(specified_run_type, model_map) + except Exception as e: + st.error(f"Error pulling HuggingFace model: {str(e)}") + return None + else: + try: + # model hub case: + local_path, run_type = pull_model(model_path) + if not local_path or not run_type: + st.error(f"Failed to pull model {model_path} from Nexa Model Hub") + return None + update_model_options(specified_run_type, model_map) # update options after successful pull + except ValueError as e: + st.error(f"Error pulling model from Nexa Model Hub: {str(e)}") + return None + except Exception as e: + st.error(f"Unexpected error while pulling model: {str(e)}") + return None + + try: + nexa_model = NexaImageInference( + model_path=model_path, + local_path=local_path + ) + + # force refresh of model options after successful load: + update_model_options(specified_run_type, model_map) + + return nexa_model + + except Exception as e: + st.error(f"Error initializing model: {str(e)}") + return None + + except Exception as e: + st.error(f"Error in load_model: {str(e)}") + return None +@st.cache_resource(show_spinner=False) +def load_local_model(local_path: str): + """Load local model with default parameters.""" + try: + nexa_model = NexaImageInference( + model_path="local_model", + local_path=local_path + ) + update_model_options(specified_run_type, model_map) # update options after successful local model load + return nexa_model + except Exception as e: + st.error(f"Error loading local model: {str(e)}") + return None def generate_images(nexa_model: NexaImageInference, prompt: str, negative_prompt: str): + """Generate images using the model.""" output_dir = os.path.dirname(nexa_model.params["output_path"]) if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -37,93 +105,236 @@ def generate_images(nexa_model: NexaImageInference, prompt: str, negative_prompt sample_steps=nexa_model.params["num_inference_steps"], seed=nexa_model.params["random_seed"] ) - + return images +# main execution: +try: + # get command line arguments with proper error handling: + if len(sys.argv) < 4: + st.error("Missing required command line arguments.") + sys.exit(1) # program terminated with an error -st.markdown( - r""" - - """, - unsafe_allow_html=True, -) -st.title("Nexa AI Image Generation") -st.caption("Powered by Nexa AI SDK🐙") + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" + + # UI setup: + st.set_page_config(page_title="Nexa AI Image Generation", layout="wide") + st.markdown( + r""" + + """, + unsafe_allow_html=True, + ) + st.title("Nexa AI Image Generation") + st.caption("Powered by Nexa AI SDK🐙") + + # force refresh model options on every page load: + if 'model_options' not in st.session_state: + st.session_state.model_options = get_model_options(specified_run_type, model_map) + else: + update_model_options(specified_run_type, model_map) -st.sidebar.header("Model Configuration") -model_path = st.sidebar.text_input("Model path", default_model) + # init session state variables: + if 'initialized' not in st.session_state: + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + if not is_local_path and not hf: + try: + with st.spinner(f"Loading model: {default_model}"): + st.session_state.nexa_model = load_model(default_model) + if st.session_state.nexa_model: + st.session_state.current_hub_model = default_model + except Exception as e: + st.error(f"Error loading default model: {str(e)}") + + if default_model not in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + else: + try: + st.session_state.current_model_index = st.session_state.model_options.index(default_model) + except ValueError: + st.session_state.current_model_index = 0 + + st.session_state.initialized = True + + # model selection sidebar: + st.sidebar.header("Model Configuration") + + # update selectbox index based on current model + if 'nexa_model' in st.session_state: + if st.session_state.current_hub_model: + current_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + elif st.session_state.current_local_path: + current_index = st.session_state.model_options.index("Local Model 📁") + elif st.session_state.current_model_path: + current_index = st.session_state.model_options.index(st.session_state.current_model_path) + else: + current_index = st.session_state.current_model_index + else: + current_index = st.session_state.current_model_index -if not model_path: - st.warning( - "Please enter a valid path or identifier for the model in Nexa Model Hub to proceed." + model_path = st.sidebar.selectbox( + "Select a Model", + st.session_state.model_options, + index=current_index, + key='model_selectbox' ) - st.stop() - -if ( - "current_model_path" not in st.session_state - or st.session_state.current_model_path != model_path -): - st.session_state.current_model_path = model_path - st.session_state.nexa_model = load_model(model_path) - if st.session_state.nexa_model is None: - st.stop() - -st.sidebar.header("Generation Parameters") -num_inference_steps = st.sidebar.slider( - "Number of Inference Steps", - 1, - 100, - st.session_state.nexa_model.params["num_inference_steps"], -) -height = st.sidebar.slider( - "Height", 64, 1024, st.session_state.nexa_model.params["height"] -) -width = st.sidebar.slider( - "Width", 64, 1024, st.session_state.nexa_model.params["width"] -) -guidance_scale = st.sidebar.slider( - "Guidance Scale", 0.0, 20.0, st.session_state.nexa_model.params["guidance_scale"] -) -random_seed = st.sidebar.slider( - "Random Seed", 0, 10000, st.session_state.nexa_model.params["random_seed"] -) -st.session_state.nexa_model.params.update( - { - "num_inference_steps": num_inference_steps, - "height": height, - "width": width, - "guidance_scale": guidance_scale, - "random_seed": random_seed, - } -) + # handle model path input: + if model_path == "Local Model 📁": + local_model_path = st.sidebar.text_input("Enter local model path") + if not local_model_path: + st.warning("Please enter a valid local model path to proceed.") + st.stop() + local_model_path = local_model_path.strip() # remove spaces -prompt = st.text_input("Enter your prompt:") -negative_prompt = st.text_input("Enter your negative prompt (optional):") + # handle local model path changes: + if 'nexa_model' not in st.session_state or st.session_state.current_local_path != local_model_path: + with st.spinner("Loading local model..."): + st.session_state.nexa_model = load_local_model(local_model_path) + st.session_state.current_local_path = local_model_path + + elif model_path == "Use Model From Nexa Model Hub 🔍": + initial_value = default_model if not is_local_path and not hf else "" + hub_model_name = st.sidebar.text_input( + "Enter model name from Nexa Model Hub", + value=initial_value + ) + + # empty string check: + if not hub_model_name: + st.warning(""" + How to add a model from Nexa Model Hub: + \n1. Visit [Nexa Model Hub](https://nexaai.com/models) + \n2. Find a vision model using the task filters + \n3. Select your desired model and copy either: + \n - The full nexa run command, or (e.g., nexa run stable-diffusion-v1-4:q4_0) + \n - Simply the model name (e.g., stable-diffusion-v1-4:q4_0) + \n4. Paste it into the field on the sidebar and press enter + """) + st.stop() + + # process the input after checking it's not empty: + if hub_model_name.startswith("nexa run"): + hub_model_name = hub_model_name.split("nexa run")[-1].strip() + else: + hub_model_name = hub_model_name.strip() + + # handle hub model name changes: + if 'nexa_model' not in st.session_state or st.session_state.current_hub_model != hub_model_name: + with st.spinner("Loading model from hub..."): + st.session_state.nexa_model = load_model(hub_model_name) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_hub_model = hub_model_name -if st.button("Generate Image"): - if not prompt: - st.warning("Please enter a prompt to proceed.") else: - with st.spinner("Generating images..."): - images = generate_images( - st.session_state.nexa_model, prompt, negative_prompt - ) - st.success("Images generated successfully!") - for i, image in enumerate(images): - st.image(image, caption=f"Generated Image", use_column_width=True) - - img_byte_arr = io.BytesIO() - image.save(img_byte_arr, format='PNG') - img_byte_arr = img_byte_arr.getvalue() - - st.download_button( - label=f"Download Image", - data=img_byte_arr, - file_name=f"generated_image.png", - mime="image/png" + # load selected model if it's not already loaded: + if ('nexa_model' not in st.session_state or getattr(st.session_state, 'current_model_path', None) != model_path): + with st.spinner(f"Loading model: {model_path}"): + st.session_state.nexa_model = load_model(model_path) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_model_path = model_path + + # generation params: + if 'nexa_model' in st.session_state and st.session_state.nexa_model: + st.sidebar.header("Generation Parameters") + + model_to_check = (st.session_state.current_hub_model if st.session_state.current_hub_model else st.session_state.current_local_path if st.session_state.current_local_path else st.session_state.current_model_path) + + # get model specific defaults: + default_params = get_default_params(model_to_check) + + # adjust step range based on model type: + max_steps = 100 + if "lcm-dreamshaper" in model_to_check or "flux" in model_to_check: + max_steps = 8 # 4-8 steps + elif "sdxl-turbo" in model_to_check: + max_steps = 10 # 5-10 steps + + # adjust guidance scale range based on model type: + max_guidance = 20.0 + if "lcm-dreamshaper" in model_to_check or "flux" in model_to_check: + max_guidance = 2.0 # 1.0-2.0 + elif "sdxl-turbo" in model_to_check: + max_guidance = 10.0 # 5.0-10.0 + + num_inference_steps = st.sidebar.slider( + "Number of Inference Steps", + 1, + max_steps, + default_params["num_inference_steps"] + ) + height = st.sidebar.slider( + "Height", + 64, + 1024, + default_params["height"] + ) + width = st.sidebar.slider( + "Width", + 64, + 1024, + default_params["width"] + ) + guidance_scale = st.sidebar.slider( + "Guidance Scale", + 0.0, + max_guidance, + default_params["guidance_scale"] + ) + random_seed = st.sidebar.slider( + "Random Seed", + 0, + 10000, + default_params["random_seed"] + ) + + st.session_state.nexa_model.params.update({ + "num_inference_steps": num_inference_steps, + "height": height, + "width": width, + "guidance_scale": guidance_scale, + "random_seed": random_seed, + }) + + # image generation interface: + prompt = st.text_input("Enter your prompt:") + negative_prompt = st.text_input("Enter your negative prompt (optional):") + + if st.button("Generate Image"): + if not prompt: + st.warning("Please enter a prompt to proceed.") + else: + with st.spinner("Generating images..."): + images = generate_images( + st.session_state.nexa_model, + prompt, + negative_prompt ) + st.success("Images generated successfully!") + for i, image in enumerate(images): + st.image(image, caption=f"Generated Image", use_column_width=True) + + img_byte_arr = io.BytesIO() + image.save(img_byte_arr, format='PNG') + img_byte_arr = img_byte_arr.getvalue() + + st.download_button( + label=f"Download Image", + data=img_byte_arr, + file_name=f"generated_image.png", + mime="image/png" + ) + +except Exception as e: + st.error(f"An unexpected error occurred: {str(e)}") + import traceback + st.error(f"Traceback: {traceback.format_exc()}") diff --git a/nexa/gguf/streamlit/streamlit_text_chat.py b/nexa/gguf/streamlit/streamlit_text_chat.py index 542e8059..4adf60e3 100644 --- a/nexa/gguf/streamlit/streamlit_text_chat.py +++ b/nexa/gguf/streamlit/streamlit_text_chat.py @@ -1,112 +1,313 @@ import sys -from typing import Iterator - +import subprocess +import re +from typing import Iterator, List import streamlit as st from nexa.general import pull_model from nexa.gguf.nexa_inference_text import NexaTextInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import ( + DEFAULT_TEXT_GEN_PARAMS, + NEXA_RUN_MODEL_MAP_TEXT, +) -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True - -@st.cache_resource -def load_model(model_path): - st.session_state.messages = [] - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) - else: - local_path, run_type = pull_model(model_path) - nexa_model = NexaTextInference(model_path=model_path, local_path=local_path) - return nexa_model +specified_run_type = 'NLP' +model_map = NEXA_RUN_MODEL_MAP_TEXT + +# init: +DEFAULT_PARAMS = DEFAULT_TEXT_GEN_PARAMS.copy() + +@st.cache_resource(show_spinner=False) +def load_model(model_path: str, is_local: bool = False, is_hf: bool = False): + """Load model with proper error handling and state management.""" + try: + st.session_state.messages = [] + + if is_local: + local_path = model_path + elif is_hf: + try: + local_path, _ = pull_model(model_path, hf=True) + update_model_options(specified_run_type, model_map) # update options after successful pull + except Exception as e: + st.error(f"Error pulling HuggingFace model: {str(e)}") + return None + else: + try: + # model hub case: + local_path, run_type = pull_model(model_path) + if not local_path or not run_type: + st.error(f"Failed to pull model {model_path} from Nexa Model Hub") + return None + update_model_options(specified_run_type, model_map) # update options after successful pull + except ValueError as e: + st.error(f"Error pulling model from Nexa Model Hub: {str(e)}") + return None + except Exception as e: + st.error(f"Unexpected error while pulling model: {str(e)}") + return None + + try: + nexa_model = NexaTextInference( + model_path=model_path, + local_path=local_path, + **DEFAULT_PARAMS + ) + + # force refresh of model options after successful load: + update_model_options(specified_run_type, model_map) + + # reset the model index to include the new model: + if model_path in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index(model_path) + return nexa_model + + except Exception as e: + st.error(f"Error initializing model: {str(e)}") + return None + except Exception as e: + st.error(f"Error in load_model: {str(e)}") + return None + +@st.cache_resource(show_spinner=False) +def load_local_model(local_path: str): + """Load local model with default parameters.""" + try: + st.session_state.messages = [] + nexa_model = NexaTextInference( + model_path="local_model", + local_path=local_path, + **DEFAULT_PARAMS + ) + update_model_options(specified_run_type, model_map) # update options after successful local model load + return nexa_model + except Exception as e: + st.error(f"Error loading local model: {str(e)}") + return None def generate_response(nexa_model: NexaTextInference) -> Iterator: + """Generate response from the model.""" user_input = st.session_state.messages[-1]["content"] if hasattr(nexa_model, "chat_format") and nexa_model.chat_format: return nexa_model._chat(user_input) else: return nexa_model._complete(user_input) -st.markdown( - r""" - - """, - unsafe_allow_html=True, -) -st.title("Nexa AI Text Generation") -st.caption("Powered by Nexa AI SDK🐙") +# main execution: +try: + # get command line arguments with proper error handling: + if len(sys.argv) < 4: + st.error("Missing required command line arguments.") + sys.exit(1) # program terminated with an error -st.sidebar.header("Model Configuration") -model_path = st.sidebar.text_input("Model path", default_model) + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" -if not model_path: - st.warning( - "Please enter a valid path or identifier for the model in Nexa Model Hub to proceed." + # UI setup: + st.set_page_config(page_title="Nexa AI Text Generation", layout="wide") + st.markdown( + r""" + + """, + unsafe_allow_html=True, ) - st.stop() - -if ( - "current_model_path" not in st.session_state - or st.session_state.current_model_path != model_path -): - st.session_state.current_model_path = model_path - st.session_state.nexa_model = load_model(model_path) - if st.session_state.nexa_model is None: - st.stop() - -st.sidebar.header("Generation Parameters") -temperature = st.sidebar.slider( - "Temperature", 0.0, 1.0, st.session_state.nexa_model.params["temperature"] -) -max_new_tokens = st.sidebar.slider( - "Max New Tokens", 1, 500, st.session_state.nexa_model.params["max_new_tokens"] -) -top_k = st.sidebar.slider("Top K", 1, 100, st.session_state.nexa_model.params["top_k"]) -top_p = st.sidebar.slider( - "Top P", 0.0, 1.0, st.session_state.nexa_model.params["top_p"] -) + st.title("Nexa AI Text Generation") + st.caption("Powered by Nexa AI SDK🐙") -st.session_state.nexa_model.params.update( - { - "temperature": temperature, - "max_new_tokens": max_new_tokens, - "top_k": top_k, - "top_p": top_p, - } -) + # force refresh model options on every page load: + if 'model_options' not in st.session_state: + st.session_state.model_options = get_model_options(specified_run_type, model_map) + else: + update_model_options(specified_run_type, model_map) + + # init session state variables: + if 'initialized' not in st.session_state: + st.session_state.messages = [] + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + if not is_local_path and not hf: + try: + with st.spinner(f"Loading model: {default_model}"): + st.session_state.nexa_model = load_model(default_model) + if st.session_state.nexa_model: + st.session_state.current_hub_model = default_model + except Exception as e: + st.error(f"Error loading default model: {str(e)}") + + # set to model hub option if not found in list: + if default_model not in st.session_state.model_options: + st.session_state.current_model_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + else: + try: + st.session_state.current_model_index = st.session_state.model_options.index(default_model) + except ValueError: + st.session_state.current_model_index = 0 + + st.session_state.initialized = True + + # model selection sidebar: + st.sidebar.header("Model Configuration") + + # update the selectbox index based on the currently loaded model: + if 'nexa_model' in st.session_state: + if st.session_state.current_hub_model: + # if we have a hub model loaded, select the hub option: + current_index = st.session_state.model_options.index("Use Model From Nexa Model Hub 🔍") + elif st.session_state.current_local_path: + # if we have a local model loaded, select the local option: + current_index = st.session_state.model_options.index("Local Model 📁") + elif st.session_state.current_model_path: + # if we have a listed model loaded, find its index: + current_index = st.session_state.model_options.index(st.session_state.current_model_path) + else: + current_index = st.session_state.current_model_index + else: + current_index = st.session_state.current_model_index + + model_path = st.sidebar.selectbox( + "Select a Model", + st.session_state.model_options, + index=current_index, + key='model_selectbox' + ) + + # update current model index when selection changes: + current_index = st.session_state.model_options.index(model_path) + if current_index != st.session_state.current_model_index: + st.session_state.current_model_index = current_index + if 'nexa_model' in st.session_state: + del st.session_state.nexa_model + st.session_state.messages = [] + st.session_state.current_model_path = None + st.session_state.current_local_path = None + st.session_state.current_hub_model = None + + # handle model loading based on selection: + if model_path == "Local Model 📁": + local_model_path = st.sidebar.text_input("Enter local model path") + if not local_model_path: + st.warning("Please enter a valid local model path to proceed.") + st.stop() + + local_model_path = local_model_path.strip() # remove spaces + if 'nexa_model' not in st.session_state or st.session_state.current_local_path != local_model_path: + with st.spinner("Loading local model..."): + st.session_state.nexa_model = load_local_model(local_model_path) + st.session_state.current_local_path = local_model_path + + elif model_path == "Use Model From Nexa Model Hub 🔍": + initial_value = default_model if not is_local_path and not hf else "" + hub_model_name = st.sidebar.text_input( + "Enter model name from Nexa Model Hub", + value=initial_value + ) + + # empty string check: + if not hub_model_name: + st.warning(f""" + How to add a model from Nexa Model Hub: + \n1. Visit [Nexa Model Hub](https://nexaai.com/models) + \n2. Find a NLP model using the task filters (chat, uncensored, etc.) + \n3. Select your desired model and copy either: + \n - The full nexa run command (e.g., nexa run Sao10K/MN-BackyardAI-Party-12B-v1:gguf-q4_K_M), or + \n - Simply the model name (e.g., Sao10K/MN-BackyardAI-Party-12B-v1:gguf-q4_K_M) + \n4. Paste it into the "Enter model name from Nexa Model Hub" field on the sidebar and press enter + """) + st.stop() + + # process the input after checking it's not empty: + if hub_model_name.startswith("nexa run"): + hub_model_name = hub_model_name.split("nexa run")[-1].strip() + else: + hub_model_name = hub_model_name.strip() + + if 'nexa_model' not in st.session_state or st.session_state.current_hub_model != hub_model_name: + with st.spinner("Loading model from hub..."): + st.session_state.nexa_model = load_model(hub_model_name) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_hub_model = hub_model_name + + else: + # load selected model if it's not already loaded: + if ('nexa_model' not in st.session_state or + getattr(st.session_state, 'current_model_path', None) != model_path): + with st.spinner(f"Loading model: {model_path}"): + st.session_state.nexa_model = load_model(model_path) + if st.session_state.nexa_model: # only update if load was successful + st.session_state.current_model_path = model_path + + # generation params: + if 'nexa_model' in st.session_state and st.session_state.nexa_model: + st.sidebar.header("Generation Parameters") + model_params = st.session_state.nexa_model.params + + temperature = st.sidebar.slider( + "Temperature", 0.0, 1.0, model_params.get("temperature", DEFAULT_PARAMS["temperature"]) + ) + max_new_tokens = st.sidebar.slider( + "Max New Tokens", 1, 500, model_params.get("max_new_tokens", DEFAULT_PARAMS["max_new_tokens"]) + ) + top_k = st.sidebar.slider( + "Top K", 1, 100, model_params.get("top_k", DEFAULT_PARAMS["top_k"]) + ) + top_p = st.sidebar.slider( + "Top P", 0.0, 1.0, model_params.get("top_p", DEFAULT_PARAMS["top_p"]) + ) + nctx = st.sidebar.slider( + "Context length", 1000, 9999, model_params.get("nctx", DEFAULT_PARAMS["nctx"]) + ) + + st.session_state.nexa_model.params.update({ + "temperature": temperature, + "max_new_tokens": max_new_tokens, + "top_k": top_k, + "top_p": top_p, + "nctx": nctx, + }) + + # chat interface: + for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + + if prompt := st.chat_input("Say something..."): + if 'nexa_model' not in st.session_state or not st.session_state.nexa_model: + st.error("Please wait for the model to load or select a valid model.") + else: + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + with st.chat_message("assistant"): + response_placeholder = st.empty() + full_response = "" + for chunk in generate_response(st.session_state.nexa_model): + choice = chunk["choices"][0] + if "delta" in choice: + delta = choice["delta"] + content = delta.get("content", "") + elif "text" in choice: + delta = choice["text"] + content = delta + + full_response += content + response_placeholder.markdown(full_response, unsafe_allow_html=True) + response_placeholder.markdown(full_response) + + st.session_state.messages.append({"role": "assistant", "content": full_response}) -if "messages" not in st.session_state: - st.session_state.messages = [] - -for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - -if prompt := st.chat_input("Say something..."): - st.session_state.messages.append({"role": "user", "content": prompt}) - with st.chat_message("user"): - st.markdown(prompt) - - with st.chat_message("assistant"): - response_placeholder = st.empty() - full_response = "" - for chunk in generate_response(st.session_state.nexa_model): - choice = chunk["choices"][0] - if "delta" in choice: - delta = choice["delta"] - content = delta.get("content", "") - elif "text" in choice: - delta = choice["text"] - content = delta - - full_response += content - response_placeholder.markdown(full_response, unsafe_allow_html=True) - response_placeholder.markdown(full_response) - - st.session_state.messages.append({"role": "assistant", "content": full_response}) +except Exception as e: + st.error(f"An unexpected error occurred: {str(e)}") + import traceback + st.error(f"Traceback: {traceback.format_exc()}") diff --git a/nexa/gguf/streamlit/streamlit_vlm.py b/nexa/gguf/streamlit/streamlit_vlm.py index c9357904..a581b167 100644 --- a/nexa/gguf/streamlit/streamlit_vlm.py +++ b/nexa/gguf/streamlit/streamlit_vlm.py @@ -1,40 +1,65 @@ import sys import tempfile -from typing import Iterator - +import subprocess +import re +from typing import List, Iterator import streamlit as st from PIL import Image from nexa.general import pull_model from nexa.gguf.nexa_inference_vlm import NexaVLMInference +from nexa.utils import ( + get_model_options, + update_model_options, +) +from nexa.constants import NEXA_RUN_MODEL_MAP_VLM -default_model = sys.argv[1] -is_local_path = False if sys.argv[2] == "False" else True -hf = False if sys.argv[3] == "False" else True -projector_local_path = sys.argv[4] if len(sys.argv) > 4 else None +specified_run_type = 'Multimodal' +model_map = NEXA_RUN_MODEL_MAP_VLM +# init from command line args: +try: + default_model = sys.argv[1] + is_local_path = sys.argv[2].lower() == "true" + hf = sys.argv[3].lower() == "true" + projector_local_path = sys.argv[4] if len(sys.argv) > 4 else None +except IndexError: + st.error("Missing required command line arguments.") + sys.exit(1) # terminate with an error -@st.cache_resource -def load_model(model_path): - if is_local_path: - local_path = model_path - elif hf: - local_path, _ = pull_model(model_path, hf=True) - else: - local_path, run_type = pull_model(model_path) - - if is_local_path: - nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path) - else: - nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) - return nexa_model - +@st.cache_resource(show_spinner=False) +def load_model(model_path, is_local=False, is_hf=False, projector_path=None): + """Load model with model mapping logic.""" + try: + if is_local: + local_path = model_path + nexa_model = NexaVLMInference( + model_path=model_path, + local_path=local_path, + projector_local_path=projector_path + ) + elif is_hf: + local_path, _ = pull_model(model_path, hf=True) + nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) + else: + # get the actual model name from the mapping if it exists: + if model_path in NEXA_RUN_MODEL_MAP_VLM: + real_model_path = NEXA_RUN_MODEL_MAP_VLM[model_path] + local_path, run_type = pull_model(real_model_path) + else: + local_path, run_type = pull_model(model_path) + nexa_model = NexaVLMInference(model_path=model_path, local_path=local_path) + return nexa_model + except Exception as e: + st.error(f"Error loading model: {str(e)}") + return None def generate_response( nexa_model: NexaVLMInference, image_path: str, user_input: str ) -> Iterator: return nexa_model._chat(user_input, image_path) - +# UI setup: +st.set_page_config(page_title="Nexa AI Multimodal Generation", layout="wide") st.markdown( r""" + \ No newline at end of file diff --git a/android/llama.android/app-java/src/main/res/values/colors.xml b/android/llama.android/app-java/src/main/res/values/colors.xml new file mode 100644 index 00000000..b15af47b --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/colors.xml @@ -0,0 +1,17 @@ + + + #FF000000 + #FFFFFFFF + #813BBA + #FF202020 + #17CE92 + #E5E5E5 + #0A1528 + #313D50 + #03070D + #03070D + #03070D + #03070D + #FFFFFF + #B00020 + \ No newline at end of file diff --git a/android/llama.android/app-java/src/main/res/values/font_certs.xml b/android/llama.android/app-java/src/main/res/values/font_certs.xml new file mode 100644 index 00000000..d2226ac0 --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/font_certs.xml @@ -0,0 +1,17 @@ + + + + @array/com_google_android_gms_fonts_certs_dev + @array/com_google_android_gms_fonts_certs_prod + + + + MIIEqDCCA5CgAwIBAgIJANWFuGx90071MA0GCSqGSIb3DQEBBAUAMIGUMQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEQMA4GA1UEChMHQW5kcm9pZDEQMA4GA1UECxMHQW5kcm9pZDEQMA4GA1UEAxMHQW5kcm9pZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTAeFw0wODA0MTUyMzM2NTZaFw0zNTA5MDEyMzM2NTZaMIGUMQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEQMA4GA1UEChMHQW5kcm9pZDEQMA4GA1UECxMHQW5kcm9pZDEQMA4GA1UEAxMHQW5kcm9pZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbTCCASAwDQYJKoZIhvcNAQEBBQADggENADCCAQgCggEBANbOLggKv+IxTdGNs8/TGFy0PTP6DHThvbbR24kT9ixcOd9W+EaBPWW+wPPKQmsHxajtWjmQwWfna8mZuSeJS48LIgAZlKkpFeVyxW0qMBujb8X8ETrWy550NaFtI6t9+u7hZeTfHwqNvacKhp1RbE6dBRGWynwMVX8XW8N1+UjFaq6GCJukT4qmpN2afb8sCjUigq0GuMwYXrFVee74bQgLHWGJwPmvmLHC69EH6kWr22ijx4OKXlSIx2xT1AsSHee70w5iDBiK4aph27yH3TxkXy9V89TDdexAcKk/cVHYNnDBapcavl7y0RiQ4biu8ymM8Ga/nmzhRKya6G0cGw8CAQOjgfwwgfkwHQYDVR0OBBYEFI0cxb6VTEM8YYY6FbBMvAPyT+CyMIHJBgNVHSMEgcEwgb6AFI0cxb6VTEM8YYY6FbBMvAPyT+CyoYGapIGXMIGUMQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEQMA4GA1UEChMHQW5kcm9pZDEQMA4GA1UECxMHQW5kcm9pZDEQMA4GA1UEAxMHQW5kcm9pZDEiMCAGCSqGSIb3DQEJARYTYW5kcm9pZEBhbmRyb2lkLmNvbYIJANWFuGx90071MAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEEBQADggEBABnTDPEF+3iSP0wNfdIjIz1AlnrPzgAIHVvXxunW7SBrDhEglQZBbKJEk5kT0mtKoOD1JMrSu1xuTKEBahWRbqHsXclaXjoBADb0kkjVEJu/Lh5hgYZnOjvlba8Ld7HCKePCVePoTJBdI4fvugnL8TsgK05aIskyY0hKI9L8KfqfGTl1lzOv2KoWD0KWwtAWPoGChZxmQ+nBli+gwYMzM1vAkP+aayLe0a1EQimlOalO762r0GXO0ks+UeXde2Z4e+8S/pf7pITEI/tP+MxJTALw9QUWEv9lKTk+jkbqxbsh8nfBUapfKqYn0eidpwq2AzVp3juYl7//fKnaPhJD9gs= + + + + + MIIEQzCCAyugAwIBAgIJAMLgh0ZkSjCNMA0GCSqGSIb3DQEBBAUAMHQxCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpDYWxpZm9ybmlhMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtHb29nbGUgSW5jLjEQMA4GA1UECxMHQW5kcm9pZDEQMA4GA1UEAxMHQW5kcm9pZDAeFw0wODA4MjEyMzEzMzRaFw0zNjAxMDcyMzEzMzRaMHQxCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpDYWxpZm9ybmlhMRYwFAYDVQQHEw1Nb3VudGFpbiBWaWV3MRQwEgYDVQQKEwtHb29nbGUgSW5jLjEQMA4GA1UECxMHQW5kcm9pZDEQMA4GA1UEAxMHQW5kcm9pZDCCASAwDQYJKoZIhvcNAQEBBQADggENADCCAQgCggEBAKtWLgDYO6IIrgqWbxJOKdoR8qtW0I9Y4sypEwPpt1TTcvZApxsdyxMJZ2JORland2qSGT2y5b+3JKkedxiLDmpHpDsz2WCbdxgxRczfey5YZnTJ4VZbH0xqWVW/8lGmPav5xVwnIiJS6HXk+BVKZF+JcWjAsb/GEuq/eFdpuzSqeYTcfi6idkyugwfYwXFU1+5fZKUaRKYCwkkFQVfcAs1fXA5V+++FGfvjJ/CxURaSxaBvGdGDhfXE28LWuT9ozCl5xw4Yq5OGazvV24mZVSoOO0yZ31j7kYvtwYK6NeADwbSxDdJEqO4k//0zOHKrUiGYXtqw/A0LFFtqoZKFjnkCAQOjgdkwgdYwHQYDVR0OBBYEFMd9jMIhF1Ylmn/Tgt9r45jk14alMIGmBgNVHSMEgZ4wgZuAFMd9jMIhF1Ylmn/Tgt9r45jk14aloXikdjB0MQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEWMBQGA1UEBxMNTW91bnRhaW4gVmlldzEUMBIGA1UEChMLR29vZ2xlIEluYy4xEDAOBgNVBAsTB0FuZHJvaWQxEDAOBgNVBAMTB0FuZHJvaWSCCQDC4IdGZEowjTAMBgNVHRMEBTADAQH/MA0GCSqGSIb3DQEBBAUAA4IBAQBt0lLO74UwLDYKqs6Tm8/yzKkEu116FmH4rkaymUIE0P9KaMftGlMexFlaYjzmB2OxZyl6euNXEsQH8gjwyxCUKRJNexBiGcCEyj6z+a1fuHHvkiaai+KL8W1EyNmgjmyy8AW7P+LLlkR+ho5zEHatRbM/YAnqGcFh5iZBqpknHf1SKMXFh4dd239FJ1jWYfbMDMy3NS5CTMQ2XFI1MvcyUTdZPErjQfTbQe3aDQsQcafEQPD+nqActifKZ0Np0IS9L9kR/wbNvyz6ENwPiTrjV2KRkEjH78ZMcUQXg0L3BYHJ3lc69Vs5Ddf9uUGGMYldX3WfMBEmh/9iFBDAaTCK + + + diff --git a/android/llama.android/app-java/src/main/res/values/ic_launcher_background.xml b/android/llama.android/app-java/src/main/res/values/ic_launcher_background.xml new file mode 100644 index 00000000..c5d5899f --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/ic_launcher_background.xml @@ -0,0 +1,4 @@ + + + #FFFFFF + \ No newline at end of file diff --git a/android/llama.android/app-java/src/main/res/values/preloaded_fonts.xml b/android/llama.android/app-java/src/main/res/values/preloaded_fonts.xml new file mode 100644 index 00000000..56657f17 --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/preloaded_fonts.xml @@ -0,0 +1,6 @@ + + + + @font/alegreya_sans_sc_extrabold + + diff --git a/android/llama.android/app-java/src/main/res/values/strings.xml b/android/llama.android/app-java/src/main/res/values/strings.xml new file mode 100644 index 00000000..2ff67712 --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/strings.xml @@ -0,0 +1,4 @@ + + LayoutTest + User Message + \ No newline at end of file diff --git a/android/llama.android/app-java/src/main/res/values/styles.xml b/android/llama.android/app-java/src/main/res/values/styles.xml new file mode 100644 index 00000000..864fcf30 --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/styles.xml @@ -0,0 +1,16 @@ + + + + diff --git a/android/llama.android/app-java/src/main/res/values/themes.xml b/android/llama.android/app-java/src/main/res/values/themes.xml new file mode 100644 index 00000000..2ef46f0c --- /dev/null +++ b/android/llama.android/app-java/src/main/res/values/themes.xml @@ -0,0 +1,13 @@ + + + + +