Merge pull request #312 from NexaAI/nexa-sdk-upgrade-llama-cpp-python…

…-new Nexa sdk upgrade
NexaAI · Dec 12, 2024 · aa45f02 · aa45f02
2 parents 9ac4a17 + 8c6d8be
commit aa45f02
Show file tree

Hide file tree

Showing 26 changed files with 1,514 additions and 2,254 deletions.
diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
@@ -11,7 +11,7 @@ jobs:
     runs-on: macos-${{ matrix.os }}
     strategy:
       matrix:
-        os: [12, 13, 14]
+        os: [13, 14, 15]
 
     steps:
       - uses: actions/checkout@v4

diff --git a/.gitmodules b/.gitmodules
@@ -5,7 +5,7 @@
 [submodule "dependency/llama.cpp"]
 	path = dependency/llama.cpp
 	url = https://github.com/NexaAI/llama.cpp.git
-	branch = master
+	branch = release
 [submodule "nexa/eval/benchmark_tasks"]
 	path = nexa/eval/benchmark_tasks
 	url = https://github.com/NexaAI/benchmark-tasks.git

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@
 
 ## Latest News 🔥
 
-- Support Nexa AI's own vision language model (0.9B parameters): `nexa run omnivision` and audio language model (2.9B parameters): `nexa run omniaudio`
+- Support Nexa AI's own vision language model (0.9B parameters): `nexa run omniVLM` and audio language model (2.9B parameters): `nexa run omniaudio`
 - Support audio language model: `nexa run qwen2audio`, **we are the first open-source toolkit to support audio language model with GGML tensor library.**
 - Support iOS Swift binding for local inference on **iOS mobile** devices.
 - Support embedding model: `nexa embed <model_path> <prompt>`
@@ -33,13 +33,13 @@ Welcome to submit your requests through [issues](https://github.com/NexaAI/nexa-
 ## Install Option 1: Executable Installer
 
 <p>
-    <a href="https://public-storage.nexa4ai.com/nexa-sdk-executable-installer/nexa-sdk-0.0.9.5-macos-installer.pkg">
+    <a href="https://public-storage.nexa4ai.com/nexa-sdk-executable-installer/nexa-sdk-0.0.9.6-macos-installer.pkg">
         <img src="./assets/mac.png" style="height: 1em; width: auto" /> <strong> macOS Installer </strong>
     </a>
 </p>
 
 <p>
-    <a href="https://public-storage.nexa4ai.com/nexa-sdk-executable-installer/nexa-sdk-0.0.9.5-windows-setup.exe">
+    <a href="https://public-storage.nexa4ai.com/nexa-sdk-executable-installer/nexa-sdk-0.0.9.6-windows-setup.exe">
         <img src="./assets/windows.png" style="height: 1em; width: auto" /> <strong>Windows Installer</strong>
     </a>
 </p>
@@ -228,7 +228,7 @@ Supported model examples (full list at [Model Hub](https://nexa.ai/models)):
 | [qwen2audio](https://nexa.ai/Qwen/Qwen2-Audio-7.8B-Instruct/gguf-q4_K_M/readme) | AudioLM | GGUF | `nexa run qwen2audio` |
 | [octopus-v2](https://www.nexaai.com/NexaAI/Octopus-v2/gguf-q4_0/readme) | Function Call | GGUF | `nexa run octopus-v2` |
 | [octo-net](https://www.nexaai.com/NexaAI/Octo-net/gguf-q4_0/readme) | Text | GGUF | `nexa run octo-net` |
-| [omnivision](https://nexa.ai/NexaAI/omnivision/gguf-fp16/readme) | Multimodal | GGUF | `nexa run omnivision` |
+| [omniVLM](https://nexa.ai/NexaAI/omniVLM/gguf-fp16/readme) | Multimodal | GGUF | `nexa run omniVLM` |
 | [nanollava](https://www.nexaai.com/qnguyen3/nanoLLaVA/gguf-fp16/readme) | Multimodal | GGUF | `nexa run nanollava` |
 | [llava-phi3](https://www.nexaai.com/xtuner/llava-phi-3-mini/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-phi3` |
 | [llava-llama3](https://www.nexaai.com/xtuner/llava-llama-3-8b-v1.1/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-llama3` |

diff --git a/dependency/llama.cpp b/dependency/llama.cpp
diff --git a/docs/README.md b/docs/README.md
@@ -28,12 +28,16 @@ pip install nexaai[onnx] # if you need ONNX support
 ```
 
 ### build from source
+
 To build C++ only
+
 ```
 cmake -B build -S .
 cmake --build build --config Release -j32
 ```
+
 To build C++ and install python package from source, run the following commands:
+
 ```bash
 git clone --recursive https://github.com/NexaAI/nexa-sdk.git
 cd nexa-sdk
@@ -75,7 +79,7 @@ python -m nexa.gguf.nexa_inference_text gemma
 python -m nexa.gguf.nexa_inference_text octopusv2 --stop_words "<nexa_end>"
 wget https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png -O test.png
 python -m nexa.gguf.nexa_inference_vlm nanollava
-python -m nexa.gguf.nexa_inference_vlm_omni omnivision
+python -m nexa.gguf.nexa_inference_vlm_omni omniVLM
 python -m nexa.gguf.nexa_inference_image sd1-4
 python -m nexa.gguf.nexa_inference_image sd1-4 --img2img
 wget -O control_normal-fp16.safetensors https://huggingface.co/webui/ControlNet-modules-safetensors/resolve/main/control_normal-fp16.safetensors
@@ -235,7 +239,9 @@ dumpbin /dependents your_executable_or_dll.dll  # in Developer PowerShell for Vi
 ```
 
 ### Debug dynamic lib
+
 According to [isse](https://github.com/abetlen/llama-cpp-python/issues/1346), below can check the exported symbols on linux.
+
 ```
 readelf -Ws --dyn-syms libllama.so
-```
+```
diff --git a/nexa/__init__.py b/nexa/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.9.5"
+__version__ = "0.0.9.6"
diff --git a/nexa/constants.py b/nexa/constants.py
@@ -188,8 +188,8 @@ class ModelType(Enum):
     "omnivision-preview": "omnivision-preview:projector-fp16",
     "omnivision-preview:fp16": "omnivision-preview:projector-fp16",
     "omnivision-preview:q4_0": "omnivision-preview:projector-q4_0",
-    "omnivision": "omnivision:projector-fp16",
-    "omnivision:fp16": "omnivision:projector-fp16",
+    "omniVLM": "omniVLM:projector-fp16",
+    "omniVLM:fp16": "omniVLM:projector-fp16",
     "omnivision-ocr": "omnivision-ocr:projector-fp16",
     "omnivision-ocr:fp16": "omnivision-ocr:projector-fp16",
 }
@@ -198,8 +198,8 @@ class ModelType(Enum):
     "omnivision-preview": "omnivision-preview:model-fp16",
     "omnivision-preview:fp16": "omnivision-preview:model-fp16",
     "omnivision-preview:q4_0": "omnivision-preview:model-q4_0",
-    "omnivision": "omnivision:model-fp16",
-    "omnivision:fp16": "omnivision:model-fp16",
+    "omniVLM": "omniVLM:model-fp16",
+    "omniVLM:fp16": "omniVLM:model-fp16",
     "omnivision-ocr": "omnivision-ocr:model-fp16",
     "omnivision-ocr:fp16": "omnivision-ocr:model-fp16",
 }
@@ -461,7 +461,7 @@ class ModelType(Enum):
     "FLUX.1-schnell": ModelType.COMPUTER_VISION,
     "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
     "omnivision-preview": ModelType.MULTIMODAL,
-    "omnivision": ModelType.MULTIMODAL,
+    "omniVLM": ModelType.MULTIMODAL,
     "omnivision-ocr": ModelType.MULTIMODAL,
     "nanoLLaVA": ModelType.MULTIMODAL,
     "llava-v1.6-mistral-7b": ModelType.MULTIMODAL,

diff --git a/nexa/gguf/llama/__init__.py b/nexa/gguf/llama/__init__.py
@@ -0,0 +1,2 @@
+from nexa.gguf.llama.llama_cpp import *
+from nexa.gguf.llama.llama import *
diff --git a/nexa/gguf/llama/_ctypes_extensions.py b/nexa/gguf/llama/_ctypes_extensions.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import sys
+import os
+import ctypes
+import functools
+import pathlib
+
+from typing import (
+    Any,
+    Callable,
+    List,
+    Union,
+    Optional,
+    TYPE_CHECKING,
+    TypeVar,
+    Generic,
+)
+from typing_extensions import TypeAlias
+
+
+# ctypes sane type hint helpers
+#
+# - Generic Pointer and Array types
+# - PointerOrRef type with a type hinted byref function
+#
+# NOTE: Only use these for static type checking not for runtime checks
+# no good will come of that
+
+if TYPE_CHECKING:
+    CtypesCData = TypeVar("CtypesCData", bound=ctypes._CData)  # type: ignore
+
+    CtypesArray: TypeAlias = ctypes.Array[CtypesCData]  # type: ignore
+
+    CtypesPointer: TypeAlias = ctypes._Pointer[CtypesCData]  # type: ignore
+
+    CtypesVoidPointer: TypeAlias = ctypes.c_void_p
+
+    class CtypesRef(Generic[CtypesCData]):
+        pass
+
+    CtypesPointerOrRef: TypeAlias = Union[
+        CtypesPointer[CtypesCData], CtypesRef[CtypesCData]
+    ]
+
+    CtypesFuncPointer: TypeAlias = ctypes._FuncPointer  # type: ignore
+
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+def ctypes_function_for_shared_library(lib: ctypes.CDLL):
+    """Decorator for defining ctypes functions with type hints"""
+
+    def ctypes_function(
+        name: str, argtypes: List[Any], restype: Any, enabled: bool = True
+    ):
+        def decorator(f: F) -> F:
+            if enabled:
+                func = getattr(lib, name)
+                func.argtypes = argtypes
+                func.restype = restype
+                functools.wraps(f)(func)
+                return func
+            else:
+                return f
+
+        return decorator
+
+    return ctypes_function
+
+
+def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCData]:
+    """Type-annotated version of ctypes.byref"""
+    ...
+
+
+byref = _byref if TYPE_CHECKING else ctypes.byref
diff --git a/nexa/gguf/llama/_ggml.py b/nexa/gguf/llama/_ggml.py
@@ -0,0 +1,11 @@
+"""Internal module use at your own risk
+
+This module provides a minimal interface for working with ggml tensors from llama-cpp-python
+"""
+import os
+import pathlib
+
+from nexa.gguf.lib_utils import load_library
+
+libggml = load_library("ggml")
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from nexa.gguf.llama.llama_cpp import *
		from nexa.gguf.llama.llama import *