diff --git a/README.md b/README.md index a61f4633..17887873 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ ## Latest News 🔥 -- Support Nexa AI's own vision language model (0.9B parameters): `nexa run omnivision` and audio language model (2.9B parameters): `nexa run omniaudio` +- Support Nexa AI's own vision language model (0.9B parameters): `nexa run omniVLM` and audio language model (2.9B parameters): `nexa run omniaudio` - Support audio language model: `nexa run qwen2audio`, **we are the first open-source toolkit to support audio language model with GGML tensor library.** - Support iOS Swift binding for local inference on **iOS mobile** devices. - Support embedding model: `nexa embed ` @@ -228,7 +228,7 @@ Supported model examples (full list at [Model Hub](https://nexa.ai/models)): | [qwen2audio](https://nexa.ai/Qwen/Qwen2-Audio-7.8B-Instruct/gguf-q4_K_M/readme) | AudioLM | GGUF | `nexa run qwen2audio` | | [octopus-v2](https://www.nexaai.com/NexaAI/Octopus-v2/gguf-q4_0/readme) | Function Call | GGUF | `nexa run octopus-v2` | | [octo-net](https://www.nexaai.com/NexaAI/Octo-net/gguf-q4_0/readme) | Text | GGUF | `nexa run octo-net` | -| [omnivision](https://nexa.ai/NexaAI/omnivision/gguf-fp16/readme) | Multimodal | GGUF | `nexa run omnivision` | +| [omniVLM](https://nexa.ai/NexaAI/omniVLM/gguf-fp16/readme) | Multimodal | GGUF | `nexa run omniVLM` | | [nanollava](https://www.nexaai.com/qnguyen3/nanoLLaVA/gguf-fp16/readme) | Multimodal | GGUF | `nexa run nanollava` | | [llava-phi3](https://www.nexaai.com/xtuner/llava-phi-3-mini/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-phi3` | | [llava-llama3](https://www.nexaai.com/xtuner/llava-llama-3-8b-v1.1/gguf-q4_0/readme) | Multimodal | GGUF | `nexa run llava-llama3` | diff --git a/dependency/llama.cpp b/dependency/llama.cpp index ed459776..bb33473f 160000 --- a/dependency/llama.cpp +++ b/dependency/llama.cpp @@ -1 +1 @@ -Subproject commit ed459776811d0928ce55a001e9e5a6bc3bf22ca4 +Subproject commit bb33473f08db604e1f30334366032f0904e2a722 diff --git a/docs/README.md b/docs/README.md index 252116f7..d4081d2e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -28,12 +28,16 @@ pip install nexaai[onnx] # if you need ONNX support ``` ### build from source + To build C++ only + ``` cmake -B build -S . cmake --build build --config Release -j32 ``` + To build C++ and install python package from source, run the following commands: + ```bash git clone --recursive https://github.com/NexaAI/nexa-sdk.git cd nexa-sdk @@ -75,7 +79,7 @@ python -m nexa.gguf.nexa_inference_text gemma python -m nexa.gguf.nexa_inference_text octopusv2 --stop_words "" wget https://assets-c4akfrf5b4d3f4b7.z01.azurefd.net/assets/2024/04/BMDataViz_661fb89f3845e.png -O test.png python -m nexa.gguf.nexa_inference_vlm nanollava -python -m nexa.gguf.nexa_inference_vlm_omni omnivision +python -m nexa.gguf.nexa_inference_vlm_omni omniVLM python -m nexa.gguf.nexa_inference_image sd1-4 python -m nexa.gguf.nexa_inference_image sd1-4 --img2img wget -O control_normal-fp16.safetensors https://huggingface.co/webui/ControlNet-modules-safetensors/resolve/main/control_normal-fp16.safetensors @@ -235,7 +239,9 @@ dumpbin /dependents your_executable_or_dll.dll # in Developer PowerShell for Vi ``` ### Debug dynamic lib + According to [isse](https://github.com/abetlen/llama-cpp-python/issues/1346), below can check the exported symbols on linux. + ``` readelf -Ws --dyn-syms libllama.so -``` \ No newline at end of file +``` diff --git a/nexa/constants.py b/nexa/constants.py index 24acd195..51d6e051 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -188,8 +188,8 @@ class ModelType(Enum): "omnivision-preview": "omnivision-preview:projector-fp16", "omnivision-preview:fp16": "omnivision-preview:projector-fp16", "omnivision-preview:q4_0": "omnivision-preview:projector-q4_0", - "omnivision": "omnivision:projector-fp16", - "omnivision:fp16": "omnivision:projector-fp16", + "omniVLM": "omniVLM:projector-fp16", + "omniVLM:fp16": "omniVLM:projector-fp16", "omnivision-ocr": "omnivision-ocr:projector-fp16", "omnivision-ocr:fp16": "omnivision-ocr:projector-fp16", } @@ -198,8 +198,8 @@ class ModelType(Enum): "omnivision-preview": "omnivision-preview:model-fp16", "omnivision-preview:fp16": "omnivision-preview:model-fp16", "omnivision-preview:q4_0": "omnivision-preview:model-q4_0", - "omnivision": "omnivision:model-fp16", - "omnivision:fp16": "omnivision:model-fp16", + "omniVLM": "omniVLM:model-fp16", + "omniVLM:fp16": "omniVLM:model-fp16", "omnivision-ocr": "omnivision-ocr:model-fp16", "omnivision-ocr:fp16": "omnivision-ocr:model-fp16", } @@ -461,7 +461,7 @@ class ModelType(Enum): "FLUX.1-schnell": ModelType.COMPUTER_VISION, "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL, "omnivision-preview": ModelType.MULTIMODAL, - "omnivision": ModelType.MULTIMODAL, + "omniVLM": ModelType.MULTIMODAL, "omnivision-ocr": ModelType.MULTIMODAL, "nanoLLaVA": ModelType.MULTIMODAL, "llava-v1.6-mistral-7b": ModelType.MULTIMODAL, diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py index bd5b6b29..4a76a4eb 100644 --- a/nexa/gguf/nexa_inference_vlm_omni.py +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -40,7 +40,7 @@ def __init__( else: self.n_gpu_layers = 0 - # Handle direct model file paths (e.g., omnivision:model-fp16) + # Handle direct model file paths (e.g., omniVLM:model-fp16) if model_path and ':model-' in model_path: base_name = model_path.split(':')[0] model_type = model_path.split('model-')[1]