From a5ff3e07b69b1c3eda60ee28c69191d9c6fbd4d0 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Tue, 5 Nov 2024 14:29:51 -0800 Subject: [PATCH 1/2] update SpinningCursorAnimation and prompt instructions --- nexa/cli/entry.py | 8 ++++---- nexa/constants.py | 24 ++++++++++++------------ nexa/gguf/nexa_inference_audio_lm.py | 7 ++++--- nexa/gguf/nexa_inference_vlm_omni.py | 10 ++++++---- 4 files changed, 26 insertions(+), 23 deletions(-) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 3afeea96..9ecd4b04 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -96,7 +96,6 @@ def run_ggml_inference(args): inference.run_txt2img() return elif run_type == "Multimodal": - print("debug run_type", run_type, "is_local_path", is_local_path) if is_local_path: if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference @@ -105,7 +104,6 @@ def run_ggml_inference(args): from nexa.gguf.nexa_inference_vlm import NexaVLMInference inference = NexaVLMInference(model_path=model_path, local_path=local_path, projector_local_path=projector_local_path, stop_words=stop_words, **kwargs) else: - print("debug: local_path", local_path) if "omni" in local_path: from nexa.gguf.nexa_inference_vlm_omni import NexaOmniVlmInference inference = NexaOmniVlmInference(model_path=model_path, local_path=local_path, **kwargs) @@ -125,7 +123,8 @@ def run_ggml_inference(args): print(f"Unknown task: {run_type}. Skipping inference.") return except Exception as e: - print(f"Error {e}, please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") + print(f"Error running ggml inference: {e}") + print(f"Please refer to our docs to install nexaai package: https://docs.nexaai.com/getting-started/installation ") return if hasattr(args, 'streamlit') and args.streamlit: @@ -216,7 +215,8 @@ def run_onnx_inference(args): print(f"Unknown task: {run_type}. Skipping inference.") return except Exception as e: - print(f"Error loading ONNX models, please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") + print(f"Error running onnx inference: {e}") + print(f"Please refer to our docs to install nexaai[onnx] package: https://docs.nexaai.com/getting-started/installation ") return if hasattr(args, 'streamlit') and args.streamlit: diff --git a/nexa/constants.py b/nexa/constants.py index 32e6a53a..04ec51fb 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -111,11 +111,11 @@ class ModelType(Enum): NEXA_RUN_MODEL_MAP_AUDIO_LM = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:model-q4_0", - "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", - "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", + "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:model-fp16", + "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:model-q4_0", "omniaudio": "Octo-omni-audio:model-fp16", - "omniaudio:fp16": "Octo-omni-audio:model-fp16", - "omniaudio:q4_0": "Octo-omni-audio:model-q4_0", + "Octo-omni-audio:fp16": "Octo-omni-audio:model-fp16", + "Octo-omni-audio:q4_0": "Octo-omni-audio:model-q4_0", } NEXA_RUN_MODEL_MAP_VOICE = { @@ -172,23 +172,23 @@ class ModelType(Enum): NEXA_RUN_OMNI_VLM_PROJECTOR_MAP = { "omnivision": "Octo-omni-vision:projector-fp16", - "omnivision:fp16": "Octo-omni-vision:projector-fp16", - "omnivision:q4_0": "Octo-omni-vision:projector-q4_0", + "Octo-omni-vision:fp16": "Octo-omni-vision:projector-fp16", + "Octo-omni-vision:q4_0": "Octo-omni-vision:projector-q4_0", } NEXA_RUN_OMNI_VLM_MAP = { "omnivision": "Octo-omni-vision:model-fp16", - "omnivision:fp16": "Octo-omni-vision:model-fp16", - "omnivision:q4_0": "Octo-omni-vision:model-q4_0", + "Octo-omni-vision:fp16": "Octo-omni-vision:model-fp16", + "Octo-omni-vision:q4_0": "Octo-omni-vision:model-q4_0", } NEXA_RUN_AUDIO_LM_PROJECTOR_MAP = { "qwen2audio": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", - "qwen2audio:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", - "qwen2audio:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", + "Qwen2-Audio-7.8B-Instruct:fp16": "Qwen2-Audio-7.8B-Instruct:projector-fp16", + "Qwen2-Audio-7.8B-Instruct:q4_0": "Qwen2-Audio-7.8B-Instruct:projector-q4_0", "omniaudio": "Octo-omni-audio:projector-fp16", - "omniaudio:fp16": "Octo-omni-audio:projector-fp16", - "omniaudio:q4_0": "Octo-omni-audio:projector-q4_0", + "Octo-omni-audio:fp16": "Octo-omni-audio:projector-fp16", + "Octo-omni-audio:q4_0": "Octo-omni-audio:projector-q4_0", } NEXA_RUN_T5XXL_MAP = { diff --git a/nexa/gguf/nexa_inference_audio_lm.py b/nexa/gguf/nexa_inference_audio_lm.py index 777ae2e6..101f62e1 100644 --- a/nexa/gguf/nexa_inference_audio_lm.py +++ b/nexa/gguf/nexa_inference_audio_lm.py @@ -2,7 +2,7 @@ import logging import os from pathlib import Path - +from nexa.utils import SpinningCursorAnimation, nexa_prompt from nexa.constants import ( DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_MODEL_MAP_AUDIO_LM, @@ -114,6 +114,7 @@ def __init__( with suppress_stdout_stderr(): self._load_model() + @SpinningCursorAnimation() def _load_model(self): try: self.ctx_params.model = ctypes.c_char_p( @@ -140,12 +141,12 @@ def run(self): while True: try: while True: - audio_path = input("Audio Path (required): ") + audio_path = nexa_prompt("Enter the path to your audio file (required): ") if os.path.exists(audio_path): break print(f"'{audio_path}' is not a valid audio path. Please try again.") - user_input = input("Enter text (leave empty if no prompt): ") + user_input = nexa_prompt("Enter text (leave empty if no prompt): ") self.ctx_params.file = ctypes.c_char_p(audio_path.encode("utf-8")) self.ctx_params.prompt = ctypes.c_char_p(user_input.encode("utf-8")) diff --git a/nexa/gguf/nexa_inference_vlm_omni.py b/nexa/gguf/nexa_inference_vlm_omni.py index 91a31314..0a9ca737 100644 --- a/nexa/gguf/nexa_inference_vlm_omni.py +++ b/nexa/gguf/nexa_inference_vlm_omni.py @@ -3,7 +3,7 @@ import logging import os from pathlib import Path - +from nexa.utils import nexa_prompt, SpinningCursorAnimation from nexa.constants import ( DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_OMNI_VLM_PROJECTOR_MAP, @@ -84,6 +84,7 @@ def __init__( with suppress_stdout_stderr(): self._load_model() + @SpinningCursorAnimation() def _load_model(self): try: self.ctx_params_model = ctypes.c_char_p( @@ -100,10 +101,11 @@ def _load_model(self): def run(self): while True: try: - image_path = input("Image Path: ") + image_path = nexa_prompt("Image Path (required): ") if not os.path.exists(image_path): print(f"Image path: {image_path} not found, running omni VLM without image input.") - user_input = input("Enter text: ") + + user_input = nexa_prompt() image_path = ctypes.c_char_p(image_path.encode("utf-8")) user_input = ctypes.c_char_p(user_input.encode("utf-8")) omni_vlm_cpp.omnivlm_inference(user_input, image_path) @@ -111,9 +113,9 @@ def run(self): except KeyboardInterrupt: print("\nExiting...") break - except Exception as e: logging.error(f"\nError during audio generation: {e}", exc_info=True) + print("\n") def __del__(self): omni_vlm_cpp.omnivlm_free() From 329590bd5889be33dde3ba65ae856b27a05f7891 Mon Sep 17 00:00:00 2001 From: Yicheng Qian Date: Tue, 5 Nov 2024 16:29:48 -0800 Subject: [PATCH 2/2] fix vlm streamlit bug --- nexa/cli/entry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nexa/cli/entry.py b/nexa/cli/entry.py index 9ecd4b04..7898a774 100644 --- a/nexa/cli/entry.py +++ b/nexa/cli/entry.py @@ -46,6 +46,8 @@ def run_ggml_inference(args): run_type = ModelType[model_type].value local_path = None + projector_local_path = None + if is_local_path or hf: if not model_type: print("Error: --model_type must be provided when using --local_path or --huggingface")