From 1489bf481c12b9e5039e8024b7da2dabe941c6e2 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Tue, 10 Sep 2024 17:19:26 +0000 Subject: [PATCH 1/4] update onnx installation in README --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ff9fc8d2..4e29f91e 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,8 @@ Below is our differentiation from other similar tools: We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/). +> [!NOTE] +> If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands #### CPU @@ -117,9 +119,6 @@ For **Windows Git Bash**: ```bash CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir ``` -> [!NOTE] -> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands -
FAQ: Building Issues for llava From 7ab42db3aa405eea69ba1ae5fe0deeddadf8198c Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Tue, 10 Sep 2024 19:33:43 +0000 Subject: [PATCH 2/4] update tsinghua mirror --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4e29f91e..602dd85c 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,8 @@ Below is our differentiation from other similar tools: We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/). > [!NOTE] -> If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands +> 1. If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands. +> 2. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands. #### CPU From 5ace4a061434659e3b883d3b068404100e5520fd Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Tue, 10 Sep 2024 20:28:38 +0000 Subject: [PATCH 3/4] add new models --- nexa/constants.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nexa/constants.py b/nexa/constants.py index bfe48218..9e9cf558 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -44,8 +44,10 @@ "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0", "phi2": "Phi-2:q4_0", "phi3": "Phi-3-mini-128k-instruct:q4_0", + "phi3.5": "Phi-3.5-mini-instruct:q4_0", "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0", "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M", + "openelm": "OpenELM-3B:q4_K_M", } NEXA_RUN_MODEL_MAP_ONNX = { @@ -139,10 +141,12 @@ "sd1-4": "stable-diffusion-v1-4:q4_0", "sd1-5": "stable-diffusion-v1-5:q4_0", "sd2-1": "stable-diffusion-v2-1:fp16", + "sd3": "stable-diffusion-3-medium:q4_0", "sdxl-turbo": "sdxl-turbo:q8_0", "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16", "anything-lcm": "anything-v30-LCM:fp16", "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16", + "flux": "FLUX.1-schnell:q4_0", } NEXA_RUN_MODEL_MAP = { @@ -172,10 +176,12 @@ "sd1-4": "q4_0", "sd1-5": "q4_0", "sd2-1": "q4_0", + "sd3": "q4_0", "lcm-dreamshaper": "f16", "sdxl-turbo": "q8_0", "anything-lcm": "f16", "hassaku-lcm": "f16", + "flux": "q4_0", } EXIT_COMMANDS = ["/exit", "/quit", "/bye"] @@ -236,7 +242,7 @@ 'Mistral-7B-Instruct-v0.2': 'NLP', 'Phi-3-mini-128k-instruct': 'NLP', 'Phi-3-mini-4k-instruct': 'NLP', - "Phi-3.5-mini-instruct": "NLP", + 'Phi-3.5-mini-instruct': 'NLP', 'CodeQwen1.5-7B-Instruct': 'NLP', 'Qwen2-0.5B-Instruct': 'NLP', 'Qwen2-1.5B-Instruct': 'NLP', @@ -270,13 +276,17 @@ 'Llama2-7b-chat-uncensored': 'NLP', 'Llama3-8B-Lexi-Uncensored': 'NLP', 'Llama2-7b-function-calling': 'NLP', + 'OpenELM-1_1B': 'NLP', + 'OpenELM-3B': 'NLP', 'lcm-dreamshaper-v7': 'Computer Vision', 'stable-diffusion-v1-5': 'Computer Vision', 'stable-diffusion-v1-4': 'Computer Vision', 'stable-diffusion-v2-1': 'Computer Vision', + 'stable-diffusion-3-medium': 'Computer Vision', 'sdxl-turbo': 'Computer Vision', 'hassaku-hentai-model-v13-LCM': 'Computer Vision', 'anything-v30-LCM': 'Computer Vision', + 'FLUX.1-schnell': 'Computer Vision', 'Phi-3-vision-128k-instruct': 'Multimodal', 'nanoLLaVA': 'Multimodal', 'llava-v1.6-mistral-7b': 'Multimodal', From 2515ebedd3d6246a82e613ab7708f4eda9b1b233 Mon Sep 17 00:00:00 2001 From: Davidqian123 Date: Thu, 12 Sep 2024 04:58:44 +0000 Subject: [PATCH 4/4] add pull flux model logic --- nexa/constants.py | 206 ++++++++++++++----------- nexa/general.py | 11 +- nexa/gguf/llama/_utils_transformers.py | 2 +- nexa/gguf/llama/llama_cpp.py | 8 +- nexa/gguf/nexa_inference_image.py | 44 +++++- nexa/gguf/nexa_inference_text.py | 3 +- nexa/gguf/nexa_inference_voice.py | 3 +- 7 files changed, 167 insertions(+), 110 deletions(-) diff --git a/nexa/constants.py b/nexa/constants.py index 9e9cf558..ce5a4915 100644 --- a/nexa/constants.py +++ b/nexa/constants.py @@ -1,14 +1,19 @@ import os from pathlib import Path +from enum import Enum +# Paths for caching, model hub, and tokens NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser() NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token" NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub" NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official" NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json" + +# URLs and buckets NEXA_API_URL = "https://model-hub-backend.nexa4ai.com" NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/" +# Nexa logo NEXA_LOGO = """ _| _| _|_|_| _| _| _|_| _|_| _|_|_|_| _|_| _| _| _| _| _| _| _| _| _| @@ -16,13 +21,18 @@ _| _|_| _| _| _| _| _| _| _| _| _| _| _|_|_| _| _| _| _| _| _| _|_|_|_| """ -# Maokun TODO: Update the model info and find a good default precision for each model -PRODUCER_INFO = dict( - # producer_name="nexa.ai", # onnxruntime: Model producer not matched: Expected "pytorch" - producer_version="0.0.0", - doc_string="Model exported by Nexa.ai", -) +# Model producer info +PRODUCER_INFO = { + "producer_version": "0.0.0", + "doc_string": "Model exported by Nexa.ai", +} + +class ModelType(Enum): + NLP = "NLP" + COMPUTER_VISION = "Computer Vision" + AUDIO = "Audio" + MULTIMODAL = "Multimodal" NEXA_RUN_MODEL_MAP_TEXT = { "octopus-v2": "Octopus-v2:q4_0", @@ -118,8 +128,6 @@ "Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0", } - - NEXA_RUN_PROJECTOR_MAP = { "nanollava": "nanoLLaVA:projector-fp16", "nanoLLaVA:fp16": "nanoLLaVA:projector-fp16", @@ -137,16 +145,34 @@ "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16", } +NEXA_RUN_T5XXL_MAP = { + "flux": "FLUX.1-schnell:t5xxl-q4_0", + "FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0", + "FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0", + "FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1", + "FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0", + "FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16", +} + NEXA_RUN_MODEL_MAP_IMAGE = { "sd1-4": "stable-diffusion-v1-4:q4_0", "sd1-5": "stable-diffusion-v1-5:q4_0", - "sd2-1": "stable-diffusion-v2-1:fp16", + "sd2-1": "stable-diffusion-v2-1:q4_0", "sd3": "stable-diffusion-3-medium:q4_0", "sdxl-turbo": "sdxl-turbo:q8_0", + "flux": "FLUX.1-schnell:q4_0", "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16", "anything-lcm": "anything-v30-LCM:fp16", "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16", - "flux": "FLUX.1-schnell:q4_0", +} + +NEXA_RUN_MODEL_MAP_FLUX = { + "flux": "FLUX.1-schnell:flux1-schnell-q4_0", + "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0", + "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0", + "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1", + "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0", + "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16", } NEXA_RUN_MODEL_MAP = { @@ -155,6 +181,7 @@ **NEXA_RUN_MODEL_MAP_IMAGE, **NEXA_RUN_MODEL_MAP_VOICE, **NEXA_RUN_MODEL_MAP_FUNCTION_CALLING, + **NEXA_RUN_MODEL_MAP_FLUX, } NEXA_RUN_CHAT_TEMPLATE_MAP = { @@ -177,17 +204,20 @@ "sd1-5": "q4_0", "sd2-1": "q4_0", "sd3": "q4_0", + "flux": "q4_0", "lcm-dreamshaper": "f16", "sdxl-turbo": "q8_0", "anything-lcm": "f16", "hassaku-lcm": "f16", - "flux": "q4_0", } EXIT_COMMANDS = ["/exit", "/quit", "/bye"] EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n" -NEXA_STOP_WORDS_MAP = {"octopus-v2": [""]} +NEXA_STOP_WORDS_MAP = { + "octopus-v2": [""], + "octopus-v4": [""] +} DEFAULT_TEXT_GEN_PARAMS = { "temperature": 0.7, @@ -229,85 +259,85 @@ "language": None, "task": "transcribe", "temperature": 0.0, - "compute_type": "default" + "compute_type": "default", } NEXA_OFFICIAL_MODELS_TYPE = { - 'gemma-2b': 'NLP', - 'Llama-2-7b-chat': 'NLP', - 'Llama-2-7b': 'NLP', - 'Meta-Llama-3-8B-Instruct': 'NLP', - 'Meta-Llama-3.1-8B-Instruct': 'NLP', - 'Mistral-7B-Instruct-v0.3': 'NLP', - 'Mistral-7B-Instruct-v0.2': 'NLP', - 'Phi-3-mini-128k-instruct': 'NLP', - 'Phi-3-mini-4k-instruct': 'NLP', - 'Phi-3.5-mini-instruct': 'NLP', - 'CodeQwen1.5-7B-Instruct': 'NLP', - 'Qwen2-0.5B-Instruct': 'NLP', - 'Qwen2-1.5B-Instruct': 'NLP', - 'Qwen2-7B-Instruct': 'NLP', - 'codegemma-2b': 'NLP', - 'gemma-1.1-2b-instruct': 'NLP', - 'gemma-2b-instruct': 'NLP', - 'gemma-2-9b-instruct': 'NLP', - 'gemma-1.1-7b-instruct': 'NLP', - 'gemma-7b-instruct': 'NLP', - 'gemma-7b': 'NLP', - 'Qwen2-1.5B': 'NLP', - 'codegemma-7b': 'NLP', - 'TinyLlama-1.1B-Chat-v1.0': 'NLP', - 'CodeLlama-7b-Instruct': 'NLP', - 'gpt2': 'NLP', - 'CodeLlama-7b': 'NLP', - 'CodeLlama-7b-Python': 'NLP', - 'Qwen1.5-7B-Instruct': 'NLP', - 'Qwen1.5-7B': 'NLP', - 'Phi-2': 'NLP', - 'deepseek-coder-1.3b-instruct': 'NLP', - 'deepseek-coder-1.3b-base': 'NLP', - 'deepseek-coder-6.7b-instruct': 'NLP', - 'dolphin-2.8-mistral-7b': 'NLP', - 'gemma-2-2b-instruct': 'NLP', - 'Octopus-v2': 'NLP', - 'Octopus-v4': 'NLP', - 'Octo-planner': 'NLP', - 'deepseek-coder-6.7b-base': 'NLP', - 'Llama2-7b-chat-uncensored': 'NLP', - 'Llama3-8B-Lexi-Uncensored': 'NLP', - 'Llama2-7b-function-calling': 'NLP', - 'OpenELM-1_1B': 'NLP', - 'OpenELM-3B': 'NLP', - 'lcm-dreamshaper-v7': 'Computer Vision', - 'stable-diffusion-v1-5': 'Computer Vision', - 'stable-diffusion-v1-4': 'Computer Vision', - 'stable-diffusion-v2-1': 'Computer Vision', - 'stable-diffusion-3-medium': 'Computer Vision', - 'sdxl-turbo': 'Computer Vision', - 'hassaku-hentai-model-v13-LCM': 'Computer Vision', - 'anything-v30-LCM': 'Computer Vision', - 'FLUX.1-schnell': 'Computer Vision', - 'Phi-3-vision-128k-instruct': 'Multimodal', - 'nanoLLaVA': 'Multimodal', - 'llava-v1.6-mistral-7b': 'Multimodal', - 'llava-v1.6-vicuna-7b': 'Multimodal', - 'llava-phi-3-mini': 'Multimodal', - 'llava-llama-3-8b-v1.1': 'Multimodal', - 'faster-whisper-tiny.en': 'Audio', - 'faster-whisper-tiny': 'Audio', - 'faster-whisper-small.en': 'Audio', - 'faster-whisper-small': 'Audio', - 'faster-whisper-medium.en': 'Audio', - 'faster-whisper-medium': 'Audio', - 'faster-whisper-base.en': 'Audio', - 'faster-whisper-base': 'Audio', - 'faster-whisper-large-v3': 'Audio', - 'whisper-tiny.en': 'Audio', - 'whisper-tiny': 'Audio', - 'whisper-small.en': 'Audio', - 'whisper-small': 'Audio', - 'whisper-base.en': 'Audio', - 'whisper-base': 'Audio', + "gemma-2b": ModelType.NLP, + "Llama-2-7b-chat": ModelType.NLP, + "Llama-2-7b": ModelType.NLP, + "Meta-Llama-3-8B-Instruct": ModelType.NLP, + "Meta-Llama-3.1-8B-Instruct": ModelType.NLP, + "Mistral-7B-Instruct-v0.3": ModelType.NLP, + "Mistral-7B-Instruct-v0.2": ModelType.NLP, + "Phi-3-mini-128k-instruct": ModelType.NLP, + "Phi-3-mini-4k-instruct": ModelType.NLP, + "Phi-3.5-mini-instruct": ModelType.NLP, + "CodeQwen1.5-7B-Instruct": ModelType.NLP, + "Qwen2-0.5B-Instruct": ModelType.NLP, + "Qwen2-1.5B-Instruct": ModelType.NLP, + "Qwen2-7B-Instruct": ModelType.NLP, + "codegemma-2b": ModelType.NLP, + "gemma-1.1-2b-instruct": ModelType.NLP, + "gemma-2b-instruct": ModelType.NLP, + "gemma-2-9b-instruct": ModelType.NLP, + "gemma-1.1-7b-instruct": ModelType.NLP, + "gemma-7b-instruct": ModelType.NLP, + "gemma-7b": ModelType.NLP, + "Qwen2-1.5B": ModelType.NLP, + "codegemma-7b": ModelType.NLP, + "TinyLlama-1.1B-Chat-v1.0": ModelType.NLP, + "CodeLlama-7b-Instruct": ModelType.NLP, + "gpt2": ModelType.NLP, + "CodeLlama-7b": ModelType.NLP, + "CodeLlama-7b-Python": ModelType.NLP, + "Qwen1.5-7B-Instruct": ModelType.NLP, + "Qwen1.5-7B": ModelType.NLP, + "Phi-2": ModelType.NLP, + "deepseek-coder-1.3b-instruct": ModelType.NLP, + "deepseek-coder-1.3b-base": ModelType.NLP, + "deepseek-coder-6.7b-instruct": ModelType.NLP, + "dolphin-2.8-mistral-7b": ModelType.NLP, + "gemma-2-2b-instruct": ModelType.NLP, + "Octopus-v2": ModelType.NLP, + "Octopus-v4": ModelType.NLP, + "Octo-planner": ModelType.NLP, + "deepseek-coder-6.7b-base": ModelType.NLP, + "Llama2-7b-chat-uncensored": ModelType.NLP, + "Llama3-8B-Lexi-Uncensored": ModelType.NLP, + "Llama2-7b-function-calling": ModelType.NLP, + "OpenELM-1_1B": ModelType.NLP, + "OpenELM-3B": ModelType.NLP, + "lcm-dreamshaper-v7": ModelType.COMPUTER_VISION, + "stable-diffusion-v1-5": ModelType.COMPUTER_VISION, + "stable-diffusion-v1-4": ModelType.COMPUTER_VISION, + "stable-diffusion-v2-1": ModelType.COMPUTER_VISION, + "stable-diffusion-3-medium": ModelType.COMPUTER_VISION, + "sdxl-turbo": ModelType.COMPUTER_VISION, + "hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION, + "anything-v30-LCM": ModelType.COMPUTER_VISION, + "FLUX.1-schnell": ModelType.COMPUTER_VISION, + "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL, + "nanoLLaVA": ModelType.MULTIMODAL, + "llava-v1.6-mistral-7b": ModelType.MULTIMODAL, + "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL, + "llava-phi-3-mini": ModelType.MULTIMODAL, + "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL, + "faster-whisper-tiny.en": ModelType.AUDIO, + "faster-whisper-tiny": ModelType.AUDIO, + "faster-whisper-small.en": ModelType.AUDIO, + "faster-whisper-small": ModelType.AUDIO, + "faster-whisper-medium.en": ModelType.AUDIO, + "faster-whisper-medium": ModelType.AUDIO, + "faster-whisper-base.en": ModelType.AUDIO, + "faster-whisper-base": ModelType.AUDIO, + "faster-whisper-large-v3": ModelType.AUDIO, + "whisper-tiny.en": ModelType.AUDIO, + "whisper-tiny": ModelType.AUDIO, + "whisper-small.en": ModelType.AUDIO, + "whisper-small": ModelType.AUDIO, + "whisper-base.en": ModelType.AUDIO, + "whisper-base": ModelType.AUDIO, } diff --git a/nexa/general.py b/nexa/general.py index 53a704e9..838b3b98 100644 --- a/nexa/general.py +++ b/nexa/general.py @@ -15,7 +15,7 @@ NEXA_TOKEN_PATH, NEXA_OFFICIAL_MODELS_TYPE, ) - +from nexa.constants import ModelType def login(): """ @@ -119,10 +119,10 @@ def pull_model(model_path): return result["local_path"], result["run_type"] else: print(f"Failed to pull model {model_path}") - return None, "UNKNOWN" + return None, "NLP" except Exception as e: logging.error(f"An error occurred while pulling the model: {e}") - return None, "UNKNOWN" + return None, "NLP" def pull_model_from_hub(model_path): @@ -198,19 +198,20 @@ def pull_model_from_official(model_path): model_type = "gguf" run_type = get_run_type_from_model_path(model_path) + run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type) success, location = download_model_from_official(model_path, model_type) return { "success": success, "local_path": location, "model_type": model_type, - "run_type": run_type + "run_type": run_type_str } def get_run_type_from_model_path(model_path): model_name, model_version = model_path.split(":") - return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN") + return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value def get_model_presigned_link(full_path, token): diff --git a/nexa/gguf/llama/_utils_transformers.py b/nexa/gguf/llama/_utils_transformers.py index 945c1478..0049e9cc 100644 --- a/nexa/gguf/llama/_utils_transformers.py +++ b/nexa/gguf/llama/_utils_transformers.py @@ -17,7 +17,7 @@ class suppress_stdout_stderr(object): sys = sys os = os - def __init__(self, disable: bool = True): + def __init__(self, disable: bool = False): self.disable = disable # Oddly enough this works better than the contextlib version diff --git a/nexa/gguf/llama/llama_cpp.py b/nexa/gguf/llama/llama_cpp.py index 3b502790..f970a739 100644 --- a/nexa/gguf/llama/llama_cpp.py +++ b/nexa/gguf/llama/llama_cpp.py @@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int: # // Returns true if the model is recurrent (like Mamba, RWKV, etc.) # LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model); -@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool) -def llama_model_is_recurrent(model: llama_model_p, /) -> bool: - """Returns true if the model is recurrent (like Mamba, RWKV, etc.)""" - ... +# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool) +# def llama_model_is_recurrent(model: llama_model_p, /) -> bool: +# """Returns true if the model is recurrent (like Mamba, RWKV, etc.)""" +# ... # // Returns 0 on success diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py index 1deebb03..d57bcbed 100644 --- a/nexa/gguf/nexa_inference_image.py +++ b/nexa/gguf/nexa_inference_image.py @@ -8,10 +8,11 @@ from nexa.constants import ( DEFAULT_IMG_GEN_PARAMS, EXIT_REMINDER, - NEXA_RUN_MODEL_MAP, NEXA_RUN_MODEL_PRECISION_MAP, DEFAULT_IMG_GEN_PARAMS_LCM, DEFAULT_IMG_GEN_PARAMS_TURBO, + NEXA_RUN_MODEL_MAP_FLUX, + NEXA_RUN_T5XXL_MAP, ) from nexa.utils import SpinningCursorAnimation, nexa_prompt from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr @@ -22,10 +23,15 @@ logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" ) + +# image generation retry attempts RETRY_ATTEMPTS = ( 3 # a temporary fix for the issue of segmentation fault for stable-diffusion-cpp ) +# FLUX vae and clip model paths +FLUX_VAE_PATH = "FLUX.1-schnell:ae-fp16" +FLUX_CLIP_L_PATH = "FLUX.1-schnell:clip_l-fp16" class NexaImageInference: """ @@ -55,15 +61,36 @@ def __init__(self, model_path, local_path=None, **kwargs): self.model_path = model_path self.downloaded_path = local_path - if self.downloaded_path is None: - self.downloaded_path, run_type = pull_model(self.model_path) + # FLUX model components + self.t5xxl_path = None + self.ae_path = None + self.clip_l_path = None + self.t5xxl_downloaded_path = None + self.ae_downloaded_path = None + self.clip_l_downloaded_path = None + # Download base model if not provided if self.downloaded_path is None: - logging.error( - f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", - exc_info=True, - ) - exit(1) + self.downloaded_path, _ = pull_model(self.model_path) + if self.downloaded_path is None: + logging.error( + f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.", + exc_info=True, + ) + exit(1) + + # Check if the model is a FLUX model and download additional components + if self.model_path in NEXA_RUN_MODEL_MAP_FLUX: + self.t5xxl_path = NEXA_RUN_T5XXL_MAP.get(model_path) + self.ae_path = FLUX_VAE_PATH + self.clip_l_path = FLUX_CLIP_L_PATH + + if self.t5xxl_path: + self.t5xxl_downloaded_path, _ = pull_model(self.t5xxl_path) + if self.ae_path: + self.ae_downloaded_path, _ = pull_model(self.ae_path) + if self.clip_l_path: + self.clip_l_downloaded_path, _ = pull_model(self.clip_l_path) if "lcm-dreamshaper" in self.model_path: self.params = DEFAULT_IMG_GEN_PARAMS_LCM @@ -73,6 +100,7 @@ def __init__(self, model_path, local_path=None, **kwargs): self.params = DEFAULT_IMG_GEN_PARAMS self.params.update(kwargs) + if not kwargs.get("streamlit", False): self._load_model(model_path) if self.model is None: diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py index edbd63c9..485680af 100644 --- a/nexa/gguf/nexa_inference_text.py +++ b/nexa/gguf/nexa_inference_text.py @@ -9,7 +9,6 @@ DEFAULT_TEXT_GEN_PARAMS, NEXA_RUN_CHAT_TEMPLATE_MAP, NEXA_RUN_COMPLETION_TEMPLATE_MAP, - NEXA_RUN_MODEL_MAP, NEXA_STOP_WORDS_MAP, ) from nexa.gguf.lib_utils import is_gpu_available @@ -54,7 +53,7 @@ def __init__(self, model_path, local_path=None, stop_words=None, **kwargs): self.downloaded_path = local_path if self.downloaded_path is None: - self.downloaded_path, run_type = pull_model(self.model_path) + self.downloaded_path, _ = pull_model(self.model_path) if self.downloaded_path is None: logging.error( diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py index d2dcc76b..4abab76b 100644 --- a/nexa/gguf/nexa_inference_voice.py +++ b/nexa/gguf/nexa_inference_voice.py @@ -8,7 +8,6 @@ from nexa.constants import ( DEFAULT_VOICE_GEN_PARAMS, EXIT_REMINDER, - NEXA_RUN_MODEL_MAP_VOICE, ) from nexa.general import pull_model from nexa.utils import nexa_prompt, SpinningCursorAnimation @@ -45,7 +44,7 @@ def __init__(self, model_path, local_path=None, **kwargs): self.params = DEFAULT_VOICE_GEN_PARAMS if self.downloaded_path is None: - self.downloaded_path, run_type = pull_model(self.model_path) + self.downloaded_path, _ = pull_model(self.model_path) if self.downloaded_path is None: logging.error(