From 1489bf481c12b9e5039e8024b7da2dabe941c6e2 Mon Sep 17 00:00:00 2001
From: Davidqian123 <yq2325@nyu.edu>
Date: Tue, 10 Sep 2024 17:19:26 +0000
Subject: [PATCH 1/4] update onnx installation in README

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index ff9fc8d2..4e29f91e 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,8 @@ Below is our differentiation from other similar tools:
 
 We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).
 
+> [!NOTE]
+> If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands
 
 #### CPU
 
@@ -117,9 +119,6 @@ For **Windows Git Bash**:
 ```bash
 CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
-> [!NOTE]
-> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands
-
 
 <details>
 <summary><strong>FAQ: Building Issues for llava</strong></summary>

From 7ab42db3aa405eea69ba1ae5fe0deeddadf8198c Mon Sep 17 00:00:00 2001
From: Davidqian123 <yq2325@nyu.edu>
Date: Tue, 10 Sep 2024 19:33:43 +0000
Subject: [PATCH 2/4] update tsinghua mirror

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4e29f91e..602dd85c 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,8 @@ Below is our differentiation from other similar tools:
 We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).
 
 > [!NOTE]
-> If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands
+> 1. If you want to use <strong>ONNX model</strong>, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands.
+> 2. For Chinese developers, we recommend you to use <strong>Tsinghua Open Source Mirror</strong> as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands.
 
 #### CPU
 

From 5ace4a061434659e3b883d3b068404100e5520fd Mon Sep 17 00:00:00 2001
From: Davidqian123 <yq2325@nyu.edu>
Date: Tue, 10 Sep 2024 20:28:38 +0000
Subject: [PATCH 3/4] add new models

---
 nexa/constants.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/nexa/constants.py b/nexa/constants.py
index bfe48218..9e9cf558 100644
--- a/nexa/constants.py
+++ b/nexa/constants.py
@@ -44,8 +44,10 @@
     "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
     "phi2": "Phi-2:q4_0",
     "phi3": "Phi-3-mini-128k-instruct:q4_0",
+    "phi3.5": "Phi-3.5-mini-instruct:q4_0",
     "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
     "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
+    "openelm": "OpenELM-3B:q4_K_M",
 }
 
 NEXA_RUN_MODEL_MAP_ONNX = {
@@ -139,10 +141,12 @@
     "sd1-4": "stable-diffusion-v1-4:q4_0",
     "sd1-5": "stable-diffusion-v1-5:q4_0",
     "sd2-1": "stable-diffusion-v2-1:fp16",
+    "sd3": "stable-diffusion-3-medium:q4_0",
     "sdxl-turbo": "sdxl-turbo:q8_0",
     "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
     "anything-lcm": "anything-v30-LCM:fp16",
     "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
+    "flux": "FLUX.1-schnell:q4_0",
 }
 
 NEXA_RUN_MODEL_MAP = {
@@ -172,10 +176,12 @@
     "sd1-4": "q4_0",
     "sd1-5": "q4_0",
     "sd2-1": "q4_0",
+    "sd3": "q4_0",
     "lcm-dreamshaper": "f16",
     "sdxl-turbo": "q8_0",
     "anything-lcm": "f16",
     "hassaku-lcm": "f16",
+    "flux": "q4_0",
 }
 
 EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
@@ -236,7 +242,7 @@
   'Mistral-7B-Instruct-v0.2': 'NLP',
   'Phi-3-mini-128k-instruct': 'NLP',
   'Phi-3-mini-4k-instruct': 'NLP',
-  "Phi-3.5-mini-instruct": "NLP",
+  'Phi-3.5-mini-instruct': 'NLP',
   'CodeQwen1.5-7B-Instruct': 'NLP',
   'Qwen2-0.5B-Instruct': 'NLP',
   'Qwen2-1.5B-Instruct': 'NLP',
@@ -270,13 +276,17 @@
   'Llama2-7b-chat-uncensored': 'NLP',
   'Llama3-8B-Lexi-Uncensored': 'NLP',
   'Llama2-7b-function-calling': 'NLP',
+  'OpenELM-1_1B': 'NLP',
+  'OpenELM-3B': 'NLP',
   'lcm-dreamshaper-v7': 'Computer Vision',
   'stable-diffusion-v1-5': 'Computer Vision',
   'stable-diffusion-v1-4': 'Computer Vision',
   'stable-diffusion-v2-1': 'Computer Vision',
+  'stable-diffusion-3-medium': 'Computer Vision',
   'sdxl-turbo': 'Computer Vision',
   'hassaku-hentai-model-v13-LCM': 'Computer Vision',
   'anything-v30-LCM': 'Computer Vision',
+  'FLUX.1-schnell': 'Computer Vision',
   'Phi-3-vision-128k-instruct': 'Multimodal',
   'nanoLLaVA': 'Multimodal',
   'llava-v1.6-mistral-7b': 'Multimodal',

From 2515ebedd3d6246a82e613ab7708f4eda9b1b233 Mon Sep 17 00:00:00 2001
From: Davidqian123 <yq2325@nyu.edu>
Date: Thu, 12 Sep 2024 04:58:44 +0000
Subject: [PATCH 4/4] add pull flux model logic

---
 nexa/constants.py                      | 206 ++++++++++++++-----------
 nexa/general.py                        |  11 +-
 nexa/gguf/llama/_utils_transformers.py |   2 +-
 nexa/gguf/llama/llama_cpp.py           |   8 +-
 nexa/gguf/nexa_inference_image.py      |  44 +++++-
 nexa/gguf/nexa_inference_text.py       |   3 +-
 nexa/gguf/nexa_inference_voice.py      |   3 +-
 7 files changed, 167 insertions(+), 110 deletions(-)

diff --git a/nexa/constants.py b/nexa/constants.py
index 9e9cf558..ce5a4915 100644
--- a/nexa/constants.py
+++ b/nexa/constants.py
@@ -1,14 +1,19 @@
 import os
 from pathlib import Path
+from enum import Enum
 
+# Paths for caching, model hub, and tokens
 NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser()
 NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token"
 NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub"
 NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official"
 NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json"
+
+# URLs and buckets
 NEXA_API_URL = "https://model-hub-backend.nexa4ai.com"
 NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/"
 
+# Nexa logo
 NEXA_LOGO = """
       _|    _|  _|_|_|  _|    _|    _|_|      _|_|    _|_|_|_|
       _|_|  _|  _|       _|  _|   _|    _|  _|    _|     _|
@@ -16,13 +21,18 @@
       _|  _|_|  _|        _| _|   _|    _|  _|    _|     _|
       _|    _|  _|_|_|  _|    _|  _|    _|  _|    _|  _|_|_|_|
 """
-# Maokun TODO: Update the model info and find a good default precision for each model
 
-PRODUCER_INFO = dict(
-    # producer_name="nexa.ai",  # onnxruntime: Model producer not matched: Expected "pytorch"
-    producer_version="0.0.0",
-    doc_string="Model exported by Nexa.ai",
-)
+# Model producer info
+PRODUCER_INFO = {
+    "producer_version": "0.0.0",
+    "doc_string": "Model exported by Nexa.ai",
+}
+
+class ModelType(Enum):
+    NLP = "NLP"
+    COMPUTER_VISION = "Computer Vision"
+    AUDIO = "Audio"
+    MULTIMODAL = "Multimodal"
 
 NEXA_RUN_MODEL_MAP_TEXT = {
     "octopus-v2": "Octopus-v2:q4_0",
@@ -118,8 +128,6 @@
   "Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0",
 }
 
-
-
 NEXA_RUN_PROJECTOR_MAP = {
     "nanollava": "nanoLLaVA:projector-fp16",
     "nanoLLaVA:fp16": "nanoLLaVA:projector-fp16",
@@ -137,16 +145,34 @@
     "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
 }
 
+NEXA_RUN_T5XXL_MAP = {
+    "flux": "FLUX.1-schnell:t5xxl-q4_0",
+    "FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0",
+    "FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0",
+    "FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1",
+    "FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0",
+    "FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16",
+}
+
 NEXA_RUN_MODEL_MAP_IMAGE = {
     "sd1-4": "stable-diffusion-v1-4:q4_0",
     "sd1-5": "stable-diffusion-v1-5:q4_0",
-    "sd2-1": "stable-diffusion-v2-1:fp16",
+    "sd2-1": "stable-diffusion-v2-1:q4_0",
     "sd3": "stable-diffusion-3-medium:q4_0",
     "sdxl-turbo": "sdxl-turbo:q8_0",
+    "flux": "FLUX.1-schnell:q4_0",
     "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
     "anything-lcm": "anything-v30-LCM:fp16",
     "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
-    "flux": "FLUX.1-schnell:q4_0",
+}
+
+NEXA_RUN_MODEL_MAP_FLUX = {
+    "flux": "FLUX.1-schnell:flux1-schnell-q4_0",
+    "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0",
+    "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0",
+    "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1",
+    "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0",
+    "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16",
 }
 
 NEXA_RUN_MODEL_MAP = {
@@ -155,6 +181,7 @@
     **NEXA_RUN_MODEL_MAP_IMAGE,
     **NEXA_RUN_MODEL_MAP_VOICE,
     **NEXA_RUN_MODEL_MAP_FUNCTION_CALLING,
+    **NEXA_RUN_MODEL_MAP_FLUX,
 }
 
 NEXA_RUN_CHAT_TEMPLATE_MAP = {
@@ -177,17 +204,20 @@
     "sd1-5": "q4_0",
     "sd2-1": "q4_0",
     "sd3": "q4_0",
+    "flux": "q4_0",
     "lcm-dreamshaper": "f16",
     "sdxl-turbo": "q8_0",
     "anything-lcm": "f16",
     "hassaku-lcm": "f16",
-    "flux": "q4_0",
 }
 
 EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
 EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n"
 
-NEXA_STOP_WORDS_MAP = {"octopus-v2": ["<nexa_end>"]}
+NEXA_STOP_WORDS_MAP = {
+    "octopus-v2": ["<nexa_end>"],
+    "octopus-v4": ["<nexa_end>"]
+}
 
 DEFAULT_TEXT_GEN_PARAMS = {
     "temperature": 0.7,
@@ -229,85 +259,85 @@
     "language": None,
     "task": "transcribe",
     "temperature": 0.0,
-    "compute_type": "default"
+    "compute_type": "default",
 }
 
 NEXA_OFFICIAL_MODELS_TYPE = {
-  'gemma-2b': 'NLP',
-  'Llama-2-7b-chat': 'NLP',
-  'Llama-2-7b': 'NLP',
-  'Meta-Llama-3-8B-Instruct': 'NLP',
-  'Meta-Llama-3.1-8B-Instruct': 'NLP',
-  'Mistral-7B-Instruct-v0.3': 'NLP',
-  'Mistral-7B-Instruct-v0.2': 'NLP',
-  'Phi-3-mini-128k-instruct': 'NLP',
-  'Phi-3-mini-4k-instruct': 'NLP',
-  'Phi-3.5-mini-instruct': 'NLP',
-  'CodeQwen1.5-7B-Instruct': 'NLP',
-  'Qwen2-0.5B-Instruct': 'NLP',
-  'Qwen2-1.5B-Instruct': 'NLP',
-  'Qwen2-7B-Instruct': 'NLP',
-  'codegemma-2b': 'NLP',
-  'gemma-1.1-2b-instruct': 'NLP',
-  'gemma-2b-instruct': 'NLP',
-  'gemma-2-9b-instruct': 'NLP',
-  'gemma-1.1-7b-instruct': 'NLP',
-  'gemma-7b-instruct': 'NLP',
-  'gemma-7b': 'NLP',
-  'Qwen2-1.5B': 'NLP',
-  'codegemma-7b': 'NLP',
-  'TinyLlama-1.1B-Chat-v1.0': 'NLP',
-  'CodeLlama-7b-Instruct': 'NLP',
-  'gpt2': 'NLP',
-  'CodeLlama-7b': 'NLP',
-  'CodeLlama-7b-Python': 'NLP',
-  'Qwen1.5-7B-Instruct': 'NLP',
-  'Qwen1.5-7B': 'NLP',
-  'Phi-2': 'NLP',
-  'deepseek-coder-1.3b-instruct': 'NLP',
-  'deepseek-coder-1.3b-base': 'NLP',
-  'deepseek-coder-6.7b-instruct': 'NLP',
-  'dolphin-2.8-mistral-7b': 'NLP',
-  'gemma-2-2b-instruct': 'NLP',
-  'Octopus-v2': 'NLP',
-  'Octopus-v4': 'NLP',
-  'Octo-planner': 'NLP',
-  'deepseek-coder-6.7b-base': 'NLP',
-  'Llama2-7b-chat-uncensored': 'NLP',
-  'Llama3-8B-Lexi-Uncensored': 'NLP',
-  'Llama2-7b-function-calling': 'NLP',
-  'OpenELM-1_1B': 'NLP',
-  'OpenELM-3B': 'NLP',
-  'lcm-dreamshaper-v7': 'Computer Vision',
-  'stable-diffusion-v1-5': 'Computer Vision',
-  'stable-diffusion-v1-4': 'Computer Vision',
-  'stable-diffusion-v2-1': 'Computer Vision',
-  'stable-diffusion-3-medium': 'Computer Vision',
-  'sdxl-turbo': 'Computer Vision',
-  'hassaku-hentai-model-v13-LCM': 'Computer Vision',
-  'anything-v30-LCM': 'Computer Vision',
-  'FLUX.1-schnell': 'Computer Vision',
-  'Phi-3-vision-128k-instruct': 'Multimodal',
-  'nanoLLaVA': 'Multimodal',
-  'llava-v1.6-mistral-7b': 'Multimodal',
-  'llava-v1.6-vicuna-7b': 'Multimodal',
-  'llava-phi-3-mini': 'Multimodal',
-  'llava-llama-3-8b-v1.1': 'Multimodal',
-  'faster-whisper-tiny.en': 'Audio',
-  'faster-whisper-tiny': 'Audio',
-  'faster-whisper-small.en': 'Audio',
-  'faster-whisper-small': 'Audio',
-  'faster-whisper-medium.en': 'Audio',
-  'faster-whisper-medium': 'Audio',
-  'faster-whisper-base.en': 'Audio',
-  'faster-whisper-base': 'Audio',
-  'faster-whisper-large-v3': 'Audio',
-  'whisper-tiny.en': 'Audio',
-  'whisper-tiny': 'Audio',
-  'whisper-small.en': 'Audio',
-  'whisper-small': 'Audio',
-  'whisper-base.en': 'Audio',
-  'whisper-base': 'Audio',
+    "gemma-2b": ModelType.NLP,
+    "Llama-2-7b-chat": ModelType.NLP,
+    "Llama-2-7b": ModelType.NLP,
+    "Meta-Llama-3-8B-Instruct": ModelType.NLP,
+    "Meta-Llama-3.1-8B-Instruct": ModelType.NLP,
+    "Mistral-7B-Instruct-v0.3": ModelType.NLP,
+    "Mistral-7B-Instruct-v0.2": ModelType.NLP,
+    "Phi-3-mini-128k-instruct": ModelType.NLP,
+    "Phi-3-mini-4k-instruct": ModelType.NLP,
+    "Phi-3.5-mini-instruct": ModelType.NLP,
+    "CodeQwen1.5-7B-Instruct": ModelType.NLP,
+    "Qwen2-0.5B-Instruct": ModelType.NLP,
+    "Qwen2-1.5B-Instruct": ModelType.NLP,
+    "Qwen2-7B-Instruct": ModelType.NLP,
+    "codegemma-2b": ModelType.NLP,
+    "gemma-1.1-2b-instruct": ModelType.NLP,
+    "gemma-2b-instruct": ModelType.NLP,
+    "gemma-2-9b-instruct": ModelType.NLP,
+    "gemma-1.1-7b-instruct": ModelType.NLP,
+    "gemma-7b-instruct": ModelType.NLP,
+    "gemma-7b": ModelType.NLP,
+    "Qwen2-1.5B": ModelType.NLP,
+    "codegemma-7b": ModelType.NLP,
+    "TinyLlama-1.1B-Chat-v1.0": ModelType.NLP,
+    "CodeLlama-7b-Instruct": ModelType.NLP,
+    "gpt2": ModelType.NLP,
+    "CodeLlama-7b": ModelType.NLP,
+    "CodeLlama-7b-Python": ModelType.NLP,
+    "Qwen1.5-7B-Instruct": ModelType.NLP,
+    "Qwen1.5-7B": ModelType.NLP,
+    "Phi-2": ModelType.NLP,
+    "deepseek-coder-1.3b-instruct": ModelType.NLP,
+    "deepseek-coder-1.3b-base": ModelType.NLP,
+    "deepseek-coder-6.7b-instruct": ModelType.NLP,
+    "dolphin-2.8-mistral-7b": ModelType.NLP,
+    "gemma-2-2b-instruct": ModelType.NLP,
+    "Octopus-v2": ModelType.NLP,
+    "Octopus-v4": ModelType.NLP,
+    "Octo-planner": ModelType.NLP,
+    "deepseek-coder-6.7b-base": ModelType.NLP,
+    "Llama2-7b-chat-uncensored": ModelType.NLP,
+    "Llama3-8B-Lexi-Uncensored": ModelType.NLP,
+    "Llama2-7b-function-calling": ModelType.NLP,
+    "OpenELM-1_1B": ModelType.NLP,
+    "OpenELM-3B": ModelType.NLP,
+    "lcm-dreamshaper-v7": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v1-5": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v1-4": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v2-1": ModelType.COMPUTER_VISION,
+    "stable-diffusion-3-medium": ModelType.COMPUTER_VISION,
+    "sdxl-turbo": ModelType.COMPUTER_VISION,
+    "hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION,
+    "anything-v30-LCM": ModelType.COMPUTER_VISION,
+    "FLUX.1-schnell": ModelType.COMPUTER_VISION,
+    "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
+    "nanoLLaVA": ModelType.MULTIMODAL,
+    "llava-v1.6-mistral-7b": ModelType.MULTIMODAL,
+    "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL,
+    "llava-phi-3-mini": ModelType.MULTIMODAL,
+    "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL,
+    "faster-whisper-tiny.en": ModelType.AUDIO,
+    "faster-whisper-tiny": ModelType.AUDIO,
+    "faster-whisper-small.en": ModelType.AUDIO,
+    "faster-whisper-small": ModelType.AUDIO,
+    "faster-whisper-medium.en": ModelType.AUDIO,
+    "faster-whisper-medium": ModelType.AUDIO,
+    "faster-whisper-base.en": ModelType.AUDIO,
+    "faster-whisper-base": ModelType.AUDIO,
+    "faster-whisper-large-v3": ModelType.AUDIO,
+    "whisper-tiny.en": ModelType.AUDIO,
+    "whisper-tiny": ModelType.AUDIO,
+    "whisper-small.en": ModelType.AUDIO,
+    "whisper-small": ModelType.AUDIO,
+    "whisper-base.en": ModelType.AUDIO,
+    "whisper-base": ModelType.AUDIO,
 }
 
 
diff --git a/nexa/general.py b/nexa/general.py
index 53a704e9..838b3b98 100644
--- a/nexa/general.py
+++ b/nexa/general.py
@@ -15,7 +15,7 @@
     NEXA_TOKEN_PATH,
     NEXA_OFFICIAL_MODELS_TYPE,
 )
-
+from nexa.constants import ModelType
 
 def login():
     """
@@ -119,10 +119,10 @@ def pull_model(model_path):
             return result["local_path"], result["run_type"]
         else:
             print(f"Failed to pull model {model_path}")
-            return None, "UNKNOWN"
+            return None, "NLP"
     except Exception as e:
         logging.error(f"An error occurred while pulling the model: {e}")
-        return None, "UNKNOWN"
+        return None, "NLP"
 
 
 def pull_model_from_hub(model_path):
@@ -198,19 +198,20 @@ def pull_model_from_official(model_path):
         model_type = "gguf"
 
     run_type = get_run_type_from_model_path(model_path)
+    run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type)
     success, location = download_model_from_official(model_path, model_type)
     
     return {
         "success": success,
         "local_path": location,
         "model_type": model_type,
-        "run_type": run_type
+        "run_type": run_type_str
     }
 
 
 def get_run_type_from_model_path(model_path):
     model_name, model_version = model_path.split(":")
-    return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN")
+    return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value
 
 
 def get_model_presigned_link(full_path, token):
diff --git a/nexa/gguf/llama/_utils_transformers.py b/nexa/gguf/llama/_utils_transformers.py
index 945c1478..0049e9cc 100644
--- a/nexa/gguf/llama/_utils_transformers.py
+++ b/nexa/gguf/llama/_utils_transformers.py
@@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
     sys = sys
     os = os
 
-    def __init__(self, disable: bool = True):
+    def __init__(self, disable: bool = False):
         self.disable = disable
 
     # Oddly enough this works better than the contextlib version
diff --git a/nexa/gguf/llama/llama_cpp.py b/nexa/gguf/llama/llama_cpp.py
index 3b502790..f970a739 100644
--- a/nexa/gguf/llama/llama_cpp.py
+++ b/nexa/gguf/llama/llama_cpp.py
@@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
 
 # // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
 # LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
-@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
-def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
-    """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
-    ...
+# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
+# def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
+#     """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
+#     ...
 
 
 # // Returns 0 on success
diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py
index 1deebb03..d57bcbed 100644
--- a/nexa/gguf/nexa_inference_image.py
+++ b/nexa/gguf/nexa_inference_image.py
@@ -8,10 +8,11 @@
 from nexa.constants import (
     DEFAULT_IMG_GEN_PARAMS,
     EXIT_REMINDER,
-    NEXA_RUN_MODEL_MAP,
     NEXA_RUN_MODEL_PRECISION_MAP,
     DEFAULT_IMG_GEN_PARAMS_LCM,
     DEFAULT_IMG_GEN_PARAMS_TURBO,
+    NEXA_RUN_MODEL_MAP_FLUX,
+    NEXA_RUN_T5XXL_MAP,
 )
 from nexa.utils import SpinningCursorAnimation, nexa_prompt
 from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
@@ -22,10 +23,15 @@
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
 )
+
+# image generation retry attempts
 RETRY_ATTEMPTS = (
     3  # a temporary fix for the issue of segmentation fault for stable-diffusion-cpp
 )
 
+# FLUX vae and clip model paths
+FLUX_VAE_PATH = "FLUX.1-schnell:ae-fp16"
+FLUX_CLIP_L_PATH = "FLUX.1-schnell:clip_l-fp16"
 
 class NexaImageInference:
     """
@@ -55,15 +61,36 @@ def __init__(self, model_path, local_path=None, **kwargs):
         self.model_path = model_path
         self.downloaded_path = local_path
 
-        if self.downloaded_path is None:
-            self.downloaded_path, run_type = pull_model(self.model_path)
+        # FLUX model components
+        self.t5xxl_path = None
+        self.ae_path = None
+        self.clip_l_path = None
+        self.t5xxl_downloaded_path = None
+        self.ae_downloaded_path = None
+        self.clip_l_downloaded_path = None
 
+        # Download base model if not provided
         if self.downloaded_path is None:
-            logging.error(
-                f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.",
-                exc_info=True,
-            )
-            exit(1)
+            self.downloaded_path, _ = pull_model(self.model_path)
+            if self.downloaded_path is None:
+                logging.error(
+                    f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.",
+                    exc_info=True,
+                )
+                exit(1)
+
+        # Check if the model is a FLUX model and download additional components
+        if self.model_path in NEXA_RUN_MODEL_MAP_FLUX:
+            self.t5xxl_path = NEXA_RUN_T5XXL_MAP.get(model_path)
+            self.ae_path = FLUX_VAE_PATH
+            self.clip_l_path = FLUX_CLIP_L_PATH
+            
+            if self.t5xxl_path:
+                self.t5xxl_downloaded_path, _ = pull_model(self.t5xxl_path)
+            if self.ae_path:
+                self.ae_downloaded_path, _ = pull_model(self.ae_path)
+            if self.clip_l_path:
+                self.clip_l_downloaded_path, _ = pull_model(self.clip_l_path)
 
         if "lcm-dreamshaper" in self.model_path:
             self.params = DEFAULT_IMG_GEN_PARAMS_LCM
@@ -73,6 +100,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
             self.params = DEFAULT_IMG_GEN_PARAMS
 
         self.params.update(kwargs)
+
         if not kwargs.get("streamlit", False):
             self._load_model(model_path)
             if self.model is None:
diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py
index edbd63c9..485680af 100644
--- a/nexa/gguf/nexa_inference_text.py
+++ b/nexa/gguf/nexa_inference_text.py
@@ -9,7 +9,6 @@
     DEFAULT_TEXT_GEN_PARAMS,
     NEXA_RUN_CHAT_TEMPLATE_MAP,
     NEXA_RUN_COMPLETION_TEMPLATE_MAP,
-    NEXA_RUN_MODEL_MAP,
     NEXA_STOP_WORDS_MAP,
 )
 from nexa.gguf.lib_utils import is_gpu_available
@@ -54,7 +53,7 @@ def __init__(self, model_path, local_path=None, stop_words=None, **kwargs):
         self.downloaded_path = local_path
         
         if self.downloaded_path is None:
-            self.downloaded_path, run_type = pull_model(self.model_path)
+            self.downloaded_path, _ = pull_model(self.model_path)
 
         if self.downloaded_path is None:
             logging.error(
diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py
index d2dcc76b..4abab76b 100644
--- a/nexa/gguf/nexa_inference_voice.py
+++ b/nexa/gguf/nexa_inference_voice.py
@@ -8,7 +8,6 @@
 from nexa.constants import (
     DEFAULT_VOICE_GEN_PARAMS,
     EXIT_REMINDER,
-    NEXA_RUN_MODEL_MAP_VOICE,
 )
 from nexa.general import pull_model
 from nexa.utils import nexa_prompt, SpinningCursorAnimation
@@ -45,7 +44,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
         self.params = DEFAULT_VOICE_GEN_PARAMS
 
         if self.downloaded_path is None:
-            self.downloaded_path, run_type = pull_model(self.model_path)
+            self.downloaded_path, _ = pull_model(self.model_path)
 
         if self.downloaded_path is None:
             logging.error(