diff --git a/README.md b/README.md
index ff9fc8d2..602dd85c 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,9 @@ Below is our differentiation from other similar tools:
We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).
+> [!NOTE]
+> 1. If you want to use ONNX model, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands.
+> 2. For Chinese developers, we recommend you to use Tsinghua Open Source Mirror as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands.
#### CPU
@@ -117,9 +120,6 @@ For **Windows Git Bash**:
```bash
CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
```
-> [!NOTE]
-> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands
-
FAQ: Building Issues for llava
diff --git a/nexa/constants.py b/nexa/constants.py
index 3f07ec16..ce5a4915 100644
--- a/nexa/constants.py
+++ b/nexa/constants.py
@@ -1,14 +1,19 @@
import os
from pathlib import Path
+from enum import Enum
+# Paths for caching, model hub, and tokens
NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser()
NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token"
NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub"
NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official"
NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json"
+
+# URLs and buckets
NEXA_API_URL = "https://model-hub-backend.nexa4ai.com"
NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/"
+# Nexa logo
NEXA_LOGO = """
_| _| _|_|_| _| _| _|_| _|_| _|_|_|_|
_|_| _| _| _| _| _| _| _| _| _|
@@ -16,13 +21,18 @@
_| _|_| _| _| _| _| _| _| _| _|
_| _| _|_|_| _| _| _| _| _| _| _|_|_|_|
"""
-# Maokun TODO: Update the model info and find a good default precision for each model
-PRODUCER_INFO = dict(
- # producer_name="nexa.ai", # onnxruntime: Model producer not matched: Expected "pytorch"
- producer_version="0.0.0",
- doc_string="Model exported by Nexa.ai",
-)
+# Model producer info
+PRODUCER_INFO = {
+ "producer_version": "0.0.0",
+ "doc_string": "Model exported by Nexa.ai",
+}
+
+class ModelType(Enum):
+ NLP = "NLP"
+ COMPUTER_VISION = "Computer Vision"
+ AUDIO = "Audio"
+ MULTIMODAL = "Multimodal"
NEXA_RUN_MODEL_MAP_TEXT = {
"octopus-v2": "Octopus-v2:q4_0",
@@ -44,8 +54,10 @@
"dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
"phi2": "Phi-2:q4_0",
"phi3": "Phi-3-mini-128k-instruct:q4_0",
+ "phi3.5": "Phi-3.5-mini-instruct:q4_0",
"llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
"llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
+ "openelm": "OpenELM-3B:q4_K_M",
}
NEXA_RUN_MODEL_MAP_ONNX = {
@@ -116,8 +128,6 @@
"Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0",
}
-
-
NEXA_RUN_PROJECTOR_MAP = {
"nanollava": "nanoLLaVA:projector-fp16",
"nanoLLaVA:fp16": "nanoLLaVA:projector-fp16",
@@ -135,22 +145,43 @@
"llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
}
+NEXA_RUN_T5XXL_MAP = {
+ "flux": "FLUX.1-schnell:t5xxl-q4_0",
+ "FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0",
+ "FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0",
+ "FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1",
+ "FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0",
+ "FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16",
+}
+
NEXA_RUN_MODEL_MAP_IMAGE = {
"sd1-4": "stable-diffusion-v1-4:q4_0",
"sd1-5": "stable-diffusion-v1-5:q4_0",
- "sd2-1": "stable-diffusion-v2-1:fp16",
+ "sd2-1": "stable-diffusion-v2-1:q4_0",
+ "sd3": "stable-diffusion-3-medium:q4_0",
"sdxl-turbo": "sdxl-turbo:q8_0",
+ "flux": "FLUX.1-schnell:q4_0",
"lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
"anything-lcm": "anything-v30-LCM:fp16",
"hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
}
+NEXA_RUN_MODEL_MAP_FLUX = {
+ "flux": "FLUX.1-schnell:flux1-schnell-q4_0",
+ "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0",
+ "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0",
+ "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1",
+ "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0",
+ "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16",
+}
+
NEXA_RUN_MODEL_MAP = {
**NEXA_RUN_MODEL_MAP_TEXT,
**NEXA_RUN_MODEL_MAP_VLM,
**NEXA_RUN_MODEL_MAP_IMAGE,
**NEXA_RUN_MODEL_MAP_VOICE,
**NEXA_RUN_MODEL_MAP_FUNCTION_CALLING,
+ **NEXA_RUN_MODEL_MAP_FLUX,
}
NEXA_RUN_CHAT_TEMPLATE_MAP = {
@@ -172,6 +203,8 @@
"sd1-4": "q4_0",
"sd1-5": "q4_0",
"sd2-1": "q4_0",
+ "sd3": "q4_0",
+ "flux": "q4_0",
"lcm-dreamshaper": "f16",
"sdxl-turbo": "q8_0",
"anything-lcm": "f16",
@@ -181,7 +214,10 @@
EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n"
-NEXA_STOP_WORDS_MAP = {"octopus-v2": [""]}
+NEXA_STOP_WORDS_MAP = {
+ "octopus-v2": [""],
+ "octopus-v4": [""]
+}
DEFAULT_TEXT_GEN_PARAMS = {
"temperature": 0.7,
@@ -223,83 +259,85 @@
"language": None,
"task": "transcribe",
"temperature": 0.0,
- "compute_type": "default"
+ "compute_type": "default",
}
NEXA_OFFICIAL_MODELS_TYPE = {
- 'gemma-2b': 'NLP',
- 'Llama-2-7b-chat': 'NLP',
- 'Llama-2-7b': 'NLP',
- 'Meta-Llama-3-8B-Instruct': 'NLP',
- 'Meta-Llama-3.1-8B-Instruct': 'NLP',
- 'Mistral-7B-Instruct-v0.3': 'NLP',
- 'Mistral-7B-Instruct-v0.2': 'NLP',
- 'Phi-3-mini-128k-instruct': 'NLP',
- 'Phi-3-mini-4k-instruct': 'NLP',
- "Phi-3.5-mini-instruct": "NLP",
- 'CodeQwen1.5-7B-Instruct': 'NLP',
- 'Qwen2-0.5B-Instruct': 'NLP',
- 'Qwen2-1.5B-Instruct': 'NLP',
- 'Qwen2-7B-Instruct': 'NLP',
- 'codegemma-2b': 'NLP',
- 'gemma-1.1-2b-instruct': 'NLP',
- 'gemma-2b-instruct': 'NLP',
- 'gemma-2-9b-instruct': 'NLP',
- 'gemma-1.1-7b-instruct': 'NLP',
- 'gemma-7b-instruct': 'NLP',
- 'gemma-7b': 'NLP',
- 'Qwen2-1.5B': 'NLP',
- 'codegemma-7b': 'NLP',
- 'TinyLlama-1.1B-Chat-v1.0': 'NLP',
- 'CodeLlama-7b-Instruct': 'NLP',
- 'gpt2': 'NLP',
- 'CodeLlama-7b': 'NLP',
- 'CodeLlama-7b-Python': 'NLP',
- 'Qwen1.5-7B-Instruct': 'NLP',
- 'Qwen1.5-7B': 'NLP',
- 'Phi-2': 'NLP',
- 'deepseek-coder-1.3b-instruct': 'NLP',
- 'deepseek-coder-1.3b-base': 'NLP',
- 'deepseek-coder-6.7b-instruct': 'NLP',
- 'dolphin-2.8-mistral-7b': 'NLP',
- 'gemma-2-2b-instruct': 'NLP',
- 'Octopus-v2': 'NLP',
- 'Octopus-v4': 'NLP',
- 'Octo-planner': 'NLP',
- 'deepseek-coder-6.7b-base': 'NLP',
- 'Llama2-7b-chat-uncensored': 'NLP',
- 'Llama3-8B-Lexi-Uncensored': 'NLP',
- 'Llama2-7b-function-calling': 'NLP',
- 'OpenELM-1_1B': 'NLP',
- 'OpenELM-3B': 'NLP',
- 'lcm-dreamshaper-v7': 'Computer Vision',
- 'stable-diffusion-v1-5': 'Computer Vision',
- 'stable-diffusion-v1-4': 'Computer Vision',
- 'stable-diffusion-v2-1': 'Computer Vision',
- 'sdxl-turbo': 'Computer Vision',
- 'hassaku-hentai-model-v13-LCM': 'Computer Vision',
- 'anything-v30-LCM': 'Computer Vision',
- 'Phi-3-vision-128k-instruct': 'Multimodal',
- 'nanoLLaVA': 'Multimodal',
- 'llava-v1.6-mistral-7b': 'Multimodal',
- 'llava-v1.6-vicuna-7b': 'Multimodal',
- 'llava-phi-3-mini': 'Multimodal',
- 'llava-llama-3-8b-v1.1': 'Multimodal',
- 'faster-whisper-tiny.en': 'Audio',
- 'faster-whisper-tiny': 'Audio',
- 'faster-whisper-small.en': 'Audio',
- 'faster-whisper-small': 'Audio',
- 'faster-whisper-medium.en': 'Audio',
- 'faster-whisper-medium': 'Audio',
- 'faster-whisper-base.en': 'Audio',
- 'faster-whisper-base': 'Audio',
- 'faster-whisper-large-v3': 'Audio',
- 'whisper-tiny.en': 'Audio',
- 'whisper-tiny': 'Audio',
- 'whisper-small.en': 'Audio',
- 'whisper-small': 'Audio',
- 'whisper-base.en': 'Audio',
- 'whisper-base': 'Audio',
+ "gemma-2b": ModelType.NLP,
+ "Llama-2-7b-chat": ModelType.NLP,
+ "Llama-2-7b": ModelType.NLP,
+ "Meta-Llama-3-8B-Instruct": ModelType.NLP,
+ "Meta-Llama-3.1-8B-Instruct": ModelType.NLP,
+ "Mistral-7B-Instruct-v0.3": ModelType.NLP,
+ "Mistral-7B-Instruct-v0.2": ModelType.NLP,
+ "Phi-3-mini-128k-instruct": ModelType.NLP,
+ "Phi-3-mini-4k-instruct": ModelType.NLP,
+ "Phi-3.5-mini-instruct": ModelType.NLP,
+ "CodeQwen1.5-7B-Instruct": ModelType.NLP,
+ "Qwen2-0.5B-Instruct": ModelType.NLP,
+ "Qwen2-1.5B-Instruct": ModelType.NLP,
+ "Qwen2-7B-Instruct": ModelType.NLP,
+ "codegemma-2b": ModelType.NLP,
+ "gemma-1.1-2b-instruct": ModelType.NLP,
+ "gemma-2b-instruct": ModelType.NLP,
+ "gemma-2-9b-instruct": ModelType.NLP,
+ "gemma-1.1-7b-instruct": ModelType.NLP,
+ "gemma-7b-instruct": ModelType.NLP,
+ "gemma-7b": ModelType.NLP,
+ "Qwen2-1.5B": ModelType.NLP,
+ "codegemma-7b": ModelType.NLP,
+ "TinyLlama-1.1B-Chat-v1.0": ModelType.NLP,
+ "CodeLlama-7b-Instruct": ModelType.NLP,
+ "gpt2": ModelType.NLP,
+ "CodeLlama-7b": ModelType.NLP,
+ "CodeLlama-7b-Python": ModelType.NLP,
+ "Qwen1.5-7B-Instruct": ModelType.NLP,
+ "Qwen1.5-7B": ModelType.NLP,
+ "Phi-2": ModelType.NLP,
+ "deepseek-coder-1.3b-instruct": ModelType.NLP,
+ "deepseek-coder-1.3b-base": ModelType.NLP,
+ "deepseek-coder-6.7b-instruct": ModelType.NLP,
+ "dolphin-2.8-mistral-7b": ModelType.NLP,
+ "gemma-2-2b-instruct": ModelType.NLP,
+ "Octopus-v2": ModelType.NLP,
+ "Octopus-v4": ModelType.NLP,
+ "Octo-planner": ModelType.NLP,
+ "deepseek-coder-6.7b-base": ModelType.NLP,
+ "Llama2-7b-chat-uncensored": ModelType.NLP,
+ "Llama3-8B-Lexi-Uncensored": ModelType.NLP,
+ "Llama2-7b-function-calling": ModelType.NLP,
+ "OpenELM-1_1B": ModelType.NLP,
+ "OpenELM-3B": ModelType.NLP,
+ "lcm-dreamshaper-v7": ModelType.COMPUTER_VISION,
+ "stable-diffusion-v1-5": ModelType.COMPUTER_VISION,
+ "stable-diffusion-v1-4": ModelType.COMPUTER_VISION,
+ "stable-diffusion-v2-1": ModelType.COMPUTER_VISION,
+ "stable-diffusion-3-medium": ModelType.COMPUTER_VISION,
+ "sdxl-turbo": ModelType.COMPUTER_VISION,
+ "hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION,
+ "anything-v30-LCM": ModelType.COMPUTER_VISION,
+ "FLUX.1-schnell": ModelType.COMPUTER_VISION,
+ "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
+ "nanoLLaVA": ModelType.MULTIMODAL,
+ "llava-v1.6-mistral-7b": ModelType.MULTIMODAL,
+ "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL,
+ "llava-phi-3-mini": ModelType.MULTIMODAL,
+ "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL,
+ "faster-whisper-tiny.en": ModelType.AUDIO,
+ "faster-whisper-tiny": ModelType.AUDIO,
+ "faster-whisper-small.en": ModelType.AUDIO,
+ "faster-whisper-small": ModelType.AUDIO,
+ "faster-whisper-medium.en": ModelType.AUDIO,
+ "faster-whisper-medium": ModelType.AUDIO,
+ "faster-whisper-base.en": ModelType.AUDIO,
+ "faster-whisper-base": ModelType.AUDIO,
+ "faster-whisper-large-v3": ModelType.AUDIO,
+ "whisper-tiny.en": ModelType.AUDIO,
+ "whisper-tiny": ModelType.AUDIO,
+ "whisper-small.en": ModelType.AUDIO,
+ "whisper-small": ModelType.AUDIO,
+ "whisper-base.en": ModelType.AUDIO,
+ "whisper-base": ModelType.AUDIO,
}
diff --git a/nexa/general.py b/nexa/general.py
index 53a704e9..838b3b98 100644
--- a/nexa/general.py
+++ b/nexa/general.py
@@ -15,7 +15,7 @@
NEXA_TOKEN_PATH,
NEXA_OFFICIAL_MODELS_TYPE,
)
-
+from nexa.constants import ModelType
def login():
"""
@@ -119,10 +119,10 @@ def pull_model(model_path):
return result["local_path"], result["run_type"]
else:
print(f"Failed to pull model {model_path}")
- return None, "UNKNOWN"
+ return None, "NLP"
except Exception as e:
logging.error(f"An error occurred while pulling the model: {e}")
- return None, "UNKNOWN"
+ return None, "NLP"
def pull_model_from_hub(model_path):
@@ -198,19 +198,20 @@ def pull_model_from_official(model_path):
model_type = "gguf"
run_type = get_run_type_from_model_path(model_path)
+ run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type)
success, location = download_model_from_official(model_path, model_type)
return {
"success": success,
"local_path": location,
"model_type": model_type,
- "run_type": run_type
+ "run_type": run_type_str
}
def get_run_type_from_model_path(model_path):
model_name, model_version = model_path.split(":")
- return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN")
+ return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value
def get_model_presigned_link(full_path, token):
diff --git a/nexa/gguf/llama/_utils_transformers.py b/nexa/gguf/llama/_utils_transformers.py
index 945c1478..0049e9cc 100644
--- a/nexa/gguf/llama/_utils_transformers.py
+++ b/nexa/gguf/llama/_utils_transformers.py
@@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
sys = sys
os = os
- def __init__(self, disable: bool = True):
+ def __init__(self, disable: bool = False):
self.disable = disable
# Oddly enough this works better than the contextlib version
diff --git a/nexa/gguf/llama/llama_cpp.py b/nexa/gguf/llama/llama_cpp.py
index 3b502790..f970a739 100644
--- a/nexa/gguf/llama/llama_cpp.py
+++ b/nexa/gguf/llama/llama_cpp.py
@@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
# // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
# LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
-@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
-def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
- """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
- ...
+# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
+# def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
+# """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
+# ...
# // Returns 0 on success
diff --git a/nexa/gguf/nexa_inference_image.py b/nexa/gguf/nexa_inference_image.py
index 1deebb03..d57bcbed 100644
--- a/nexa/gguf/nexa_inference_image.py
+++ b/nexa/gguf/nexa_inference_image.py
@@ -8,10 +8,11 @@
from nexa.constants import (
DEFAULT_IMG_GEN_PARAMS,
EXIT_REMINDER,
- NEXA_RUN_MODEL_MAP,
NEXA_RUN_MODEL_PRECISION_MAP,
DEFAULT_IMG_GEN_PARAMS_LCM,
DEFAULT_IMG_GEN_PARAMS_TURBO,
+ NEXA_RUN_MODEL_MAP_FLUX,
+ NEXA_RUN_T5XXL_MAP,
)
from nexa.utils import SpinningCursorAnimation, nexa_prompt
from nexa.gguf.llama._utils_transformers import suppress_stdout_stderr
@@ -22,10 +23,15 @@
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
+
+# image generation retry attempts
RETRY_ATTEMPTS = (
3 # a temporary fix for the issue of segmentation fault for stable-diffusion-cpp
)
+# FLUX vae and clip model paths
+FLUX_VAE_PATH = "FLUX.1-schnell:ae-fp16"
+FLUX_CLIP_L_PATH = "FLUX.1-schnell:clip_l-fp16"
class NexaImageInference:
"""
@@ -55,15 +61,36 @@ def __init__(self, model_path, local_path=None, **kwargs):
self.model_path = model_path
self.downloaded_path = local_path
- if self.downloaded_path is None:
- self.downloaded_path, run_type = pull_model(self.model_path)
+ # FLUX model components
+ self.t5xxl_path = None
+ self.ae_path = None
+ self.clip_l_path = None
+ self.t5xxl_downloaded_path = None
+ self.ae_downloaded_path = None
+ self.clip_l_downloaded_path = None
+ # Download base model if not provided
if self.downloaded_path is None:
- logging.error(
- f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.",
- exc_info=True,
- )
- exit(1)
+ self.downloaded_path, _ = pull_model(self.model_path)
+ if self.downloaded_path is None:
+ logging.error(
+ f"Model ({model_path}) is not applicable. Please refer to our docs for proper usage.",
+ exc_info=True,
+ )
+ exit(1)
+
+ # Check if the model is a FLUX model and download additional components
+ if self.model_path in NEXA_RUN_MODEL_MAP_FLUX:
+ self.t5xxl_path = NEXA_RUN_T5XXL_MAP.get(model_path)
+ self.ae_path = FLUX_VAE_PATH
+ self.clip_l_path = FLUX_CLIP_L_PATH
+
+ if self.t5xxl_path:
+ self.t5xxl_downloaded_path, _ = pull_model(self.t5xxl_path)
+ if self.ae_path:
+ self.ae_downloaded_path, _ = pull_model(self.ae_path)
+ if self.clip_l_path:
+ self.clip_l_downloaded_path, _ = pull_model(self.clip_l_path)
if "lcm-dreamshaper" in self.model_path:
self.params = DEFAULT_IMG_GEN_PARAMS_LCM
@@ -73,6 +100,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
self.params = DEFAULT_IMG_GEN_PARAMS
self.params.update(kwargs)
+
if not kwargs.get("streamlit", False):
self._load_model(model_path)
if self.model is None:
diff --git a/nexa/gguf/nexa_inference_text.py b/nexa/gguf/nexa_inference_text.py
index edbd63c9..485680af 100644
--- a/nexa/gguf/nexa_inference_text.py
+++ b/nexa/gguf/nexa_inference_text.py
@@ -9,7 +9,6 @@
DEFAULT_TEXT_GEN_PARAMS,
NEXA_RUN_CHAT_TEMPLATE_MAP,
NEXA_RUN_COMPLETION_TEMPLATE_MAP,
- NEXA_RUN_MODEL_MAP,
NEXA_STOP_WORDS_MAP,
)
from nexa.gguf.lib_utils import is_gpu_available
@@ -54,7 +53,7 @@ def __init__(self, model_path, local_path=None, stop_words=None, **kwargs):
self.downloaded_path = local_path
if self.downloaded_path is None:
- self.downloaded_path, run_type = pull_model(self.model_path)
+ self.downloaded_path, _ = pull_model(self.model_path)
if self.downloaded_path is None:
logging.error(
diff --git a/nexa/gguf/nexa_inference_voice.py b/nexa/gguf/nexa_inference_voice.py
index d2dcc76b..4abab76b 100644
--- a/nexa/gguf/nexa_inference_voice.py
+++ b/nexa/gguf/nexa_inference_voice.py
@@ -8,7 +8,6 @@
from nexa.constants import (
DEFAULT_VOICE_GEN_PARAMS,
EXIT_REMINDER,
- NEXA_RUN_MODEL_MAP_VOICE,
)
from nexa.general import pull_model
from nexa.utils import nexa_prompt, SpinningCursorAnimation
@@ -45,7 +44,7 @@ def __init__(self, model_path, local_path=None, **kwargs):
self.params = DEFAULT_VOICE_GEN_PARAMS
if self.downloaded_path is None:
- self.downloaded_path, run_type = pull_model(self.model_path)
+ self.downloaded_path, _ = pull_model(self.model_path)
if self.downloaded_path is None:
logging.error(