Merge pull request #82 from NexaAI/david/bugfix

David/bugfix
NexaAI · Sep 12, 2024 · 2f7fe07 · 2f7fe07
2 parents 4e94a91 + 2515ebe
commit 2f7fe07
Show file tree

Hide file tree

Showing 8 changed files with 174 additions and 109 deletions.
diff --git a/README.md b/README.md
@@ -62,6 +62,9 @@ Below is our differentiation from other similar tools:
 
 We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).
 
+> [!NOTE]
+> 1. If you want to use <strong>ONNX model</strong>, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands.
+> 2. For Chinese developers, we recommend you to use <strong>Tsinghua Open Source Mirror</strong> as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands.
 
 #### CPU
 
@@ -117,9 +120,6 @@ For **Windows Git Bash**:
 ```bash
 CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
 ```
-> [!NOTE]
-> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands
-
 
 <details>
 <summary><strong>FAQ: Building Issues for llava</strong></summary>

diff --git a/nexa/constants.py b/nexa/constants.py
@@ -1,28 +1,38 @@
 import os
 from pathlib import Path
+from enum import Enum
 
+# Paths for caching, model hub, and tokens
 NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser()
 NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token"
 NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub"
 NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official"
 NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json"
+
+# URLs and buckets
 NEXA_API_URL = "https://model-hub-backend.nexa4ai.com"
 NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/"
 
+# Nexa logo
 NEXA_LOGO = """
       _|    _|  _|_|_|  _|    _|    _|_|      _|_|    _|_|_|_|
       _|_|  _|  _|       _|  _|   _|    _|  _|    _|     _|
       _|_|_|_|  _|_|_|     _|     _|_|_|_|  _|_|_|_|     _|
       _|  _|_|  _|        _| _|   _|    _|  _|    _|     _|
       _|    _|  _|_|_|  _|    _|  _|    _|  _|    _|  _|_|_|_|
 """
-# Maokun TODO: Update the model info and find a good default precision for each model
 
-PRODUCER_INFO = dict(
-    # producer_name="nexa.ai",  # onnxruntime: Model producer not matched: Expected "pytorch"
-    producer_version="0.0.0",
-    doc_string="Model exported by Nexa.ai",
-)
+# Model producer info
+PRODUCER_INFO = {
+    "producer_version": "0.0.0",
+    "doc_string": "Model exported by Nexa.ai",
+}
+
+class ModelType(Enum):
+    NLP = "NLP"
+    COMPUTER_VISION = "Computer Vision"
+    AUDIO = "Audio"
+    MULTIMODAL = "Multimodal"
 
 NEXA_RUN_MODEL_MAP_TEXT = {
     "octopus-v2": "Octopus-v2:q4_0",
@@ -44,8 +54,10 @@
     "dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
     "phi2": "Phi-2:q4_0",
     "phi3": "Phi-3-mini-128k-instruct:q4_0",
+    "phi3.5": "Phi-3.5-mini-instruct:q4_0",
     "llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
     "llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
+    "openelm": "OpenELM-3B:q4_K_M",
 }
 
 NEXA_RUN_MODEL_MAP_ONNX = {
@@ -116,8 +128,6 @@
   "Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0",
 }
 
-
-
 NEXA_RUN_PROJECTOR_MAP = {
     "nanollava": "nanoLLaVA:projector-fp16",
     "nanoLLaVA:fp16": "nanoLLaVA:projector-fp16",
@@ -135,22 +145,43 @@
     "llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
 }
 
+NEXA_RUN_T5XXL_MAP = {
+    "flux": "FLUX.1-schnell:t5xxl-q4_0",
+    "FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0",
+    "FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0",
+    "FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1",
+    "FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0",
+    "FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16",
+}
+
 NEXA_RUN_MODEL_MAP_IMAGE = {
     "sd1-4": "stable-diffusion-v1-4:q4_0",
     "sd1-5": "stable-diffusion-v1-5:q4_0",
-    "sd2-1": "stable-diffusion-v2-1:fp16",
+    "sd2-1": "stable-diffusion-v2-1:q4_0",
+    "sd3": "stable-diffusion-3-medium:q4_0",
     "sdxl-turbo": "sdxl-turbo:q8_0",
+    "flux": "FLUX.1-schnell:q4_0",
     "lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
     "anything-lcm": "anything-v30-LCM:fp16",
     "hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
 }
 
+NEXA_RUN_MODEL_MAP_FLUX = {
+    "flux": "FLUX.1-schnell:flux1-schnell-q4_0",
+    "FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0",
+    "FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0",
+    "FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1",
+    "FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0",
+    "FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16",
+}
+
 NEXA_RUN_MODEL_MAP = {
     **NEXA_RUN_MODEL_MAP_TEXT,
     **NEXA_RUN_MODEL_MAP_VLM,
     **NEXA_RUN_MODEL_MAP_IMAGE,
     **NEXA_RUN_MODEL_MAP_VOICE,
     **NEXA_RUN_MODEL_MAP_FUNCTION_CALLING,
+    **NEXA_RUN_MODEL_MAP_FLUX,
 }
 
 NEXA_RUN_CHAT_TEMPLATE_MAP = {
@@ -172,6 +203,8 @@
     "sd1-4": "q4_0",
     "sd1-5": "q4_0",
     "sd2-1": "q4_0",
+    "sd3": "q4_0",
+    "flux": "q4_0",
     "lcm-dreamshaper": "f16",
     "sdxl-turbo": "q8_0",
     "anything-lcm": "f16",
@@ -181,7 +214,10 @@
 EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
 EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n"
 
-NEXA_STOP_WORDS_MAP = {"octopus-v2": ["<nexa_end>"]}
+NEXA_STOP_WORDS_MAP = {
+    "octopus-v2": ["<nexa_end>"],
+    "octopus-v4": ["<nexa_end>"]
+}
 
 DEFAULT_TEXT_GEN_PARAMS = {
     "temperature": 0.7,
@@ -223,83 +259,85 @@
     "language": None,
     "task": "transcribe",
     "temperature": 0.0,
-    "compute_type": "default"
+    "compute_type": "default",
 }
 
 NEXA_OFFICIAL_MODELS_TYPE = {
-  'gemma-2b': 'NLP',
-  'Llama-2-7b-chat': 'NLP',
-  'Llama-2-7b': 'NLP',
-  'Meta-Llama-3-8B-Instruct': 'NLP',
-  'Meta-Llama-3.1-8B-Instruct': 'NLP',
-  'Mistral-7B-Instruct-v0.3': 'NLP',
-  'Mistral-7B-Instruct-v0.2': 'NLP',
-  'Phi-3-mini-128k-instruct': 'NLP',
-  'Phi-3-mini-4k-instruct': 'NLP',
-  "Phi-3.5-mini-instruct": "NLP",
-  'CodeQwen1.5-7B-Instruct': 'NLP',
-  'Qwen2-0.5B-Instruct': 'NLP',
-  'Qwen2-1.5B-Instruct': 'NLP',
-  'Qwen2-7B-Instruct': 'NLP',
-  'codegemma-2b': 'NLP',
-  'gemma-1.1-2b-instruct': 'NLP',
-  'gemma-2b-instruct': 'NLP',
-  'gemma-2-9b-instruct': 'NLP',
-  'gemma-1.1-7b-instruct': 'NLP',
-  'gemma-7b-instruct': 'NLP',
-  'gemma-7b': 'NLP',
-  'Qwen2-1.5B': 'NLP',
-  'codegemma-7b': 'NLP',
-  'TinyLlama-1.1B-Chat-v1.0': 'NLP',
-  'CodeLlama-7b-Instruct': 'NLP',
-  'gpt2': 'NLP',
-  'CodeLlama-7b': 'NLP',
-  'CodeLlama-7b-Python': 'NLP',
-  'Qwen1.5-7B-Instruct': 'NLP',
-  'Qwen1.5-7B': 'NLP',
-  'Phi-2': 'NLP',
-  'deepseek-coder-1.3b-instruct': 'NLP',
-  'deepseek-coder-1.3b-base': 'NLP',
-  'deepseek-coder-6.7b-instruct': 'NLP',
-  'dolphin-2.8-mistral-7b': 'NLP',
-  'gemma-2-2b-instruct': 'NLP',
-  'Octopus-v2': 'NLP',
-  'Octopus-v4': 'NLP',
-  'Octo-planner': 'NLP',
-  'deepseek-coder-6.7b-base': 'NLP',
-  'Llama2-7b-chat-uncensored': 'NLP',
-  'Llama3-8B-Lexi-Uncensored': 'NLP',
-  'Llama2-7b-function-calling': 'NLP',
-  'OpenELM-1_1B': 'NLP',
-  'OpenELM-3B': 'NLP',
-  'lcm-dreamshaper-v7': 'Computer Vision',
-  'stable-diffusion-v1-5': 'Computer Vision',
-  'stable-diffusion-v1-4': 'Computer Vision',
-  'stable-diffusion-v2-1': 'Computer Vision',
-  'sdxl-turbo': 'Computer Vision',
-  'hassaku-hentai-model-v13-LCM': 'Computer Vision',
-  'anything-v30-LCM': 'Computer Vision',
-  'Phi-3-vision-128k-instruct': 'Multimodal',
-  'nanoLLaVA': 'Multimodal',
-  'llava-v1.6-mistral-7b': 'Multimodal',
-  'llava-v1.6-vicuna-7b': 'Multimodal',
-  'llava-phi-3-mini': 'Multimodal',
-  'llava-llama-3-8b-v1.1': 'Multimodal',
-  'faster-whisper-tiny.en': 'Audio',
-  'faster-whisper-tiny': 'Audio',
-  'faster-whisper-small.en': 'Audio',
-  'faster-whisper-small': 'Audio',
-  'faster-whisper-medium.en': 'Audio',
-  'faster-whisper-medium': 'Audio',
-  'faster-whisper-base.en': 'Audio',
-  'faster-whisper-base': 'Audio',
-  'faster-whisper-large-v3': 'Audio',
-  'whisper-tiny.en': 'Audio',
-  'whisper-tiny': 'Audio',
-  'whisper-small.en': 'Audio',
-  'whisper-small': 'Audio',
-  'whisper-base.en': 'Audio',
-  'whisper-base': 'Audio',
+    "gemma-2b": ModelType.NLP,
+    "Llama-2-7b-chat": ModelType.NLP,
+    "Llama-2-7b": ModelType.NLP,
+    "Meta-Llama-3-8B-Instruct": ModelType.NLP,
+    "Meta-Llama-3.1-8B-Instruct": ModelType.NLP,
+    "Mistral-7B-Instruct-v0.3": ModelType.NLP,
+    "Mistral-7B-Instruct-v0.2": ModelType.NLP,
+    "Phi-3-mini-128k-instruct": ModelType.NLP,
+    "Phi-3-mini-4k-instruct": ModelType.NLP,
+    "Phi-3.5-mini-instruct": ModelType.NLP,
+    "CodeQwen1.5-7B-Instruct": ModelType.NLP,
+    "Qwen2-0.5B-Instruct": ModelType.NLP,
+    "Qwen2-1.5B-Instruct": ModelType.NLP,
+    "Qwen2-7B-Instruct": ModelType.NLP,
+    "codegemma-2b": ModelType.NLP,
+    "gemma-1.1-2b-instruct": ModelType.NLP,
+    "gemma-2b-instruct": ModelType.NLP,
+    "gemma-2-9b-instruct": ModelType.NLP,
+    "gemma-1.1-7b-instruct": ModelType.NLP,
+    "gemma-7b-instruct": ModelType.NLP,
+    "gemma-7b": ModelType.NLP,
+    "Qwen2-1.5B": ModelType.NLP,
+    "codegemma-7b": ModelType.NLP,
+    "TinyLlama-1.1B-Chat-v1.0": ModelType.NLP,
+    "CodeLlama-7b-Instruct": ModelType.NLP,
+    "gpt2": ModelType.NLP,
+    "CodeLlama-7b": ModelType.NLP,
+    "CodeLlama-7b-Python": ModelType.NLP,
+    "Qwen1.5-7B-Instruct": ModelType.NLP,
+    "Qwen1.5-7B": ModelType.NLP,
+    "Phi-2": ModelType.NLP,
+    "deepseek-coder-1.3b-instruct": ModelType.NLP,
+    "deepseek-coder-1.3b-base": ModelType.NLP,
+    "deepseek-coder-6.7b-instruct": ModelType.NLP,
+    "dolphin-2.8-mistral-7b": ModelType.NLP,
+    "gemma-2-2b-instruct": ModelType.NLP,
+    "Octopus-v2": ModelType.NLP,
+    "Octopus-v4": ModelType.NLP,
+    "Octo-planner": ModelType.NLP,
+    "deepseek-coder-6.7b-base": ModelType.NLP,
+    "Llama2-7b-chat-uncensored": ModelType.NLP,
+    "Llama3-8B-Lexi-Uncensored": ModelType.NLP,
+    "Llama2-7b-function-calling": ModelType.NLP,
+    "OpenELM-1_1B": ModelType.NLP,
+    "OpenELM-3B": ModelType.NLP,
+    "lcm-dreamshaper-v7": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v1-5": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v1-4": ModelType.COMPUTER_VISION,
+    "stable-diffusion-v2-1": ModelType.COMPUTER_VISION,
+    "stable-diffusion-3-medium": ModelType.COMPUTER_VISION,
+    "sdxl-turbo": ModelType.COMPUTER_VISION,
+    "hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION,
+    "anything-v30-LCM": ModelType.COMPUTER_VISION,
+    "FLUX.1-schnell": ModelType.COMPUTER_VISION,
+    "Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
+    "nanoLLaVA": ModelType.MULTIMODAL,
+    "llava-v1.6-mistral-7b": ModelType.MULTIMODAL,
+    "llava-v1.6-vicuna-7b": ModelType.MULTIMODAL,
+    "llava-phi-3-mini": ModelType.MULTIMODAL,
+    "llava-llama-3-8b-v1.1": ModelType.MULTIMODAL,
+    "faster-whisper-tiny.en": ModelType.AUDIO,
+    "faster-whisper-tiny": ModelType.AUDIO,
+    "faster-whisper-small.en": ModelType.AUDIO,
+    "faster-whisper-small": ModelType.AUDIO,
+    "faster-whisper-medium.en": ModelType.AUDIO,
+    "faster-whisper-medium": ModelType.AUDIO,
+    "faster-whisper-base.en": ModelType.AUDIO,
+    "faster-whisper-base": ModelType.AUDIO,
+    "faster-whisper-large-v3": ModelType.AUDIO,
+    "whisper-tiny.en": ModelType.AUDIO,
+    "whisper-tiny": ModelType.AUDIO,
+    "whisper-small.en": ModelType.AUDIO,
+    "whisper-small": ModelType.AUDIO,
+    "whisper-base.en": ModelType.AUDIO,
+    "whisper-base": ModelType.AUDIO,
 }
 
 
diff --git a/nexa/general.py b/nexa/general.py
@@ -15,7 +15,7 @@
     NEXA_TOKEN_PATH,
     NEXA_OFFICIAL_MODELS_TYPE,
 )
-
+from nexa.constants import ModelType
 
 def login():
     """
@@ -119,10 +119,10 @@ def pull_model(model_path):
             return result["local_path"], result["run_type"]
         else:
             print(f"Failed to pull model {model_path}")
-            return None, "UNKNOWN"
+            return None, "NLP"
     except Exception as e:
         logging.error(f"An error occurred while pulling the model: {e}")
-        return None, "UNKNOWN"
+        return None, "NLP"
 
 
 def pull_model_from_hub(model_path):
@@ -198,19 +198,20 @@ def pull_model_from_official(model_path):
         model_type = "gguf"
 
     run_type = get_run_type_from_model_path(model_path)
+    run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type)
     success, location = download_model_from_official(model_path, model_type)
 
     return {
         "success": success,
         "local_path": location,
         "model_type": model_type,
-        "run_type": run_type
+        "run_type": run_type_str
     }
 
 
 def get_run_type_from_model_path(model_path):
     model_name, model_version = model_path.split(":")
-    return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN")
+    return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value
 
 
 def get_model_presigned_link(full_path, token):

diff --git a/nexa/gguf/llama/_utils_transformers.py b/nexa/gguf/llama/_utils_transformers.py
@@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
     sys = sys
     os = os
 
-    def __init__(self, disable: bool = True):
+    def __init__(self, disable: bool = False):
         self.disable = disable
 
     # Oddly enough this works better than the contextlib version

diff --git a/nexa/gguf/llama/llama_cpp.py b/nexa/gguf/llama/llama_cpp.py
@@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
 
 # // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
 # LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
-@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
-def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
-    """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
-    ...
+# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
+# def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
+#     """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
+#     ...
 
 
 # // Returns 0 on success