Skip to content

Commit

Permalink
Merge pull request #82 from NexaAI/david/bugfix
Browse files Browse the repository at this point in the history
David/bugfix
  • Loading branch information
zhiyuan8 authored Sep 12, 2024
2 parents 4e94a91 + 2515ebe commit 2f7fe07
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 109 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ Below is our differentiation from other similar tools:

We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).

> [!NOTE]
> 1. If you want to use <strong>ONNX model</strong>, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands.
> 2. For Chinese developers, we recommend you to use <strong>Tsinghua Open Source Mirror</strong> as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands.
#### CPU

Expand Down Expand Up @@ -117,9 +120,6 @@ For **Windows Git Bash**:
```bash
CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
```
> [!NOTE]
> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands

<details>
<summary><strong>FAQ: Building Issues for llava</strong></summary>
Expand Down
206 changes: 122 additions & 84 deletions nexa/constants.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,38 @@
import os
from pathlib import Path
from enum import Enum

# Paths for caching, model hub, and tokens
NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser()
NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token"
NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub"
NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official"
NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json"

# URLs and buckets
NEXA_API_URL = "https://model-hub-backend.nexa4ai.com"
NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/"

# Nexa logo
NEXA_LOGO = """
_| _| _|_|_| _| _| _|_| _|_| _|_|_|_|
_|_| _| _| _| _| _| _| _| _| _|
_|_|_|_| _|_|_| _| _|_|_|_| _|_|_|_| _|
_| _|_| _| _| _| _| _| _| _| _|
_| _| _|_|_| _| _| _| _| _| _| _|_|_|_|
"""
# Maokun TODO: Update the model info and find a good default precision for each model

PRODUCER_INFO = dict(
# producer_name="nexa.ai", # onnxruntime: Model producer not matched: Expected "pytorch"
producer_version="0.0.0",
doc_string="Model exported by Nexa.ai",
)
# Model producer info
PRODUCER_INFO = {
"producer_version": "0.0.0",
"doc_string": "Model exported by Nexa.ai",
}

class ModelType(Enum):
NLP = "NLP"
COMPUTER_VISION = "Computer Vision"
AUDIO = "Audio"
MULTIMODAL = "Multimodal"

NEXA_RUN_MODEL_MAP_TEXT = {
"octopus-v2": "Octopus-v2:q4_0",
Expand All @@ -44,8 +54,10 @@
"dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
"phi2": "Phi-2:q4_0",
"phi3": "Phi-3-mini-128k-instruct:q4_0",
"phi3.5": "Phi-3.5-mini-instruct:q4_0",
"llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
"llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
"openelm": "OpenELM-3B:q4_K_M",
}

NEXA_RUN_MODEL_MAP_ONNX = {
Expand Down Expand Up @@ -116,8 +128,6 @@
"Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0",
}



NEXA_RUN_PROJECTOR_MAP = {
"nanollava": "nanoLLaVA:projector-fp16",
"nanoLLaVA:fp16": "nanoLLaVA:projector-fp16",
Expand All @@ -135,22 +145,43 @@
"llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
}

NEXA_RUN_T5XXL_MAP = {
"flux": "FLUX.1-schnell:t5xxl-q4_0",
"FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0",
"FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0",
"FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1",
"FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0",
"FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16",
}

NEXA_RUN_MODEL_MAP_IMAGE = {
"sd1-4": "stable-diffusion-v1-4:q4_0",
"sd1-5": "stable-diffusion-v1-5:q4_0",
"sd2-1": "stable-diffusion-v2-1:fp16",
"sd2-1": "stable-diffusion-v2-1:q4_0",
"sd3": "stable-diffusion-3-medium:q4_0",
"sdxl-turbo": "sdxl-turbo:q8_0",
"flux": "FLUX.1-schnell:q4_0",
"lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
"anything-lcm": "anything-v30-LCM:fp16",
"hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
}

NEXA_RUN_MODEL_MAP_FLUX = {
"flux": "FLUX.1-schnell:flux1-schnell-q4_0",
"FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0",
"FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0",
"FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1",
"FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0",
"FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16",
}

NEXA_RUN_MODEL_MAP = {
**NEXA_RUN_MODEL_MAP_TEXT,
**NEXA_RUN_MODEL_MAP_VLM,
**NEXA_RUN_MODEL_MAP_IMAGE,
**NEXA_RUN_MODEL_MAP_VOICE,
**NEXA_RUN_MODEL_MAP_FUNCTION_CALLING,
**NEXA_RUN_MODEL_MAP_FLUX,
}

NEXA_RUN_CHAT_TEMPLATE_MAP = {
Expand All @@ -172,6 +203,8 @@
"sd1-4": "q4_0",
"sd1-5": "q4_0",
"sd2-1": "q4_0",
"sd3": "q4_0",
"flux": "q4_0",
"lcm-dreamshaper": "f16",
"sdxl-turbo": "q8_0",
"anything-lcm": "f16",
Expand All @@ -181,7 +214,10 @@
EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n"

NEXA_STOP_WORDS_MAP = {"octopus-v2": ["<nexa_end>"]}
NEXA_STOP_WORDS_MAP = {
"octopus-v2": ["<nexa_end>"],
"octopus-v4": ["<nexa_end>"]
}

DEFAULT_TEXT_GEN_PARAMS = {
"temperature": 0.7,
Expand Down Expand Up @@ -223,83 +259,85 @@
"language": None,
"task": "transcribe",
"temperature": 0.0,
"compute_type": "default"
"compute_type": "default",
}

NEXA_OFFICIAL_MODELS_TYPE = {
'gemma-2b': 'NLP',
'Llama-2-7b-chat': 'NLP',
'Llama-2-7b': 'NLP',
'Meta-Llama-3-8B-Instruct': 'NLP',
'Meta-Llama-3.1-8B-Instruct': 'NLP',
'Mistral-7B-Instruct-v0.3': 'NLP',
'Mistral-7B-Instruct-v0.2': 'NLP',
'Phi-3-mini-128k-instruct': 'NLP',
'Phi-3-mini-4k-instruct': 'NLP',
"Phi-3.5-mini-instruct": "NLP",
'CodeQwen1.5-7B-Instruct': 'NLP',
'Qwen2-0.5B-Instruct': 'NLP',
'Qwen2-1.5B-Instruct': 'NLP',
'Qwen2-7B-Instruct': 'NLP',
'codegemma-2b': 'NLP',
'gemma-1.1-2b-instruct': 'NLP',
'gemma-2b-instruct': 'NLP',
'gemma-2-9b-instruct': 'NLP',
'gemma-1.1-7b-instruct': 'NLP',
'gemma-7b-instruct': 'NLP',
'gemma-7b': 'NLP',
'Qwen2-1.5B': 'NLP',
'codegemma-7b': 'NLP',
'TinyLlama-1.1B-Chat-v1.0': 'NLP',
'CodeLlama-7b-Instruct': 'NLP',
'gpt2': 'NLP',
'CodeLlama-7b': 'NLP',
'CodeLlama-7b-Python': 'NLP',
'Qwen1.5-7B-Instruct': 'NLP',
'Qwen1.5-7B': 'NLP',
'Phi-2': 'NLP',
'deepseek-coder-1.3b-instruct': 'NLP',
'deepseek-coder-1.3b-base': 'NLP',
'deepseek-coder-6.7b-instruct': 'NLP',
'dolphin-2.8-mistral-7b': 'NLP',
'gemma-2-2b-instruct': 'NLP',
'Octopus-v2': 'NLP',
'Octopus-v4': 'NLP',
'Octo-planner': 'NLP',
'deepseek-coder-6.7b-base': 'NLP',
'Llama2-7b-chat-uncensored': 'NLP',
'Llama3-8B-Lexi-Uncensored': 'NLP',
'Llama2-7b-function-calling': 'NLP',
'OpenELM-1_1B': 'NLP',
'OpenELM-3B': 'NLP',
'lcm-dreamshaper-v7': 'Computer Vision',
'stable-diffusion-v1-5': 'Computer Vision',
'stable-diffusion-v1-4': 'Computer Vision',
'stable-diffusion-v2-1': 'Computer Vision',
'sdxl-turbo': 'Computer Vision',
'hassaku-hentai-model-v13-LCM': 'Computer Vision',
'anything-v30-LCM': 'Computer Vision',
'Phi-3-vision-128k-instruct': 'Multimodal',
'nanoLLaVA': 'Multimodal',
'llava-v1.6-mistral-7b': 'Multimodal',
'llava-v1.6-vicuna-7b': 'Multimodal',
'llava-phi-3-mini': 'Multimodal',
'llava-llama-3-8b-v1.1': 'Multimodal',
'faster-whisper-tiny.en': 'Audio',
'faster-whisper-tiny': 'Audio',
'faster-whisper-small.en': 'Audio',
'faster-whisper-small': 'Audio',
'faster-whisper-medium.en': 'Audio',
'faster-whisper-medium': 'Audio',
'faster-whisper-base.en': 'Audio',
'faster-whisper-base': 'Audio',
'faster-whisper-large-v3': 'Audio',
'whisper-tiny.en': 'Audio',
'whisper-tiny': 'Audio',
'whisper-small.en': 'Audio',
'whisper-small': 'Audio',
'whisper-base.en': 'Audio',
'whisper-base': 'Audio',
"gemma-2b": ModelType.NLP,
"Llama-2-7b-chat": ModelType.NLP,
"Llama-2-7b": ModelType.NLP,
"Meta-Llama-3-8B-Instruct": ModelType.NLP,
"Meta-Llama-3.1-8B-Instruct": ModelType.NLP,
"Mistral-7B-Instruct-v0.3": ModelType.NLP,
"Mistral-7B-Instruct-v0.2": ModelType.NLP,
"Phi-3-mini-128k-instruct": ModelType.NLP,
"Phi-3-mini-4k-instruct": ModelType.NLP,
"Phi-3.5-mini-instruct": ModelType.NLP,
"CodeQwen1.5-7B-Instruct": ModelType.NLP,
"Qwen2-0.5B-Instruct": ModelType.NLP,
"Qwen2-1.5B-Instruct": ModelType.NLP,
"Qwen2-7B-Instruct": ModelType.NLP,
"codegemma-2b": ModelType.NLP,
"gemma-1.1-2b-instruct": ModelType.NLP,
"gemma-2b-instruct": ModelType.NLP,
"gemma-2-9b-instruct": ModelType.NLP,
"gemma-1.1-7b-instruct": ModelType.NLP,
"gemma-7b-instruct": ModelType.NLP,
"gemma-7b": ModelType.NLP,
"Qwen2-1.5B": ModelType.NLP,
"codegemma-7b": ModelType.NLP,
"TinyLlama-1.1B-Chat-v1.0": ModelType.NLP,
"CodeLlama-7b-Instruct": ModelType.NLP,
"gpt2": ModelType.NLP,
"CodeLlama-7b": ModelType.NLP,
"CodeLlama-7b-Python": ModelType.NLP,
"Qwen1.5-7B-Instruct": ModelType.NLP,
"Qwen1.5-7B": ModelType.NLP,
"Phi-2": ModelType.NLP,
"deepseek-coder-1.3b-instruct": ModelType.NLP,
"deepseek-coder-1.3b-base": ModelType.NLP,
"deepseek-coder-6.7b-instruct": ModelType.NLP,
"dolphin-2.8-mistral-7b": ModelType.NLP,
"gemma-2-2b-instruct": ModelType.NLP,
"Octopus-v2": ModelType.NLP,
"Octopus-v4": ModelType.NLP,
"Octo-planner": ModelType.NLP,
"deepseek-coder-6.7b-base": ModelType.NLP,
"Llama2-7b-chat-uncensored": ModelType.NLP,
"Llama3-8B-Lexi-Uncensored": ModelType.NLP,
"Llama2-7b-function-calling": ModelType.NLP,
"OpenELM-1_1B": ModelType.NLP,
"OpenELM-3B": ModelType.NLP,
"lcm-dreamshaper-v7": ModelType.COMPUTER_VISION,
"stable-diffusion-v1-5": ModelType.COMPUTER_VISION,
"stable-diffusion-v1-4": ModelType.COMPUTER_VISION,
"stable-diffusion-v2-1": ModelType.COMPUTER_VISION,
"stable-diffusion-3-medium": ModelType.COMPUTER_VISION,
"sdxl-turbo": ModelType.COMPUTER_VISION,
"hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION,
"anything-v30-LCM": ModelType.COMPUTER_VISION,
"FLUX.1-schnell": ModelType.COMPUTER_VISION,
"Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
"nanoLLaVA": ModelType.MULTIMODAL,
"llava-v1.6-mistral-7b": ModelType.MULTIMODAL,
"llava-v1.6-vicuna-7b": ModelType.MULTIMODAL,
"llava-phi-3-mini": ModelType.MULTIMODAL,
"llava-llama-3-8b-v1.1": ModelType.MULTIMODAL,
"faster-whisper-tiny.en": ModelType.AUDIO,
"faster-whisper-tiny": ModelType.AUDIO,
"faster-whisper-small.en": ModelType.AUDIO,
"faster-whisper-small": ModelType.AUDIO,
"faster-whisper-medium.en": ModelType.AUDIO,
"faster-whisper-medium": ModelType.AUDIO,
"faster-whisper-base.en": ModelType.AUDIO,
"faster-whisper-base": ModelType.AUDIO,
"faster-whisper-large-v3": ModelType.AUDIO,
"whisper-tiny.en": ModelType.AUDIO,
"whisper-tiny": ModelType.AUDIO,
"whisper-small.en": ModelType.AUDIO,
"whisper-small": ModelType.AUDIO,
"whisper-base.en": ModelType.AUDIO,
"whisper-base": ModelType.AUDIO,
}


11 changes: 6 additions & 5 deletions nexa/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
NEXA_TOKEN_PATH,
NEXA_OFFICIAL_MODELS_TYPE,
)

from nexa.constants import ModelType

def login():
"""
Expand Down Expand Up @@ -119,10 +119,10 @@ def pull_model(model_path):
return result["local_path"], result["run_type"]
else:
print(f"Failed to pull model {model_path}")
return None, "UNKNOWN"
return None, "NLP"
except Exception as e:
logging.error(f"An error occurred while pulling the model: {e}")
return None, "UNKNOWN"
return None, "NLP"


def pull_model_from_hub(model_path):
Expand Down Expand Up @@ -198,19 +198,20 @@ def pull_model_from_official(model_path):
model_type = "gguf"

run_type = get_run_type_from_model_path(model_path)
run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type)
success, location = download_model_from_official(model_path, model_type)

return {
"success": success,
"local_path": location,
"model_type": model_type,
"run_type": run_type
"run_type": run_type_str
}


def get_run_type_from_model_path(model_path):
model_name, model_version = model_path.split(":")
return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN")
return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value


def get_model_presigned_link(full_path, token):
Expand Down
2 changes: 1 addition & 1 deletion nexa/gguf/llama/_utils_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
sys = sys
os = os

def __init__(self, disable: bool = True):
def __init__(self, disable: bool = False):
self.disable = disable

# Oddly enough this works better than the contextlib version
Expand Down
8 changes: 4 additions & 4 deletions nexa/gguf/llama/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int:

# // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
# LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
"""Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
...
# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
# def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
# """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
# ...


# // Returns 0 on success
Expand Down
Loading

0 comments on commit 2f7fe07

Please sign in to comment.