Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pull flux model logic #82

Merged
merged 5 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ Below is our differentiation from other similar tools:

We have released pre-built wheels for various Python versions, platforms, and backends for convenient installation on our [index page](https://nexaai.github.io/nexa-sdk/whl/).

> [!NOTE]
> 1. If you want to use <strong>ONNX model</strong>, just replace `pip install nexaai` with `pip install "nexaai[onnx]"` in provided commands.
> 2. For Chinese developers, we recommend you to use <strong>Tsinghua Open Source Mirror</strong> as extra index url, just replace `--extra-index-url https://pypi.org/simple` with `--extra-index-url https://pypi.tuna.tsinghua.edu.cn/simple` in provided commands.

#### CPU

Expand Down Expand Up @@ -117,9 +120,6 @@ For **Windows Git Bash**:
```bash
CMAKE_ARGS="-DGGML_CUDA=ON -DSD_CUBLAS=ON" pip install nexaai --prefer-binary --index-url https://nexaai.github.io/nexa-sdk/whl/cu124 --extra-index-url https://pypi.org/simple --no-cache-dir
```
> [!NOTE]
> If you want to use ONNX model, just replace `pip install nexaai` with `pip install nexaai[onnx]` in above commands


<details>
<summary><strong>FAQ: Building Issues for llava</strong></summary>
Expand Down
206 changes: 122 additions & 84 deletions nexa/constants.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,38 @@
import os
from pathlib import Path
from enum import Enum

# Paths for caching, model hub, and tokens
NEXA_CACHE_ROOT = Path(os.getenv("NEXA_CACHE_ROOT") or "~/.cache/nexa").expanduser()
NEXA_TOKEN_PATH = NEXA_CACHE_ROOT / "token"
NEXA_MODELS_HUB_DIR = NEXA_CACHE_ROOT / "hub"
NEXA_MODELS_HUB_OFFICIAL_DIR = NEXA_MODELS_HUB_DIR / "official"
NEXA_MODEL_LIST_PATH = NEXA_MODELS_HUB_DIR / "model_list.json"

# URLs and buckets
NEXA_API_URL = "https://model-hub-backend.nexa4ai.com"
NEXA_OFFICIAL_BUCKET = "https://public-storage.nexa4ai.com/"

# Nexa logo
NEXA_LOGO = """
_| _| _|_|_| _| _| _|_| _|_| _|_|_|_|
_|_| _| _| _| _| _| _| _| _| _|
_|_|_|_| _|_|_| _| _|_|_|_| _|_|_|_| _|
_| _|_| _| _| _| _| _| _| _| _|
_| _| _|_|_| _| _| _| _| _| _| _|_|_|_|
"""
# Maokun TODO: Update the model info and find a good default precision for each model

PRODUCER_INFO = dict(
# producer_name="nexa.ai", # onnxruntime: Model producer not matched: Expected "pytorch"
producer_version="0.0.0",
doc_string="Model exported by Nexa.ai",
)
# Model producer info
PRODUCER_INFO = {
"producer_version": "0.0.0",
"doc_string": "Model exported by Nexa.ai",
}

class ModelType(Enum):
NLP = "NLP"
COMPUTER_VISION = "Computer Vision"
AUDIO = "Audio"
MULTIMODAL = "Multimodal"

NEXA_RUN_MODEL_MAP_TEXT = {
"octopus-v2": "Octopus-v2:q4_0",
Expand All @@ -44,8 +54,10 @@
"dolphin-mistral": "dolphin-2.8-mistral-7b:q4_0",
"phi2": "Phi-2:q4_0",
"phi3": "Phi-3-mini-128k-instruct:q4_0",
"phi3.5": "Phi-3.5-mini-instruct:q4_0",
"llama2-uncensored": "Llama2-7b-chat-uncensored:q4_0",
"llama3-uncensored": "Llama3-8B-Lexi-Uncensored:q4_K_M",
"openelm": "OpenELM-3B:q4_K_M",
}

NEXA_RUN_MODEL_MAP_ONNX = {
Expand Down Expand Up @@ -116,8 +128,6 @@
"Llama2-7b-function-calling:q8_0": "Llama2-7b-function-calling:q8_0",
}



NEXA_RUN_PROJECTOR_MAP = {
"nanollava": "nanoLLaVA:projector-fp16",
"nanoLLaVA:fp16": "nanoLLaVA:projector-fp16",
Expand All @@ -135,22 +145,43 @@
"llava-v1.6-vicuna-7b:fp16": "llava-v1.6-vicuna-7b:projector-fp16",
}

NEXA_RUN_T5XXL_MAP = {
"flux": "FLUX.1-schnell:t5xxl-q4_0",
"FLUX.1-schnell:q4_0": "FLUX.1-schnell:t5xxl-q4_0",
"FLUX.1-schnell:q5_0": "FLUX.1-schnell:t5xxl-q5_0",
"FLUX.1-schnell:q5_1": "FLUX.1-schnell:t5xxl-q5_1",
"FLUX.1-schnell:q8_0": "FLUX.1-schnell:t5xxl-q8_0",
"FLUX.1-schnell:fp16": "FLUX.1-schnell:t5xxl-fp16",
}

NEXA_RUN_MODEL_MAP_IMAGE = {
"sd1-4": "stable-diffusion-v1-4:q4_0",
"sd1-5": "stable-diffusion-v1-5:q4_0",
"sd2-1": "stable-diffusion-v2-1:fp16",
"sd2-1": "stable-diffusion-v2-1:q4_0",
"sd3": "stable-diffusion-3-medium:q4_0",
"sdxl-turbo": "sdxl-turbo:q8_0",
"flux": "FLUX.1-schnell:q4_0",
"lcm-dreamshaper": "lcm-dreamshaper-v7:fp16",
"anything-lcm": "anything-v30-LCM:fp16",
"hassaku-lcm": "hassaku-hentai-model-v13-LCM:fp16",
}

NEXA_RUN_MODEL_MAP_FLUX = {
"flux": "FLUX.1-schnell:flux1-schnell-q4_0",
"FLUX.1-schnell:q4_0": "FLUX.1-schnell:flux1-schnell-q4_0",
"FLUX.1-schnell:q5_0": "FLUX.1-schnell:flux1-schnell-q5_0",
"FLUX.1-schnell:q5_1": "FLUX.1-schnell:flux1-schnell-q5_1",
"FLUX.1-schnell:q8_0": "FLUX.1-schnell:flux1-schnell-q8_0",
"FLUX.1-schnell:fp16": "FLUX.1-schnell:flux1-schnell-fp16",
}

NEXA_RUN_MODEL_MAP = {
**NEXA_RUN_MODEL_MAP_TEXT,
**NEXA_RUN_MODEL_MAP_VLM,
**NEXA_RUN_MODEL_MAP_IMAGE,
**NEXA_RUN_MODEL_MAP_VOICE,
**NEXA_RUN_MODEL_MAP_FUNCTION_CALLING,
**NEXA_RUN_MODEL_MAP_FLUX,
}

NEXA_RUN_CHAT_TEMPLATE_MAP = {
Expand All @@ -172,6 +203,8 @@
"sd1-4": "q4_0",
"sd1-5": "q4_0",
"sd2-1": "q4_0",
"sd3": "q4_0",
"flux": "q4_0",
"lcm-dreamshaper": "f16",
"sdxl-turbo": "q8_0",
"anything-lcm": "f16",
Expand All @@ -181,7 +214,10 @@
EXIT_COMMANDS = ["/exit", "/quit", "/bye"]
EXIT_REMINDER = f"Please use Ctrl + d or one of {EXIT_COMMANDS} to exit.\n"

NEXA_STOP_WORDS_MAP = {"octopus-v2": ["<nexa_end>"]}
NEXA_STOP_WORDS_MAP = {
"octopus-v2": ["<nexa_end>"],
"octopus-v4": ["<nexa_end>"]
}

DEFAULT_TEXT_GEN_PARAMS = {
"temperature": 0.7,
Expand Down Expand Up @@ -223,83 +259,85 @@
"language": None,
"task": "transcribe",
"temperature": 0.0,
"compute_type": "default"
"compute_type": "default",
}

NEXA_OFFICIAL_MODELS_TYPE = {
'gemma-2b': 'NLP',
'Llama-2-7b-chat': 'NLP',
'Llama-2-7b': 'NLP',
'Meta-Llama-3-8B-Instruct': 'NLP',
'Meta-Llama-3.1-8B-Instruct': 'NLP',
'Mistral-7B-Instruct-v0.3': 'NLP',
'Mistral-7B-Instruct-v0.2': 'NLP',
'Phi-3-mini-128k-instruct': 'NLP',
'Phi-3-mini-4k-instruct': 'NLP',
"Phi-3.5-mini-instruct": "NLP",
'CodeQwen1.5-7B-Instruct': 'NLP',
'Qwen2-0.5B-Instruct': 'NLP',
'Qwen2-1.5B-Instruct': 'NLP',
'Qwen2-7B-Instruct': 'NLP',
'codegemma-2b': 'NLP',
'gemma-1.1-2b-instruct': 'NLP',
'gemma-2b-instruct': 'NLP',
'gemma-2-9b-instruct': 'NLP',
'gemma-1.1-7b-instruct': 'NLP',
'gemma-7b-instruct': 'NLP',
'gemma-7b': 'NLP',
'Qwen2-1.5B': 'NLP',
'codegemma-7b': 'NLP',
'TinyLlama-1.1B-Chat-v1.0': 'NLP',
'CodeLlama-7b-Instruct': 'NLP',
'gpt2': 'NLP',
'CodeLlama-7b': 'NLP',
'CodeLlama-7b-Python': 'NLP',
'Qwen1.5-7B-Instruct': 'NLP',
'Qwen1.5-7B': 'NLP',
'Phi-2': 'NLP',
'deepseek-coder-1.3b-instruct': 'NLP',
'deepseek-coder-1.3b-base': 'NLP',
'deepseek-coder-6.7b-instruct': 'NLP',
'dolphin-2.8-mistral-7b': 'NLP',
'gemma-2-2b-instruct': 'NLP',
'Octopus-v2': 'NLP',
'Octopus-v4': 'NLP',
'Octo-planner': 'NLP',
'deepseek-coder-6.7b-base': 'NLP',
'Llama2-7b-chat-uncensored': 'NLP',
'Llama3-8B-Lexi-Uncensored': 'NLP',
'Llama2-7b-function-calling': 'NLP',
'OpenELM-1_1B': 'NLP',
'OpenELM-3B': 'NLP',
'lcm-dreamshaper-v7': 'Computer Vision',
'stable-diffusion-v1-5': 'Computer Vision',
'stable-diffusion-v1-4': 'Computer Vision',
'stable-diffusion-v2-1': 'Computer Vision',
'sdxl-turbo': 'Computer Vision',
'hassaku-hentai-model-v13-LCM': 'Computer Vision',
'anything-v30-LCM': 'Computer Vision',
'Phi-3-vision-128k-instruct': 'Multimodal',
'nanoLLaVA': 'Multimodal',
'llava-v1.6-mistral-7b': 'Multimodal',
'llava-v1.6-vicuna-7b': 'Multimodal',
'llava-phi-3-mini': 'Multimodal',
'llava-llama-3-8b-v1.1': 'Multimodal',
'faster-whisper-tiny.en': 'Audio',
'faster-whisper-tiny': 'Audio',
'faster-whisper-small.en': 'Audio',
'faster-whisper-small': 'Audio',
'faster-whisper-medium.en': 'Audio',
'faster-whisper-medium': 'Audio',
'faster-whisper-base.en': 'Audio',
'faster-whisper-base': 'Audio',
'faster-whisper-large-v3': 'Audio',
'whisper-tiny.en': 'Audio',
'whisper-tiny': 'Audio',
'whisper-small.en': 'Audio',
'whisper-small': 'Audio',
'whisper-base.en': 'Audio',
'whisper-base': 'Audio',
"gemma-2b": ModelType.NLP,
"Llama-2-7b-chat": ModelType.NLP,
"Llama-2-7b": ModelType.NLP,
"Meta-Llama-3-8B-Instruct": ModelType.NLP,
"Meta-Llama-3.1-8B-Instruct": ModelType.NLP,
"Mistral-7B-Instruct-v0.3": ModelType.NLP,
"Mistral-7B-Instruct-v0.2": ModelType.NLP,
"Phi-3-mini-128k-instruct": ModelType.NLP,
"Phi-3-mini-4k-instruct": ModelType.NLP,
"Phi-3.5-mini-instruct": ModelType.NLP,
"CodeQwen1.5-7B-Instruct": ModelType.NLP,
"Qwen2-0.5B-Instruct": ModelType.NLP,
"Qwen2-1.5B-Instruct": ModelType.NLP,
"Qwen2-7B-Instruct": ModelType.NLP,
"codegemma-2b": ModelType.NLP,
"gemma-1.1-2b-instruct": ModelType.NLP,
"gemma-2b-instruct": ModelType.NLP,
"gemma-2-9b-instruct": ModelType.NLP,
"gemma-1.1-7b-instruct": ModelType.NLP,
"gemma-7b-instruct": ModelType.NLP,
"gemma-7b": ModelType.NLP,
"Qwen2-1.5B": ModelType.NLP,
"codegemma-7b": ModelType.NLP,
"TinyLlama-1.1B-Chat-v1.0": ModelType.NLP,
"CodeLlama-7b-Instruct": ModelType.NLP,
"gpt2": ModelType.NLP,
"CodeLlama-7b": ModelType.NLP,
"CodeLlama-7b-Python": ModelType.NLP,
"Qwen1.5-7B-Instruct": ModelType.NLP,
"Qwen1.5-7B": ModelType.NLP,
"Phi-2": ModelType.NLP,
"deepseek-coder-1.3b-instruct": ModelType.NLP,
"deepseek-coder-1.3b-base": ModelType.NLP,
"deepseek-coder-6.7b-instruct": ModelType.NLP,
"dolphin-2.8-mistral-7b": ModelType.NLP,
"gemma-2-2b-instruct": ModelType.NLP,
"Octopus-v2": ModelType.NLP,
"Octopus-v4": ModelType.NLP,
"Octo-planner": ModelType.NLP,
"deepseek-coder-6.7b-base": ModelType.NLP,
"Llama2-7b-chat-uncensored": ModelType.NLP,
"Llama3-8B-Lexi-Uncensored": ModelType.NLP,
"Llama2-7b-function-calling": ModelType.NLP,
"OpenELM-1_1B": ModelType.NLP,
"OpenELM-3B": ModelType.NLP,
"lcm-dreamshaper-v7": ModelType.COMPUTER_VISION,
"stable-diffusion-v1-5": ModelType.COMPUTER_VISION,
"stable-diffusion-v1-4": ModelType.COMPUTER_VISION,
"stable-diffusion-v2-1": ModelType.COMPUTER_VISION,
"stable-diffusion-3-medium": ModelType.COMPUTER_VISION,
"sdxl-turbo": ModelType.COMPUTER_VISION,
"hassaku-hentai-model-v13-LCM": ModelType.COMPUTER_VISION,
"anything-v30-LCM": ModelType.COMPUTER_VISION,
"FLUX.1-schnell": ModelType.COMPUTER_VISION,
"Phi-3-vision-128k-instruct": ModelType.MULTIMODAL,
"nanoLLaVA": ModelType.MULTIMODAL,
"llava-v1.6-mistral-7b": ModelType.MULTIMODAL,
"llava-v1.6-vicuna-7b": ModelType.MULTIMODAL,
"llava-phi-3-mini": ModelType.MULTIMODAL,
"llava-llama-3-8b-v1.1": ModelType.MULTIMODAL,
"faster-whisper-tiny.en": ModelType.AUDIO,
"faster-whisper-tiny": ModelType.AUDIO,
"faster-whisper-small.en": ModelType.AUDIO,
"faster-whisper-small": ModelType.AUDIO,
"faster-whisper-medium.en": ModelType.AUDIO,
"faster-whisper-medium": ModelType.AUDIO,
"faster-whisper-base.en": ModelType.AUDIO,
"faster-whisper-base": ModelType.AUDIO,
"faster-whisper-large-v3": ModelType.AUDIO,
"whisper-tiny.en": ModelType.AUDIO,
"whisper-tiny": ModelType.AUDIO,
"whisper-small.en": ModelType.AUDIO,
"whisper-small": ModelType.AUDIO,
"whisper-base.en": ModelType.AUDIO,
"whisper-base": ModelType.AUDIO,
}


11 changes: 6 additions & 5 deletions nexa/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
NEXA_TOKEN_PATH,
NEXA_OFFICIAL_MODELS_TYPE,
)

from nexa.constants import ModelType

def login():
"""
Expand Down Expand Up @@ -119,10 +119,10 @@ def pull_model(model_path):
return result["local_path"], result["run_type"]
else:
print(f"Failed to pull model {model_path}")
return None, "UNKNOWN"
return None, "NLP"
except Exception as e:
logging.error(f"An error occurred while pulling the model: {e}")
return None, "UNKNOWN"
return None, "NLP"


def pull_model_from_hub(model_path):
Expand Down Expand Up @@ -198,19 +198,20 @@ def pull_model_from_official(model_path):
model_type = "gguf"

run_type = get_run_type_from_model_path(model_path)
run_type_str = run_type.value if isinstance(run_type, ModelType) else str(run_type)
success, location = download_model_from_official(model_path, model_type)

return {
"success": success,
"local_path": location,
"model_type": model_type,
"run_type": run_type
"run_type": run_type_str
}


def get_run_type_from_model_path(model_path):
model_name, model_version = model_path.split(":")
return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, "UNKNOWN")
return NEXA_OFFICIAL_MODELS_TYPE.get(model_name, ModelType.NLP).value
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fix conflicts due to adding enum in constants



def get_model_presigned_link(full_path, token):
Expand Down
2 changes: 1 addition & 1 deletion nexa/gguf/llama/_utils_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class suppress_stdout_stderr(object):
sys = sys
os = os

def __init__(self, disable: bool = True):
def __init__(self, disable: bool = False):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

supress logging print

self.disable = disable

# Oddly enough this works better than the contextlib version
Expand Down
8 changes: 4 additions & 4 deletions nexa/gguf/llama/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -1488,10 +1488,10 @@ def llama_model_decoder_start_token(model: llama_model_p, /) -> int:

# // Returns true if the model is recurrent (like Mamba, RWKV, etc.)
# LLAMA_API bool llama_model_is_recurrent(const struct llama_model * model);
@ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
"""Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
...
# @ctypes_function("llama_model_is_recurrent", [llama_model_p_ctypes], ctypes.c_bool)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

temporarily comment this to avoid vlm_inference crash

# def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
# """Returns true if the model is recurrent (like Mamba, RWKV, etc.)"""
# ...


# // Returns 0 on success
Expand Down
Loading
Loading