Merge pull request #253 from mi804/support_modelscope_hub

support modelscope hub
NexaAI · Nov 19, 2024 · b16fe65 · b16fe65
2 parents ba36557 + b6f7f8e
commit b16fe65
Show file tree

Hide file tree

Showing 8 changed files with 222 additions and 71 deletions.
diff --git a/CLI.md b/CLI.md
@@ -46,11 +46,12 @@ nexa pull MODEL_PATH
 usage: nexa pull [-h] model_path
 
 positional arguments:
-  model_path  Path or identifier for the model in Nexa Model Hub, or Hugging Face repo ID when using -hf flag
+  model_path  Path or identifier for the model in Nexa Model Hub, Hugging Face repo ID when using -hf flag, or ModelScope model ID when using -ms flag
 
 options:
   -h, --help            show this help message and exit
   -hf, --huggingface    Pull model from Hugging Face Hub
+  -ms, --modelscope     Pull model from ModelScope Hub
   -o, --output_path OUTPUT_PATH
                         Custom output path for the pulled model
 ```
@@ -102,7 +103,7 @@ You can run any model shown in `nexa list` command.
 
 ```
 nexa run MODEL_PATH
-usage: nexa run [-h] [-t TEMPERATURE] [-m MAX_NEW_TOKENS] [-k TOP_K] [-p TOP_P] [-sw [STOP_WORDS ...]] [-pf] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] model_path
+usage: nexa run [-h] [-t TEMPERATURE] [-m MAX_NEW_TOKENS] [-k TOP_K] [-p TOP_P] [-sw [STOP_WORDS ...]] [-pf] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] [-ms] model_path
 
 positional arguments:
   model_path            Path or identifier for the model in Nexa Model Hub
@@ -112,8 +113,9 @@ options:
   -pf, --profiling      Enable profiling logs for the inference process
   -st, --streamlit      Run the inference in Streamlit UI, can be used with -lp or -hf
   -lp, --local_path     Indicate that the model path provided is the local path
-  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf or -ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
 
 Text generation options:
   -t, --temperature TEMPERATURE
@@ -137,7 +139,7 @@ nexa run llama2
 
 ```
 nexa run MODEL_PATH
-usage: nexa run [-h] [-i2i] [-ns NUM_INFERENCE_STEPS] [-np NUM_IMAGES_PER_PROMPT] [-H HEIGHT] [-W WIDTH] [-g GUIDANCE_SCALE] [-o OUTPUT] [-s RANDOM_SEED] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] model_path
+usage: nexa run [-h] [-i2i] [-ns NUM_INFERENCE_STEPS] [-np NUM_IMAGES_PER_PROMPT] [-H HEIGHT] [-W WIDTH] [-g GUIDANCE_SCALE] [-o OUTPUT] [-s RANDOM_SEED] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] [-ms] model_path
 
 positional arguments:
   model_path            Path or identifier for the model in Nexa Model Hub
@@ -146,8 +148,9 @@ options:
   -h, --help            show this help message and exit
   -st, --streamlit      Run the inference in Streamlit UI, can be used with -lp or -hf
   -lp, --local_path     Indicate that the model path provided is the local path
-  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf or -ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
 
 Image generation options:
   -i2i, --img2img       Whether to run image-to-image generation
@@ -182,7 +185,7 @@ nexa run sd1-4
 
 ```
 nexa run MODEL_PATH
-usage: nexa run [-h] [-t TEMPERATURE] [-m MAX_NEW_TOKENS] [-k TOP_K] [-p TOP_P] [-sw [STOP_WORDS ...]] [-pf] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] model_path
+usage: nexa run [-h] [-t TEMPERATURE] [-m MAX_NEW_TOKENS] [-k TOP_K] [-p TOP_P] [-sw [STOP_WORDS ...]] [-pf] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] [-ms] model_path
 
 positional arguments:
   model_path            Path or identifier for the model in Nexa Model Hub
@@ -192,8 +195,9 @@ options:
   -pf, --profiling      Enable profiling logs for the inference process
   -st, --streamlit      Run the inference in Streamlit UI, can be used with -lp or -hf
   -lp, --local_path     Indicate that the model path provided is the local path
-  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf or -ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
 
 VLM generation options:
   -t, --temperature TEMPERATURE
@@ -217,7 +221,7 @@ nexa run nanollava
 
 ```
 nexa run MODEL_PATH
-usage: nexa run [-h] [-o OUTPUT_DIR] [-b BEAM_SIZE] [-l LANGUAGE] [--task TASK] [-t TEMPERATURE] [-c COMPUTE_TYPE] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] model_path
+usage: nexa run [-h] [-o OUTPUT_DIR] [-b BEAM_SIZE] [-l LANGUAGE] [--task TASK] [-t TEMPERATURE] [-c COMPUTE_TYPE] [-st] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] [-ms] model_path
 
 positional arguments:
   model_path            Path or identifier for the model in Nexa Model Hub
@@ -226,8 +230,9 @@ options:
   -h, --help            show this help message and exit
   -st, --streamlit      Run the inference in Streamlit UI, can be used with -lp or -hf
   -lp, --local_path     Indicate that the model path provided is the local path
-  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf or -ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
 
 Automatic Speech Recognition options:
   -b, --beam_size BEAM_SIZE
@@ -251,7 +256,7 @@ nexa run faster-whisper-tiny
 
 ```
 nexa embed MODEL_PATH
-usage: nexa embed [-h] [-lp] [-hf] [-n] [-nt] model_path prompt
+usage: nexa embed [-h] [-lp] [-hf] [-ms] [-n] [-nt] model_path prompt
 
 positional arguments:
   model_path            Path or identifier for the model in Nexa Model Hub
@@ -261,6 +266,7 @@ options:
   -h, --help            show this help message and exit
   -lp, --local_path     Indicate that the model path provided is the local path
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
   -n, --normalize       Normalize the embeddings
   -nt, --no_truncate    Not truncate the embeddings
 ```
@@ -318,6 +324,7 @@ options:
   --only_copy           Only copy tensors (ignores ftype, allow_requantize, and quantize_output_tensor)
   --pure                Quantize all tensors to the default type
   --keep_split          Quantize to the same number of shards
+  -ms --modelscope      Load model from ModelScope Hub
 ```
 
 #### Example
@@ -341,16 +348,17 @@ Start a local server using models on your local computer.
 
 ```
 nexa server MODEL_PATH
-usage: nexa server [-h] [--host HOST] [--port PORT] [--reload] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] model_path
+usage: nexa server [-h] [--host HOST] [--port PORT] [--reload] [-lp] [-mt {NLP, COMPUTER_VISION, MULTIMODAL, AUDIO}] [-hf] [-ms] model_path
 
 positional arguments:
   model_path   Path or identifier for the model in S3
 
 options:
   -h, --help   show this help message and exit
   -lp, --local_path     Indicate that the model path provided is the local path
-  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+  -mt, --model_type     Indicate the model running type, must be used with -lp or -hf or -ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
   -hf, --huggingface    Load model from Hugging Face Hub
+  -ms, --modelscope     Load model from ModelScope Hub
   --host HOST  Host to bind the server to
   --port PORT  Port to bind the server to
   --reload     Enable automatic reloading on code changes

diff --git a/README.md b/README.md
@@ -254,19 +254,26 @@ Supported model examples (full list at [Model Hub](https://nexa.ai/models)):
 | [all-MiniLM-L12-v2](https://nexa.ai/sentence-transformers/all-MiniLM-L12-v2/gguf-fp16/readme) | Embedding | GGUF | `nexa embed all-MiniLM-L12-v2:fp16` |
 | [bark-small](https://nexa.ai/suno/bark-small/gguf-fp16/readme) | Text-to-Speech | GGUF | `nexa run bark-small:fp16` |
 
-## Run Models from 🤗 HuggingFace 
-You can pull, convert (to .gguf), quantize and run [llama.cpp supported](https://github.com/ggerganov/llama.cpp#description) text generation models from HF with Nexa SDK.
+## Run Models from 🤗 HuggingFace or 🤖 ModelScope
+You can pull, convert (to .gguf), quantize and run [llama.cpp supported](https://github.com/ggerganov/llama.cpp#description) text generation models from HF or MS with Nexa SDK.
 ### Run .gguf File
-Use `nexa run -hf <hf-model-id>` to run models with provided .gguf files:
+Use `nexa run -hf <hf-model-id>` or `nexa run -ms <ms-model-id>` to run models with provided .gguf files:
 ```bash
 nexa run -hf Qwen/Qwen2.5-Coder-7B-Instruct-GGUF
 ```
+```bash
+nexa run -ms Qwen/Qwen2.5-Coder-7B-Instruct-GGUF
+```
 > **Note:** You will be prompted to select a single .gguf file. If your desired quantization version has multiple split files (like fp16-00001-of-00004), please use Nexa's conversion tool (see below) to convert and quantize the model locally.
 ### Convert .safetensors Files
-Install [Nexa Python package](https://github.com/NexaAI/nexa-sdk?tab=readme-ov-file#install-option-2-python-package), and install Nexa conversion tool with `pip install "nexaai[convert]"`, then convert models with `nexa convert <hf-model-id>`:
+Install [Nexa Python package](https://github.com/NexaAI/nexa-sdk?tab=readme-ov-file#install-option-2-python-package), and install Nexa conversion tool with `pip install "nexaai[convert]"`, then convert models from huggingface with `nexa convert <hf-model-id>`:
 ```bash
 nexa convert HuggingFaceTB/SmolLM2-135M-Instruct
 ```
+Or you can convert models from ModelScope with `nexa convert -ms <ms-model-id>`:
+```bash
+nexa convert -ms Qwen/Qwen2.5-7B-Instruct
+```
 > **Note:** Check our [leaderboard](https://nexa.ai/leaderboard) for performance benchmarks of different quantized versions of mainstream language models and [HuggingFace docs](https://huggingface.co/docs/optimum/en/concept_guides/quantization) to learn about quantization options.
 
 📋 You can view downloaded and converted models with `nexa list`

diff --git a/SERVER.md b/SERVER.md
@@ -9,8 +9,9 @@ usage: nexa server [-h] [--host HOST] [--port PORT] [--reload] model_path
 ### Options:
 
 - `-lp, --local_path`: Indicate that the model path provided is the local path
-- `-mt, --model_type`: Indicate the model running type, must be used with -lp or -hf, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
+- `-mt, --model_type`: Indicate the model running type, must be used with -lp or -hf or ms, choose from [NLP, COMPUTER_VISION, MULTIMODAL, AUDIO]
 - `-hf, --huggingface`: Load model from Hugging Face Hub
+- `-ms, --modelscope`: Load model from ModelScope Hub
 - `--host`: Host to bind the server to
 - `--port`: Port to bind the server to
 - `--reload`: Enable automatic reloading on code changes