Skip to content

Commit

Permalink
FEAT: support VL models for MLX (#2638)
Browse files Browse the repository at this point in the history
  • Loading branch information
qinxuye authored Dec 12, 2024
1 parent dcf3fa8 commit 6b0bf6f
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 61 deletions.
47 changes: 25 additions & 22 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,33 +117,36 @@ jobs:
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
fi
pip install -e ".[dev]"
if [ "$MODULE" == "metal" ]; then
conda install -c conda-forge "ffmpeg<7"
pip install mlx-lm
pip install mlx-vlm
pip install mlx-whisper
pip install qwen-vl-utils
else
pip install "llama-cpp-python==0.2.77" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
pip install transformers
pip install attrdict
pip install "timm>=0.9.16"
pip install torch torchvision
pip install accelerate
pip install sentencepiece
pip install transformers_stream_generator
pip install bitsandbytes
pip install "sentence-transformers>=2.3.1"
pip install modelscope
pip install diffusers
pip install protobuf
pip install FlagEmbedding
pip install "tenacity>=8.2.0,<8.4.0"
pip install "jinja2==3.1.2"
pip install tensorizer
pip install jj-pytorchvideo
pip install qwen-vl-utils
pip install datamodel_code_generator
pip install jsonschema
fi
pip install "llama-cpp-python==0.2.77" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
pip install transformers
pip install attrdict
pip install "timm>=0.9.16"
pip install torch torchvision
pip install accelerate
pip install sentencepiece
pip install transformers_stream_generator
pip install bitsandbytes
pip install "sentence-transformers>=2.3.1"
pip install modelscope
pip install diffusers
pip install protobuf
pip install FlagEmbedding
pip install "tenacity>=8.2.0,<8.4.0"
pip install -e ".[dev]"
pip install "jinja2==3.1.2"
pip install tensorizer
pip install jj-pytorchvideo
pip install qwen-vl-utils
pip install datamodel_code_generator
pip install jsonschema
working-directory: .

- name: Test with pytest
Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ all =
outlines>=0.0.34
sglang>=0.2.7 ; sys_platform=='linux'
mlx-lm ; sys_platform=='darwin' and platform_machine=='arm64'
mlx-vlm ; sys_platform=='darwin' and platform_machine=='arm64'
mlx-whisper ; sys_platform=='darwin' and platform_machine=='arm64'
qwen_vl_utils
attrdict # For deepseek VL
timm>=0.9.16 # For deepseek VL
torchvision # For deepseek VL
Expand Down Expand Up @@ -180,7 +182,9 @@ sglang =
outlines>=0.0.34
mlx =
mlx-lm
mlx-vlm
mlx-whisper
qwen_vl_utils
embedding =
sentence-transformers>=3.1.0
rerank =
Expand Down
4 changes: 2 additions & 2 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def register_custom_model():
def _install():
from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
from .mlx.core import MLXChatModel, MLXModel
from .mlx.core import MLXChatModel, MLXModel, MLXVisionModel
from .sglang.core import SGLANGChatModel, SGLANGModel
from .transformers.chatglm import ChatglmPytorchChatModel
from .transformers.cogvlm2 import CogVLM2Model
Expand Down Expand Up @@ -172,7 +172,7 @@ def _install():
)
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
MLX_CLASSES.extend([MLXModel, MLXChatModel])
MLX_CLASSES.extend([MLXModel, MLXChatModel, MLXVisionModel])
LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
TRANSFORMERS_CLASSES.extend(
[
Expand Down
29 changes: 28 additions & 1 deletion xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -6925,7 +6925,7 @@
"model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
"model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
},
{
{
"model_format":"awq",
"model_size_in_billions":2,
"quantizations":[
Expand All @@ -6934,6 +6934,15 @@
"model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
"model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
},
{
"model_format":"mlx",
"model_size_in_billions":2,
"quantizations":[
"4bit",
"8bit"
],
"model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}"
},
{
"model_format":"pytorch",
"model_size_in_billions":7,
Expand Down Expand Up @@ -6970,6 +6979,15 @@
"model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
"model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
},
{
"model_format":"mlx",
"model_size_in_billions":7,
"quantizations":[
"4bit",
"8bit"
],
"model_id":"mlx-community/Qwen2-VL-7B-Instruct-{quantization}"
},
{
"model_format":"pytorch",
"model_size_in_billions":72,
Expand All @@ -6994,6 +7012,15 @@
"Int8"
],
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
},
{
"model_format":"mlx",
"model_size_in_billions":72,
"quantizations":[
"4bit",
"8bit"
],
"model_id":"mlx-community/Qwen2-VL-72B-Instruct-{quantization}"
}
],
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
Expand Down
31 changes: 31 additions & 0 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -4649,6 +4649,16 @@
"model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
"model_revision":"master"
},
{
"model_format":"mlx",
"model_size_in_billions":7,
"quantizations":[
"8bit"
],
"model_hub": "modelscope",
"model_id":"okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
"model_revision":"master"
},
{
"model_format":"pytorch",
"model_size_in_billions":2,
Expand Down Expand Up @@ -4689,6 +4699,16 @@
"model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
"model_revision":"master"
},
{
"model_format":"mlx",
"model_size_in_billions":2,
"quantizations":[
"8bit"
],
"model_hub": "modelscope",
"model_id":"okwinds/Qwen2-VL-2B-Instruct-MLX-8bit",
"model_revision":"master"
},
{
"model_format":"pytorch",
"model_size_in_billions":72,
Expand Down Expand Up @@ -4716,6 +4736,17 @@
],
"model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
"model_hub": "modelscope"
},
{
"model_format":"mlx",
"model_size_in_billions":72,
"quantizations":[
"4bit",
"8bit"
],
"model_hub": "modelscope",
"model_id":"okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
"model_revision":"master"
}
],
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
Expand Down
Loading

0 comments on commit 6b0bf6f

Please sign in to comment.