diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 625e31bb0d368..2ea592aaba9f9 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -575,8 +575,7 @@ def forward( return hidden_states -class ChatGLMBaseModel(nn.Module, SupportsLoRA, SupportsPP, - SupportsMultiModal): +class ChatGLMBaseModel(nn.Module, SupportsLoRA, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() @@ -695,7 +694,7 @@ class ChatGLM(ChatGLMBaseModel): embedding_padding_modules = [] -class ChatGLMV(ChatGLMBaseModel): +class ChatGLMV(ChatGLMBaseModel, SupportsMultiModal): packed_modules_mapping = { "query_key_value": ["query_key_value"], "dense_h_to_4h": ["dense_h_to_4h"], diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index 3978c176a2144..44ce6eda42943 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -870,7 +870,7 @@ def dummy_data_for_qwen( return DummyData(seq_data, mm_data) -class QWenBaseModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): +class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() @@ -1024,7 +1024,7 @@ class QWenLLM(QWenBaseModel): embedding_padding_modules = [] -class QWenVL(QWenBaseModel): +class QWenVL(QWenBaseModel, SupportsMultiModal): packed_modules_mapping = { "c_attn": ["c_attn"], "gate_up_proj": [ @@ -1062,7 +1062,7 @@ def get_mm_mapping(self) -> MultiModelKeys: @MULTIMODAL_REGISTRY.register_max_image_tokens(MAX_QWEN_IMG_TOKENS) @INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen) @INPUT_REGISTRY.register_input_processor(input_processor_for_qwen) -class QWenLMHeadModel(QWenBaseModel, SupportsLoRA): +class QWenLMHeadModel(QWenBaseModel, SupportsMultiModal, SupportsLoRA): """ QWenLMHeadModel is not only applicable to LLM but also to VL, which is not conducive to the current integration logic of LoRA in vLLM. Therefore, it @@ -1083,7 +1083,7 @@ def __new__( config = vllm_config.model_config.hf_config # Initialize VL if hasattr(config, "visual"): - return QWenVL(vllm_config=vllm_config) + return QWenVL(vllm_config=vllm_config, prefix=prefix) # Initialize LLM else: - return QWenLLM(vllm_config=vllm_config) + return QWenLLM(vllm_config=vllm_config, prefix=prefix)