Skip to content

Commit

Permalink
[Model] support bitsandbytes quantization with minicpm model (vllm-pr…
Browse files Browse the repository at this point in the history
…oject#10842)

Signed-off-by: Ubuntu <[email protected]>
  • Loading branch information
zixuanzhang226 authored and weilong.yu committed Dec 13, 2024
1 parent bb771cb commit 3aec6e2
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions vllm/model_executor/models/minicpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,16 @@ class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
}
embedding_padding_modules = ["lm_head"]

# BitandBytes specific attributes
bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"q_proj": ("qkv_proj", 0),
"k_proj": ("qkv_proj", 1),
"v_proj": ("qkv_proj", 2),
"gate_proj": ("gate_up_proj", 0),
"up_proj": ("gate_up_proj", 1),
}

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config = vllm_config.model_config.hf_config
Expand Down

0 comments on commit 3aec6e2

Please sign in to comment.