Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Model] Add support for Xverse Model #3038

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions python/mlc_llm/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from .rwkv6 import rwkv6_loader, rwkv6_model, rwkv6_quantization
from .stable_lm import stablelm_loader, stablelm_model, stablelm_quantization
from .starcoder2 import starcoder2_loader, starcoder2_model, starcoder2_quantization
from .xverse import xverse_loader, xverse_model, xverse_quantization

ModelConfig = Any
"""A ModelConfig is an object that represents a model architecture. It is required to have
Expand Down Expand Up @@ -532,4 +533,18 @@ class Model:
"ft-quant": deepseek_quantization.ft_quant,
},
),
"xverse": Model(
name="xverse",
model=xverse_model.XverseForCausalLM,
config=xverse_model.XverseConfig,
source={
"huggingface-torch": xverse_loader.huggingface,
"huggingface-safetensor": xverse_loader.huggingface,
},
quantize={
"no-quant": xverse_quantization.no_quant,
"group-quant": xverse_quantization.group_quant,
"ft-quant": xverse_quantization.ft_quant,
},
),
}
21 changes: 21 additions & 0 deletions python/mlc_llm/model/model_preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1821,4 +1821,25 @@
"use_cache": True,
"vocab_size": 102400,
},
"xverse": {
"architectures": ["XverseForCausalLM"],
"pad_token_id": 1,
"bos_token_id": 2,
"eos_token_id": 3,
"hidden_act": "silu",
"hidden_size": 4096,
"initializer_range": 0.02,
"intermediate_size": 11008,
"max_position_embeddings": 8192,
"max_tokenizer_truncation": 6144,
"model_type": "xverse",
"num_attention_heads": 32,
"num_hidden_layers": 32,
"rms_norm_eps": 1e-06,
"tie_word_embeddings": False,
"torch_dtype": "bfloat16",
"transformers_version": "4.30.0.dev0",
"use_cache": True,
"vocab_size": 100534,
},
}
Empty file.
87 changes: 87 additions & 0 deletions python/mlc_llm/model/xverse/xverse_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
This file specifies how MLC's Xverse parameter maps from other formats, for example HuggingFace
PyTorch, HuggingFace safetensors.
"""

import functools

import numpy as np

from mlc_llm.loader import ExternMapping
from mlc_llm.quantization import Quantization

from .xverse_model import XverseConfig, XverseForCausalLM


def huggingface(model_config: XverseConfig, quantization: Quantization) -> ExternMapping:
"""Returns a parameter mapping that maps from the names of MLC LLM parameters to
the names of HuggingFace PyTorch parameters.

Parameters
----------
model_config : XverseConfig
The configuration of the Xverse model.

quantization : Quantization
The quantization configuration.

Returns
-------
param_map : ExternMapping
The parameter mapping from MLC to HuggingFace PyTorch.
"""
model = XverseForCausalLM(model_config)
if quantization is not None:
model.to(quantization.model_dtype)
_, _named_params, _ = model.export_tvm( # type: ignore[misc]
spec=model.get_default_spec(),
allow_extern=True,
)
named_parameters = dict(_named_params)

mapping = ExternMapping()

for i in range(model_config.num_hidden_layers):
# Add QKV in self attention
attn = f"model.layers.{i}.self_attn"
mlc_name = f"{attn}.qkv_proj.weight"
mlc_param = named_parameters[mlc_name]
mapping.add_mapping(
mlc_name,
[
f"{attn}.q_proj.weight",
f"{attn}.k_proj.weight",
f"{attn}.v_proj.weight",
],
functools.partial(
lambda q, k, v, dtype: np.concatenate([q, k, v], axis=0).astype(dtype),
dtype=mlc_param.dtype,
),
)
# Add gates in MLP
mlp = f"model.layers.{i}.mlp"
mlc_name = f"{mlp}.gate_up_proj.weight"
mlc_param = named_parameters[mlc_name]
mapping.add_mapping(
mlc_name,
[
f"{mlp}.gate_proj.weight",
f"{mlp}.up_proj.weight",
],
functools.partial(
lambda gate, up, dtype: np.concatenate([gate, up], axis=0).astype(dtype),
dtype=mlc_param.dtype,
),
)

for mlc_name, mlc_param in named_parameters.items():
if mlc_name not in mapping.param_map:
mapping.add_mapping(
mlc_name,
[mlc_name],
functools.partial(
lambda x, dtype: x.astype(dtype),
dtype=mlc_param.dtype,
),
)
return mapping
Loading
Loading