mlc-ai · tlopex · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 20, 2024
diff --git a/python/mlc_llm/model/model.py b/python/mlc_llm/model/model.py
@@ -39,6 +39,7 @@
 from .rwkv6 import rwkv6_loader, rwkv6_model, rwkv6_quantization
 from .stable_lm import stablelm_loader, stablelm_model, stablelm_quantization
 from .starcoder2 import starcoder2_loader, starcoder2_model, starcoder2_quantization
+from .xverse import xverse_loader, xverse_model, xverse_quantization
 
 ModelConfig = Any
 """A ModelConfig is an object that represents a model architecture. It is required to have
@@ -532,4 +533,18 @@ class Model:
             "ft-quant": deepseek_quantization.ft_quant,
         },
     ),
+    "xverse": Model(
+        name="xverse",
+        model=xverse_model.XverseForCausalLM,
+        config=xverse_model.XverseConfig,
+        source={
+            "huggingface-torch": xverse_loader.huggingface,
+            "huggingface-safetensor": xverse_loader.huggingface,
+        },
+        quantize={
+            "no-quant": xverse_quantization.no_quant,
+            "group-quant": xverse_quantization.group_quant,
+            "ft-quant": xverse_quantization.ft_quant,
+        },
+    ),
 }
diff --git a/python/mlc_llm/model/model_preset.py b/python/mlc_llm/model/model_preset.py
@@ -1821,4 +1821,25 @@
         "use_cache": True,
         "vocab_size": 102400,
     },
+    "xverse": {
+        "architectures": ["XverseForCausalLM"],
+        "pad_token_id": 1,
+        "bos_token_id": 2,
+        "eos_token_id": 3,
+        "hidden_act": "silu",
+        "hidden_size": 4096,
+        "initializer_range": 0.02,
+        "intermediate_size": 11008,
+        "max_position_embeddings": 8192,
+        "max_tokenizer_truncation": 6144,
+        "model_type": "xverse",
+        "num_attention_heads": 32,
+        "num_hidden_layers": 32,
+        "rms_norm_eps": 1e-06,
+        "tie_word_embeddings": False,
+        "torch_dtype": "bfloat16",
+        "transformers_version": "4.30.0.dev0",
+        "use_cache": True,
+        "vocab_size": 100534,
+    },
 }
diff --git a/python/mlc_llm/model/xverse/__init__.py b/python/mlc_llm/model/xverse/__init__.py
diff --git a/python/mlc_llm/model/xverse/xverse_loader.py b/python/mlc_llm/model/xverse/xverse_loader.py
@@ -0,0 +1,87 @@
+"""
+This file specifies how MLC's Xverse parameter maps from other formats, for example HuggingFace
+PyTorch, HuggingFace safetensors.
+"""
+
+import functools
+
+import numpy as np
+
+from mlc_llm.loader import ExternMapping
+from mlc_llm.quantization import Quantization
+
+from .xverse_model import XverseConfig, XverseForCausalLM
+
+
+def huggingface(model_config: XverseConfig, quantization: Quantization) -> ExternMapping:
+    """Returns a parameter mapping that maps from the names of MLC LLM parameters to
+    the names of HuggingFace PyTorch parameters.
+
+    Parameters
+    ----------
+    model_config : XverseConfig
+        The configuration of the Xverse model.
+
+    quantization : Quantization
+        The quantization configuration.
+
+    Returns
+    -------
+    param_map : ExternMapping
+        The parameter mapping from MLC to HuggingFace PyTorch.
+    """
+    model = XverseForCausalLM(model_config)
+    if quantization is not None:
+        model.to(quantization.model_dtype)
+    _, _named_params, _ = model.export_tvm(  # type: ignore[misc]
+        spec=model.get_default_spec(),
+        allow_extern=True,
+    )
+    named_parameters = dict(_named_params)
+
+    mapping = ExternMapping()
+
+    for i in range(model_config.num_hidden_layers):
+        # Add QKV in self attention
+        attn = f"model.layers.{i}.self_attn"
+        mlc_name = f"{attn}.qkv_proj.weight"
+        mlc_param = named_parameters[mlc_name]
+        mapping.add_mapping(
+            mlc_name,
+            [
+                f"{attn}.q_proj.weight",
+                f"{attn}.k_proj.weight",
+                f"{attn}.v_proj.weight",
+            ],
+            functools.partial(
+                lambda q, k, v, dtype: np.concatenate([q, k, v], axis=0).astype(dtype),
+                dtype=mlc_param.dtype,
+            ),
+        )
+        # Add gates in MLP
+        mlp = f"model.layers.{i}.mlp"
+        mlc_name = f"{mlp}.gate_up_proj.weight"
+        mlc_param = named_parameters[mlc_name]
+        mapping.add_mapping(
+            mlc_name,
+            [
+                f"{mlp}.gate_proj.weight",
+                f"{mlp}.up_proj.weight",
+            ],
+            functools.partial(
+                lambda gate, up, dtype: np.concatenate([gate, up], axis=0).astype(dtype),
+                dtype=mlc_param.dtype,
+            ),
+        )
+
+    for mlc_name, mlc_param in named_parameters.items():
+        if mlc_name not in mapping.param_map:
+            mapping.add_mapping(
+                mlc_name,
+                [mlc_name],
+                functools.partial(
+                    lambda x, dtype: x.astype(dtype),
+                    dtype=mlc_param.dtype,
+                ),
+            )
+    return mapping