diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py index 1fff72b3490e9..053d838432885 100644 --- a/vllm/model_executor/models/bert.py +++ b/vllm/model_executor/models/bert.py @@ -443,6 +443,8 @@ def pooler( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""}) weights = hf_to_vllm_mapper.apply(weights) + weights = ((name, data) for name, data in weights + if not name.startswith("lm_head.")) self.model.load_weights(weights) def _build_model(self, diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index c93223c740272..d35fcb012e166 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -504,4 +504,6 @@ def pooler( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""}) weights = hf_to_vllm_mapper.apply(weights) + weights = ((name, data) for name, data in weights + if not name.startswith("lm_head.")) self.model.load_weights(weights) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index fffb3fe53b94c..fe94bb352961b 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -689,6 +689,8 @@ def pooler( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""}) weights = hf_to_vllm_mapper.apply(weights) + weights = ((name, data) for name, data in weights + if not name.startswith("lm_head.")) self.model.load_weights(weights) def load_kv_cache_scales(self, quantization_param_path: str) -> None: diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index 9f706610a129a..87943e53d861c 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -580,4 +580,6 @@ def pooler( def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]): hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""}) weights = hf_to_vllm_mapper.apply(weights) + weights = ((name, data) for name, data in weights + if not name.startswith("lm_head.")) self.model.load_weights(weights)