Skip to content

Commit

Permalink
lora_utils_worker_manager
Browse files Browse the repository at this point in the history
Signed-off-by: s.kochetkov <[email protected]>
  • Loading branch information
s.kochetkov committed Jan 3, 2025
1 parent eff2018 commit da2af18
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 7 deletions.
26 changes: 20 additions & 6 deletions vllm/lora/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
LogitsProcessorWithLoRA,
MergedColumnParallelLinearWithLoRA,
MergedQKVParallelLinearWithLora,
QKVParallelLinearWithLora,
ModulesToSaveWrapper, QKVParallelLinearWithLora,
ReplicatedLinearWithLoRA,
RowParallelLinearWithLoRA,
VocabParallelEmbeddingWithLoRA)
Expand All @@ -49,6 +49,7 @@
MergedQKVParallelLinearWithShardedLora,
RowParallelLinearWithShardedLoRA,
LinearScalingRotaryEmbeddingWithLora,
ModulesToSaveWrapper,
}


Expand Down Expand Up @@ -94,6 +95,7 @@ def replace_submodule(model: nn.Module, module_name: str,

def parse_fine_tuned_lora_name(
name: str,
enable_lora_modules_to_save: bool = False,
weights_mapper: Optional[WeightsMapper] = None
) -> Tuple[str, bool, bool]:
"""Parse the name of lora weights.
Expand All @@ -106,7 +108,8 @@ def parse_fine_tuned_lora_name(
return:
Tuple(module_name, is_lora_a):
module_name: the name of the module, e.g. model.dense1,
is_lora_a whether the tensor is lora_a or lora_b.
is_lora_a whether the tensor is lora_a or lora_b,
lora_a=None if this is module_to_save lm_head or token_embeds
is_bias whether the tensor is lora bias.
"""

Expand All @@ -120,11 +123,22 @@ def parse_fine_tuned_lora_name(
name = "base_model.model." + name

parts = name.split(".")
if parts[-1] == "weight" and (parts[-2] == "lora_A"
or parts[-2] == "lora_B"):
new_name = ".".join(parts[2:-2])
return new_name, parts[-2] == "lora_A", False


if parts[-1] == "weight":
if parts[-2] == "lora_A" or parts[-2] == "lora_B":
return ".".join(parts[2:-2]), parts[-2] == "lora_A", False

if parts[-2] in ModulesToSaveWrapper.implemented_layers:

if not enable_lora_modules_to_save:
error_msg = f"""enable_lora_modules_to_save is False,
but found tensor name {name} in LoRA checkpoint.
Set enable_lora_modules_to_save=True to process
lm_head and embed_tokens as fully trained tensors"""
raise ValueError(error_msg)

return '.'.join(parts[2:-1]), None, False

Check failure on line 141 in vllm/lora/utils.py

View workflow job for this annotation

GitHub Actions / mypy (3.9)

Incompatible return value type (got "tuple[str, None, bool]", expected "tuple[str, bool, bool]") [return-value]

Check failure on line 141 in vllm/lora/utils.py

View workflow job for this annotation

GitHub Actions / mypy (3.10)

Incompatible return value type (got "tuple[str, None, bool]", expected "tuple[str, bool, bool]") [return-value]

Check failure on line 141 in vllm/lora/utils.py

View workflow job for this annotation

GitHub Actions / mypy (3.11)

Incompatible return value type (got "tuple[str, None, bool]", expected "tuple[str, bool, bool]") [return-value]

Check failure on line 141 in vllm/lora/utils.py

View workflow job for this annotation

GitHub Actions / mypy (3.12)

Incompatible return value type (got "tuple[str, None, bool]", expected "tuple[str, bool, bool]") [return-value]
if parts[-1] == "lora_embedding_A" or parts[-1] == "lora_embedding_B":
new_name = ".".join(parts[2:-1])
return new_name, parts[-1] == "lora_embedding_A", False
Expand Down
5 changes: 4 additions & 1 deletion vllm/lora/worker_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel:
packed_modules_mapping[module])
else:
expected_lora_modules.append(module)

expected_lora_modules = list(set(expected_lora_modules))
expected_modules_to_save: List[str] = model.modules_to_save
lora_path = get_adapter_absolute_path(lora_request.lora_path)

# For some models like Qwen2VL, we need to use hf_to_vllm_mapper
Expand All @@ -105,9 +105,12 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel:
lora = self._lora_model_cls.from_local_checkpoint(
lora_path,
expected_lora_modules,
expected_modules_to_save,
max_position_embeddings=self.max_position_embeddings,
lora_model_id=lora_request.lora_int_id,
device="cpu",
enable_lora_modules_to_save=self._adapter_manager.lora_config.
enable_lora_modules_to_save,
dtype=self.lora_config.lora_dtype,
target_embedding_padding=self.vocab_size +
self.lora_config.lora_extra_vocab_size,
Expand Down

0 comments on commit da2af18

Please sign in to comment.