diff --git a/vllm/lora/ops/v1/lora_expand.py b/vllm/lora/ops/v1/lora_expand.py index 9b7e0c955967e..8b143c2eead5b 100644 --- a/vllm/lora/ops/v1/lora_expand.py +++ b/vllm/lora/ops/v1/lora_expand.py @@ -219,6 +219,7 @@ def lora_expand_fake( inputs: torch.Tensor, lora_b_weights: torch.Tensor, output_tensor: torch.Tensor, + token_lora_mapping: torch.Tensor, token_indices_sorted_by_lora_ids: torch.Tensor, num_tokens_per_lora: torch.Tensor, lora_token_start_loc: torch.Tensor, diff --git a/vllm/lora/ops/v1/lora_expand_slice.py b/vllm/lora/ops/v1/lora_expand_slice.py index 3618de5da4c7d..b89d46512a4e7 100644 --- a/vllm/lora/ops/v1/lora_expand_slice.py +++ b/vllm/lora/ops/v1/lora_expand_slice.py @@ -222,11 +222,11 @@ def _lora_expand_slice( ) return - def lora_expand_slice_fake( inputs: torch.Tensor, lora_b_weights: torch.Tensor, output_tensor: torch.Tensor, + token_lora_mapping: torch.Tensor, token_indices_sorted_by_lora_ids: torch.Tensor, num_tokens_per_lora: torch.Tensor, lora_token_start_loc: torch.Tensor, diff --git a/vllm/lora/ops/v1/lora_shrink.py b/vllm/lora/ops/v1/lora_shrink.py index af52ce74bea09..bc3ecf036f9b9 100644 --- a/vllm/lora/ops/v1/lora_shrink.py +++ b/vllm/lora/ops/v1/lora_shrink.py @@ -227,11 +227,11 @@ def _lora_shrink( ) return - def lora_shrink_fake( inputs: torch.Tensor, lora_a_weights: torch.Tensor, output_tensor: torch.Tensor, + token_lora_mapping: torch.Tensor, token_indices_sorted_by_lora_ids: torch.Tensor, num_tokens_per_lora: torch.Tensor, lora_token_start_loc: torch.Tensor,