diff --git a/vllm/attention/backends/torch_sdpa.py b/vllm/attention/backends/torch_sdpa.py index 16e044b618c40..dafa5bb56acda 100644 --- a/vllm/attention/backends/torch_sdpa.py +++ b/vllm/attention/backends/torch_sdpa.py @@ -341,7 +341,11 @@ def build(self, seq_lens: List[int], query_lens: List[int], ) else: block_tables = torch.tensor([]) - seq_lens_tensor = torch.tensor([]) + seq_lens_tensor = torch.tensor( + input_data.seq_lens[:input_data.num_prefills], + dtype=torch.int32, + device="cpu", + ) # For multi-modal models placeholder_index_maps = None