Fill TorchSDPAAttentionMetadata seq_lens_field for prefill (#10799)

Signed-off-by: Max de Bayser <[email protected]>
vllm-project · Dec 2, 2024 · e25810a · e25810a
1 parent 073a4bd
commit e25810a
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/vllm/attention/backends/torch_sdpa.py b/vllm/attention/backends/torch_sdpa.py
@@ -341,7 +341,11 @@ def build(self, seq_lens: List[int], query_lens: List[int],
             )
         else:
             block_tables = torch.tensor([])
-            seq_lens_tensor = torch.tensor([])
+            seq_lens_tensor = torch.tensor(
+                input_data.seq_lens[:input_data.num_prefills],
+                dtype=torch.int32,
+                device="cpu",
+            )
 
         # For multi-modal models
         placeholder_index_maps = None