From 2f385183f35497e030ef22c9820d83b83bc4f6db Mon Sep 17 00:00:00 2001 From: Kathy Yu <143133934+kathyyu-google@users.noreply.github.com> Date: Thu, 2 Jan 2025 10:28:09 -0800 Subject: [PATCH] [Bugfix] Free cross attention block table for preempted-for-recompute sequence group. (#10013) Signed-off-by: Kathy Yu --- vllm/core/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index c3bc6becf0995..b3d396f9cedda 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1579,6 +1579,7 @@ def _preempt_by_recompute( seq.status = SequenceStatus.WAITING self.free_seq(seq) seq.reset_state_for_recompute() + self._free_seq_group_cross_attn_blocks(seq_group) def _preempt_by_swap( self,