From 411f3e772b238d00ec88173b015838753fd810fe Mon Sep 17 00:00:00 2001 From: Kathy Yu Date: Tue, 5 Nov 2024 00:36:57 +0000 Subject: [PATCH] Free cross attention block table for preempted-for-recompute sequence group. --- vllm/core/scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index e56d5cddce424..1893f9f753550 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1504,6 +1504,7 @@ def _preempt_by_recompute( seq.status = SequenceStatus.WAITING self.free_seq(seq) seq.reset_state_for_recompute() + self._free_seq_group_cross_attn_blocks(seq_group) def _preempt_by_swap( self,