From 0f5e9e3b26bf1b37ad7f2627ff4cba25f38a46e9 Mon Sep 17 00:00:00 2001 From: Dahai Tang Date: Mon, 16 Dec 2024 12:16:32 +0000 Subject: [PATCH] Fix: exclude cache_engine argument for input dump Signed-off-by: Dahai Tang --- vllm/worker/model_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index d118c26e92263..62fef583470e1 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1610,7 +1610,8 @@ def prepare_model_input( virtual_engine=virtual_engine) @torch.inference_mode() - @dump_input_when_exception(exclude_args=[0], exclude_kwargs=["self"]) + @dump_input_when_exception(exclude_args=[0], + exclude_kwargs=["self", "cache_engine"]) def execute_model( self, model_input: ModelInputForGPUWithSamplingMetadata,