diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index d118c26e92263..62fef583470e1 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1610,7 +1610,8 @@ def prepare_model_input( virtual_engine=virtual_engine) @torch.inference_mode() - @dump_input_when_exception(exclude_args=[0], exclude_kwargs=["self"]) + @dump_input_when_exception(exclude_args=[0], + exclude_kwargs=["self", "cache_engine"]) def execute_model( self, model_input: ModelInputForGPUWithSamplingMetadata,