Fix: exclude cache_engine argument for input dump

Signed-off-by: Dahai Tang <[email protected]>
vllm-project · Dec 16, 2024 · 0f5e9e3 · 0f5e9e3
1 parent c3a8ae8
commit 0f5e9e3
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
@@ -1610,7 +1610,8 @@ def prepare_model_input(
                                    virtual_engine=virtual_engine)
 
     @torch.inference_mode()
-    @dump_input_when_exception(exclude_args=[0], exclude_kwargs=["self"])
+    @dump_input_when_exception(exclude_args=[0],
+                               exclude_kwargs=["self", "cache_engine"])
     def execute_model(
         self,
         model_input: ModelInputForGPUWithSamplingMetadata,