Fix: change model runner arguments to support kwargs

Signed-off-by: Dahai Tang <[email protected]>
vllm-project · Dec 12, 2024 · 7a6435d · 7a6435d
1 parent 6f97634
commit 7a6435d
Show file tree

Hide file tree

Showing 13 changed files with 13 additions and 0 deletions.
diff --git a/vllm/worker/cpu_enc_dec_model_runner.py b/vllm/worker/cpu_enc_dec_model_runner.py
@@ -280,6 +280,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[List[SamplerOutput]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py
@@ -512,6 +512,7 @@ def execute_model(
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
         previous_hidden_states: Optional[torch.Tensor] = None,
+        **kwargs,
     ) -> Optional[List[SamplerOutput]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/cpu_pooling_model_runner.py b/vllm/worker/cpu_pooling_model_runner.py
@@ -34,6 +34,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[Union[List[PoolerOutput], IntermediateTensors]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/enc_dec_model_runner.py b/vllm/worker/enc_dec_model_runner.py
@@ -154,6 +154,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[List[PoolerOutput]]:
         if num_steps > 1:
             raise ValueError("num_steps > 1 is not supported in "

diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py
@@ -1893,6 +1893,7 @@ def execute_model(
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
         warmup_mode=False,
+        **kwargs: Any,
     ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
@@ -1617,6 +1617,7 @@ def execute_model(
         num_steps: int = 1,
         cache_engine: Optional["CacheEngine"] = None,
         worker_input: Optional["WorkerInput"] = None,
+        **kwargs: Any,
     ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
         if num_steps > 1:
             raise ValueError("num_steps > 1 is not supported in ModelRunner")

diff --git a/vllm/worker/model_runner_base.py b/vllm/worker/model_runner_base.py
@@ -272,6 +272,7 @@ def execute_model(
         kv_caches: Optional[List[torch.Tensor]],
         intermediate_tensors: Optional[IntermediateTensors],
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[List[SamplerOutput]]:
         """
         Execute the model on the given input.

diff --git a/vllm/worker/multi_step_model_runner.py b/vllm/worker/multi_step_model_runner.py
@@ -461,6 +461,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]:
         """ 
         Execute the model for a single step and update multi-step

diff --git a/vllm/worker/neuron_model_runner.py b/vllm/worker/neuron_model_runner.py
@@ -309,6 +309,7 @@ def execute_model(
         kv_caches: Optional[List[torch.Tensor]] = None,
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[List[SamplerOutput]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/openvino_model_runner.py b/vllm/worker/openvino_model_runner.py
@@ -327,6 +327,7 @@ def execute_model(
         self,
         seq_group_metadata_list: List[SequenceGroupMetadata],
         kv_caches: List[Tuple["ov.Tensor", "ov.Tensor"]],
+        **kwargs: Any,
     ) -> Optional[SamplerOutput]:
         (
             input_tokens,

diff --git a/vllm/worker/pooling_model_runner.py b/vllm/worker/pooling_model_runner.py
@@ -49,6 +49,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[Union[List[PoolerOutput], IntermediateTensors]]:
         if num_steps > 1:
             raise ValueError(

diff --git a/vllm/worker/tpu_model_runner.py b/vllm/worker/tpu_model_runner.py
@@ -587,6 +587,7 @@ def execute_model(
         kv_caches: Optional[List[Any]],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> List[SamplerOutput]:
         assert intermediate_tensors is None
         if not model_input.is_first_multi_step:

diff --git a/vllm/worker/xpu_model_runner.py b/vllm/worker/xpu_model_runner.py
@@ -557,6 +557,7 @@ def execute_model(
         kv_caches: List[torch.Tensor],
         intermediate_tensors: Optional[IntermediateTensors] = None,
         num_steps: int = 1,
+        **kwargs: Any,
     ) -> Optional[List[SamplerOutput]]:
         if num_steps > 1:
             raise ValueError(