diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index aa1e4dddf4213..b57742c2ebfdd 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -641,12 +641,6 @@ def _run_non_driver_rank(self) -> bool: # that the hidden states can be propagated to proposer when needed. if data["no_spec"]: self.scorer_worker.execute_model() - # If no spec case we still want to run the proposer model - # but ONLY once to match `not skip_proposer` in - # driver `_run_no_spec` - if not data["disable_all_speculation"]: - self.proposer_worker.execute_model() - return True if not data["disable_all_speculation"]: # Even if num_lookahead_slots is zero, we want to run the