From 902daaab914061299b5d8a7794a10ed304bf289c Mon Sep 17 00:00:00 2001 From: andoorve <37849411+andoorve@users.noreply.github.com> Date: Tue, 12 Nov 2024 19:12:08 +0000 Subject: [PATCH] Revert "Fix for Spec model TP + Chunked Prefill" This reverts commit 6863d1f364eec79deb5dd7bce143e47d81670d87. Signed-off-by: andoorve <37849411+andoorve@users.noreply.github.com> --- vllm/spec_decode/spec_decode_worker.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py index aa1e4dddf4213..b57742c2ebfdd 100644 --- a/vllm/spec_decode/spec_decode_worker.py +++ b/vllm/spec_decode/spec_decode_worker.py @@ -641,12 +641,6 @@ def _run_non_driver_rank(self) -> bool: # that the hidden states can be propagated to proposer when needed. if data["no_spec"]: self.scorer_worker.execute_model() - # If no spec case we still want to run the proposer model - # but ONLY once to match `not skip_proposer` in - # driver `_run_no_spec` - if not data["disable_all_speculation"]: - self.proposer_worker.execute_model() - return True if not data["disable_all_speculation"]: # Even if num_lookahead_slots is zero, we want to run the