updated

vllm-project · Nov 17, 2024 · 1af03e0 · 1af03e0
1 parent 63b301a
commit 1af03e0
Showing 1 changed file with 0 additions and 10 deletions.
diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py
@@ -257,10 +257,6 @@ def _prepare_prefill_inputs(
                     context_lens=None,
                 ))
 
-            print(f"PREFILL {token_ids.shape=}")
-            print(f"PREFILL {positions.shape=}")
-            print(f"PREFILL {slot_mapping.shape=}")
-
         return PrefillInputData(
             request_ids=prefill_request_ids,
             prompt_lens=prefill_prompt_lens,
@@ -319,12 +315,6 @@ def _prepare_decode_inputs(self, num_decodes: int) -> DecodeInputData:
         # CONTEXT_LENS [batch_size]
         context_lens = (positions.reshape(-1) + 1)
 
-        print(f"{token_ids.shape=}")
-        print(f"{positions.shape=}")
-        print(f"{slot_mapping.shape=}")
-        print(f"{block_table.shape=}")
-        print(f"{context_lens.shape=}")
-
         # CPU<>TPU sync happens here.
         return DecodeInputData(num_decodes=num_decodes,
                                token_ids=token_ids.to(self.device),