diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 30d73177a77c4..e981514400368 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -610,6 +610,7 @@ class CachedRequestState: mm_positions: List["PlaceholderRange"] sampling_params: SamplingParams generator: Optional[torch.Generator] + block_ids: List[int] num_computed_tokens: int output_token_ids: List[int] @@ -618,6 +619,7 @@ class CachedRequestState: def num_tokens(self) -> int: return len(self.prompt_token_ids) + len(self.output_token_ids) + class InputBatch: def __init__(