From 7a5286cc047112c7cc52bad8da8c17aedc880ef5 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 24 Dec 2024 17:59:51 +0800 Subject: [PATCH] [Bugfix][Hardware][CPU] Fix CPU `input_positions` creation for text-only inputs with mrope (#11434) Signed-off-by: Isotr0py <2037008807@qq.com> --- vllm/worker/cpu_model_runner.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/vllm/worker/cpu_model_runner.py b/vllm/worker/cpu_model_runner.py index 420aaf8a1b4cd..f1531e0fc0675 100644 --- a/vllm/worker/cpu_model_runner.py +++ b/vllm/worker/cpu_model_runner.py @@ -114,8 +114,7 @@ class ModelInputData: def __init__(self, use_mrope: bool): self.use_mrope = use_mrope self.input_tokens: List[int] = [] - self.input_positions: Optional[ - List[int]] = [] if not self.use_mrope else None + self.input_positions: List[int] = [] self.token_type_ids: Optional[List[int]] = [] self.seq_lens: List[int] = [] self.query_lens: List[int] = [] @@ -130,9 +129,8 @@ def __init__(self, use_mrope: bool): self.multi_modal_placeholder_maps: Dict[ str, MultiModalPlaceholderMap] = defaultdict( MultiModalPlaceholderMap) - self.input_mrope_positions: Optional[List[List[int]]] = [ - [] for _ in range(3) - ] if self.use_mrope else None + self.input_mrope_positions: List[List[int]] = [[] + for _ in range(3)] def __init__(self, runner: "CPUModelRunner", @@ -167,7 +165,8 @@ def build(self) -> ModelInputForCPU: device="cpu") input_positions = torch.tensor( input_data.input_positions - if not input_data.use_mrope else input_data.input_mrope_positions, + if not any(input_data.input_mrope_positions) else + input_data.input_mrope_positions, dtype=torch.long, device="cpu") token_type_ids = torch.tensor(input_data.token_type_ids, @@ -236,7 +235,7 @@ def _compute_decode_input_tokens(self, data: ModelInputData, block_table = block_table[start_block:] # For MRotaryEmbedding - if data.input_positions is None: + if seq_data.mrope_position_delta is not None: next_pos = MRotaryEmbedding.get_next_input_positions( seq_data.mrope_position_delta, context_len, @@ -309,8 +308,7 @@ def _compute_prompt_input_tokens(self, data: ModelInputData, data.slot_mapping.extend(slot_mapping) # The MROPE positions are prepared in _compute_multi_modal_input - if data.input_positions is not None: - data.input_positions.extend(token_positions) + data.input_positions.extend(token_positions) if data.token_type_ids is not None: data.token_type_ids.extend(token_types if token_types else [])