diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 4b3fd5d3cd681..4090e1979fe53 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -596,7 +596,7 @@ def apply( result = super().apply(prompt_text, mm_data, hf_processor_mm_kwargs) mm_items = self._to_mm_items(mm_data) - mm_item_counts = mm_items.get_item_counts() + mm_item_counts = mm_items.get_all_counts() mm_kwargs = result["mm_kwargs"] # We reimplement the functionality of MLlavaProcessor from diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index 413655ff60fe7..0402c556296ce 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -392,7 +392,7 @@ def get_replacement_phi3v(item_idx: int): return [_IMAGE_TOKEN_ID] * num_tokens + [bos_token_id] - num_images = mm_items.get_item_count("image", strict=False) + num_images = mm_items.get_count("image", strict=False) return [ PromptReplacement( diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py index 1e190a6999f81..21f3ca8f3487a 100644 --- a/vllm/multimodal/parse.py +++ b/vllm/multimodal/parse.py @@ -169,7 +169,7 @@ class MultiModalDataItems(UserDict[str, ModalityDataItems[Any, Any]]): corresponds to a list. """ - def get_item_count(self, modality: str, *, strict: bool = True) -> int: + def get_count(self, modality: str, *, strict: bool = True) -> int: """ Get the number of data items belonging to a modality. @@ -186,7 +186,7 @@ def get_item_count(self, modality: str, *, strict: bool = True) -> int: return self[modality].get_count() - def get_item_counts(self) -> Mapping[str, int]: + def get_all_counts(self) -> Mapping[str, int]: """Get the number of items belonging to each modality.""" return {m: items.get_count() for m, items in self.items()} diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index fe6448d99da98..7c523735a73d7 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -747,7 +747,7 @@ def _apply_hf_processor_missing( cached items; instead, we rely on our own prompt replacement logic for the full text. """ - mm_missing_counts = mm_missing_data_items.get_item_counts() + mm_missing_counts = mm_missing_data_items.get_all_counts() prompt_ids, _ = self._apply_hf_processor( prompt_text=prompt_text, @@ -844,7 +844,7 @@ def _cached_apply_hf_processor( mm_merged_field_items[modality] = merged_modal_items_lst if self.enable_sanity_checks: - mm_missing_counts = mm_missing_data_items.get_item_counts() + mm_missing_counts = mm_missing_data_items.get_all_counts() assert all( item_count == mm_missing_counts[modality] for modality, item_count in mm_missing_next_idx.items()), dict( @@ -857,7 +857,7 @@ def _cached_apply_hf_processor( ) if self.enable_sanity_checks: - mm_item_counts = mm_data_items.get_item_counts() + mm_item_counts = mm_data_items.get_all_counts() for modality, item_count in mm_item_counts.items(): for item_idx in range(item_count): @@ -967,7 +967,7 @@ def apply( # If HF processor already inserts placeholder tokens, # there is no need for us to insert them - mm_item_counts = mm_items.get_item_counts() + mm_item_counts = mm_items.get_all_counts() all_placeholders = self._find_placeholders(prompt_repls, prompt_ids, mm_item_counts)