diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py index 138cc6a44c11a..e8f1a8f7bd228 100644 --- a/vllm/multimodal/inputs.py +++ b/vllm/multimodal/inputs.py @@ -1,6 +1,6 @@ from collections import UserDict, defaultdict -from typing import (Any, Dict, List, Literal, Mapping, Sequence, Tuple, - TypedDict, TypeVar, Union, cast, final) +from collections.abc import Mapping, Sequence +from typing import Any, Literal, TypedDict, TypeVar, Union, cast, final import numpy as np import torch @@ -44,7 +44,7 @@ """ # yapf: enable -MultiModalData: TypeAlias = Union[_T, List[_T]] +MultiModalData: TypeAlias = Union[_T, list[_T]] """ Either a single data item, or a list of data items. @@ -97,13 +97,13 @@ class PlaceholderRange(TypedDict): """The length of the placeholder.""" -NestedTensors = Union[List["NestedTensors"], List[torch.Tensor], torch.Tensor, - Tuple[torch.Tensor, ...]] +NestedTensors = Union[list["NestedTensors"], list[torch.Tensor], torch.Tensor, + tuple[torch.Tensor, ...]] """ Uses a list instead of a tensor if the dimensions of each element do not match. """ -BatchedTensorInputs: TypeAlias = Dict[str, NestedTensors] +BatchedTensorInputs: TypeAlias = dict[str, NestedTensors] """ A dictionary containing nested tensors which have been batched via :meth:`MultiModalKwargs.batch`. @@ -139,7 +139,7 @@ def _try_stack(nested_tensors: NestedTensors) -> NestedTensors: # Only tensors (not lists) can be stacked. return stacked - tensors_ = cast(List[torch.Tensor], stacked) + tensors_ = cast(list[torch.Tensor], stacked) if any(t.shape != tensors_[0].shape for t in tensors_): # The tensors have incompatible shapes and can't be stacked. return tensors_ @@ -147,7 +147,7 @@ def _try_stack(nested_tensors: NestedTensors) -> NestedTensors: return torch.stack(tensors_) @staticmethod - def batch(inputs_list: List["MultiModalKwargs"]) -> BatchedTensorInputs: + def batch(inputs_list: list["MultiModalKwargs"]) -> BatchedTensorInputs: """ Batch multiple inputs together into a dictionary. @@ -162,7 +162,7 @@ def batch(inputs_list: List["MultiModalKwargs"]) -> BatchedTensorInputs: # We need to consider the case where each item in the batch # contains different modalities (i.e. different keys). - item_lists: Dict[str, List[NestedTensors]] = defaultdict(list) + item_lists = defaultdict[str, list[NestedTensors]](list) for inputs in inputs_list: for k, v in inputs.items(): @@ -207,16 +207,16 @@ class MultiModalInputsV2(TypedDict): prompt: str """The processed prompt text.""" - prompt_token_ids: List[int] + prompt_token_ids: list[int] """The processed token IDs which includes placeholder tokens.""" - token_type_ids: NotRequired[List[int]] + token_type_ids: NotRequired[list[int]] """The token type IDs of the prompt.""" mm_kwargs: MultiModalKwargs """Keyword arguments to be directly passed to the model after batching.""" - mm_hashes: NotRequired[List[str]] + mm_hashes: NotRequired[list[str]] """The hashes of the multi-modal data.""" mm_placeholders: MultiModalPlaceholderDict diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index e604aef554825..2c30ae98a5b38 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -587,7 +587,7 @@ def iter_placeholders( class ProcessorInputs(NamedTuple): - """Keyword arguments to :meth:`BaseMultiModalProcessor`""" + """Keyword arguments to :meth:`BaseMultiModalProcessor`.""" prompt_text: str mm_data: MultiModalDataDict hf_mm_kwargs: Mapping[str, object] @@ -615,33 +615,14 @@ def maybe_log_cache_stats(self, cache: LRUCache, name: str) -> None: logger.debug("ProcessingCache: %s.hit_ratio = %.2f", name, cache_stats.hit_ratio) - def _iter_bytes_to_hash( - self, - key: str, - obj: object, - ) -> Iterable[tuple[bytes, bytes]]: - # Recursive cases - if isinstance(obj, (list, tuple)): - for i, elem in enumerate(obj): - yield from self._iter_bytes_to_hash(f"{key}.{i}", elem) - return - if isinstance(obj, dict): - for k, v in obj.items(): - yield from self._iter_bytes_to_hash(f"{key}.{k}", v) - return - - key_bytes = key.encode("utf-8") - + def _hash_item(self, obj: object) -> bytes: # Simple cases if isinstance(obj, str): - yield key_bytes, obj.encode("utf-8") - return + return obj.encode("utf-8") if isinstance(obj, bytes): - yield key_bytes, obj - return + return obj if isinstance(obj, Image): - yield key_bytes, obj.tobytes() - return + return obj.tobytes() # Convertible to NumPy arrays if isinstance(obj, torch.Tensor): @@ -649,14 +630,30 @@ def _iter_bytes_to_hash( if isinstance(obj, (int, float)): obj = np.array(obj) if isinstance(obj, np.ndarray): - yield key_bytes, obj.tobytes() - return + return obj.tobytes() logger.warning( "No serialization method found for %s. " "Falling back to pickle.", type(obj)) - yield key_bytes, pickle.dumps(obj) + return pickle.dumps(obj) + + def _iter_bytes_to_hash( + self, + key: str, + obj: object, + ) -> Iterable[tuple[bytes, bytes]]: + # Recursive cases + if isinstance(obj, (list, tuple)): + for i, elem in enumerate(obj): + yield from self._iter_bytes_to_hash(f"{key}.{i}", elem) + elif isinstance(obj, dict): + for k, v in obj.items(): + yield from self._iter_bytes_to_hash(f"{key}.{k}", v) + else: + key_bytes = self._hash_item(key) + value_bytes = self._hash_item(obj) + yield key_bytes, value_bytes def _hash_kwargs(self, **kwargs: object) -> str: hasher = blake3()