diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 003cc177c57c4..808e82e05cfcf 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -690,7 +690,7 @@ def _test_processing_cache_correctness( baseline_processor = factories.build_processor(ctx, cache=None) cached_processor = factories.build_processor(ctx, cache=cache) - dummy_data_builder = baseline_processor.dummy_data_builder + dummy_inputs = baseline_processor.dummy_inputs rng = np.random.RandomState(0) @@ -722,7 +722,7 @@ def _test_processing_cache_correctness( } mm_counts = {k: len(vs) for k, vs in mm_data.items()} - prompt = dummy_data_builder.get_dummy_processor_inputs( + prompt = dummy_inputs.get_dummy_processor_inputs( model_config.max_model_len, mm_counts, ).prompt_text diff --git a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py index e273c4cbf2ea2..d07560c2a9b64 100644 --- a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py +++ b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py @@ -2,7 +2,7 @@ import torch -from vllm.model_executor.models.llava import (LlavaDummyDataBuilder, +from vllm.model_executor.models.llava import (LlavaDummyInputsBuilder, LlavaForConditionalGeneration, LlavaMultiModalProcessor, LlavaProcessingInfo) @@ -12,7 +12,7 @@ @MULTIMODAL_REGISTRY.register_processor(LlavaMultiModalProcessor, info=LlavaProcessingInfo, - dummy_data=LlavaDummyDataBuilder) + dummy=LlavaDummyInputsBuilder) class MyLlava(LlavaForConditionalGeneration): def compute_logits( diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index 88cf73d109ee2..ede88d1a867cb 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -26,7 +26,7 @@ from vllm.multimodal.parse import MultiModalDataItems from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.aria import (AriaMoELMConfig, AriaVisionConfig) @@ -464,7 +464,7 @@ def get_num_image_tokens(self) -> int: return max(hf_config.projector_patch_to_query_dict.values()) -class AriaDummyDataBuilder(BaseDummyDataBuilder[AriaProcessingInfo]): +class AriaDummyInputsBuilder(BaseDummyInputsBuilder[AriaProcessingInfo]): def get_dummy_processor_inputs( self, @@ -526,7 +526,7 @@ def _get_prompt_replacements( @MULTIMODAL_REGISTRY.register_processor(AriaMultiModalProcessor, info=AriaProcessingInfo, - dummy_data=AriaDummyDataBuilder) + dummy=AriaDummyInputsBuilder) class AriaForConditionalGeneration(nn.Module, SupportsMultiModal): """ Aria model for conditional generation tasks. diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index 5db1af556ce92..cbdcce6e07b9f 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -20,7 +20,7 @@ from vllm.multimodal.parse import MultiModalDataItems from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from .blip import BlipVisionModel @@ -413,7 +413,7 @@ def get_num_image_tokens(self) -> int: return hf_config.num_query_tokens -class Blip2DummyDataBuilder(BaseDummyDataBuilder[Blip2ProcessingInfo]): +class Blip2DummyInputsBuilder(BaseDummyInputsBuilder[Blip2ProcessingInfo]): def get_dummy_processor_inputs( self, @@ -490,7 +490,7 @@ def apply( @MULTIMODAL_REGISTRY.register_processor(Blip2MultiModalProcessor, info=Blip2ProcessingInfo, - dummy_data=Blip2DummyDataBuilder) + dummy=Blip2DummyInputsBuilder) class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index 29cb489d58a2e..bb01b372d17d5 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -33,7 +33,7 @@ from vllm.multimodal.parse import MultiModalDataItems from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from vllm.utils import print_warning_once @@ -68,7 +68,8 @@ def get_num_image_tokens(self) -> int: return processor.image_seq_length -class ChameleonDummyDataBuilder(BaseDummyDataBuilder[ChameleonProcessingInfo]): +class ChameleonDummyInputsBuilder( + BaseDummyInputsBuilder[ChameleonProcessingInfo]): def get_dummy_processor_inputs( self, @@ -915,7 +916,7 @@ def forward( @MULTIMODAL_REGISTRY.register_processor(ChameleonMultiModalProcessor, info=ChameleonProcessingInfo, - dummy_data=ChameleonDummyDataBuilder) + dummy=ChameleonDummyInputsBuilder) class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 972d47c1633c8..66e68e3f8f541 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -37,7 +37,7 @@ MultiModalDataItems) from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from .interfaces import SupportsMultiModal, SupportsPP @@ -117,7 +117,7 @@ def get_image_size_with_most_features(self) -> ImageSize: height=image_processor.size["height"]) -class FuyuDummyDataBuilder(BaseDummyDataBuilder[FuyuProcessingInfo]): +class FuyuDummyInputsBuilder(BaseDummyInputsBuilder[FuyuProcessingInfo]): def get_dummy_processor_inputs( self, @@ -244,7 +244,7 @@ def apply( @MULTIMODAL_REGISTRY.register_processor(FuyuMultiModalProcessor, info=FuyuProcessingInfo, - dummy_data=FuyuDummyDataBuilder) + dummy=FuyuDummyInputsBuilder) class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index a2521c7e8514e..b0b0073280c12 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -29,7 +29,7 @@ from vllm.multimodal.processing import (BaseProcessingInfo, ProcessingCache, PromptReplacement) from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from .clip import CLIPVisionModel @@ -170,7 +170,7 @@ def get_max_image_tokens(self) -> int: _I = TypeVar("_I", bound=BaseLlavaProcessingInfo) -class LlavaDummyDataBuilder(BaseDummyDataBuilder[_I]): +class LlavaDummyInputsBuilder(BaseDummyInputsBuilder[_I]): def get_dummy_processor_inputs( self, @@ -360,7 +360,7 @@ def _build_llava_or_pixtral_hf_info( def _build_llava_or_pixtral_hf_processor( info: _I, - dummy_data_builder: BaseDummyDataBuilder[_I], + dummy_inputs: BaseDummyInputsBuilder[_I], *, cache: Optional[ProcessingCache] = None, enable_sanity_checks: bool = True, @@ -368,7 +368,7 @@ def _build_llava_or_pixtral_hf_processor( if isinstance(info, PixtralHFProcessingInfo): return PixtralHFMultiModalProcessor( info, - dummy_data_builder, # type: ignore + dummy_inputs, # type: ignore cache=cache, enable_sanity_checks=enable_sanity_checks, ) @@ -376,7 +376,7 @@ def _build_llava_or_pixtral_hf_processor( if isinstance(info, LlavaProcessingInfo): return LlavaMultiModalProcessor( info, - dummy_data_builder, # type: ignore + dummy_inputs, # type: ignore cache=cache, enable_sanity_checks=enable_sanity_checks, ) @@ -461,7 +461,7 @@ def init_vision_tower_for_llava( @MULTIMODAL_REGISTRY.register_processor(_build_llava_or_pixtral_hf_processor, info=_build_llava_or_pixtral_hf_info, - dummy_data=LlavaDummyDataBuilder) + dummy=LlavaDummyInputsBuilder) class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): # BitandBytes specific attributes bitsandbytes_stacked_params_mapping = { @@ -793,6 +793,6 @@ def get_replacement_mantis(item_idx: int): # `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'` @MULTIMODAL_REGISTRY.register_processor(MantisMultiModalProcessor, info=LlavaProcessingInfo, - dummy_data=LlavaDummyDataBuilder) + dummy=LlavaDummyInputsBuilder) class MantisForConditionalGeneration(LlavaForConditionalGeneration): pass diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index 6af8acd392e5e..c76822f657661 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -21,7 +21,7 @@ from .clip import CLIPVisionModel from .interfaces import SupportsMultiModal, SupportsPP from .llava import (BaseLlavaMultiModalProcessor, BaseLlavaProcessingInfo, - LlavaDummyDataBuilder, LlavaLikeConfig, + LlavaDummyInputsBuilder, LlavaLikeConfig, LlavaMultiModalProjector, init_vision_tower_for_llava) from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, embed_multimodal, flatten_bn, @@ -181,7 +181,7 @@ class LlavaNextMultiModalProcessor( @MULTIMODAL_REGISTRY.register_processor(LlavaNextMultiModalProcessor, info=LlavaNextProcessingInfo, - dummy_data=LlavaDummyDataBuilder) + dummy=LlavaDummyInputsBuilder) class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index 881d71ed9ab5c..8e60aa8aaa719 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -21,7 +21,7 @@ VideoEmbeddingItems, VideoProcessorItems) from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from vllm.utils import is_list_of @@ -134,8 +134,8 @@ def get_max_num_frames(self, seq_len: int) -> int: return max(max_total_frames // max(max_videos, 1), 1) -class LlavaNextVideoDummyDataBuilder( - BaseDummyDataBuilder[LlavaNextVideoProcessingInfo]): +class LlavaNextVideoDummyInputsBuilder( + BaseDummyInputsBuilder[LlavaNextVideoProcessingInfo]): def get_dummy_processor_inputs( self, @@ -269,7 +269,7 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor: @MULTIMODAL_REGISTRY.register_processor( LlavaNextVideoMultiModalProcessor, info=LlavaNextVideoProcessingInfo, - dummy_data=LlavaNextVideoDummyDataBuilder, + dummy=LlavaNextVideoDummyInputsBuilder, ) class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 6622e3a150e64..21f589f4fbf1c 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -28,7 +28,7 @@ from .clip import CLIPVisionModel from .interfaces import SupportsMultiModal, SupportsPP -from .llava import LlavaDummyDataBuilder, init_vision_tower_for_llava +from .llava import LlavaDummyInputsBuilder, init_vision_tower_for_llava from .llava_next import (BaseLlavaNextMultiModalProcessor, LlavaNextLikeConfig, LlavaNextProcessingInfo) from .siglip import SiglipVisionModel @@ -233,8 +233,8 @@ def get_max_video_tokens(self, seq_len: int) -> int: ) -class LlavaOnevisionDummyDataBuilder( - LlavaDummyDataBuilder[LlavaOnevisionProcessingInfo]): +class LlavaOnevisionDummyInputsBuilder( + LlavaDummyInputsBuilder[LlavaOnevisionProcessingInfo]): def get_dummy_processor_inputs( self, @@ -392,10 +392,9 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor: return hidden_states -@MULTIMODAL_REGISTRY.register_processor( - LlavaOnevisionMultiModalProcessor, - info=LlavaOnevisionProcessingInfo, - dummy_data=LlavaOnevisionDummyDataBuilder) +@MULTIMODAL_REGISTRY.register_processor(LlavaOnevisionMultiModalProcessor, + info=LlavaOnevisionProcessingInfo, + dummy=LlavaOnevisionDummyInputsBuilder) class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index 387fccecbf848..dab80c581eccb 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -39,7 +39,7 @@ BoundPromptReplacement, PlaceholderInfo, PromptReplacement) from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from vllm.utils import is_list_of @@ -344,7 +344,7 @@ def get_image_size_with_most_features(self) -> ImageSize: return ImageSize(height=8000, width=50) -class Phi3VDummyDataBuilder(BaseDummyDataBuilder[Phi3VProcessingInfo]): +class Phi3VDummyInputsBuilder(BaseDummyInputsBuilder[Phi3VProcessingInfo]): def get_dummy_processor_inputs( self, @@ -498,7 +498,7 @@ def apply( @MULTIMODAL_REGISTRY.register_processor(Phi3VMultiModalProcessor, info=Phi3VProcessingInfo, - dummy_data=Phi3VDummyDataBuilder) + dummy=Phi3VDummyInputsBuilder) class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): hf_to_vllm_mapper = WeightsMapper( orig_to_new_prefix={ diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index aea7dc8dd8fea..bc4426eff5866 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -42,7 +42,7 @@ MultiModalDataParser) from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from .interfaces import SupportsMultiModal, SupportsPP @@ -115,8 +115,8 @@ def get_mm_max_tokens_per_item(self, seq_len: int) -> Mapping[str, int]: return {"audio": max_output_lengths} -class Qwen2AudioDummyDataBuilder(BaseDummyDataBuilder[Qwen2AudioProcessingInfo] - ): +class Qwen2AudioDummyInputsBuilder( + BaseDummyInputsBuilder[Qwen2AudioProcessingInfo]): def get_dummy_processor_inputs( self, @@ -237,7 +237,7 @@ def _always_apply_prompt_replacements(self) -> bool: @MULTIMODAL_REGISTRY.register_processor(Qwen2AudioMultiModalProcessor, info=Qwen2AudioProcessingInfo, - dummy_data=Qwen2AudioDummyDataBuilder) + dummy=Qwen2AudioDummyInputsBuilder) class Qwen2AudioForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index d60656d140bf8..ba3b7127a0ee4 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -60,7 +60,7 @@ MultiModalDataItems, MultiModalDataParser) from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -883,7 +883,7 @@ def get_max_video_tokens(self, seq_len: int) -> int: ) -class Qwen2VLDummyDataBuilder(BaseDummyDataBuilder[Qwen2VLProcessingInfo]): +class Qwen2VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen2VLProcessingInfo]): def get_dummy_processor_inputs( self, @@ -991,7 +991,7 @@ def _get_mm_fields_config( @MULTIMODAL_REGISTRY.register_processor(Qwen2VLMultiModalProcessor, info=Qwen2VLProcessingInfo, - dummy_data=Qwen2VLDummyDataBuilder) + dummy=Qwen2VLDummyInputsBuilder) class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP): packed_modules_mapping = { diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 1a403941f803f..477ed17905589 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -27,7 +27,7 @@ from vllm.multimodal.parse import MultiModalDataItems, MultiModalDataParser from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement from vllm.multimodal.processor import BaseMultiModalProcessor -from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs +from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.ultravox import UltravoxConfig @@ -98,7 +98,8 @@ def get_mm_max_tokens_per_item(self, seq_len: int) -> Mapping[str, int]: return {"audio": max_audio_tokens} -class UltravoxDummyDataBuilder(BaseDummyDataBuilder[UltravoxProcessingInfo]): +class UltravoxDummyInputsBuilder(BaseDummyInputsBuilder[UltravoxProcessingInfo] + ): def get_dummy_processor_inputs( self, @@ -340,7 +341,7 @@ def forward( @MULTIMODAL_REGISTRY.register_processor(UltravoxMultiModalProcessor, info=UltravoxProcessingInfo, - dummy_data=UltravoxDummyDataBuilder) + dummy=UltravoxDummyInputsBuilder) class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP): hf_to_vllm_mapper = WeightsMapper( diff --git a/vllm/multimodal/processor.py b/vllm/multimodal/processor.py index aa509eb347f47..08e6b7337e62d 100644 --- a/vllm/multimodal/processor.py +++ b/vllm/multimodal/processor.py @@ -17,7 +17,7 @@ find_mm_placeholders, find_text_matches, find_token_matches, full_groupby_modality, replace_text_matches, replace_token_matches) -from .profiling import BaseDummyDataBuilder +from .profiling import BaseDummyInputsBuilder _I = TypeVar("_I", bound=BaseProcessingInfo) @@ -31,14 +31,14 @@ class BaseMultiModalProcessor(ABC, Generic[_I]): def __init__(self, info: _I, - dummy_data_builder: BaseDummyDataBuilder[_I], + dummy_inputs: BaseDummyInputsBuilder[_I], *, cache: Optional[ProcessingCache] = None, enable_sanity_checks: bool = True) -> None: super().__init__() self.info = info - self.dummy_data_builder = dummy_data_builder + self.dummy_inputs = dummy_inputs self.cache = cache self.enable_sanity_checks = enable_sanity_checks @@ -208,7 +208,7 @@ def _apply_hf_processor_missing( # Some HF processors (e.g. Qwen2-VL) expect corresponding # multi-modal tokens to be in the prompt text - dummy_inputs = self.dummy_data_builder.get_dummy_processor_inputs( + dummy_inputs = self.dummy_inputs.get_dummy_processor_inputs( self.info.ctx.model_config.max_model_len, mm_missing_counts, ) diff --git a/vllm/multimodal/profiler.py b/vllm/multimodal/profiler.py index 8a7d03c8d3c41..ed3c4edcf092d 100644 --- a/vllm/multimodal/profiler.py +++ b/vllm/multimodal/profiler.py @@ -8,7 +8,7 @@ from .inputs import MultiModalInputsV2 from .processing import BaseProcessingInfo from .processor import BaseMultiModalProcessor -from .profiling import BaseDummyDataBuilder +from .profiling import BaseDummyInputsBuilder logger = init_logger(__name__) @@ -30,8 +30,8 @@ def processing(self) -> BaseProcessingInfo: return self.processor.info @property - def dummy_data_builder(self) -> BaseDummyDataBuilder[_I]: - return self.processor.dummy_data_builder + def dummy_inputs(self) -> BaseDummyInputsBuilder[_I]: + return self.processor.dummy_inputs def _get_mm_limits(self) -> Mapping[str, int]: mm_config = self.processing.ctx.get_mm_config() @@ -59,7 +59,7 @@ def _get_dummy_mm_inputs( seq_len: int, mm_counts: Mapping[str, int], ) -> MultiModalInputsV2: - factory = self.dummy_data_builder + factory = self.dummy_inputs processor_inputs = factory.get_dummy_processor_inputs( seq_len, mm_counts) diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index 6ecf1d4f11061..de68b335cebd8 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -22,7 +22,7 @@ class ProcessorInputs: _I = TypeVar("_I", bound=BaseProcessingInfo) -class BaseDummyDataBuilder(ABC, Generic[_I]): +class BaseDummyInputsBuilder(ABC, Generic[_I]): """ Abstract base class that constructs the dummy data to profile multi-modal models. diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index ac1bb8b33bff8..3639384d505f7 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -17,7 +17,7 @@ from .inputs import MultiModalDataDict, MultiModalKwargs, NestedTensors from .processing import ProcessingCache from .processor import BaseMultiModalProcessor, BaseProcessingInfo -from .profiling import BaseDummyDataBuilder +from .profiling import BaseDummyInputsBuilder from .utils import cached_get_tokenizer from .video import VideoPlugin @@ -44,10 +44,12 @@ def __call__( ... -class DummyDataBuilderFactory(Protocol[_I]): - """Constructs a :class:`BaseDummyDataBuilder` instance from the context.""" +class DummyInputsBuilderFactory(Protocol[_I]): + """ + Constructs a :class:`BaseDummyInputsBuilder` instance from the context. + """ - def __call__(self, info: _I) -> BaseDummyDataBuilder[_I]: + def __call__(self, info: _I) -> BaseDummyInputsBuilder[_I]: ... @@ -57,7 +59,7 @@ class MultiModalProcessorFactory(Protocol[_I]): def __call__( self, info: _I, - dummy_data_builder: BaseDummyDataBuilder[_I], + dummy_inputs: BaseDummyInputsBuilder[_I], *, cache: Optional[ProcessingCache] = None, ) -> BaseMultiModalProcessor[_I]: @@ -68,7 +70,7 @@ def __call__( class _ProcessorFactories(Generic[_I]): info: ProcessingInfoFactory[_I] processor: MultiModalProcessorFactory[_I] - dummy_data: DummyDataBuilderFactory[_I] + dummy: DummyInputsBuilderFactory[_I] def build_processor( self, @@ -77,8 +79,8 @@ def build_processor( cache: Optional[ProcessingCache] = None, ): info = self.info(ctx) - dummy_data_builder = self.dummy_data(info) - return self.processor(info, dummy_data_builder, cache=cache) + dummy_inputs_builder = self.dummy(info) + return self.processor(info, dummy_inputs_builder, cache=cache) class _MultiModalLimits(UserDict["ModelConfig", Dict[str, int]]): @@ -358,7 +360,7 @@ def register_processor( processor: MultiModalProcessorFactory[_I], *, info: ProcessingInfoFactory[_I], - dummy_data: DummyDataBuilderFactory[_I], + dummy: DummyInputsBuilderFactory[_I], ): """ Register a multi-modal processor to a model class. The processor @@ -381,7 +383,7 @@ def wrapper(model_cls: N) -> N: self._processor_factories[model_cls] = _ProcessorFactories( info=info, - dummy_data=dummy_data, + dummy=dummy, processor=processor, )