Skip to content

Commit

Permalink
Rename
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Jan 7, 2025
1 parent 5f940fc commit 62942e3
Show file tree
Hide file tree
Showing 18 changed files with 71 additions and 68 deletions.
4 changes: 2 additions & 2 deletions tests/multimodal/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def _test_processing_cache_correctness(

baseline_processor = factories.build_processor(ctx, cache=None)
cached_processor = factories.build_processor(ctx, cache=cache)
dummy_data_builder = baseline_processor.dummy_data_builder
dummy_inputs = baseline_processor.dummy_inputs

rng = np.random.RandomState(0)

Expand Down Expand Up @@ -722,7 +722,7 @@ def _test_processing_cache_correctness(
}

mm_counts = {k: len(vs) for k, vs in mm_data.items()}
prompt = dummy_data_builder.get_dummy_processor_inputs(
prompt = dummy_inputs.get_dummy_processor_inputs(
model_config.max_model_len,
mm_counts,
).prompt_text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import torch

from vllm.model_executor.models.llava import (LlavaDummyDataBuilder,
from vllm.model_executor.models.llava import (LlavaDummyInputsBuilder,
LlavaForConditionalGeneration,
LlavaMultiModalProcessor,
LlavaProcessingInfo)
Expand All @@ -12,7 +12,7 @@

@MULTIMODAL_REGISTRY.register_processor(LlavaMultiModalProcessor,
info=LlavaProcessingInfo,
dummy_data=LlavaDummyDataBuilder)
dummy=LlavaDummyInputsBuilder)
class MyLlava(LlavaForConditionalGeneration):

def compute_logits(
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from vllm.multimodal.parse import MultiModalDataItems
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.aria import (AriaMoELMConfig,
AriaVisionConfig)
Expand Down Expand Up @@ -464,7 +464,7 @@ def get_num_image_tokens(self) -> int:
return max(hf_config.projector_patch_to_query_dict.values())


class AriaDummyDataBuilder(BaseDummyDataBuilder[AriaProcessingInfo]):
class AriaDummyInputsBuilder(BaseDummyInputsBuilder[AriaProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -526,7 +526,7 @@ def _get_prompt_replacements(

@MULTIMODAL_REGISTRY.register_processor(AriaMultiModalProcessor,
info=AriaProcessingInfo,
dummy_data=AriaDummyDataBuilder)
dummy=AriaDummyInputsBuilder)
class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
"""
Aria model for conditional generation tasks.
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/blip2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from vllm.multimodal.parse import MultiModalDataItems
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors

from .blip import BlipVisionModel
Expand Down Expand Up @@ -413,7 +413,7 @@ def get_num_image_tokens(self) -> int:
return hf_config.num_query_tokens


class Blip2DummyDataBuilder(BaseDummyDataBuilder[Blip2ProcessingInfo]):
class Blip2DummyInputsBuilder(BaseDummyInputsBuilder[Blip2ProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -490,7 +490,7 @@ def apply(

@MULTIMODAL_REGISTRY.register_processor(Blip2MultiModalProcessor,
info=Blip2ProcessingInfo,
dummy_data=Blip2DummyDataBuilder)
dummy=Blip2DummyInputsBuilder)
class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
Expand Down
7 changes: 4 additions & 3 deletions vllm/model_executor/models/chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from vllm.multimodal.parse import MultiModalDataItems
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.utils import print_warning_once

Expand Down Expand Up @@ -68,7 +68,8 @@ def get_num_image_tokens(self) -> int:
return processor.image_seq_length


class ChameleonDummyDataBuilder(BaseDummyDataBuilder[ChameleonProcessingInfo]):
class ChameleonDummyInputsBuilder(
BaseDummyInputsBuilder[ChameleonProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -915,7 +916,7 @@ def forward(

@MULTIMODAL_REGISTRY.register_processor(ChameleonMultiModalProcessor,
info=ChameleonProcessingInfo,
dummy_data=ChameleonDummyDataBuilder)
dummy=ChameleonDummyInputsBuilder)
class ChameleonForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):

Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
MultiModalDataItems)
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors

from .interfaces import SupportsMultiModal, SupportsPP
Expand Down Expand Up @@ -117,7 +117,7 @@ def get_image_size_with_most_features(self) -> ImageSize:
height=image_processor.size["height"])


class FuyuDummyDataBuilder(BaseDummyDataBuilder[FuyuProcessingInfo]):
class FuyuDummyInputsBuilder(BaseDummyInputsBuilder[FuyuProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -244,7 +244,7 @@ def apply(

@MULTIMODAL_REGISTRY.register_processor(FuyuMultiModalProcessor,
info=FuyuProcessingInfo,
dummy_data=FuyuDummyDataBuilder)
dummy=FuyuDummyInputsBuilder)
class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
Expand Down
14 changes: 7 additions & 7 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from vllm.multimodal.processing import (BaseProcessingInfo, ProcessingCache,
PromptReplacement)
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors

from .clip import CLIPVisionModel
Expand Down Expand Up @@ -170,7 +170,7 @@ def get_max_image_tokens(self) -> int:
_I = TypeVar("_I", bound=BaseLlavaProcessingInfo)


class LlavaDummyDataBuilder(BaseDummyDataBuilder[_I]):
class LlavaDummyInputsBuilder(BaseDummyInputsBuilder[_I]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -360,23 +360,23 @@ def _build_llava_or_pixtral_hf_info(

def _build_llava_or_pixtral_hf_processor(
info: _I,
dummy_data_builder: BaseDummyDataBuilder[_I],
dummy_inputs: BaseDummyInputsBuilder[_I],
*,
cache: Optional[ProcessingCache] = None,
enable_sanity_checks: bool = True,
) -> BaseMultiModalProcessor:
if isinstance(info, PixtralHFProcessingInfo):
return PixtralHFMultiModalProcessor(
info,
dummy_data_builder, # type: ignore
dummy_inputs, # type: ignore
cache=cache,
enable_sanity_checks=enable_sanity_checks,
)

if isinstance(info, LlavaProcessingInfo):
return LlavaMultiModalProcessor(
info,
dummy_data_builder, # type: ignore
dummy_inputs, # type: ignore
cache=cache,
enable_sanity_checks=enable_sanity_checks,
)
Expand Down Expand Up @@ -461,7 +461,7 @@ def init_vision_tower_for_llava(

@MULTIMODAL_REGISTRY.register_processor(_build_llava_or_pixtral_hf_processor,
info=_build_llava_or_pixtral_hf_info,
dummy_data=LlavaDummyDataBuilder)
dummy=LlavaDummyInputsBuilder)
class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
# BitandBytes specific attributes
bitsandbytes_stacked_params_mapping = {
Expand Down Expand Up @@ -793,6 +793,6 @@ def get_replacement_mantis(item_idx: int):
# `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'`
@MULTIMODAL_REGISTRY.register_processor(MantisMultiModalProcessor,
info=LlavaProcessingInfo,
dummy_data=LlavaDummyDataBuilder)
dummy=LlavaDummyInputsBuilder)
class MantisForConditionalGeneration(LlavaForConditionalGeneration):
pass
4 changes: 2 additions & 2 deletions vllm/model_executor/models/llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from .clip import CLIPVisionModel
from .interfaces import SupportsMultiModal, SupportsPP
from .llava import (BaseLlavaMultiModalProcessor, BaseLlavaProcessingInfo,
LlavaDummyDataBuilder, LlavaLikeConfig,
LlavaDummyInputsBuilder, LlavaLikeConfig,
LlavaMultiModalProjector, init_vision_tower_for_llava)
from .siglip import SiglipVisionModel
from .utils import (AutoWeightsLoader, embed_multimodal, flatten_bn,
Expand Down Expand Up @@ -181,7 +181,7 @@ class LlavaNextMultiModalProcessor(

@MULTIMODAL_REGISTRY.register_processor(LlavaNextMultiModalProcessor,
info=LlavaNextProcessingInfo,
dummy_data=LlavaDummyDataBuilder)
dummy=LlavaDummyInputsBuilder)
class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):

Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/llava_next_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
VideoEmbeddingItems, VideoProcessorItems)
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.utils import is_list_of

Expand Down Expand Up @@ -134,8 +134,8 @@ def get_max_num_frames(self, seq_len: int) -> int:
return max(max_total_frames // max(max_videos, 1), 1)


class LlavaNextVideoDummyDataBuilder(
BaseDummyDataBuilder[LlavaNextVideoProcessingInfo]):
class LlavaNextVideoDummyInputsBuilder(
BaseDummyInputsBuilder[LlavaNextVideoProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -269,7 +269,7 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor:
@MULTIMODAL_REGISTRY.register_processor(
LlavaNextVideoMultiModalProcessor,
info=LlavaNextVideoProcessingInfo,
dummy_data=LlavaNextVideoDummyDataBuilder,
dummy=LlavaNextVideoDummyInputsBuilder,
)
class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):
Expand Down
13 changes: 6 additions & 7 deletions vllm/model_executor/models/llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from .clip import CLIPVisionModel
from .interfaces import SupportsMultiModal, SupportsPP
from .llava import LlavaDummyDataBuilder, init_vision_tower_for_llava
from .llava import LlavaDummyInputsBuilder, init_vision_tower_for_llava
from .llava_next import (BaseLlavaNextMultiModalProcessor, LlavaNextLikeConfig,
LlavaNextProcessingInfo)
from .siglip import SiglipVisionModel
Expand Down Expand Up @@ -233,8 +233,8 @@ def get_max_video_tokens(self, seq_len: int) -> int:
)


class LlavaOnevisionDummyDataBuilder(
LlavaDummyDataBuilder[LlavaOnevisionProcessingInfo]):
class LlavaOnevisionDummyInputsBuilder(
LlavaDummyInputsBuilder[LlavaOnevisionProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -392,10 +392,9 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor:
return hidden_states


@MULTIMODAL_REGISTRY.register_processor(
LlavaOnevisionMultiModalProcessor,
info=LlavaOnevisionProcessingInfo,
dummy_data=LlavaOnevisionDummyDataBuilder)
@MULTIMODAL_REGISTRY.register_processor(LlavaOnevisionMultiModalProcessor,
info=LlavaOnevisionProcessingInfo,
dummy=LlavaOnevisionDummyInputsBuilder)
class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):

Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
BoundPromptReplacement,
PlaceholderInfo, PromptReplacement)
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.utils import is_list_of

Expand Down Expand Up @@ -344,7 +344,7 @@ def get_image_size_with_most_features(self) -> ImageSize:
return ImageSize(height=8000, width=50)


class Phi3VDummyDataBuilder(BaseDummyDataBuilder[Phi3VProcessingInfo]):
class Phi3VDummyInputsBuilder(BaseDummyInputsBuilder[Phi3VProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -498,7 +498,7 @@ def apply(

@MULTIMODAL_REGISTRY.register_processor(Phi3VMultiModalProcessor,
info=Phi3VProcessingInfo,
dummy_data=Phi3VDummyDataBuilder)
dummy=Phi3VDummyInputsBuilder)
class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/models/qwen2_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
MultiModalDataParser)
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors

from .interfaces import SupportsMultiModal, SupportsPP
Expand Down Expand Up @@ -115,8 +115,8 @@ def get_mm_max_tokens_per_item(self, seq_len: int) -> Mapping[str, int]:
return {"audio": max_output_lengths}


class Qwen2AudioDummyDataBuilder(BaseDummyDataBuilder[Qwen2AudioProcessingInfo]
):
class Qwen2AudioDummyInputsBuilder(
BaseDummyInputsBuilder[Qwen2AudioProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -237,7 +237,7 @@ def _always_apply_prompt_replacements(self) -> bool:

@MULTIMODAL_REGISTRY.register_processor(Qwen2AudioMultiModalProcessor,
info=Qwen2AudioProcessingInfo,
dummy_data=Qwen2AudioDummyDataBuilder)
dummy=Qwen2AudioDummyInputsBuilder)
class Qwen2AudioForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):

Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
MultiModalDataItems, MultiModalDataParser)
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.platforms import _Backend
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import uses_mrope
Expand Down Expand Up @@ -883,7 +883,7 @@ def get_max_video_tokens(self, seq_len: int) -> int:
)


class Qwen2VLDummyDataBuilder(BaseDummyDataBuilder[Qwen2VLProcessingInfo]):
class Qwen2VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen2VLProcessingInfo]):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -991,7 +991,7 @@ def _get_mm_fields_config(

@MULTIMODAL_REGISTRY.register_processor(Qwen2VLMultiModalProcessor,
info=Qwen2VLProcessingInfo,
dummy_data=Qwen2VLDummyDataBuilder)
dummy=Qwen2VLDummyInputsBuilder)
class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsLoRA, SupportsPP):
packed_modules_mapping = {
Expand Down
7 changes: 4 additions & 3 deletions vllm/model_executor/models/ultravox.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from vllm.multimodal.parse import MultiModalDataItems, MultiModalDataParser
from vllm.multimodal.processing import BaseProcessingInfo, PromptReplacement
from vllm.multimodal.processor import BaseMultiModalProcessor
from vllm.multimodal.profiling import BaseDummyDataBuilder, ProcessorInputs
from vllm.multimodal.profiling import BaseDummyInputsBuilder, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs.ultravox import UltravoxConfig

Expand Down Expand Up @@ -98,7 +98,8 @@ def get_mm_max_tokens_per_item(self, seq_len: int) -> Mapping[str, int]:
return {"audio": max_audio_tokens}


class UltravoxDummyDataBuilder(BaseDummyDataBuilder[UltravoxProcessingInfo]):
class UltravoxDummyInputsBuilder(BaseDummyInputsBuilder[UltravoxProcessingInfo]
):

def get_dummy_processor_inputs(
self,
Expand Down Expand Up @@ -340,7 +341,7 @@ def forward(

@MULTIMODAL_REGISTRY.register_processor(UltravoxMultiModalProcessor,
info=UltravoxProcessingInfo,
dummy_data=UltravoxDummyDataBuilder)
dummy=UltravoxDummyInputsBuilder)
class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP):

hf_to_vllm_mapper = WeightsMapper(
Expand Down
Loading

0 comments on commit 62942e3

Please sign in to comment.