Skip to content

Commit

Permalink
Rename Inputs to Items to avoid confusion
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Dec 30, 2024
1 parent 7752760 commit 0197724
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 24 deletions.
4 changes: 2 additions & 2 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
NestedTensors)
from vllm.multimodal.parse import ImageProcessorInput
from vllm.multimodal.parse import ImageProcessorItems
from vllm.multimodal.processing import (BaseMultiModalProcessor,
MultiModalDataItems, ProcessorInputs,
PromptReplacement,
Expand Down Expand Up @@ -181,7 +181,7 @@ def _get_prompt_replacements(
assert isinstance(vision_config, PixtralVisionConfig)

def get_replacement_pixtral(item_idx: int):
images = mm_items.get_items("image", ImageProcessorInput)
images = mm_items.get_items("image", ImageProcessorItems)
image_size = images.get_image_size(item_idx)

(
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.parse import ImageProcessorInput
from vllm.multimodal.parse import ImageProcessorItems
from vllm.multimodal.processing import (BaseMultiModalProcessor,
MultiModalDataItems, ProcessorInputs,
PromptReplacement,
Expand Down Expand Up @@ -382,7 +382,7 @@ def _get_prompt_replacements(
assert isinstance(bos_token_id, int)

def get_replacement_phi3v(item_idx: int):
images = mm_items.get_items("image", ImageProcessorInput)
images = mm_items.get_items("image", ImageProcessorItems)
image_size = images.get_image_size(item_idx)

num_tokens = image_processor.calc_num_image_tokens_from_image_size(
Expand Down
12 changes: 6 additions & 6 deletions vllm/model_executor/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,8 @@ def get_max_qwen2_vl_mm_tokens(ctx: InputContext,
data_type_key="video")


class Qwen2EmbeddingsInput(ModalityDataItems[dict[str, torch.Tensor],
dict[str, torch.Tensor]]):
class Qwen2EmbeddingItems(ModalityDataItems[dict[str, torch.Tensor],
dict[str, torch.Tensor]]):

def __init__(self, data: dict, modality: str) -> None:
super().__init__(data)
Expand Down Expand Up @@ -757,13 +757,13 @@ def get_passthrough_data(self) -> Mapping[str, object]:
return self.data


class Qwen2ImageEmbeddingsInput(Qwen2EmbeddingsInput):
class Qwen2ImageEmbeddingItems(Qwen2EmbeddingItems):

def __init__(self, data: dict) -> None:
super().__init__(data, "image")


class Qwen2VideoEmbeddingsInput(Qwen2EmbeddingsInput):
class Qwen2VideoEmbeddingItems(Qwen2EmbeddingItems):

def __init__(self, data: dict) -> None:
super().__init__(data, "video")
Expand All @@ -776,7 +776,7 @@ def _parse_image_data(
data: Union[dict[str, torch.Tensor], ModalityData[ImageItem]],
) -> ModalityDataItems[Any, Any]:
if isinstance(data, dict):
return Qwen2ImageEmbeddingsInput(data)
return Qwen2EmbeddingItems(data, modality="image")

return super()._parse_image_data(data)

Expand All @@ -785,7 +785,7 @@ def _parse_video_data(
data: Union[dict[str, torch.Tensor], ModalityData[VideoItem]],
) -> ModalityDataItems[Any, Any]:
if isinstance(data, dict):
return Qwen2VideoEmbeddingsInput(data)
return Qwen2EmbeddingItems(data, modality="video")

return super()._parse_video_data(data)

Expand Down
28 changes: 14 additions & 14 deletions vllm/multimodal/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_passthrough_data(self) -> Mapping[str, object]:
raise NotImplementedError


class ProcessorBatchInput(ModalityDataItems[Sequence[_T], _T]):
class ProcessorBatchItems(ModalityDataItems[Sequence[_T], _T]):

def __init__(self, data: Sequence[_T], modality: str) -> None:
super().__init__(data)
Expand All @@ -85,7 +85,7 @@ def get_passthrough_data(self) -> Mapping[str, object]:
return {}


class EmbeddingsInput(ModalityDataItems[NestedTensors, torch.Tensor]):
class EmbeddingItems(ModalityDataItems[NestedTensors, torch.Tensor]):

def __init__(self, data: NestedTensors, modality: str) -> None:
super().__init__(data)
Expand All @@ -108,13 +108,13 @@ def get_passthrough_data(self) -> Mapping[str, object]:
return {f"{self.modality}_embeds": self.data}


class AudioProcessorInput(ProcessorBatchInput[HfAudioItem]):
class AudioProcessorItems(ProcessorBatchItems[HfAudioItem]):

def __init__(self, data: Sequence[HfAudioItem]) -> None:
super().__init__(data, "audio")


class AudioEmbeddingsInput(EmbeddingsInput):
class AudioEmbeddingItems(EmbeddingItems):

def __init__(self, data: NestedTensors) -> None:
super().__init__(data, "audio")
Expand All @@ -125,7 +125,7 @@ class ImageSize(NamedTuple):
height: int


class ImageProcessorInput(ProcessorBatchInput[HfImageItem]):
class ImageProcessorItems(ProcessorBatchItems[HfImageItem]):

def __init__(self, data: Sequence[HfImageItem]) -> None:
super().__init__(data, "image")
Expand All @@ -142,19 +142,19 @@ def get_image_size(self, item_idx: int) -> ImageSize:
assert_never(image)


class ImageEmbeddingsInput(EmbeddingsInput):
class ImageEmbeddingItems(EmbeddingItems):

def __init__(self, data: NestedTensors) -> None:
super().__init__(data, "image")


class VideoProcessorInput(ProcessorBatchInput[HfVideoItem]):
class VideoProcessorItems(ProcessorBatchItems[HfVideoItem]):

def __init__(self, data: Sequence[HfVideoItem]) -> None:
super().__init__(data, "video")


class VideoEmbeddingsInput(EmbeddingsInput):
class VideoEmbeddingItems(EmbeddingItems):

def __init__(self, data: NestedTensors) -> None:
super().__init__(data, "video")
Expand Down Expand Up @@ -255,7 +255,7 @@ def _parse_audio_data(
data: ModalityData[AudioItem],
) -> ModalityDataItems[Any, Any]:
if self._is_embeddings(data):
return AudioEmbeddingsInput(data)
return AudioEmbeddingItems(data)

if (is_list_of(data, float)
or isinstance(data,
Expand Down Expand Up @@ -285,14 +285,14 @@ def _parse_audio_data(

new_audios.append(new_audio)

return AudioProcessorInput(new_audios)
return AudioProcessorItems(new_audios)

def _parse_image_data(
self,
data: ModalityData[ImageItem],
) -> ModalityDataItems[Any, Any]:
if self._is_embeddings(data):
return ImageEmbeddingsInput(data)
return ImageEmbeddingItems(data)

if (isinstance(data, Image)
or isinstance(data,
Expand All @@ -303,14 +303,14 @@ def _parse_image_data(
else:
data_items = data

return ImageProcessorInput(data_items)
return ImageProcessorItems(data_items)

def _parse_video_data(
self,
data: ModalityData[VideoItem],
) -> ModalityDataItems[Any, Any]:
if self._is_embeddings(data):
return VideoEmbeddingsInput(data)
return VideoEmbeddingItems(data)

if (is_list_of(data, Image)
or isinstance(data,
Expand All @@ -321,7 +321,7 @@ def _parse_video_data(
else:
data_items = data

return VideoProcessorInput(data_items)
return VideoProcessorItems(data_items)

def _get_subparsers(self) -> Mapping[str, ModalityDataParser]:
return {
Expand Down

0 comments on commit 0197724

Please sign in to comment.