Skip to content

Commit

Permalink
Fix placeholder maps handling on V0
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Nov 27, 2024
1 parent 0194324 commit 09618d0
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 14 deletions.
45 changes: 33 additions & 12 deletions vllm/multimodal/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,26 +326,47 @@ def from_seq_group(
src_ranges = []
dest_ranges = []
"""
if (not seq_group.multi_modal_data
or not seq_group.multi_modal_placeholders):
return seq_group.multi_modal_data, {}
seq_mm_data = seq_group.multi_modal_data
seq_mm_placeholders = seq_group.multi_modal_placeholders

mm_data = {**seq_group.multi_modal_data}
placeholder_maps: Dict[str, MultiModalPlaceholderMap] = defaultdict(
if not seq_mm_data or not seq_mm_placeholders:
return seq_mm_data, {}

# For merged processor, we directly use mm_kwargs as mm_data
if isinstance(seq_mm_data, MultiModalKwargs):
placeholder_maps = dict[str, MultiModalPlaceholderMap]()

for modality, placeholders in seq_mm_placeholders.items():
placeholder_map = MultiModalPlaceholderMap()

if positions:
placeholder_map.append_items_from_seq_group(
positions,
# Dummy, since we don't care about intersecting items
[None] * len(placeholders),
placeholders,
)

placeholder_maps[modality] = placeholder_map

return seq_mm_data, placeholder_maps

mm_data = {**seq_mm_data}
placeholder_maps = defaultdict[str, MultiModalPlaceholderMap](
MultiModalPlaceholderMap)

for (
modality,
placeholders,
) in seq_group.multi_modal_placeholders.items():
for modality, placeholders in seq_mm_placeholders.items():
mm_items = mm_data.pop(modality)
if not isinstance(mm_items, list):
mm_items = [mm_items]

if positions:
intersecting_items = placeholder_maps[
modality].append_items_from_seq_group(
positions, mm_items, placeholders)
intersecting_items = placeholder_maps[modality] \
.append_items_from_seq_group(
positions,
mm_items,
placeholders,
)

if intersecting_items:
mm_data[modality] = intersecting_items
Expand Down
5 changes: 3 additions & 2 deletions vllm/v1/engine/mm_input_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ def process_inputs(
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Dict[str, Any]],
) -> List[MultiModalKwargs]:
if self.mm_registry.has_processor(self.model_config):
return [MultiModalKwargs(mm_data)] # Already processed
# Skip this redundant step if merged processor has been applied
if isinstance(mm_data, MultiModalKwargs):
return [mm_data]

image_inputs = mm_data["image"]
if not isinstance(image_inputs, list):
Expand Down

0 comments on commit 09618d0

Please sign in to comment.