Skip to content

Commit

Permalink
Avoid extra placeholder in phi3v
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Dec 24, 2024
1 parent fa54292 commit 64996bb
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@
from vllm.model_executor.models.clip import CLIPVisionModel
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (MultiModalFieldConfig, MultiModalKwargs,
NestedTensors)
from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig,
MultiModalInputsV2, MultiModalKwargs,
NestedTensors, PlaceholderRange)
from vllm.multimodal.processing import (BaseMultiModalProcessor,
MultiModalDataItems, ProcessorInputs,
PromptReplacement,
Expand Down Expand Up @@ -315,14 +316,11 @@ def get_max_phi3v_image_tokens(

processor = ctx.get_hf_processor(**hf_processor_mm_kwargs)

num_image_tokens = processor.calc_num_image_tokens_from_image_size(
return processor.calc_num_image_tokens_from_image_size(
width=MAX_IMAGE_FEATURE_SIZE_WIDTH,
height=MAX_IMAGE_FEATURE_SIZE_HEIGHT,
)

# Include the separator (bos_token_id)
return num_image_tokens + 1


class Phi3VMultiModalProcessor(BaseMultiModalProcessor):

Expand Down Expand Up @@ -443,6 +441,25 @@ def _get_dummy_mm_inputs(
mm_data=data,
)

def apply(
self,
prompt_text: str,
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
) -> MultiModalInputsV2:
result = super().apply(prompt_text, mm_data, hf_processor_mm_kwargs)

# Ignore the trailing bos_token_id
result["mm_placeholders"] = {
k: [
PlaceholderRange(offset=p["offset"], length=p["length"] - 1)
for p in ps
]
for k, ps in result["mm_placeholders"].items()
}

return result


@MULTIMODAL_REGISTRY.register_max_image_tokens(get_max_phi3v_image_tokens)
@MULTIMODAL_REGISTRY.register_processor(Phi3VMultiModalProcessor)
Expand Down

0 comments on commit 64996bb

Please sign in to comment.