From 77527606950b62ace66a600bb1a3e35fa8b0d361 Mon Sep 17 00:00:00 2001 From: DarkLight1337 Date: Mon, 30 Dec 2024 11:15:35 +0000 Subject: [PATCH] Iterate Signed-off-by: DarkLight1337 --- vllm/multimodal/parse.py | 11 ++++++----- vllm/multimodal/processing.py | 3 +++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/vllm/multimodal/parse.py b/vllm/multimodal/parse.py index d0cfea845c2c0..3b1434607527b 100644 --- a/vllm/multimodal/parse.py +++ b/vllm/multimodal/parse.py @@ -323,21 +323,22 @@ def _parse_video_data( return VideoProcessorInput(data_items) - def _get_mm_data_parsers(self) -> Mapping[str, ModalityDataParser]: + def _get_subparsers(self) -> Mapping[str, ModalityDataParser]: return { "audio": self._parse_audio_data, "image": self._parse_image_data, "video": self._parse_video_data, } - def parse_mm_data(self,mm_data: MultiModalDataDict) -> MultiModalDataItems: - parsers = self._get_mm_data_parsers() + def parse_mm_data(self, + mm_data: MultiModalDataDict) -> MultiModalDataItems: + subparsers = self._get_subparsers() mm_items = MultiModalDataItems() for k, v in mm_data.items(): - if k not in parsers: + if k not in subparsers: raise ValueError(f"Unsupported modality: {k}") - mm_items[k] = parsers[k](v) + mm_items[k] = subparsers[k](v) return mm_items diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 84b114b7a6939..180489166b407 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -626,6 +626,9 @@ def _get_data_parser(self) -> MultiModalDataParser: """ Construct a data parser to preprocess multi-modal data items before passing them to :meth:`_get_hf_mm_data`. + + You can support additional modalities by creating a subclass + of :class:`MultiModalDataParser` that has additional subparsers. """ return MultiModalDataParser()