diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py index d2318ebc8bfca..e8b8c456aeae7 100644 --- a/vllm/model_executor/models/whisper.py +++ b/vllm/model_executor/models/whisper.py @@ -1,6 +1,6 @@ import math -from typing import (Iterable, List, Literal, Mapping, Optional, Set, Tuple, - TypedDict, Union) +from typing import (Iterable, List, Mapping, Optional, Set, Tuple, TypedDict, + Union) import numpy as np import torch @@ -589,7 +589,6 @@ def dummy_encoder_data_for_whisper(ctx: InputContext, seq_len: int, def input_processor_for_whisper(ctx: InputContext, inputs): - multi_modal_data = inputs["encoder"]["multi_modal_data"] if isinstance(multi_modal_data["audio"], list): assert len(multi_modal_data["audio"]) == 1 @@ -710,7 +709,7 @@ def _parse_and_validate_audio_input( if input_features is not None: if not isinstance(input_features, (torch.Tensor, list)): raise ValueError("Incorrect type of audio features. " - f"Got type: {type(input_features)}") + f"Got type: {type(input_features)}") input_features = [feat.to(self.dtype) for feat in input_features] return WhisperAudioInputs(input_features=input_features) diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index c09b4bfbc34d4..f75b835e8464b 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -62,7 +62,8 @@ def _cached_encode( *, add_special_tokens: bool = False, ) -> list[int]: - return encode_tokens(tokenizer, text, + return encode_tokens(tokenizer, + text, add_special_tokens=add_special_tokens) @@ -746,7 +747,8 @@ def _apply_prompt_replacements( mm_item_counts, ) - token_ids = encode_tokens(tokenizer, text, + token_ids = encode_tokens(tokenizer, + text, add_special_tokens=False) matched_repls = [match.prompt_repl for match in text_matches] diff --git a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py index 0b20be2ac071f..6dc2f90561873 100644 --- a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py +++ b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py @@ -2,8 +2,7 @@ from vllm.config import TokenizerPoolConfig from vllm.lora.request import LoRARequest -from vllm.transformers_utils.tokenizer import (AnyTokenizer, - encode_tokens, +from vllm.transformers_utils.tokenizer import (AnyTokenizer, encode_tokens, get_lora_tokenizer, get_lora_tokenizer_async, get_tokenizer) @@ -59,7 +58,8 @@ def encode(self, lora_request: Optional[LoRARequest] = None, add_special_tokens: Optional[bool] = None) -> List[int]: tokenizer = self.get_lora_tokenizer(lora_request) - ret = encode_tokens(tokenizer, prompt, + ret = encode_tokens(tokenizer, + prompt, add_special_tokens=add_special_tokens) self._raise_if_input_too_long(ret, lora_request) return ret @@ -71,7 +71,8 @@ async def encode_async( lora_request: Optional[LoRARequest] = None, add_special_tokens: Optional[bool] = None) -> List[int]: tokenizer = await self.get_lora_tokenizer_async(lora_request) - ret = encode_tokens(tokenizer, prompt, + ret = encode_tokens(tokenizer, + prompt, add_special_tokens=add_special_tokens) self._raise_if_input_too_long(ret, lora_request) return ret