From f6d5301429a8675bf49dec7450b66c431e9787c5 Mon Sep 17 00:00:00 2001 From: Ruedi Steinmann Date: Thu, 2 Jan 2025 17:17:58 +0100 Subject: [PATCH] Add split_special_tokens to the Tokenize Endpoint Signed-off-by: Ruedi Steinmann --- vllm/entrypoints/openai/protocol.py | 8 +++++++- vllm/entrypoints/openai/serving_engine.py | 11 +++++++++-- vllm/entrypoints/openai/serving_tokenization.py | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 14e41346df775..c228be6204251 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1251,7 +1251,13 @@ class TokenizeCompletionRequest(OpenAIBaseModel): default=True, description=( "If true (the default), special tokens (e.g. BOS) will be added to " - "the prompt."), + "the prompt.") + ) + split_special_tokens: bool = Field( + default=False, + description=( + "If set to true, special tokens in the prompt will be split. For example, if <|fim_prefix|> is a special token" + "it would by default be tokenized for example to [151661]. With this flag set to true, it becomes [27,91,69,318,37151,91,29]") ) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 319f869240036..70da9ca5838b5 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -159,12 +159,14 @@ def _normalize_prompt_text_to_input( prompt: str, truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]], add_special_tokens: bool, + split_special_tokens: bool=False, ) -> TextTokensPrompt: if truncate_prompt_tokens is None: - encoded = tokenizer(prompt, add_special_tokens=add_special_tokens) + encoded = tokenizer(prompt, add_special_tokens=add_special_tokens, split_special_tokens=split_special_tokens) else: encoded = tokenizer(prompt, add_special_tokens=add_special_tokens, + split_special_tokens=split_special_tokens, truncation=True, max_length=truncate_prompt_tokens) @@ -298,6 +300,7 @@ def _tokenize_prompt_input_or_inputs( input_or_inputs: Union[str, List[str], List[int], List[List[int]]], truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None, add_special_tokens: bool = True, + split_special_tokens: bool = False, ) -> List[TextTokensPrompt]: """ Tokenize/detokenize depending on the input format. @@ -316,7 +319,9 @@ def _tokenize_prompt_input_or_inputs( tokenizer, prompt=prompt_input["content"], truncate_prompt_tokens=truncate_prompt_tokens, - add_special_tokens=add_special_tokens) + add_special_tokens=add_special_tokens, + split_special_tokens=split_special_tokens, + ) if prompt_input["is_tokens"] is False else self._normalize_prompt_tokens_to_input( request, @@ -333,6 +338,7 @@ async def _preprocess_completion( input_or_inputs: Union[str, List[str], List[int], List[List[int]]], truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None, add_special_tokens: bool = True, + split_special_tokens: bool = False, ) -> Tuple[List[TextTokensPrompt], List[TokensPrompt]]: request_prompts = await self._tokenize_prompt_input_or_inputs_async( request, @@ -340,6 +346,7 @@ async def _preprocess_completion( input_or_inputs, truncate_prompt_tokens=truncate_prompt_tokens, add_special_tokens=add_special_tokens, + split_special_tokens=split_special_tokens, ) engine_prompts = [ diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index b67ecfb01316f..1989da5a0cc34 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -85,6 +85,7 @@ async def create_tokenize( tokenizer, request.prompt, add_special_tokens=request.add_special_tokens, + split_special_tokens=request.split_special_tokens, ) except ValueError as e: logger.exception("Error in preprocessing prompt inputs")