From f6d5301429a8675bf49dec7450b66c431e9787c5 Mon Sep 17 00:00:00 2001
From: Ruedi Steinmann <ruediste@gmail.com>
Date: Thu, 2 Jan 2025 17:17:58 +0100
Subject: [PATCH] Add split_special_tokens to the Tokenize Endpoint

Signed-off-by: Ruedi Steinmann <ruediste@gmail.com>
---
 vllm/entrypoints/openai/protocol.py             |  8 +++++++-
 vllm/entrypoints/openai/serving_engine.py       | 11 +++++++++--
 vllm/entrypoints/openai/serving_tokenization.py |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 14e41346df775..c228be6204251 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -1251,7 +1251,13 @@ class TokenizeCompletionRequest(OpenAIBaseModel):
         default=True,
         description=(
             "If true (the default), special tokens (e.g. BOS) will be added to "
-            "the prompt."),
+            "the prompt.")
+    )
+    split_special_tokens: bool = Field(
+        default=False,
+        description=(
+            "If set to true, special tokens in the prompt will be split. For example, if <|fim_prefix|> is a special token"
+            "it would by default be tokenized for example to [151661]. With this flag set to true, it becomes [27,91,69,318,37151,91,29]")
     )
 
 
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 319f869240036..70da9ca5838b5 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -159,12 +159,14 @@ def _normalize_prompt_text_to_input(
         prompt: str,
         truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]],
         add_special_tokens: bool,
+        split_special_tokens: bool=False,
     ) -> TextTokensPrompt:
         if truncate_prompt_tokens is None:
-            encoded = tokenizer(prompt, add_special_tokens=add_special_tokens)
+            encoded = tokenizer(prompt, add_special_tokens=add_special_tokens, split_special_tokens=split_special_tokens)
         else:
             encoded = tokenizer(prompt,
                                 add_special_tokens=add_special_tokens,
+                                split_special_tokens=split_special_tokens,
                                 truncation=True,
                                 max_length=truncate_prompt_tokens)
 
@@ -298,6 +300,7 @@ def _tokenize_prompt_input_or_inputs(
         input_or_inputs: Union[str, List[str], List[int], List[List[int]]],
         truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
         add_special_tokens: bool = True,
+        split_special_tokens: bool = False,
     ) -> List[TextTokensPrompt]:
         """
         Tokenize/detokenize depending on the input format.
@@ -316,7 +319,9 @@ def _tokenize_prompt_input_or_inputs(
                 tokenizer,
                 prompt=prompt_input["content"],
                 truncate_prompt_tokens=truncate_prompt_tokens,
-                add_special_tokens=add_special_tokens)
+                add_special_tokens=add_special_tokens,
+                split_special_tokens=split_special_tokens,
+                )
             if prompt_input["is_tokens"] is False else
             self._normalize_prompt_tokens_to_input(
                 request,
@@ -333,6 +338,7 @@ async def _preprocess_completion(
         input_or_inputs: Union[str, List[str], List[int], List[List[int]]],
         truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None,
         add_special_tokens: bool = True,
+        split_special_tokens: bool = False,
     ) -> Tuple[List[TextTokensPrompt], List[TokensPrompt]]:
         request_prompts = await self._tokenize_prompt_input_or_inputs_async(
             request,
@@ -340,6 +346,7 @@ async def _preprocess_completion(
             input_or_inputs,
             truncate_prompt_tokens=truncate_prompt_tokens,
             add_special_tokens=add_special_tokens,
+            split_special_tokens=split_special_tokens,
         )
 
         engine_prompts = [
diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py
index b67ecfb01316f..1989da5a0cc34 100644
--- a/vllm/entrypoints/openai/serving_tokenization.py
+++ b/vllm/entrypoints/openai/serving_tokenization.py
@@ -85,6 +85,7 @@ async def create_tokenize(
                      tokenizer,
                      request.prompt,
                      add_special_tokens=request.add_special_tokens,
+                     split_special_tokens=request.split_special_tokens,
                  )
         except ValueError as e:
             logger.exception("Error in preprocessing prompt inputs")