From e1cdeea0cdbacebf5fd65a8ae1083405f219abc3 Mon Sep 17 00:00:00 2001
From: aria-hacker <aria.dev@rhymes.ai>
Date: Wed, 9 Oct 2024 18:01:39 +0800
Subject: [PATCH] add stop for vllm readme

---
 docs/inference.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/inference.md b/docs/inference.md
index afc0313..319e318 100644
--- a/docs/inference.md
+++ b/docs/inference.md
@@ -103,11 +103,12 @@ _MULTIMODAL_MODELS["AriaForConditionalGeneration"] = (
 def main():
     llm = LLM(
         model="rhymes-ai/Aria",
+        tokenizer="rhymes-ai/Aria",
+        tokenizer_mode="slow",
         dtype="bfloat16",
         limit_mm_per_prompt={"image": 256},
         enforce_eager=True,
         trust_remote_code=True,
-        skip_tokenizer_init=True,
     )
 
     tokenizer = AutoTokenizer.from_pretrained(
@@ -143,7 +144,7 @@ def main():
                 "split_image": True,  # [Optional] whether to split the images, default `False`
             },
         },
-        sampling_params=SamplingParams(max_tokens=200, top_k=1),
+        sampling_params=SamplingParams(max_tokens=200, top_k=1, stop=["<|im_end|>"]),
     )
 
     for o in outputs: