diff --git a/docs/inference.md b/docs/inference.md
index afc0313..319e318 100644
--- a/docs/inference.md
+++ b/docs/inference.md
@@ -103,11 +103,12 @@ _MULTIMODAL_MODELS["AriaForConditionalGeneration"] = (
 def main():
     llm = LLM(
         model="rhymes-ai/Aria",
+        tokenizer="rhymes-ai/Aria",
+        tokenizer_mode="slow",
         dtype="bfloat16",
         limit_mm_per_prompt={"image": 256},
         enforce_eager=True,
         trust_remote_code=True,
-        skip_tokenizer_init=True,
     )
 
     tokenizer = AutoTokenizer.from_pretrained(
@@ -143,7 +144,7 @@ def main():
                 "split_image": True,  # [Optional] whether to split the images, default `False`
             },
         },
-        sampling_params=SamplingParams(max_tokens=200, top_k=1),
+        sampling_params=SamplingParams(max_tokens=200, top_k=1, stop=["<|im_end|>"]),
     )
 
     for o in outputs: