From e1cdeea0cdbacebf5fd65a8ae1083405f219abc3 Mon Sep 17 00:00:00 2001 From: aria-hacker Date: Wed, 9 Oct 2024 18:01:39 +0800 Subject: [PATCH] add stop for vllm readme --- docs/inference.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/inference.md b/docs/inference.md index afc0313..319e318 100644 --- a/docs/inference.md +++ b/docs/inference.md @@ -103,11 +103,12 @@ _MULTIMODAL_MODELS["AriaForConditionalGeneration"] = ( def main(): llm = LLM( model="rhymes-ai/Aria", + tokenizer="rhymes-ai/Aria", + tokenizer_mode="slow", dtype="bfloat16", limit_mm_per_prompt={"image": 256}, enforce_eager=True, trust_remote_code=True, - skip_tokenizer_init=True, ) tokenizer = AutoTokenizer.from_pretrained( @@ -143,7 +144,7 @@ def main(): "split_image": True, # [Optional] whether to split the images, default `False` }, }, - sampling_params=SamplingParams(max_tokens=200, top_k=1), + sampling_params=SamplingParams(max_tokens=200, top_k=1, stop=["<|im_end|>"]), ) for o in outputs: