From ce384bcbfe9615e19ad70dd252de4c8c3bca02f9 Mon Sep 17 00:00:00 2001 From: Xin Yang Date: Mon, 2 Dec 2024 23:37:18 -0800 Subject: [PATCH] Review changes Signed-off-by: Xin Yang --- tests/entrypoints/test_chat_utils.py | 5 ----- tests/test_cache_block_hashing.py | 1 - tests/tokenization/test_tokenizer_group.py | 7 ------- .../tokenizer_group/ray_tokenizer_group.py | 5 ++--- vllm/transformers_utils/tokenizer_group/tokenizer_group.py | 4 ++-- 5 files changed, 4 insertions(+), 18 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 3e9887d4ac658..996e60bfee592 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -46,7 +46,6 @@ def phi3v_tokenizer(): tokenizer_id=PHI3V_MODEL_ID, enable_lora=False, max_num_seqs=5, - max_loras=0, max_input_length=None, ) @@ -71,7 +70,6 @@ def mllama_tokenizer(): MLLAMA_MODEL_ID, enable_lora=False, max_num_seqs=5, - max_loras=0, max_input_length=None, ) @@ -684,7 +682,6 @@ def get_conversation(is_hf: bool): MLLAMA_MODEL_ID, enable_lora=False, max_num_seqs=5, - max_loras=0, max_input_length=None, ) tokenizer = tokenizer_group.tokenizer @@ -731,7 +728,6 @@ def test_resolve_content_format_hf_defined(model, expected_format): model, enable_lora=False, max_num_seqs=5, - max_loras=0, max_input_length=None, ) tokenizer = tokenizer_group.tokenizer @@ -781,7 +777,6 @@ def test_resolve_content_format_examples(template_path, expected_format): PHI3V_MODEL_ID, enable_lora=False, max_num_seqs=5, - max_loras=0, max_input_length=None, ) dummy_tokenizer = tokenizer_group.tokenizer diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py index aac66a9e9ab9e..e8f8499aa88ca 100644 --- a/tests/test_cache_block_hashing.py +++ b/tests/test_cache_block_hashing.py @@ -49,7 +49,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, tokenizer_id="facebook/opt-125m", enable_lora=False, max_num_seqs=max_num_seqs, - max_loras=0, max_input_length=None, ) diff --git a/tests/tokenization/test_tokenizer_group.py b/tests/tokenization/test_tokenizer_group.py index 1e476e3533aac..3faaf326f5422 100644 --- a/tests/tokenization/test_tokenizer_group.py +++ b/tests/tokenization/test_tokenizer_group.py @@ -36,7 +36,6 @@ async def test_tokenizer_group(tokenizer_group_type): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None, ) assert reference_tokenizer.encode("prompt") == tokenizer_group.encode( @@ -61,7 +60,6 @@ async def test_tokenizer_group_pool(tokenizer_group_type): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None, ) # Send multiple requests to the tokenizer group pool @@ -104,7 +102,6 @@ class EnvVarCheckerRayTokenizerGroupPool(RayTokenizerGroupPool): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None) with pytest.raises(AssertionError): tokenizer_pool.ping() @@ -116,7 +113,6 @@ class EnvVarCheckerRayTokenizerGroupPool(RayTokenizerGroupPool): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None) tokenizer_pool.ping() @@ -154,7 +150,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None, fail_at=fail_at) tokenizer_actors = tokenizer_group_pool.tokenizer_actors.copy() @@ -182,7 +177,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=None, fail_at=fail_at) @@ -204,7 +198,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool): tokenizer_id="gpt2", enable_lora=False, max_num_seqs=1, - max_loras=0, max_input_length=2, fail_at=fail_at) tokenizer_actors = tokenizer_group_pool.tokenizer_actors.copy() diff --git a/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py index 3e9b56059db69..9a999a0d6067d 100644 --- a/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py +++ b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py @@ -51,15 +51,14 @@ def from_config(cls, tokenizer_pool_config: Optional[TokenizerPoolConfig], return cls(**init_kwargs) def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int, - max_loras: int, max_input_length: Optional[int], - num_actors: int, ray_actor_options: dict, **tokenizer_config): + max_input_length: Optional[int], num_actors: int, + ray_actor_options: dict, **tokenizer_config): # Store a local copy of the TokenizerGroup for quick access # to underlying HF tokenizers. self._tokenizer_config = { "tokenizer_id": tokenizer_id, "enable_lora": enable_lora, "max_num_seqs": max_num_seqs, - "max_loras": max_loras, "max_input_length": max_input_length, **tokenizer_config } diff --git a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py index 2e1fcf2de16d4..761b07f34d2f9 100644 --- a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py +++ b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py @@ -15,13 +15,13 @@ class TokenizerGroup(BaseTokenizerGroup): """A group of tokenizers that can be used for LoRA adapters.""" def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int, - max_loras: int, max_input_length: Optional[int], - **tokenizer_config): + max_input_length: Optional[int], **tokenizer_config): self.tokenizer_id = tokenizer_id self.tokenizer_config = tokenizer_config self.enable_lora = enable_lora self.max_input_length = max_input_length self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config) + max_loras = tokenizer_config.get("max_loras", 0) self.lora_tokenizers = LRUCache[AnyTokenizer]( capacity=max(max_loras, max_num_seqs) if enable_lora else 0)