Skip to content

Commit

Permalink
Review changes
Browse files Browse the repository at this point in the history
Signed-off-by: Xin Yang <[email protected]>
  • Loading branch information
xyang16 committed Dec 3, 2024
1 parent 3b88608 commit ce384bc
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 18 deletions.
5 changes: 0 additions & 5 deletions tests/entrypoints/test_chat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def phi3v_tokenizer():
tokenizer_id=PHI3V_MODEL_ID,
enable_lora=False,
max_num_seqs=5,
max_loras=0,
max_input_length=None,
)

Expand All @@ -71,7 +70,6 @@ def mllama_tokenizer():
MLLAMA_MODEL_ID,
enable_lora=False,
max_num_seqs=5,
max_loras=0,
max_input_length=None,
)

Expand Down Expand Up @@ -684,7 +682,6 @@ def get_conversation(is_hf: bool):
MLLAMA_MODEL_ID,
enable_lora=False,
max_num_seqs=5,
max_loras=0,
max_input_length=None,
)
tokenizer = tokenizer_group.tokenizer
Expand Down Expand Up @@ -731,7 +728,6 @@ def test_resolve_content_format_hf_defined(model, expected_format):
model,
enable_lora=False,
max_num_seqs=5,
max_loras=0,
max_input_length=None,
)
tokenizer = tokenizer_group.tokenizer
Expand Down Expand Up @@ -781,7 +777,6 @@ def test_resolve_content_format_examples(template_path, expected_format):
PHI3V_MODEL_ID,
enable_lora=False,
max_num_seqs=5,
max_loras=0,
max_input_length=None,
)
dummy_tokenizer = tokenizer_group.tokenizer
Expand Down
1 change: 0 additions & 1 deletion tests/test_cache_block_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
tokenizer_id="facebook/opt-125m",
enable_lora=False,
max_num_seqs=max_num_seqs,
max_loras=0,
max_input_length=None,
)

Expand Down
7 changes: 0 additions & 7 deletions tests/tokenization/test_tokenizer_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ async def test_tokenizer_group(tokenizer_group_type):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None,
)
assert reference_tokenizer.encode("prompt") == tokenizer_group.encode(
Expand All @@ -61,7 +60,6 @@ async def test_tokenizer_group_pool(tokenizer_group_type):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None,
)
# Send multiple requests to the tokenizer group pool
Expand Down Expand Up @@ -104,7 +102,6 @@ class EnvVarCheckerRayTokenizerGroupPool(RayTokenizerGroupPool):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None)
with pytest.raises(AssertionError):
tokenizer_pool.ping()
Expand All @@ -116,7 +113,6 @@ class EnvVarCheckerRayTokenizerGroupPool(RayTokenizerGroupPool):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None)
tokenizer_pool.ping()

Expand Down Expand Up @@ -154,7 +150,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None,
fail_at=fail_at)
tokenizer_actors = tokenizer_group_pool.tokenizer_actors.copy()
Expand Down Expand Up @@ -182,7 +177,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=None,
fail_at=fail_at)

Expand All @@ -204,7 +198,6 @@ class FailingRayTokenizerGroupPool(RayTokenizerGroupPool):
tokenizer_id="gpt2",
enable_lora=False,
max_num_seqs=1,
max_loras=0,
max_input_length=2,
fail_at=fail_at)
tokenizer_actors = tokenizer_group_pool.tokenizer_actors.copy()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,14 @@ def from_config(cls, tokenizer_pool_config: Optional[TokenizerPoolConfig],
return cls(**init_kwargs)

def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
max_loras: int, max_input_length: Optional[int],
num_actors: int, ray_actor_options: dict, **tokenizer_config):
max_input_length: Optional[int], num_actors: int,
ray_actor_options: dict, **tokenizer_config):
# Store a local copy of the TokenizerGroup for quick access
# to underlying HF tokenizers.
self._tokenizer_config = {
"tokenizer_id": tokenizer_id,
"enable_lora": enable_lora,
"max_num_seqs": max_num_seqs,
"max_loras": max_loras,
"max_input_length": max_input_length,
**tokenizer_config
}
Expand Down
4 changes: 2 additions & 2 deletions vllm/transformers_utils/tokenizer_group/tokenizer_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ class TokenizerGroup(BaseTokenizerGroup):
"""A group of tokenizers that can be used for LoRA adapters."""

def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
max_loras: int, max_input_length: Optional[int],
**tokenizer_config):
max_input_length: Optional[int], **tokenizer_config):
self.tokenizer_id = tokenizer_id
self.tokenizer_config = tokenizer_config
self.enable_lora = enable_lora
self.max_input_length = max_input_length
self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)
max_loras = tokenizer_config.get("max_loras", 0)
self.lora_tokenizers = LRUCache[AnyTokenizer](
capacity=max(max_loras, max_num_seqs) if enable_lora else 0)

Expand Down

0 comments on commit ce384bc

Please sign in to comment.