Skip to content

Commit

Permalink
Add suggested max model len
Browse files Browse the repository at this point in the history
Signed-off-by: mgoin <[email protected]>
  • Loading branch information
mgoin committed Jan 3, 2025
1 parent f0c0bbc commit a0a6742
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/worker/tpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,10 @@ def __init__(
logger.warning(
"The max_model_len (%d) is too large. This may degrade the "
"performance due to the insufficient smem size. Consider "
"setting --max-model-len to a smaller value.",
self.model_config.max_model_len)
"setting --max-model-len to a smaller value, like %d.",
self.model_config.max_model_len,
self.model_config.max_model_len /
(block_table_size / smem_size))

def load_model(self) -> None:
self.device = self.device_config.device
Expand Down

0 comments on commit a0a6742

Please sign in to comment.