Skip to content

Commit

Permalink
async metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Aug 10, 2024
1 parent 4c5d8e8 commit 3015a4b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
2 changes: 1 addition & 1 deletion examples/openai_completion_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
openai_api_base = "http://localhost:8001/v1"

client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
Expand Down
14 changes: 12 additions & 2 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
logger = init_logger(__name__)
ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S

_running_tasks = set()

class AsyncEngineDeadError(RuntimeError):
pass
Expand Down Expand Up @@ -251,6 +252,10 @@ def has_new_requests(self):
class _AsyncLLMEngine(LLMEngine):
"""Extension of LLMEngine to add async methods."""

async def do_log_stats_async(self, scheduler_outputs, model_output):
self.do_log_stats(scheduler_outputs, model_output)


async def step_async(
self, virtual_engine: int
) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
Expand Down Expand Up @@ -289,7 +294,11 @@ async def step_async(
scheduler_outputs.ignored_seq_groups, seq_group_metadata_list)

# Log stats.
self.do_log_stats(scheduler_outputs, output)
log_task = asyncio.create_task(self.do_log_stats_async(
scheduler_outputs, output))
_running_tasks.add(log_task)
log_task.add_done_callback(_running_tasks.discard)
# self.do_log_stats(scheduler_outputs, output)

# Tracing
self.do_tracing(scheduler_outputs)
Expand Down Expand Up @@ -1068,7 +1077,8 @@ async def do_log_stats(
await self.engine.do_log_stats.remote( # type: ignore
scheduler_outputs, model_output)
else:
self.engine.do_log_stats()
self.engine.do_log_stats(scheduler_outputs,
model_output)

async def check_health(self) -> None:
"""Raises an error if engine is unhealthy."""
Expand Down

0 comments on commit 3015a4b

Please sign in to comment.