From ae29a2e2c7580019e82592f97ed9d7997fc2ec0f Mon Sep 17 00:00:00 2001 From: Tomer Asida Date: Tue, 3 Dec 2024 10:39:25 +0200 Subject: [PATCH] fix - histogram_prefill_time_request and histogram_decode_time_request now record prefill / decode respectively Signed-off-by: Tomer Asida --- vllm/engine/metrics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 4869557ba9b44..a5ae21c3966a7 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -599,9 +599,9 @@ def _log_prometheus(self, stats: Stats) -> None: stats.time_queue_requests) self._log_histogram(self.metrics.histogram_inference_time_request, stats.time_inference_requests) - self._log_histogram(self.metrics.histogram_decode_time_request, - stats.time_prefill_requests) self._log_histogram(self.metrics.histogram_prefill_time_request, + stats.time_prefill_requests) + self._log_histogram(self.metrics.histogram_decode_time_request, stats.time_decode_requests) self._log_histogram(self.metrics.histogram_time_in_queue_request, stats.time_in_queue_requests)