From fa42cdb5678bac03446e65ca8abb6d8c74d254ea Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Mon, 2 Dec 2024 21:24:45 +0000 Subject: [PATCH 1/5] first commit --- jetstream/core/metrics/prometheus.py | 9 ++++++++- jetstream/core/server_lib.py | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/jetstream/core/metrics/prometheus.py b/jetstream/core/metrics/prometheus.py index dc8a00e9..34b77531 100644 --- a/jetstream/core/metrics/prometheus.py +++ b/jetstream/core/metrics/prometheus.py @@ -75,7 +75,11 @@ def __new__(cls): documentation="The percentage of decode slots currently being used", labelnames=["id", "idx"], ) - + _model_load_time = Gauge( + name="jetstream_model_load_time", + documentation="Total time taken to load the model", + labelnames=["id"], + ) _server_startup_latency = Gauge( name="jetstream_server_startup_latency", documentation="Total time taken to start the Jetstream server", @@ -232,6 +236,9 @@ def get_slots_used_percentage_metric(self, idx: int): def get_server_startup_latency_metric(self): return self._server_startup_latency.labels(id=self._id) + def get_model_load_time_metric(self): + return self._model_load_time + def get_time_to_first_token(self): return self._time_to_first_token.labels(id=self._id) diff --git a/jetstream/core/server_lib.py b/jetstream/core/server_lib.py index b323286a..53fdc6b4 100644 --- a/jetstream/core/server_lib.py +++ b/jetstream/core/server_lib.py @@ -113,10 +113,13 @@ def create_driver( An orchestrator driver. """ engines = config_lib.get_engines(config, devices=devices) + model_load_start_time = time.time() prefill_params = [pe.load_params() for pe in engines.prefill_engines] generate_params = [ge.load_params() for ge in engines.generate_engines] shared_params = [ie.load_params() for ie in engines.interleaved_engines] logging.info("Loaded all weights.") + if metrics_collector: + metrics_collector.get_model_load_time_metric().set(time.time() - model_load_start_time) interleaved_mode = ( len(config.prefill_slices) + len(config.generate_slices) == 0 ) From d86b69befcb3897c471d5a2e8112b0605811e2d9 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Mon, 2 Dec 2024 21:33:21 +0000 Subject: [PATCH 2/5] fmt --- jetstream/core/server_lib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/jetstream/core/server_lib.py b/jetstream/core/server_lib.py index 53fdc6b4..40a1492d 100644 --- a/jetstream/core/server_lib.py +++ b/jetstream/core/server_lib.py @@ -119,7 +119,9 @@ def create_driver( shared_params = [ie.load_params() for ie in engines.interleaved_engines] logging.info("Loaded all weights.") if metrics_collector: - metrics_collector.get_model_load_time_metric().set(time.time() - model_load_start_time) + metrics_collector.get_model_load_time_metric().set( + time.time() - model_load_start_time + ) interleaved_mode = ( len(config.prefill_slices) + len(config.generate_slices) == 0 ) From 7746665ef9917e65965f5064e268141e4f838fd4 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Mon, 2 Dec 2024 21:39:12 +0000 Subject: [PATCH 3/5] fmt --- jetstream/core/server_lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jetstream/core/server_lib.py b/jetstream/core/server_lib.py index 40a1492d..92cb8bee 100644 --- a/jetstream/core/server_lib.py +++ b/jetstream/core/server_lib.py @@ -120,7 +120,7 @@ def create_driver( logging.info("Loaded all weights.") if metrics_collector: metrics_collector.get_model_load_time_metric().set( - time.time() - model_load_start_time + time.time() - model_load_start_time ) interleaved_mode = ( len(config.prefill_slices) + len(config.generate_slices) == 0 From 62c5fdfe642fae83c5a5cd70dcbe8037a32bc64e Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Mon, 2 Dec 2024 21:46:40 +0000 Subject: [PATCH 4/5] missing labels --- jetstream/core/metrics/prometheus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jetstream/core/metrics/prometheus.py b/jetstream/core/metrics/prometheus.py index 34b77531..45bf947a 100644 --- a/jetstream/core/metrics/prometheus.py +++ b/jetstream/core/metrics/prometheus.py @@ -237,7 +237,7 @@ def get_server_startup_latency_metric(self): return self._server_startup_latency.labels(id=self._id) def get_model_load_time_metric(self): - return self._model_load_time + return self._model_load_time.labels(id=self.id) def get_time_to_first_token(self): return self._time_to_first_token.labels(id=self._id) From 203a6fd7d7942665591765fe6758c349f7e9380a Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Mon, 2 Dec 2024 21:50:10 +0000 Subject: [PATCH 5/5] typo --- jetstream/core/metrics/prometheus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jetstream/core/metrics/prometheus.py b/jetstream/core/metrics/prometheus.py index 45bf947a..9e37dca7 100644 --- a/jetstream/core/metrics/prometheus.py +++ b/jetstream/core/metrics/prometheus.py @@ -237,7 +237,7 @@ def get_server_startup_latency_metric(self): return self._server_startup_latency.labels(id=self._id) def get_model_load_time_metric(self): - return self._model_load_time.labels(id=self.id) + return self._model_load_time.labels(id=self._id) def get_time_to_first_token(self): return self._time_to_first_token.labels(id=self._id)