diff --git a/packages/service-library/src/servicelib/instrumentation.py b/packages/service-library/src/servicelib/instrumentation.py index d1fa57f66e4..002e1942853 100644 --- a/packages/service-library/src/servicelib/instrumentation.py +++ b/packages/service-library/src/servicelib/instrumentation.py @@ -1,2 +1,13 @@ +from dataclasses import dataclass + +from prometheus_client import CollectorRegistry + + +@dataclass(slots=True, kw_only=True) +class MetricsBase: + subsystem: str + registry: CollectorRegistry + + def get_metrics_namespace(application_name: str) -> str: return application_name.replace("-", "_") diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py index 56f504d5eef..525ebf99551 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_models.py @@ -2,6 +2,7 @@ from typing import Final from prometheus_client import CollectorRegistry, Counter, Histogram +from servicelib.instrumentation import MetricsBase from ...models import BufferPoolManager, Cluster from ._constants import ( @@ -13,12 +14,6 @@ from ._utils import TrackedGauge, create_gauge -@dataclass(slots=True, kw_only=True) -class MetricsBase: - subsystem: str - registry: CollectorRegistry - - @dataclass(slots=True, kw_only=True) class ClusterMetrics(MetricsBase): # pylint: disable=too-many-instance-attributes active_nodes: TrackedGauge = field(init=False) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py index 5a8f692a124..577186fa2f9 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/instrumentation/_models.py @@ -3,12 +3,11 @@ from prometheus_client import CollectorRegistry, Histogram from pydantic import ByteSize, parse_obj_as -from servicelib.instrumentation import get_metrics_namespace +from servicelib.instrumentation import MetricsBase, get_metrics_namespace from ..._meta import PROJECT_NAME -_NAMESPACE_METRICS: Final[str] = get_metrics_namespace(PROJECT_NAME) -_SUBSYSTEM_NAME: Final[str] = "dynamic_services" +_METRICS_NAMESPACE: Final[str] = get_metrics_namespace(PROJECT_NAME) _INSTRUMENTATION_LABELS: Final[tuple[str, ...]] = ( "user_id", "wallet_id", @@ -31,7 +30,7 @@ ) -_BUCKETS_RATE_BPS: Final[tuple[float, ...]] = tuple( +_RATE_BPS_BUCKETS: Final[tuple[float, ...]] = tuple( parse_obj_as(ByteSize, f"{m}MiB") for m in ( 1, @@ -50,8 +49,7 @@ @dataclass(slots=True, kw_only=True) -class DynamiSidecarMetrics: - +class DynamiSidecarMetrics(MetricsBase): start_time_duration: Histogram = field(init=False) stop_time_duration: Histogram = field(init=False) pull_user_services_images_duration: Histogram = field(init=False) @@ -69,69 +67,77 @@ class DynamiSidecarMetrics: def __post_init__(self) -> None: self.start_time_duration = Histogram( "start_time_duration_seconds", - "time to start dynamic-sidecar", + "time to start dynamic service (from start request in dv-2 till service containers are in running state (healthy))", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, + namespace=_METRICS_NAMESPACE, buckets=_BUCKETS_TIME_S, - subsystem=_SUBSYSTEM_NAME, + subsystem=self.subsystem, + registry=self.registry, ) self.stop_time_duration = Histogram( "stop_time_duration_seconds", - "time to stop dynamic-sidecar", + "time to stop dynamic service (from stop request in dv-2 till all allocated resources (services + dynamic-sidecar) are removed)", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, + namespace=_METRICS_NAMESPACE, buckets=_BUCKETS_TIME_S, - subsystem=_SUBSYSTEM_NAME, + subsystem=self.subsystem, + registry=self.registry, ) self.pull_user_services_images_duration = Histogram( "pull_user_services_images_duration_seconds", "time to pull docker images", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, - buckets=_BUCKETS_RATE_BPS, - subsystem=_SUBSYSTEM_NAME, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, ) self.output_ports_pull_rate = Histogram( "output_ports_pull_rate_bps", "rate at which output ports were pulled", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, - buckets=_BUCKETS_RATE_BPS, - subsystem=_SUBSYSTEM_NAME, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, ) self.input_ports_pull_rate = Histogram( "input_ports_pull_rate_bps", "rate at which input ports were pulled", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, - buckets=_BUCKETS_RATE_BPS, - subsystem=_SUBSYSTEM_NAME, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, ) self.pull_service_state_rate = Histogram( "pull_service_state_rate_bps", "rate at which service states were recovered", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, - buckets=_BUCKETS_RATE_BPS, - subsystem=_SUBSYSTEM_NAME, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, ) self.push_service_state_rate = Histogram( "push_service_state_rate_bps", "rate at which service states were saved", labelnames=_INSTRUMENTATION_LABELS, - namespace=_NAMESPACE_METRICS, - buckets=_BUCKETS_RATE_BPS, - subsystem=_SUBSYSTEM_NAME, + namespace=_METRICS_NAMESPACE, + buckets=_RATE_BPS_BUCKETS, + subsystem=self.subsystem, + registry=self.registry, ) @dataclass(slots=True, kw_only=True) class DirectorV2Instrumentation: registry: CollectorRegistry - dynamic_sidecar_metrics: DynamiSidecarMetrics = field(init=False) def __post_init__(self) -> None: - self.dynamic_sidecar_metrics = DynamiSidecarMetrics() + self.dynamic_sidecar_metrics = DynamiSidecarMetrics( + subsystem="dynamic_services", registry=self.registry + )