From 0c27024c7f5da1ee41f917433613b9534011970d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 8 Nov 2024 11:41:18 +0100 Subject: [PATCH 1/5] added tracing to agent service --- .env-devel | 1 + services/agent/src/simcore_service_agent/core/application.py | 4 ++++ services/agent/src/simcore_service_agent/core/settings.py | 5 +++++ services/agent/tests/conftest.py | 1 + services/docker-compose.yml | 4 ++++ 5 files changed, 15 insertions(+) diff --git a/.env-devel b/.env-devel index 8f979751926..52fb6e84bfd 100644 --- a/.env-devel +++ b/.env-devel @@ -17,6 +17,7 @@ AGENT_VOLUMES_CLEANUP_S3_ENDPOINT=http://172.17.0.1:9001 AGENT_VOLUMES_CLEANUP_S3_PROVIDER=MINIO AGENT_VOLUMES_CLEANUP_S3_REGION=us-east-1 AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY=12345678 +AGENT_TRACING={} API_SERVER_DEV_FEATURES_ENABLED=0 API_SERVER_LOGLEVEL=INFO diff --git a/services/agent/src/simcore_service_agent/core/application.py b/services/agent/src/simcore_service_agent/core/application.py index 41c80b07d61..84bc71e24c5 100644 --- a/services/agent/src/simcore_service_agent/core/application.py +++ b/services/agent/src/simcore_service_agent/core/application.py @@ -5,6 +5,7 @@ get_common_oas_options, override_fastapi_openapi_method, ) +from servicelib.fastapi.tracing import setup_tracing from servicelib.logging_utils import config_all_loggers from .._meta import ( @@ -59,6 +60,9 @@ def create_app() -> FastAPI: setup_rest_api(app) setup_rpc_api_routes(app) + if settings.AGENT_TRACING: + setup_tracing(app, settings.AGENT_TRACING, APP_NAME) + async def _on_startup() -> None: print(APP_STARTED_BANNER_MSG, flush=True) # noqa: T201 diff --git a/services/agent/src/simcore_service_agent/core/settings.py b/services/agent/src/simcore_service_agent/core/settings.py index 756bf2cac28..f11350968f4 100644 --- a/services/agent/src/simcore_service_agent/core/settings.py +++ b/services/agent/src/simcore_service_agent/core/settings.py @@ -6,6 +6,7 @@ from settings_library.base import BaseCustomSettings from settings_library.r_clone import S3Provider from settings_library.rabbit import RabbitSettings +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings @@ -77,6 +78,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): auto_default_from_env=True, description="settings for service/rabbitmq" ) + AGENT_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) + @validator("LOGLEVEL") @classmethod def valid_log_level(cls, value) -> LogLevel: diff --git a/services/agent/tests/conftest.py b/services/agent/tests/conftest.py index 4632ca84102..5fe2cad817e 100644 --- a/services/agent/tests/conftest.py +++ b/services/agent/tests/conftest.py @@ -58,6 +58,7 @@ def mock_environment( "RABBIT_SECURE": "false", "RABBIT_USER": "test", "AGENT_DOCKER_NODE_ID": docker_node_id, + "AGENT_TRACING": "null", }, ) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 35dd3782609..2f039977889 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -1053,6 +1053,10 @@ services: RABBIT_USER: ${RABBIT_USER} RABBIT_SECURE: ${RABBIT_SECURE} + AGENT_TRACING: ${AGENT_TRACING} + TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT} + TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT} + dask-sidecar: image: ${DOCKER_REGISTRY:-itisfoundation}/dask-sidecar:${DOCKER_IMAGE_TAG:-latest} init: true From 735526f30a4762a8767b4cf233de0c4fb6c2b208 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 8 Nov 2024 11:41:40 +0100 Subject: [PATCH 2/5] added tracing to dynamic-sidecar --- .../modules/dynamic_sidecar/docker_service_specs/sidecar.py | 2 ++ .../src/simcore_service_dynamic_sidecar/core/application.py | 4 ++++ .../src/simcore_service_dynamic_sidecar/core/settings.py | 5 +++++ services/dynamic-sidecar/tests/conftest.py | 1 + 4 files changed, 12 insertions(+) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index b788e455cf3..ea040a2014e 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -1,3 +1,4 @@ +import json import logging from copy import deepcopy from typing import Any, NamedTuple @@ -175,6 +176,7 @@ def _get_environment_variables( "S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, "SC_BOOT_MODE": f"{app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_SC_BOOT_MODE}", "SSL_CERT_FILE": app_settings.DIRECTOR_V2_SELF_SIGNED_SSL_FILENAME, + "DYNAMIC_SIDECAR_TRACING": json.dumps(app_settings.DIRECTOR_V2_TRACING), # For background info on this special env-var above, see # - https://stackoverflow.com/questions/31448854/how-to-force-requests-use-the-certificates-on-my-ubuntu-system#comment78596389_37447847 "SIMCORE_HOST_NAME": scheduler_data.service_name, diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py index ce5f48a8b21..59547f40119 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py @@ -9,6 +9,7 @@ get_common_oas_options, override_fastapi_openapi_method, ) +from servicelib.fastapi.tracing import setup_tracing from servicelib.logging_utils import config_all_loggers from simcore_sdk.node_ports_common.exceptions import NodeNotFound @@ -190,6 +191,9 @@ def create_app(): if application_settings.are_prometheus_metrics_enabled: setup_prometheus_metrics(app) + if application_settings.DYNAMIC_SIDECAR_TRACING: + setup_tracing(app, application_settings.DYNAMIC_SIDECAR_TRACING, PROJECT_NAME) + # ERROR HANDLERS ------------ app.add_exception_handler(NodeNotFound, node_not_found_error_handler) app.add_exception_handler(BaseDynamicSidecarError, http_error_handler) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py index 214d51ad11b..024465913bd 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py @@ -23,6 +23,7 @@ from settings_library.resource_usage_tracker import ( DEFAULT_RESOURCE_USAGE_HEARTBEAT_INTERVAL, ) +from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings @@ -167,6 +168,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings): SYSTEM_MONITOR_SETTINGS: SystemMonitorSettings = Field(auto_default_from_env=True) + DYNAMIC_SIDECAR_TRACING: TracingSettings | None = Field( + auto_default_from_env=True, description="settings for opentelemetry tracing" + ) + @property def are_prometheus_metrics_enabled(self) -> bool: return self.DY_SIDECAR_CALLBACKS_MAPPING.metrics is not None diff --git a/services/dynamic-sidecar/tests/conftest.py b/services/dynamic-sidecar/tests/conftest.py index 8b4760b26dd..a9ec557c6dc 100644 --- a/services/dynamic-sidecar/tests/conftest.py +++ b/services/dynamic-sidecar/tests/conftest.py @@ -199,6 +199,7 @@ def base_mock_envs( "REGISTRY_SSL": "false", } ), + "DYNAMIC_SIDECAR_TRACING": "null", } From 7ec0b786bf98cf74351dca772e1ee456d43b8741 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 8 Nov 2024 12:54:51 +0100 Subject: [PATCH 3/5] properly exposed dynamic-sidecar --- .../dynamic_sidecar/docker_service_specs/sidecar.py | 7 +++++-- services/docker-compose-ops.yml | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index ea040a2014e..44e2ff575e7 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -1,4 +1,3 @@ -import json import logging from copy import deepcopy from typing import Any, NamedTuple @@ -176,7 +175,11 @@ def _get_environment_variables( "S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, "SC_BOOT_MODE": f"{app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_SC_BOOT_MODE}", "SSL_CERT_FILE": app_settings.DIRECTOR_V2_SELF_SIGNED_SSL_FILENAME, - "DYNAMIC_SIDECAR_TRACING": json.dumps(app_settings.DIRECTOR_V2_TRACING), + "DYNAMIC_SIDECAR_TRACING": ( + app_settings.DIRECTOR_V2_TRACING.json() + if app_settings.DIRECTOR_V2_TRACING + else "null" + ), # For background info on this special env-var above, see # - https://stackoverflow.com/questions/31448854/how-to-force-requests-use-the-certificates-on-my-ubuntu-system#comment78596389_37447847 "SIMCORE_HOST_NAME": scheduler_data.service_name, diff --git a/services/docker-compose-ops.yml b/services/docker-compose-ops.yml index 9beacf76c34..c80befe2316 100644 --- a/services/docker-compose-ops.yml +++ b/services/docker-compose-ops.yml @@ -111,6 +111,7 @@ services: - "4318:4318" # OTLP HTTP receiver networks: - simcore_default + - interactive_services_subnet environment: TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE} TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE} From 0538f1e5cef26580a7f735a5a02f5f0e995fd82c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 8 Nov 2024 13:31:41 +0100 Subject: [PATCH 4/5] added missing --- .../test_modules_dynamic_sidecar_docker_service_specs_sidecar.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py index 4a73b3e7210..f4870a140c4 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py @@ -37,6 +37,7 @@ "DY_SIDECAR_USER_SERVICES_HAVE_INTERNET_ACCESS", "DYNAMIC_SIDECAR_COMPOSE_NAMESPACE", "DYNAMIC_SIDECAR_LOG_LEVEL", + "DYNAMIC_SIDECAR_TRACING", "NODE_PORTS_400_REQUEST_TIMEOUT_ATTEMPTS", "POSTGRES_DB", "POSTGRES_ENDPOINT", From 1a1a13b1fd3362467c3cf78e8bc38aa12c06699f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 13 Nov 2024 07:56:32 +0100 Subject: [PATCH 5/5] fixed broken test --- .../test_modules_dynamic_sidecar_docker_service_specs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py index a05e4cd84da..ab835039262 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_dynamic_sidecar_docker_service_specs.py @@ -243,6 +243,7 @@ def expected_dynamic_sidecar_spec( "FORWARD_ENV_DISPLAY": ":0", "NODE_PORTS_400_REQUEST_TIMEOUT_ATTEMPTS": "3", "DYNAMIC_SIDECAR_LOG_LEVEL": "DEBUG", + "DYNAMIC_SIDECAR_TRACING": "null", "DY_DEPLOYMENT_REGISTRY_SETTINGS": ( '{"REGISTRY_AUTH": false, "REGISTRY_PATH": null, ' '"REGISTRY_URL": "foo.bar.com", "REGISTRY_USER": '