Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎨 Adding tracing to agent and dynamic-sidecar (🏗️ DEVOPS) #6691

Merged
1 change: 1 addition & 0 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ AGENT_VOLUMES_CLEANUP_S3_ENDPOINT=http://172.17.0.1:9001
AGENT_VOLUMES_CLEANUP_S3_PROVIDER=MINIO
AGENT_VOLUMES_CLEANUP_S3_REGION=us-east-1
AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY=12345678
AGENT_TRACING={}

API_SERVER_DEV_FEATURES_ENABLED=0
API_SERVER_LOGLEVEL=INFO
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
get_common_oas_options,
override_fastapi_openapi_method,
)
from servicelib.fastapi.tracing import setup_tracing
from servicelib.logging_utils import config_all_loggers

from .._meta import (
Expand Down Expand Up @@ -59,6 +60,9 @@ def create_app() -> FastAPI:
setup_rest_api(app)
setup_rpc_api_routes(app)

if settings.AGENT_TRACING:
setup_tracing(app, settings.AGENT_TRACING, APP_NAME)

async def _on_startup() -> None:
print(APP_STARTED_BANNER_MSG, flush=True) # noqa: T201

Expand Down
5 changes: 5 additions & 0 deletions services/agent/src/simcore_service_agent/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from settings_library.base import BaseCustomSettings
from settings_library.r_clone import S3Provider
from settings_library.rabbit import RabbitSettings
from settings_library.tracing import TracingSettings
from settings_library.utils_logging import MixinLoggingSettings


Expand Down Expand Up @@ -77,6 +78,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
auto_default_from_env=True, description="settings for service/rabbitmq"
)

AGENT_TRACING: TracingSettings | None = Field(
auto_default_from_env=True, description="settings for opentelemetry tracing"
)

@validator("LOGLEVEL")
@classmethod
def valid_log_level(cls, value) -> LogLevel:
Expand Down
1 change: 1 addition & 0 deletions services/agent/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def mock_environment(
"RABBIT_SECURE": "false",
"RABBIT_USER": "test",
"AGENT_DOCKER_NODE_ID": docker_node_id,
"AGENT_TRACING": "null",
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ def _get_environment_variables(
"S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY,
"SC_BOOT_MODE": f"{app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_SC_BOOT_MODE}",
"SSL_CERT_FILE": app_settings.DIRECTOR_V2_SELF_SIGNED_SSL_FILENAME,
"DYNAMIC_SIDECAR_TRACING": (
app_settings.DIRECTOR_V2_TRACING.json()
if app_settings.DIRECTOR_V2_TRACING
else "null"
),
# For background info on this special env-var above, see
# - https://stackoverflow.com/questions/31448854/how-to-force-requests-use-the-certificates-on-my-ubuntu-system#comment78596389_37447847
"SIMCORE_HOST_NAME": scheduler_data.service_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"DY_SIDECAR_USER_SERVICES_HAVE_INTERNET_ACCESS",
"DYNAMIC_SIDECAR_COMPOSE_NAMESPACE",
"DYNAMIC_SIDECAR_LOG_LEVEL",
"DYNAMIC_SIDECAR_TRACING",
"NODE_PORTS_400_REQUEST_TIMEOUT_ATTEMPTS",
"POSTGRES_DB",
"POSTGRES_ENDPOINT",
Expand Down
1 change: 1 addition & 0 deletions services/docker-compose-ops.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ services:
- "4318:4318" # OTLP HTTP receiver
networks:
- simcore_default
- interactive_services_subnet
YuryHrytsuk marked this conversation as resolved.
Show resolved Hide resolved
environment:
TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE: ${TRACING_OPENTELEMETRY_COLLECTOR_BATCH_SIZE}
TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE: ${TRACING_OPENTELEMETRY_COLLECTOR_SAMPLING_PERCENTAGE}
Expand Down
4 changes: 4 additions & 0 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,10 @@ services:
RABBIT_USER: ${RABBIT_USER}
RABBIT_SECURE: ${RABBIT_SECURE}

AGENT_TRACING: ${AGENT_TRACING}
TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT: ${TRACING_OPENTELEMETRY_COLLECTOR_ENDPOINT}
TRACING_OPENTELEMETRY_COLLECTOR_PORT: ${TRACING_OPENTELEMETRY_COLLECTOR_PORT}

dask-sidecar:
image: ${DOCKER_REGISTRY:-itisfoundation}/dask-sidecar:${DOCKER_IMAGE_TAG:-latest}
init: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
get_common_oas_options,
override_fastapi_openapi_method,
)
from servicelib.fastapi.tracing import setup_tracing
from servicelib.logging_utils import config_all_loggers
from simcore_sdk.node_ports_common.exceptions import NodeNotFound

Expand Down Expand Up @@ -190,6 +191,9 @@ def create_app():
if application_settings.are_prometheus_metrics_enabled:
setup_prometheus_metrics(app)

if application_settings.DYNAMIC_SIDECAR_TRACING:
setup_tracing(app, application_settings.DYNAMIC_SIDECAR_TRACING, PROJECT_NAME)

# ERROR HANDLERS ------------
app.add_exception_handler(NodeNotFound, node_not_found_error_handler)
app.add_exception_handler(BaseDynamicSidecarError, http_error_handler)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from settings_library.resource_usage_tracker import (
DEFAULT_RESOURCE_USAGE_HEARTBEAT_INTERVAL,
)
from settings_library.tracing import TracingSettings
from settings_library.utils_logging import MixinLoggingSettings


Expand Down Expand Up @@ -167,6 +168,10 @@ class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):

SYSTEM_MONITOR_SETTINGS: SystemMonitorSettings = Field(auto_default_from_env=True)

DYNAMIC_SIDECAR_TRACING: TracingSettings | None = Field(
auto_default_from_env=True, description="settings for opentelemetry tracing"
)

@property
def are_prometheus_metrics_enabled(self) -> bool:
return self.DY_SIDECAR_CALLBACKS_MAPPING.metrics is not None
Expand Down
1 change: 1 addition & 0 deletions services/dynamic-sidecar/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ def base_mock_envs(
"REGISTRY_SSL": "false",
}
),
"DYNAMIC_SIDECAR_TRACING": "null",
}


Expand Down
Loading