diff --git a/.env-devel b/.env-devel index b13d55b97a9..cc6609460da 100644 --- a/.env-devel +++ b/.env-devel @@ -17,12 +17,12 @@ AGENT_VOLUMES_CLEANUP_S3_ENDPOINT=http://172.17.0.1:9001 AGENT_VOLUMES_CLEANUP_S3_PROVIDER=MINIO AGENT_VOLUMES_CLEANUP_S3_REGION=us-east-1 AGENT_VOLUMES_CLEANUP_S3_SECRET_KEY=12345678 -AGENT_TRACING={} +AGENT_TRACING=null API_SERVER_DEV_FEATURES_ENABLED=0 API_SERVER_LOGLEVEL=INFO API_SERVER_PROFILING=1 -API_SERVER_TRACING={} +API_SERVER_TRACING=null TRAEFIK_API_SERVER_INFLIGHTREQ_AMOUNT=25 AUTOSCALING_DASK=null @@ -35,7 +35,7 @@ AUTOSCALING_LOGLEVEL=INFO AUTOSCALING_NODES_MONITORING=null AUTOSCALING_POLL_INTERVAL="00:00:10" AUTOSCALING_SSM_ACCESS=null -AUTOSCALING_TRACING={} +AUTOSCALING_TRACING=null AWS_S3_CLI_S3=null @@ -47,7 +47,7 @@ CATALOG_PORT=8000 CATALOG_PROFILING=1 CATALOG_SERVICES_DEFAULT_RESOURCES='{"CPU": {"limit": 0.1, "reservation": 0.1}, "RAM": {"limit": 2147483648, "reservation": 2147483648}}' CATALOG_SERVICES_DEFAULT_SPECIFICATIONS='{}' -CATALOG_TRACING={} +CATALOG_TRACING=null CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH='{"type":"tls","tls_ca_file":"/home/scu/.dask/dask-crt.pem","tls_client_cert":"/home/scu/.dask/dask-crt.pem","tls_client_key":"/home/scu/.dask/dask-key.pem"}' CLUSTERS_KEEPER_COMPUTATIONAL_BACKEND_DOCKER_IMAGE_TAG=master-github-latest @@ -61,7 +61,7 @@ CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION=5 CLUSTERS_KEEPER_PRIMARY_EC2_INSTANCES=null CLUSTERS_KEEPER_TASK_INTERVAL=00:00:30 CLUSTERS_KEEPER_WORKERS_EC2_INSTANCES=null -CLUSTERS_KEEPER_TRACING={} +CLUSTERS_KEEPER_TRACING=null DASK_SCHEDULER_HOST=dask-scheduler DASK_SCHEDULER_PORT=8786 @@ -81,7 +81,7 @@ DIRECTOR_PUBLISHED_HOST_NAME="127.0.0.1:9081" DIRECTOR_REGISTRY_CACHING_TTL=00:15:00 DIRECTOR_REGISTRY_CACHING=True DIRECTOR_SERVICES_CUSTOM_CONSTRAINTS=null -DIRECTOR_TRACING={} +DIRECTOR_TRACING=null EFS_USER_ID=8006 EFS_USER_NAME=efs @@ -90,11 +90,11 @@ EFS_GROUP_NAME=efs-group EFS_DNS_NAME=fs-xxx.efs.us-east-1.amazonaws.com EFS_MOUNTED_PATH=/tmp/efs EFS_PROJECT_SPECIFIC_DATA_DIRECTORY=project-specific-data -EFS_GUARDIAN_TRACING={} +EFS_GUARDIAN_TRACING=null EFS_DEFAULT_USER_SERVICE_SIZE_BYTES=10000 # DATCORE_ADAPTER -DATCORE_ADAPTER_TRACING={} +DATCORE_ADAPTER_TRACING=null # DIRECTOR_V2 ---- COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH='{"type":"tls","tls_ca_file":"/home/scu/.dask/dask-crt.pem","tls_client_cert":"/home/scu/.dask/dask-crt.pem","tls_client_key":"/home/scu/.dask/dask-key.pem"}' @@ -121,14 +121,14 @@ DYNAMIC_SIDECAR_LOG_LEVEL=DEBUG DYNAMIC_SIDECAR_PROMETHEUS_MONITORING_NETWORKS=[] DYNAMIC_SIDECAR_PROMETHEUS_SERVICE_LABELS={} DYNAMIC_SIDECAR_API_SAVE_RESTORE_STATE_TIMEOUT=01:00:00 -DIRECTOR_V2_TRACING={} +DIRECTOR_V2_TRACING=null # DYNAMIC_SCHEDULER ---- DYNAMIC_SCHEDULER_LOGLEVEL=DEBUG DYNAMIC_SCHEDULER_PROFILING=1 DYNAMIC_SCHEDULER_USE_INTERNAL_SCHEDULER=0 DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT=01:00:00 -DYNAMIC_SCHEDULER_TRACING={} +DYNAMIC_SCHEDULER_TRACING=null DYNAMIC_SCHEDULER_UI_STORAGE_SECRET=adminadmin FUNCTION_SERVICES_AUTHORS='{"UN": {"name": "Unknown", "email": "unknown@osparc.io", "affiliation": "unknown"}}' @@ -143,7 +143,7 @@ INVITATIONS_PORT=8000 INVITATIONS_SECRET_KEY='REPLACE_ME_with_result__Fernet_generate_key=' INVITATIONS_SWAGGER_API_DOC_ENABLED=1 INVITATIONS_USERNAME=admin -INVITATIONS_TRACING={} +INVITATIONS_TRACING=null LOG_FORMAT_LOCAL_DEV_ENABLED=1 LOG_FILTER_MAPPING='{}' @@ -168,7 +168,7 @@ PAYMENTS_STRIPE_API_SECRET='REPLACE_ME_with_api_secret' PAYMENTS_STRIPE_URL=https://api.stripe.com PAYMENTS_SWAGGER_API_DOC_ENABLED=1 PAYMENTS_USERNAME=admin -PAYMENTS_TRACING={} +PAYMENTS_TRACING=null POSTGRES_DB=simcoredb POSTGRES_ENDPOINT=postgres:5432 @@ -209,7 +209,7 @@ RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_CHECK_ENABLED=1 RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_COUNTER_FAIL=6 RESOURCE_USAGE_TRACKER_MISSED_HEARTBEAT_INTERVAL_SEC=300 RESOURCE_USAGE_TRACKER_S3=null -RESOURCE_USAGE_TRACKER_TRACING={} +RESOURCE_USAGE_TRACKER_TRACING=null # NOTE: 172.17.0.1 is the docker0 interface, which redirect from inside a container onto the host network interface. R_CLONE_OPTION_BUFFER_SIZE=16M @@ -243,7 +243,7 @@ STORAGE_HOST=storage STORAGE_LOGLEVEL=INFO STORAGE_PORT=8080 STORAGE_PROFILING=1 -STORAGE_TRACING={} +STORAGE_TRACING=null # STORAGE ---- SWARM_STACK_NAME=master-simcore @@ -389,6 +389,6 @@ WEBSERVER_SOCKETIO=1 WEBSERVER_STATICWEB={} WEBSERVER_STUDIES_DISPATCHER={} WEBSERVER_TAGS=1 -WEBSERVER_TRACING={} +WEBSERVER_TRACING=null WEBSERVER_USERS={} WEBSERVER_VERSION_CONTROL=1 diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index 831b6d3415a..50765b74cdc 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -2664,11 +2664,14 @@ jobs: system-api-specs: needs: [changes] - if: ${{ needs.changes.outputs.anything-py == 'true' && github.event_name == 'push' && github.event.pull_request != null }} + if: ${{ needs.changes.outputs.anything-py == 'true' || github.event_name == 'push' }} timeout-minutes: 10 name: "[sys] check api-specs are up to date" runs-on: ubuntu-latest steps: + - name: Ensure job passes if not PR # ensure pass so upstream jobs which depend on this will run (dockerhub deployment) + if: ${{ github.event.pull_request == null }} + run: echo "::notice Passing job because not in PR"; exit 0 - name: setup python environment uses: actions/setup-python@v5 with: @@ -2690,11 +2693,14 @@ jobs: system-backwards-compatibility: needs: [changes, system-api-specs] - if: ${{ needs.changes.outputs.anything-py == 'true' && github.event_name == 'push' && github.event.pull_request != null }} + if: ${{ needs.changes.outputs.anything-py == 'true' || github.event_name == 'push' }} timeout-minutes: 10 name: "[sys] api-server backwards compatibility" runs-on: ubuntu-latest steps: + - name: Ensure job passes if not PR # ensure pass so upstream jobs which depend on this will run (dockerhub deployment) + if: ${{ github.event.pull_request == null }} + run: echo "::notice Passing job because not in PR"; exit 0 - name: setup python environment uses: actions/setup-python@v5 with: diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rest/routes.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rest/routes.py index 213715904c9..bf9cd5aae9b 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rest/routes.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rest/routes.py @@ -1,16 +1,26 @@ +import logging + from fastapi import APIRouter, FastAPI +from servicelib.logging_utils import log_context from ..._meta import API_VTAG from . import _health, _meta, _resource_tracker +_logger = logging.getLogger(__name__) + def setup_api_routes(app: FastAPI): """ Composes resources/sub-resources routers """ - app.include_router(_health.router) + with log_context( + _logger, + logging.INFO, + msg="RUT setup_api_routes", + ): + app.include_router(_health.router) - api_router = APIRouter(prefix=f"/{API_VTAG}") - api_router.include_router(_meta.router, tags=["meta"]) - api_router.include_router(_resource_tracker.router) - app.include_router(api_router) + api_router = APIRouter(prefix=f"/{API_VTAG}") + api_router.include_router(_meta.router, tags=["meta"]) + api_router.include_router(_resource_tracker.router) + app.include_router(api_router) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rpc/routes.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rpc/routes.py index c15175e2564..ff2e1cdb0bb 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rpc/routes.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/api/rpc/routes.py @@ -1,12 +1,18 @@ +import logging + from fastapi import FastAPI from models_library.api_schemas_resource_usage_tracker import ( RESOURCE_USAGE_TRACKER_RPC_NAMESPACE, ) +from servicelib.logging_utils import log_context from servicelib.rabbitmq import RPCRouter from ...services.modules.rabbitmq import get_rabbitmq_rpc_server from . import _resource_tracker +_logger = logging.getLogger(__name__) + + ROUTERS: list[RPCRouter] = [ _resource_tracker.router, ] @@ -14,10 +20,15 @@ def setup_rpc_api_routes(app: FastAPI) -> None: async def startup() -> None: - rpc_server = get_rabbitmq_rpc_server(app) - for router in ROUTERS: - await rpc_server.register_router( - router, RESOURCE_USAGE_TRACKER_RPC_NAMESPACE, app - ) + with log_context( + _logger, + logging.INFO, + msg="RUT startup RPC API Routes", + ): + rpc_server = get_rabbitmq_rpc_server(app) + for router in ROUTERS: + await rpc_server.register_router( + router, RESOURCE_USAGE_TRACKER_RPC_NAMESPACE, app + ) app.add_event_handler("startup", startup) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/db/__init__.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/db/__init__.py index 42062cb30ba..1ccd94f436e 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/db/__init__.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/db/__init__.py @@ -1,13 +1,28 @@ +import logging + from fastapi import FastAPI from servicelib.fastapi.db_asyncpg_engine import close_db_connection, connect_to_db +from servicelib.logging_utils import log_context + +_logger = logging.getLogger(__name__) def setup(app: FastAPI): async def on_startup() -> None: - await connect_to_db(app, app.state.settings.RESOURCE_USAGE_TRACKER_POSTGRES) + with log_context( + _logger, + logging.INFO, + msg="RUT startup DB", + ): + await connect_to_db(app, app.state.settings.RESOURCE_USAGE_TRACKER_POSTGRES) async def on_shutdown() -> None: - await close_db_connection(app) + with log_context( + _logger, + logging.INFO, + msg="RUT shutdown DB", + ): + await close_db_connection(app) app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/rabbitmq.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/rabbitmq.py index 57fb01bdcbf..1c827fcf060 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/rabbitmq.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/rabbitmq.py @@ -3,6 +3,7 @@ from fastapi import FastAPI from fastapi.requests import Request +from servicelib.logging_utils import log_context from servicelib.rabbitmq import ( RabbitMQClient, RabbitMQRPCClient, @@ -12,32 +13,42 @@ from ...exceptions.errors import ConfigurationError -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) def setup(app: FastAPI) -> None: async def on_startup() -> None: - app.state.rabbitmq_client = None - settings: RabbitSettings | None = ( - app.state.settings.RESOURCE_USAGE_TRACKER_RABBITMQ - ) - if not settings: - raise ConfigurationError( - msg="Rabbit MQ client is de-activated in the settings" + with log_context( + _logger, + logging.INFO, + msg="RUT startup Rabbitmq", + ): + app.state.rabbitmq_client = None + settings: RabbitSettings | None = ( + app.state.settings.RESOURCE_USAGE_TRACKER_RABBITMQ + ) + if not settings: + raise ConfigurationError( + msg="Rabbit MQ client is de-activated in the settings" + ) + await wait_till_rabbitmq_responsive(settings.dsn) + app.state.rabbitmq_client = RabbitMQClient( + client_name="resource-usage-tracker", settings=settings + ) + app.state.rabbitmq_rpc_server = await RabbitMQRPCClient.create( + client_name="resource_usage_tracker_rpc_server", settings=settings ) - await wait_till_rabbitmq_responsive(settings.dsn) - app.state.rabbitmq_client = RabbitMQClient( - client_name="resource-usage-tracker", settings=settings - ) - app.state.rabbitmq_rpc_server = await RabbitMQRPCClient.create( - client_name="resource_usage_tracker_rpc_server", settings=settings - ) async def on_shutdown() -> None: - if app.state.rabbitmq_client: - await app.state.rabbitmq_client.close() - if app.state.rabbitmq_rpc_server: - await app.state.rabbitmq_rpc_server.close() + with log_context( + _logger, + logging.INFO, + msg="RUT shutdown Rabbitmq", + ): + if app.state.rabbitmq_client: + await app.state.rabbitmq_client.close() + if app.state.rabbitmq_rpc_server: + await app.state.rabbitmq_rpc_server.close() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/redis.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/redis.py index 0aece119077..be7724a5667 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/redis.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/redis.py @@ -2,28 +2,39 @@ from typing import cast from fastapi import FastAPI +from servicelib.logging_utils import log_context from servicelib.redis import RedisClientSDK from settings_library.redis import RedisDatabase, RedisSettings from ..._meta import APP_NAME -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) def setup(app: FastAPI) -> None: async def on_startup() -> None: - app.state.redis_client_sdk = None - settings: RedisSettings = app.state.settings.RESOURCE_USAGE_TRACKER_REDIS - redis_locks_dsn = settings.build_redis_dsn(RedisDatabase.LOCKS) - app.state.redis_client_sdk = client = RedisClientSDK( - redis_locks_dsn, client_name=APP_NAME - ) - await client.setup() + with log_context( + _logger, + logging.INFO, + msg="RUT startup Redis", + ): + app.state.redis_client_sdk = None + settings: RedisSettings = app.state.settings.RESOURCE_USAGE_TRACKER_REDIS + redis_locks_dsn = settings.build_redis_dsn(RedisDatabase.LOCKS) + app.state.redis_client_sdk = client = RedisClientSDK( + redis_locks_dsn, client_name=APP_NAME + ) + await client.setup() async def on_shutdown() -> None: - redis_client_sdk: None | RedisClientSDK = app.state.redis_client_sdk - if redis_client_sdk: - await redis_client_sdk.shutdown() + with log_context( + _logger, + logging.INFO, + msg="RUT shutdown Redis", + ): + redis_client_sdk: None | RedisClientSDK = app.state.redis_client_sdk + if redis_client_sdk: + await redis_client_sdk.shutdown() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/s3.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/s3.py index f5104674c4f..54770c70fc1 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/s3.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/modules/s3.py @@ -5,6 +5,7 @@ from fastapi import FastAPI from models_library.api_schemas_storage import S3BucketName from pydantic import TypeAdapter +from servicelib.logging_utils import log_context from settings_library.s3 import S3Settings from tenacity import ( AsyncRetrying, @@ -20,33 +21,43 @@ def setup(app: FastAPI) -> None: async def on_startup() -> None: - app.state.s3_client = None - settings: S3Settings | None = app.state.settings.RESOURCE_USAGE_TRACKER_S3 + with log_context( + _logger, + logging.INFO, + msg="RUT startup S3", + ): + app.state.s3_client = None + settings: S3Settings | None = app.state.settings.RESOURCE_USAGE_TRACKER_S3 - if not settings: - _logger.warning("S3 client is de-activated in the settings") - return + if not settings: + _logger.warning("S3 client is de-activated in the settings") + return - app.state.s3_client = client = await SimcoreS3API.create(settings) + app.state.s3_client = client = await SimcoreS3API.create(settings) - async for attempt in AsyncRetrying( - reraise=True, - stop=stop_after_delay(120), - wait=wait_random_exponential(max=30), - before_sleep=before_sleep_log(_logger, logging.WARNING), - ): - with attempt: - connected = await client.http_check_bucket_connected( - bucket=TypeAdapter(S3BucketName).validate_python( - settings.S3_BUCKET_NAME + async for attempt in AsyncRetrying( + reraise=True, + stop=stop_after_delay(120), + wait=wait_random_exponential(max=30), + before_sleep=before_sleep_log(_logger, logging.WARNING), + ): + with attempt: + connected = await client.http_check_bucket_connected( + bucket=TypeAdapter(S3BucketName).validate_python( + settings.S3_BUCKET_NAME + ) ) - ) - if not connected: - raise S3NotConnectedError # pragma: no cover + if not connected: + raise S3NotConnectedError # pragma: no cover async def on_shutdown() -> None: - if app.state.s3_client: - await cast(SimcoreS3API, app.state.s3_client).close() + with log_context( + _logger, + logging.INFO, + msg="RUT shutdown S3", + ): + if app.state.s3_client: + await cast(SimcoreS3API, app.state.s3_client).close() app.add_event_handler("startup", on_startup) app.add_event_handler("shutdown", on_shutdown) diff --git a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/process_message_running_service_setup.py b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/process_message_running_service_setup.py index c393626e469..cb4bc919503 100644 --- a/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/process_message_running_service_setup.py +++ b/services/resource-usage-tracker/src/simcore_service_resource_usage_tracker/services/process_message_running_service_setup.py @@ -32,7 +32,9 @@ async def _subscribe_to_rabbitmq(app) -> str: def on_app_startup(app: FastAPI) -> Callable[[], Awaitable[None]]: async def _startup() -> None: with log_context( - _logger, logging.INFO, msg="setup resource tracker" + _logger, + logging.INFO, + msg="RUT setup process_message_running_service module.", ), log_catch(_logger, reraise=False): app_settings: ApplicationSettings = app.state.settings app.state.resource_tracker_rabbitmq_consumer = None