diff --git a/api/specs/web-server/_projects_nodes.py b/api/specs/web-server/_projects_nodes.py index 392a90baf0b..50c2ba73a1a 100644 --- a/api/specs/web-server/_projects_nodes.py +++ b/api/specs/web-server/_projects_nodes.py @@ -21,9 +21,9 @@ ServiceResourcesDict, ) from models_library.generics import Envelope +from models_library.groups import GroupID from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID -from models_library.users import GroupID from simcore_service_webserver._meta import API_VTAG from simcore_service_webserver.projects._crud_handlers import ProjectPathParams from simcore_service_webserver.projects._nodes_handlers import ( diff --git a/api/specs/web-server/_wallets.py b/api/specs/web-server/_wallets.py index 06ff8d7fc10..c4e490ec711 100644 --- a/api/specs/web-server/_wallets.py +++ b/api/specs/web-server/_wallets.py @@ -27,8 +27,8 @@ WalletPaymentInitiated, ) from models_library.generics import Envelope +from models_library.groups import GroupID from models_library.rest_pagination import Page, PageQueryParameters -from models_library.users import GroupID from models_library.wallets import WalletID from simcore_service_webserver._meta import API_VTAG from simcore_service_webserver.wallets._groups_api import WalletGroupGet diff --git a/packages/aws-library/src/aws_library/ec2/_client.py b/packages/aws-library/src/aws_library/ec2/_client.py index a40cf794304..276423415a5 100644 --- a/packages/aws-library/src/aws_library/ec2/_client.py +++ b/packages/aws-library/src/aws_library/ec2/_client.py @@ -181,7 +181,8 @@ async def launch_instances( ) instance_ids = [i["InstanceId"] for i in instances["Instances"]] _logger.info( - "New instances launched: %s, waiting for them to start now...", + "%s New instances launched: %s, waiting for them to start now...", + len(instance_ids), instance_ids, ) diff --git a/packages/common-library/src/common_library/groups_enums.py b/packages/common-library/src/common_library/groups_enums.py new file mode 100644 index 00000000000..215edf335f1 --- /dev/null +++ b/packages/common-library/src/common_library/groups_enums.py @@ -0,0 +1,13 @@ +import enum + + +class GroupType(enum.Enum): + """ + standard: standard group, e.g. any group that is not a primary group or special group such as the everyone group + primary: primary group, e.g. the primary group is the user own defined group that typically only contain the user (same as in linux) + everyone: the only group for all users + """ + + STANDARD = "standard" + PRIMARY = "primary" + EVERYONE = "everyone" diff --git a/packages/models-library/src/models_library/api_schemas_catalog/service_access_rights.py b/packages/models-library/src/models_library/api_schemas_catalog/service_access_rights.py index c56edcd7cf9..b4aa1173adc 100644 --- a/packages/models-library/src/models_library/api_schemas_catalog/service_access_rights.py +++ b/packages/models-library/src/models_library/api_schemas_catalog/service_access_rights.py @@ -1,7 +1,7 @@ from pydantic import BaseModel +from ..groups import GroupID from ..services import ServiceKey, ServiceVersion -from ..users import GroupID class ServiceAccessRightsGet(BaseModel): diff --git a/packages/models-library/src/models_library/api_schemas_catalog/services.py b/packages/models-library/src/models_library/api_schemas_catalog/services.py index 8090edf0ebd..c2551c43cb2 100644 --- a/packages/models-library/src/models_library/api_schemas_catalog/services.py +++ b/packages/models-library/src/models_library/api_schemas_catalog/services.py @@ -6,6 +6,7 @@ from ..boot_options import BootOptions from ..emails import LowerCaseEmailStr +from ..groups import GroupID from ..services_access import ServiceAccessRights, ServiceGroupAccessRightsV2 from ..services_authoring import Author from ..services_enums import ServiceType @@ -18,7 +19,6 @@ ) from ..services_resources import ServiceResourcesDict from ..services_types import ServiceKey, ServiceVersion -from ..users import GroupID from ..utils.change_case import snake_to_camel diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders.py b/packages/models-library/src/models_library/api_schemas_webserver/folders.py index 092a5cb94fe..dd464718571 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders.py @@ -3,8 +3,8 @@ from models_library.basic_types import IDStr from models_library.folders import FolderID +from models_library.groups import GroupID from models_library.projects_access import AccessRights -from models_library.users import GroupID from models_library.utils.common_validators import null_or_none_str_to_none_validator from pydantic import ConfigDict, PositiveInt, field_validator diff --git a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py index 4a88532848a..adf0766442e 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/folders_v2.py @@ -6,7 +6,7 @@ from ..access_rights import AccessRights from ..basic_types import IDStr from ..folders import FolderID -from ..users import GroupID +from ..groups import GroupID from ..utils.common_validators import null_or_none_str_to_none_validator from ..workspaces import WorkspaceID from ._base import InputSchema, OutputSchema diff --git a/packages/models-library/src/models_library/api_schemas_webserver/socketio.py b/packages/models-library/src/models_library/api_schemas_webserver/socketio.py index 05bd342a4c3..6e3f987198a 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/socketio.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/socketio.py @@ -1,5 +1,6 @@ from ..basic_types import IDStr -from ..users import GroupID, UserID +from ..groups import GroupID +from ..users import UserID class SocketIORoomStr(IDStr): diff --git a/packages/models-library/src/models_library/api_schemas_webserver/wallets.py b/packages/models-library/src/models_library/api_schemas_webserver/wallets.py index a4f33ab3cad..a69297ef408 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/wallets.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/wallets.py @@ -5,7 +5,7 @@ from pydantic import ConfigDict, Field, HttpUrl, ValidationInfo, field_validator from ..basic_types import AmountDecimal, IDStr, NonNegativeDecimal -from ..users import GroupID +from ..groups import GroupID from ..wallets import WalletID, WalletStatus from ._base import InputSchema, OutputSchema diff --git a/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py b/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py index 73fb684d3aa..de3b0640b98 100644 --- a/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py +++ b/packages/models-library/src/models_library/api_schemas_webserver/workspaces.py @@ -2,7 +2,7 @@ from typing import NamedTuple from models_library.basic_types import IDStr -from models_library.users import GroupID +from models_library.groups import GroupID from models_library.workspaces import WorkspaceID from pydantic import ConfigDict, PositiveInt diff --git a/packages/models-library/src/models_library/clusters.py b/packages/models-library/src/models_library/clusters.py index 783f82df016..e18f3681b4d 100644 --- a/packages/models-library/src/models_library/clusters.py +++ b/packages/models-library/src/models_library/clusters.py @@ -5,7 +5,7 @@ from pydantic import AnyUrl, BaseModel, ConfigDict, Field, HttpUrl, field_validator from pydantic.types import NonNegativeInt -from .users import GroupID +from .groups import GroupID from .utils.common_validators import create_enums_pre_validator from .utils.enums import StrAutoEnum diff --git a/packages/models-library/src/models_library/folders.py b/packages/models-library/src/models_library/folders.py index 094ea25be92..55431173111 100644 --- a/packages/models-library/src/models_library/folders.py +++ b/packages/models-library/src/models_library/folders.py @@ -12,7 +12,8 @@ ) from .access_rights import AccessRights -from .users import GroupID, UserID +from .groups import GroupID +from .users import UserID from .utils.enums import StrAutoEnum from .workspaces import WorkspaceID diff --git a/packages/models-library/src/models_library/groups.py b/packages/models-library/src/models_library/groups.py index 368f01523ea..797453922f9 100644 --- a/packages/models-library/src/models_library/groups.py +++ b/packages/models-library/src/models_library/groups.py @@ -1,38 +1,27 @@ -import enum from typing import Annotated, Final, NamedTuple, TypeAlias from common_library.basic_types import DEFAULT_FACTORY +from common_library.groups_enums import GroupType as GroupType from pydantic import BaseModel, ConfigDict, EmailStr, Field, field_validator from pydantic.types import PositiveInt from typing_extensions import TypedDict from .basic_types import IDStr -from .users import GroupID, UserID +from .users import UserID from .utils.common_validators import create_enums_pre_validator EVERYONE_GROUP_ID: Final[int] = 1 +GroupID: TypeAlias = PositiveInt -__all__: tuple[str, ...] = ("GroupID",) - - -class GroupTypeInModel(str, enum.Enum): - """ - standard: standard group, e.g. any group that is not a primary group or special group such as the everyone group - primary: primary group, e.g. the primary group is the user own defined group that typically only contain the user (same as in linux) - everyone: the only group for all users - """ - - STANDARD = "standard" - PRIMARY = "primary" - EVERYONE = "everyone" +__all__: tuple[str, ...] = ("GroupType",) class Group(BaseModel): gid: PositiveInt name: str description: str - group_type: Annotated[GroupTypeInModel, Field(alias="type")] + group_type: Annotated[GroupType, Field(alias="type")] thumbnail: str | None inclusion_rules: Annotated[ @@ -43,7 +32,7 @@ class Group(BaseModel): ] = DEFAULT_FACTORY _from_equivalent_enums = field_validator("group_type", mode="before")( - create_enums_pre_validator(GroupTypeInModel) + create_enums_pre_validator(GroupType) ) model_config = ConfigDict(populate_by_name=True) diff --git a/packages/models-library/src/models_library/services_access.py b/packages/models-library/src/models_library/services_access.py index 84dbd7d17a0..248e8f41e85 100644 --- a/packages/models-library/src/models_library/services_access.py +++ b/packages/models-library/src/models_library/services_access.py @@ -4,7 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field -from .users import GroupID +from .groups import GroupID from .utils.change_case import snake_to_camel diff --git a/packages/models-library/src/models_library/users.py b/packages/models-library/src/models_library/users.py index 26bdf3e1798..af532978320 100644 --- a/packages/models-library/src/models_library/users.py +++ b/packages/models-library/src/models_library/users.py @@ -5,7 +5,6 @@ UserID: TypeAlias = PositiveInt UserNameID: TypeAlias = IDStr -GroupID: TypeAlias = PositiveInt FirstNameStr: TypeAlias = Annotated[ diff --git a/packages/models-library/src/models_library/workspaces.py b/packages/models-library/src/models_library/workspaces.py index 6c34efbf790..db4ff387404 100644 --- a/packages/models-library/src/models_library/workspaces.py +++ b/packages/models-library/src/models_library/workspaces.py @@ -12,7 +12,8 @@ ) from .access_rights import AccessRights -from .users import GroupID, UserID +from .groups import GroupID +from .users import UserID from .utils.enums import StrAutoEnum WorkspaceID: TypeAlias = PositiveInt diff --git a/packages/models-library/tests/test_api_schemas_webserver_socketio.py b/packages/models-library/tests/test_api_schemas_webserver_socketio.py index e5dfdbf7eff..a78ebea2432 100644 --- a/packages/models-library/tests/test_api_schemas_webserver_socketio.py +++ b/packages/models-library/tests/test_api_schemas_webserver_socketio.py @@ -3,7 +3,8 @@ import pytest from faker import Faker from models_library.api_schemas_webserver.socketio import SocketIORoomStr -from models_library.users import GroupID, UserID +from models_library.groups import GroupID +from models_library.users import UserID @pytest.fixture diff --git a/packages/notifications-library/tests/with_db/conftest.py b/packages/notifications-library/tests/with_db/conftest.py index 750f3cc24a4..9dda5da676d 100644 --- a/packages/notifications-library/tests/with_db/conftest.py +++ b/packages/notifications-library/tests/with_db/conftest.py @@ -11,8 +11,9 @@ import pytest import sqlalchemy as sa from models_library.basic_types import IDStr +from models_library.groups import GroupID from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from notifications_library._templates import get_default_named_templates from pydantic import validate_call from simcore_postgres_database.models.jinja2_templates import jinja2_templates diff --git a/packages/postgres-database/requirements/prod.txt b/packages/postgres-database/requirements/prod.txt index c4567926c6d..ba22361fcc3 100644 --- a/packages/postgres-database/requirements/prod.txt +++ b/packages/postgres-database/requirements/prod.txt @@ -8,4 +8,5 @@ --requirement _base.txt --requirement _migration.txt +simcore-common-library @ ../common-library/ simcore-postgres-database @ . diff --git a/packages/postgres-database/src/simcore_postgres_database/models/groups.py b/packages/postgres-database/src/simcore_postgres_database/models/groups.py index a70e9fa8db4..940e1a78769 100644 --- a/packages/postgres-database/src/simcore_postgres_database/models/groups.py +++ b/packages/postgres-database/src/simcore_postgres_database/models/groups.py @@ -4,27 +4,16 @@ - Groups have a ID, name and a list of users that belong to the group """ -import enum import sqlalchemy as sa +from common_library.groups_enums import GroupType from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.sql import func from ._common import RefActions from .base import metadata - -class GroupType(enum.Enum): - """ - standard: standard group, e.g. any group that is not a primary group or special group such as the everyone group - primary: primary group, e.g. the primary group is the user own defined group that typically only contain the user (same as in linux) - everyone: the only group for all users - """ - - STANDARD = "standard" - PRIMARY = "primary" - EVERYONE = "everyone" - +__all__: tuple[str, ...] = ("GroupType",) groups = sa.Table( "groups", diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/autoscaling.py b/packages/pytest-simcore/src/pytest_simcore/helpers/autoscaling.py new file mode 100644 index 00000000000..2d6c278d92c --- /dev/null +++ b/packages/pytest-simcore/src/pytest_simcore/helpers/autoscaling.py @@ -0,0 +1,73 @@ +from collections.abc import Callable + +import arrow +from aws_library.ec2 import EC2InstanceData +from models_library.generated_models.docker_rest_api import ( + Availability, + Node, + NodeState, +) +from pytest_mock import MockType +from simcore_service_autoscaling.models import AssociatedInstance, Cluster +from simcore_service_autoscaling.utils.utils_docker import ( + _OSPARC_NODE_TERMINATION_PROCESS_LABEL_KEY, + _OSPARC_SERVICE_READY_LABEL_KEY, + _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY, +) + + +def assert_cluster_state( + spied_cluster_analysis: MockType, *, expected_calls: int, expected_num_machines: int +) -> Cluster: + assert spied_cluster_analysis.call_count == expected_calls + + assert isinstance(spied_cluster_analysis.spy_return, Cluster) + assert ( + spied_cluster_analysis.spy_return.total_number_of_machines() + == expected_num_machines + ) + print("current cluster state:", spied_cluster_analysis.spy_return) + cluster = spied_cluster_analysis.spy_return + spied_cluster_analysis.reset_mock() + return cluster + + +def create_fake_association( + create_fake_node: Callable[..., Node], + drained_machine_id: str | None, + terminating_machine_id: str | None, +): + fake_node_to_instance_map = {} + + async def _fake_node_creator( + _nodes: list[Node], ec2_instances: list[EC2InstanceData] + ) -> tuple[list[AssociatedInstance], list[EC2InstanceData]]: + def _create_fake_node_with_labels(instance: EC2InstanceData) -> Node: + if instance not in fake_node_to_instance_map: + fake_node = create_fake_node() + assert fake_node.spec + fake_node.spec.availability = Availability.active + assert fake_node.status + fake_node.status.state = NodeState.ready + assert fake_node.spec.labels + fake_node.spec.labels |= { + _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY: arrow.utcnow().isoformat(), + _OSPARC_SERVICE_READY_LABEL_KEY: ( + "true" if instance.id != drained_machine_id else "false" + ), + } + if instance.id == terminating_machine_id: + fake_node.spec.labels |= { + _OSPARC_NODE_TERMINATION_PROCESS_LABEL_KEY: arrow.utcnow().isoformat() + } + fake_node_to_instance_map[instance] = fake_node + return fake_node_to_instance_map[instance] + + associated_instances = [ + AssociatedInstance(node=_create_fake_node_with_labels(i), ec2_instance=i) + for i in ec2_instances + ] + + return associated_instances, [] + + return _fake_node_creator diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/webserver_workspaces.py b/packages/pytest-simcore/src/pytest_simcore/helpers/webserver_workspaces.py index 3d1f33ab029..1dbe5ebeb42 100644 --- a/packages/pytest-simcore/src/pytest_simcore/helpers/webserver_workspaces.py +++ b/packages/pytest-simcore/src/pytest_simcore/helpers/webserver_workspaces.py @@ -1,6 +1,6 @@ import sqlalchemy as sa from aiohttp import web -from models_library.users import GroupID +from models_library.groups import GroupID from models_library.workspaces import WorkspaceID from simcore_postgres_database.models.workspaces_access_rights import ( workspaces_access_rights, diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/auth/api_keys.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/auth/api_keys.py index e70889e3de1..2609de81c5e 100644 --- a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/auth/api_keys.py +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/auth/api_keys.py @@ -26,7 +26,7 @@ async def create_api_key( product_name=product_name, api_key=api_key, ) - assert isinstance(result, ApiKeyGet) + assert isinstance(result, ApiKeyGet) # nosec return result @@ -45,7 +45,7 @@ async def get_api_key( product_name=product_name, api_key_id=api_key_id, ) - assert isinstance(result, ApiKeyGet) + assert isinstance(result, ApiKeyGet) # nosec return result @@ -63,4 +63,4 @@ async def delete_api_key( product_name=product_name, api_key_id=api_key_id, ) - assert result is None + assert result is None # nosec diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/licenses/__init__.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/licenses/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/licenses/licensed_items.py b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/licenses/licensed_items.py new file mode 100644 index 00000000000..e212854bae5 --- /dev/null +++ b/packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/licenses/licensed_items.py @@ -0,0 +1,104 @@ +import logging + +from models_library.api_schemas_webserver import WEBSERVER_RPC_NAMESPACE +from models_library.api_schemas_webserver.licensed_items import ( + LicensedItemGet, + LicensedItemGetPage, +) +from models_library.licensed_items import LicensedItemID +from models_library.products import ProductName +from models_library.rabbitmq_basic_types import RPCMethodName +from models_library.resource_tracker import ServiceRunId +from models_library.users import UserID +from models_library.wallets import WalletID +from pydantic import TypeAdapter +from servicelib.logging_utils import log_decorator +from servicelib.rabbitmq import RabbitMQRPCClient + +_logger = logging.getLogger(__name__) + + +@log_decorator(_logger, level=logging.DEBUG) +async def get_licensed_items( + rabbitmq_rpc_client: RabbitMQRPCClient, + *, + product_name: str, + offset: int, + limit: int, +) -> LicensedItemGetPage: + result: LicensedItemGetPage = await rabbitmq_rpc_client.request( + WEBSERVER_RPC_NAMESPACE, + TypeAdapter(RPCMethodName).validate_python("get_licensed_items"), + product_name=product_name, + offset=offset, + limit=limit, + ) + assert isinstance(result, LicensedItemGetPage) + return result + + +@log_decorator(_logger, level=logging.DEBUG) +async def get_licensed_items_for_wallet( + rabbitmq_rpc_client: RabbitMQRPCClient, + *, + user_id: UserID, + product_name: ProductName, + wallet_id: WalletID, +) -> LicensedItemGet: + result: LicensedItemGet = await rabbitmq_rpc_client.request( + WEBSERVER_RPC_NAMESPACE, + TypeAdapter(RPCMethodName).validate_python("get_licensed_items_for_wallet"), + user_id=user_id, + product_name=product_name, + wallet_id=wallet_id, + ) + assert isinstance(result, LicensedItemGet) # nosec + return result + + +@log_decorator(_logger, level=logging.DEBUG) +async def checkout_licensed_item_for_wallet( + rabbitmq_rpc_client: RabbitMQRPCClient, + *, + user_id: UserID, + product_name: ProductName, + wallet_id: WalletID, + licensed_item_id: LicensedItemID, + num_of_seats: int, + service_run_id: ServiceRunId, +) -> None: + result = await rabbitmq_rpc_client.request( + WEBSERVER_RPC_NAMESPACE, + TypeAdapter(RPCMethodName).validate_python("checkout_licensed_item_for_wallet"), + user_id=user_id, + product_name=product_name, + wallet_id=wallet_id, + licensed_item_id=licensed_item_id, + num_of_seats=num_of_seats, + service_run_id=service_run_id, + ) + assert result is None # nosec + + +@log_decorator(_logger, level=logging.DEBUG) +async def release_licensed_item_for_wallet( + rabbitmq_rpc_client: RabbitMQRPCClient, + *, + user_id: UserID, + product_name: ProductName, + wallet_id: WalletID, + licensed_item_id: LicensedItemID, + num_of_seats: int, + service_run_id: ServiceRunId, +) -> None: + result = await rabbitmq_rpc_client.request( + WEBSERVER_RPC_NAMESPACE, + TypeAdapter(RPCMethodName).validate_python("release_licensed_item_for_wallet"), + user_id=user_id, + product_name=product_name, + wallet_id=wallet_id, + licensed_item_id=licensed_item_id, + num_of_seats=num_of_seats, + service_run_id=service_run_id, + ) + assert result is None # nosec diff --git a/scripts/maintenance/computational-clusters/autoscaled_monitor/core.py b/scripts/maintenance/computational-clusters/autoscaled_monitor/core.py index c0c4ba7bed6..540b4581ab6 100755 --- a/scripts/maintenance/computational-clusters/autoscaled_monitor/core.py +++ b/scripts/maintenance/computational-clusters/autoscaled_monitor/core.py @@ -138,7 +138,6 @@ def _print_dynamic_instances( f"{utils.color_encode_with_state(instance.name, instance.ec2_instance)}", f"ID: {instance.ec2_instance.instance_id}", f"AMI: {instance.ec2_instance.image_id}", - f"AMI name: {instance.ec2_instance.image.name}", f"Type: {instance.ec2_instance.instance_type}", f"Up: {utils.timedelta_formatting(time_now - instance.ec2_instance.launch_time, color_code=True)}", f"ExtIP: {instance.ec2_instance.public_ip_address}", @@ -183,7 +182,6 @@ def _print_computational_clusters( f"Name: {cluster.primary.name}", f"ID: {cluster.primary.ec2_instance.id}", f"AMI: {cluster.primary.ec2_instance.image_id}", - f"AMI name: {cluster.primary.ec2_instance.image.name}", f"Type: {cluster.primary.ec2_instance.instance_type}", f"Up: {utils.timedelta_formatting(time_now - cluster.primary.ec2_instance.launch_time, color_code=True)}", f"ExtIP: {cluster.primary.ec2_instance.public_ip_address}", @@ -229,7 +227,6 @@ def _print_computational_clusters( f"Name: {worker.name}", f"ID: {worker.ec2_instance.id}", f"AMI: {worker.ec2_instance.image_id}", - f"AMI name: {worker.ec2_instance.image.name}", f"Type: {worker.ec2_instance.instance_type}", f"Up: {utils.timedelta_formatting(time_now - worker.ec2_instance.launch_time, color_code=True)}", f"ExtIP: {worker.ec2_instance.public_ip_address}", diff --git a/services/api-server/src/simcore_service_api_server/models/schemas/model_adapter.py b/services/api-server/src/simcore_service_api_server/models/schemas/model_adapter.py index 9cc8b768d45..3b88cd82ef7 100644 --- a/services/api-server/src/simcore_service_api_server/models/schemas/model_adapter.py +++ b/services/api-server/src/simcore_service_api_server/models/schemas/model_adapter.py @@ -17,13 +17,13 @@ WalletGetWithAvailableCredits as _WalletGetWithAvailableCredits, ) from models_library.basic_types import IDStr, NonNegativeDecimal +from models_library.groups import GroupID from models_library.resource_tracker import ( PricingPlanClassification, PricingPlanId, PricingUnitId, UnitExtraInfo, ) -from models_library.users import GroupID from models_library.wallets import WalletID, WalletStatus from pydantic import ( BaseModel, diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py index 8d5ff16dd9a..e2212195aed 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py @@ -16,7 +16,6 @@ Resources, ) from aws_library.ec2._errors import EC2TooManyInstancesError -from aws_library.ec2._models import AWSTagValue from fastapi import FastAPI from models_library.generated_models.docker_rest_api import Node, NodeState from servicelib.logging_utils import log_catch, log_context @@ -265,9 +264,7 @@ async def _make_pending_buffer_ec2s_join_cluster( await ec2_client.set_instances_tags( buffer_ec2_ready_for_command, tags={ - DOCKER_JOIN_COMMAND_EC2_TAG_KEY: AWSTagValue( - ssm_command.command_id - ), + DOCKER_JOIN_COMMAND_EC2_TAG_KEY: ssm_command.command_id, }, ) return cluster @@ -359,10 +356,10 @@ async def _activate_and_notify( app: FastAPI, auto_scaling_mode: BaseAutoscaling, drained_node: AssociatedInstance, -) -> None: +) -> AssociatedInstance: app_settings = get_application_settings(app) docker_client = get_docker_client(app) - await asyncio.gather( + updated_node, *_ = await asyncio.gather( utils_docker.set_node_osparc_ready( app_settings, docker_client, drained_node.node, ready=True ), @@ -376,6 +373,7 @@ async def _activate_and_notify( app, drained_node.assigned_tasks, progress=1.0 ), ) + return dataclasses.replace(drained_node, node=updated_node) async def _activate_drained_nodes( @@ -389,14 +387,19 @@ async def _activate_drained_nodes( if node.assigned_tasks ] - # activate these nodes now - await asyncio.gather( - *( - _activate_and_notify(app, auto_scaling_mode, node) - for node in nodes_to_activate + if not nodes_to_activate: + return cluster + + with log_context( + _logger, logging.INFO, f"activate {len(nodes_to_activate)} drained nodes" + ): + activated_nodes = await asyncio.gather( + *( + _activate_and_notify(app, auto_scaling_mode, node) + for node in nodes_to_activate + ) ) - ) - new_active_node_ids = {node.ec2_instance.id for node in nodes_to_activate} + new_active_node_ids = {node.ec2_instance.id for node in activated_nodes} remaining_drained_nodes = [ node for node in cluster.drained_nodes @@ -409,7 +412,7 @@ async def _activate_drained_nodes( ] return dataclasses.replace( cluster, - active_nodes=cluster.active_nodes + nodes_to_activate, + active_nodes=cluster.active_nodes + activated_nodes, drained_nodes=remaining_drained_nodes, buffer_drained_nodes=remaining_reserved_drained_nodes, ) @@ -424,12 +427,17 @@ async def _start_buffer_instances( if not instances_to_start: return cluster # change the buffer machine to an active one - await get_ec2_client(app).set_instances_tags( - instances_to_start, - tags=get_activated_buffer_ec2_tags(app, auto_scaling_mode), - ) + with log_context( + _logger, logging.INFO, f"start {len(instances_to_start)} buffer machines" + ): + await get_ec2_client(app).set_instances_tags( + instances_to_start, + tags=get_activated_buffer_ec2_tags(app, auto_scaling_mode), + ) - started_instances = await get_ec2_client(app).start_instances(instances_to_start) + started_instances = await get_ec2_client(app).start_instances( + instances_to_start + ) started_instance_ids = [i.id for i in started_instances] return dataclasses.replace( @@ -541,7 +549,8 @@ async def _assign_tasks_to_current_cluster( if unassigned_tasks: _logger.info( - "the current cluster should cope with %s tasks, %s are unnassigned/queued tasks and will need new EC2s", + "the current cluster should cope with %s tasks, %s are unnassigned/queued " + "tasks and need to wait or get new EC2s", len(tasks) - len(unassigned_tasks), len(unassigned_tasks), ) @@ -617,9 +626,10 @@ async def _find_needed_instances( _logger.exception("Unexpected error:") _logger.info( - "found following needed instances: %s", + "found following %s needed instances: %s", + len(needed_new_instance_types_for_tasks), [ - f"{i.instance_type.name=}:{i.instance_type.resources} with {len(i.assigned_tasks)} tasks" + f"{i.instance_type.name}:{i.instance_type.resources} takes {len(i.assigned_tasks)} task{'s' if len(i.assigned_tasks)>1 else ''}" for i in needed_new_instance_types_for_tasks ], ) @@ -811,39 +821,6 @@ async def _launch_instances( return new_pending_instances -async def _scale_up_cluster( - app: FastAPI, - cluster: Cluster, - unassigned_tasks: list, - auto_scaling_mode: BaseAutoscaling, - allowed_instance_types: list[EC2InstanceType], -) -> Cluster: - app_settings: ApplicationSettings = app.state.settings - assert app_settings.AUTOSCALING_EC2_ACCESS # nosec - assert app_settings.AUTOSCALING_EC2_INSTANCES # nosec - - # let's start these - if needed_ec2_instances := await _find_needed_instances( - app, unassigned_tasks, allowed_instance_types, cluster, auto_scaling_mode - ): - await auto_scaling_mode.log_message_from_tasks( - app, - unassigned_tasks, - "service is pending due to missing resources, scaling up cluster now...", - level=logging.INFO, - ) - new_pending_instances = await _launch_instances( - app, needed_ec2_instances, unassigned_tasks, auto_scaling_mode - ) - cluster.pending_ec2s.extend( - [NonAssociatedInstance(ec2_instance=i) for i in new_pending_instances] - ) - # NOTE: to check the logs of UserData in EC2 instance - # run: tail -f -n 1000 /var/log/cloud-init-output.log in the instance - - return cluster - - async def _find_drainable_nodes( app: FastAPI, cluster: Cluster ) -> list[AssociatedInstance]: @@ -899,23 +876,25 @@ async def _deactivate_empty_nodes(app: FastAPI, cluster: Cluster) -> Cluster: if not active_empty_instances: return cluster - # drain this empty nodes - updated_nodes: list[Node] = await asyncio.gather( - *( - utils_docker.set_node_osparc_ready( - app_settings, - docker_client, - node.node, - ready=False, + with log_context( + _logger, logging.INFO, f"drain {len(active_empty_instances)} empty nodes" + ): + updated_nodes = await asyncio.gather( + *( + utils_docker.set_node_osparc_ready( + app_settings, + docker_client, + node.node, + ready=False, + ) + for node in active_empty_instances ) - for node in active_empty_instances - ) - ) - if updated_nodes: - _logger.info( - "following nodes were set to drain: '%s'", - f"{[node.description.hostname for node in updated_nodes if node.description]}", ) + if updated_nodes: + _logger.info( + "following nodes were set to drain: '%s'", + f"{[node.description.hostname for node in updated_nodes if node.description]}", + ) newly_drained_instances = [ AssociatedInstance(node=node, ec2_instance=instance.ec2_instance) for instance, node in zip(active_empty_instances, updated_nodes, strict=True) @@ -945,7 +924,7 @@ async def _find_terminateable_instances( for instance in cluster.drained_nodes: node_last_updated = utils_docker.get_node_last_readyness_update(instance.node) elapsed_time_since_drained = ( - datetime.datetime.now(datetime.timezone.utc) - node_last_updated + datetime.datetime.now(datetime.UTC) - node_last_updated ) _logger.debug("%s", f"{node_last_updated=}, {elapsed_time_since_drained=}") if ( @@ -985,6 +964,9 @@ async def _try_scale_down_cluster(app: FastAPI, cluster: Cluster) -> Cluster: get_docker_client(app), instance.node ) new_terminating_instances.append(instance) + new_terminating_instance_ids = [ + i.ec2_instance.id for i in new_terminating_instances + ] # instances that are in the termination process and already waited long enough are terminated. now = arrow.utcnow().datetime @@ -1016,12 +998,18 @@ async def _try_scale_down_cluster(app: FastAPI, cluster: Cluster) -> Cluster: still_drained_nodes = [ i for i in cluster.drained_nodes + if i.ec2_instance.id + not in (new_terminating_instance_ids + terminated_instance_ids) + ] + still_terminating_nodes = [ + i + for i in cluster.terminating_nodes if i.ec2_instance.id not in terminated_instance_ids ] return dataclasses.replace( cluster, drained_nodes=still_drained_nodes, - terminating_nodes=cluster.terminating_nodes + new_terminating_instances, + terminating_nodes=still_terminating_nodes + new_terminating_instances, terminated_instances=cluster.terminated_instances + [ NonAssociatedInstance(ec2_instance=i.ec2_instance) @@ -1043,7 +1031,7 @@ async def _notify_based_on_machine_type( app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_START_TIME ) launch_time_to_tasks: dict[datetime.datetime, list] = collections.defaultdict(list) - now = datetime.datetime.now(datetime.timezone.utc) + now = datetime.datetime.now(datetime.UTC) for instance in instances: launch_time_to_tasks[ instance.ec2_instance.launch_time @@ -1089,7 +1077,7 @@ async def _drain_retired_nodes( app_settings = get_application_settings(app) docker_client = get_docker_client(app) # drain this empty nodes - updated_nodes: list[Node] = await asyncio.gather( + updated_nodes = await asyncio.gather( *( utils_docker.set_node_osparc_ready( app_settings, @@ -1116,64 +1104,103 @@ async def _drain_retired_nodes( ) -async def _autoscale_cluster( +async def _scale_down_unused_cluster_instances( app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling, - allowed_instance_types: list[EC2InstanceType], ) -> Cluster: - # 1. check if we have pending tasks and resolve them by activating some drained nodes - unrunnable_tasks = await auto_scaling_mode.list_unrunnable_tasks(app) - _logger.info("found %s unrunnable tasks", len(unrunnable_tasks)) - # NOTE: this function predicts how dask will assign a task to a machine - queued_or_missing_instance_tasks, cluster = await _assign_tasks_to_current_cluster( - app, unrunnable_tasks, cluster, auto_scaling_mode - ) - # 2. try to activate drained nodes to cover some of the tasks - cluster = await _activate_drained_nodes(app, cluster, auto_scaling_mode) + await auto_scaling_mode.try_retire_nodes(app) + cluster = await _deactivate_empty_nodes(app, cluster) + return await _try_scale_down_cluster(app, cluster) - # 3. start buffer instances to cover the remaining tasks - cluster = await _start_buffer_instances(app, cluster, auto_scaling_mode) - # 4. let's check if there are still pending tasks or if the reserve was used +async def _scale_up_cluster( + app: FastAPI, + cluster: Cluster, + auto_scaling_mode: BaseAutoscaling, + allowed_instance_types: list[EC2InstanceType], + unassigned_tasks: list, +) -> Cluster: app_settings = get_application_settings(app) assert app_settings.AUTOSCALING_EC2_INSTANCES # nosec - if queued_or_missing_instance_tasks or ( + if not unassigned_tasks and ( len(cluster.buffer_drained_nodes) - < app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER + >= app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER ): - if ( - cluster.total_number_of_machines() - < app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES - ): - _logger.info( - "%s unrunnable tasks could not be assigned, slowly trying to scale up...", - len(queued_or_missing_instance_tasks), - ) - cluster = await _scale_up_cluster( - app, - cluster, - queued_or_missing_instance_tasks, - auto_scaling_mode, - allowed_instance_types, - ) + return cluster - elif ( - len(queued_or_missing_instance_tasks) == len(unrunnable_tasks) == 0 - and cluster.can_scale_down() + if ( + cluster.total_number_of_machines() + >= app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES ): _logger.info( - "there is %s waiting task, slowly and gracefully scaling down...", - len(queued_or_missing_instance_tasks), + "cluster already hit the maximum allowed amount of instances (%s), not scaling up. " + "%s tasks will wait until instances are free.", + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + len(unassigned_tasks), ) - # NOTE: we only scale down in case we did not just scale up. The swarm needs some time to adjust - await auto_scaling_mode.try_retire_nodes(app) - cluster = await _deactivate_empty_nodes(app, cluster) - cluster = await _try_scale_down_cluster(app, cluster) + return cluster + + # now we scale up + assert app_settings.AUTOSCALING_EC2_ACCESS # nosec + + # let's start these + if needed_ec2_instances := await _find_needed_instances( + app, unassigned_tasks, allowed_instance_types, cluster, auto_scaling_mode + ): + await auto_scaling_mode.log_message_from_tasks( + app, + unassigned_tasks, + "service is pending due to missing resources, scaling up cluster now...", + level=logging.INFO, + ) + new_pending_instances = await _launch_instances( + app, needed_ec2_instances, unassigned_tasks, auto_scaling_mode + ) + cluster.pending_ec2s.extend( + [NonAssociatedInstance(ec2_instance=i) for i in new_pending_instances] + ) + # NOTE: to check the logs of UserData in EC2 instance + # run: tail -f -n 1000 /var/log/cloud-init-output.log in the instance return cluster +async def _autoscale_cluster( + app: FastAPI, + cluster: Cluster, + auto_scaling_mode: BaseAutoscaling, + allowed_instance_types: list[EC2InstanceType], +) -> Cluster: + # 1. check if we have pending tasks + unnasigned_pending_tasks = await auto_scaling_mode.list_unrunnable_tasks(app) + _logger.info( + "found %s pending task%s", + len(unnasigned_pending_tasks), + "s" if len(unnasigned_pending_tasks) > 1 else "", + ) + # NOTE: this function predicts how the backend will assign tasks + still_pending_tasks, cluster = await _assign_tasks_to_current_cluster( + app, unnasigned_pending_tasks, cluster, auto_scaling_mode + ) + + # 2. activate available drained nodes to cover some of the tasks + cluster = await _activate_drained_nodes(app, cluster, auto_scaling_mode) + + # 3. start buffer instances to cover the remaining tasks + cluster = await _start_buffer_instances(app, cluster, auto_scaling_mode) + + # 4. scale down unused instances + cluster = await _scale_down_unused_cluster_instances( + app, cluster, auto_scaling_mode + ) + + # 5. scale up if necessary + return await _scale_up_cluster( + app, cluster, auto_scaling_mode, allowed_instance_types, still_pending_tasks + ) + + async def _notify_autoscaling_status( app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling ) -> None: diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py index a632afe956e..6a133e565cb 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py +++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_computational.py @@ -69,6 +69,7 @@ async def list_unrunnable_tasks(app: FastAPI) -> list[DaskTask]: _scheduler_url(app), _scheduler_auth(app) ) # NOTE: any worker "processing" more than 1 task means that the other tasks are queued! + # NOTE: that is not necessarily true, in cases where 1 worker takes multiple tasks?? (osparc.io) processing_tasks_by_worker = await dask.list_processing_tasks_per_worker( _scheduler_url(app), _scheduler_auth(app) ) @@ -76,7 +77,7 @@ async def list_unrunnable_tasks(app: FastAPI) -> list[DaskTask]: for tasks in processing_tasks_by_worker.values(): queued_tasks += tasks[1:] _logger.debug( - "found %s unrunnable tasks and %s potentially queued tasks", + "found %s pending tasks and %s potentially queued tasks", len(unrunnable_tasks), len(queued_tasks), ) diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py index 4758c91a12f..65caa0f40b1 100644 --- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py +++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py @@ -82,19 +82,26 @@ ).validate_python("io.simcore.osparc-node-termination-started") +def _get_node_creation_date(node: Node) -> datetime.datetime: + assert node.created_at # nosec + return arrow.get(node.created_at).datetime + + async def get_monitored_nodes( docker_client: AutoscalingDocker, node_labels: list[DockerLabelKey] ) -> list[Node]: node_label_filters = [f"{label}=true" for label in node_labels] + [ f"{label}" for label in _OSPARC_SERVICE_READY_LABEL_KEYS ] - return TypeAdapter(list[Node]).validate_python( + list_of_nodes = TypeAdapter(list[Node]).validate_python( await docker_client.nodes.list(filters={"node.label": node_label_filters}) ) + list_of_nodes.sort(key=_get_node_creation_date) + return list_of_nodes async def get_worker_nodes(docker_client: AutoscalingDocker) -> list[Node]: - return TypeAdapter(list[Node]).validate_python( + list_of_nodes = TypeAdapter(list[Node]).validate_python( await docker_client.nodes.list( filters={ "role": ["worker"], @@ -104,6 +111,8 @@ async def get_worker_nodes(docker_client: AutoscalingDocker) -> list[Node]: } ) ) + list_of_nodes.sort(key=_get_node_creation_date) + return list_of_nodes async def remove_nodes( diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py index eccee9967b5..4a48f2776b6 100644 --- a/services/autoscaling/tests/unit/conftest.py +++ b/services/autoscaling/tests/unit/conftest.py @@ -11,7 +11,7 @@ from collections.abc import AsyncIterator, Awaitable, Callable, Iterator from copy import deepcopy from pathlib import Path -from typing import Any, Final, cast, get_args +from typing import Any, Final, TypeAlias, cast, get_args from unittest import mock import aiodocker @@ -46,6 +46,7 @@ TaskSpec, ) from pydantic import ByteSize, PositiveInt, TypeAdapter +from pytest_mock import MockType from pytest_mock.plugin import MockerFixture from pytest_simcore.helpers.host import get_localhost_ip from pytest_simcore.helpers.logging_tools import log_context @@ -69,6 +70,7 @@ Cluster, DaskTaskResources, ) +from simcore_service_autoscaling.modules import auto_scaling_core from simcore_service_autoscaling.modules.docker import AutoscalingDocker from simcore_service_autoscaling.modules.ec2 import SimcoreEC2API from simcore_service_autoscaling.utils.utils_docker import ( @@ -176,7 +178,11 @@ def app_with_docker_join_drained( @pytest.fixture(scope="session") def fake_ssm_settings() -> SSMSettings: assert "json_schema_extra" in SSMSettings.model_config - return SSMSettings(**SSMSettings.model_config["json_schema_extra"]["examples"][0]) + assert isinstance(SSMSettings.model_config["json_schema_extra"], dict) + assert isinstance(SSMSettings.model_config["json_schema_extra"]["examples"], list) + return SSMSettings.model_validate( + SSMSettings.model_config["json_schema_extra"]["examples"][0] + ) @pytest.fixture @@ -220,6 +226,11 @@ def app_environment( delenvs_from_dict(monkeypatch, mock_env_devel_environment, raising=False) return setenvs_from_dict(monkeypatch, {**external_envfile_dict}) + assert "json_schema_extra" in EC2InstanceBootSpecific.model_config + assert isinstance(EC2InstanceBootSpecific.model_config["json_schema_extra"], dict) + assert isinstance( + EC2InstanceBootSpecific.model_config["json_schema_extra"]["examples"], list + ) envs = setenvs_from_dict( monkeypatch, { @@ -263,6 +274,11 @@ def mocked_ec2_instances_envs( aws_allowed_ec2_instance_type_names: list[InstanceTypeType], aws_instance_profile: str, ) -> EnvVarsDict: + assert "json_schema_extra" in EC2InstanceBootSpecific.model_config + assert isinstance(EC2InstanceBootSpecific.model_config["json_schema_extra"], dict) + assert isinstance( + EC2InstanceBootSpecific.model_config["json_schema_extra"]["examples"], list + ) envs = setenvs_from_dict( monkeypatch, { @@ -271,10 +287,13 @@ def mocked_ec2_instances_envs( "EC2_INSTANCES_SUBNET_ID": aws_subnet_id, "EC2_INSTANCES_ALLOWED_TYPES": json.dumps( { - ec2_type_name: random.choice( # noqa: S311 - EC2InstanceBootSpecific.model_config["json_schema_extra"][ - "examples" - ] + ec2_type_name: cast( + dict, + random.choice( # noqa: S311 + EC2InstanceBootSpecific.model_config["json_schema_extra"][ + "examples" + ] + ), ) | {"ami_id": aws_ami_id} for ec2_type_name in aws_allowed_ec2_instance_type_names @@ -491,22 +510,23 @@ def create_fake_node(faker: Faker) -> Callable[..., DockerNode]: def _creator(**node_overrides) -> DockerNode: default_config = { "ID": faker.uuid4(), - "Version": ObjectVersion(Index=faker.pyint()), - "CreatedAt": datetime.datetime.now(tz=datetime.timezone.utc).isoformat(), - "UpdatedAt": datetime.datetime.now(tz=datetime.timezone.utc).isoformat(), + "Version": ObjectVersion(index=faker.pyint()), + "CreatedAt": datetime.datetime.now(tz=datetime.UTC).isoformat(), + "UpdatedAt": datetime.datetime.now(tz=datetime.UTC).isoformat(), "Description": NodeDescription( - Hostname=faker.pystr(), - Resources=ResourceObject( - NanoCPUs=int(9 * 1e9), MemoryBytes=256 * 1024 * 1024 * 1024 + hostname=faker.pystr(), + resources=ResourceObject( + nano_cp_us=int(9 * 1e9), + memory_bytes=TypeAdapter(ByteSize).validate_python("256GiB"), ), ), "Spec": NodeSpec( - Name=None, - Labels=faker.pydict(allowed_types=(str,)), - Role=None, - Availability=Availability.drain, + name=None, + labels=faker.pydict(allowed_types=(str,)), + role=None, + availability=Availability.drain, ), - "Status": NodeStatus(State=NodeState.unknown, Message=None, Addr=None), + "Status": NodeStatus(state=NodeState.unknown, message=None, addr=None), } default_config.update(**node_overrides) return DockerNode(**default_config) @@ -529,7 +549,7 @@ def task_template() -> dict[str, Any]: _GIGA_NANO_CPU = 10**9 -NUM_CPUS = PositiveInt +NUM_CPUS: TypeAlias = PositiveInt @pytest.fixture @@ -704,6 +724,7 @@ async def _assert_wait_for_service_state( after=after_log(ctx.logger, logging.DEBUG), ) async def _() -> None: + assert service.id services = await async_docker_client.services.list( filters={"id": service.id} ) @@ -761,7 +782,9 @@ def aws_allowed_ec2_instance_type_names_env( @pytest.fixture def host_cpu_count() -> int: - return psutil.cpu_count() + cpus = psutil.cpu_count() + assert cpus is not None + return cpus @pytest.fixture @@ -853,9 +876,7 @@ async def _fake_set_node_availability( returned_node.spec.availability = ( Availability.active if available else Availability.drain ) - returned_node.updated_at = datetime.datetime.now( - tz=datetime.timezone.utc - ).isoformat() + returned_node.updated_at = datetime.datetime.now(tz=datetime.UTC).isoformat() return returned_node return mocker.patch( @@ -890,7 +911,7 @@ async def fake_tag_node( @pytest.fixture -def patch_ec2_client_launch_instancess_min_number_of_instances( +def patch_ec2_client_launch_instances_min_number_of_instances( mocker: MockerFixture, ) -> mock.Mock: """the moto library always returns min number of instances instead of max number of instances which makes @@ -954,7 +975,7 @@ def _creator( return AssociatedInstance( node=node, ec2_instance=fake_ec2_instance_data( - launch_time=datetime.datetime.now(datetime.timezone.utc) + launch_time=datetime.datetime.now(datetime.UTC) - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION - datetime.timedelta( days=faker.pyint(min_value=0, max_value=100), @@ -1002,3 +1023,22 @@ def with_short_ec2_instances_max_start_time( "EC2_INSTANCES_MAX_START_TIME": f"{short_ec2_instance_max_start_time}", }, ) + + +@pytest.fixture +async def spied_cluster_analysis(mocker: MockerFixture) -> MockType: + return mocker.spy(auto_scaling_core, "_analyze_current_cluster") + + +@pytest.fixture +async def mocked_associate_ec2_instances_with_nodes(mocker: MockerFixture) -> mock.Mock: + async def _( + nodes: list[DockerNode], ec2_instances: list[EC2InstanceData] + ) -> tuple[list[AssociatedInstance], list[EC2InstanceData]]: + return [], ec2_instances + + return mocker.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.associate_ec2_instances_with_nodes", + autospec=True, + side_effect=_, + ) diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py index f9e0e4c416d..6e7a0d7c828 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py @@ -11,10 +11,10 @@ import datetime import logging from collections import defaultdict -from collections.abc import Callable, Iterator +from collections.abc import Awaitable, Callable, Iterator from copy import deepcopy from dataclasses import dataclass -from typing import Any +from typing import Any, Final, cast from unittest import mock import arrow @@ -32,7 +32,11 @@ from models_library.generated_models.docker_rest_api import NodeState, NodeStatus from models_library.rabbitmq_messages import RabbitAutoscalingStatusMessage from pydantic import ByteSize, TypeAdapter -from pytest_mock import MockerFixture +from pytest_mock import MockerFixture, MockType +from pytest_simcore.helpers.autoscaling import ( + assert_cluster_state, + create_fake_association, +) from pytest_simcore.helpers.aws_ec2 import assert_autoscaled_computational_ec2_instances from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict from simcore_service_autoscaling.core.settings import ApplicationSettings @@ -43,7 +47,6 @@ ) from simcore_service_autoscaling.modules.dask import DaskTaskResources from simcore_service_autoscaling.modules.docker import get_docker_client -from simcore_service_autoscaling.modules.ec2 import SimcoreEC2API from simcore_service_autoscaling.utils.utils_docker import ( _OSPARC_NODE_EMPTY_DATETIME_LABEL_KEY, _OSPARC_NODE_TERMINATION_PROCESS_LABEL_KEY, @@ -182,6 +185,126 @@ def ec2_instance_custom_tags( } +@pytest.fixture +def create_dask_task_resources() -> ( + Callable[[InstanceTypeType | None, Resources], DaskTaskResources] +): + def _do( + ec2_instance_type: InstanceTypeType | None, task_resource: Resources + ) -> DaskTaskResources: + resources = _dask_task_resources_from_resources(task_resource) + if ec2_instance_type is not None: + resources[create_ec2_resource_constraint_key(ec2_instance_type)] = 1 + return resources + + return _do + + +@pytest.fixture +def mock_dask_get_worker_has_results_in_memory(mocker: MockerFixture) -> mock.Mock: + return mocker.patch( + "simcore_service_autoscaling.modules.dask.get_worker_still_has_results_in_memory", + return_value=0, + autospec=True, + ) + + +@pytest.fixture +def mock_dask_get_worker_used_resources(mocker: MockerFixture) -> mock.Mock: + return mocker.patch( + "simcore_service_autoscaling.modules.dask.get_worker_used_resources", + return_value=Resources.create_as_empty(), + autospec=True, + ) + + +@pytest.fixture +def mock_dask_is_worker_connected(mocker: MockerFixture) -> mock.Mock: + return mocker.patch( + "simcore_service_autoscaling.modules.dask.is_worker_connected", + return_value=True, + autospec=True, + ) + + +async def _create_task_with_resources( + ec2_client: EC2Client, + dask_task_imposed_ec2_type: InstanceTypeType | None, + task_resources: Resources | None, + create_dask_task_resources: Callable[ + [InstanceTypeType | None, Resources], DaskTaskResources + ], + create_dask_task: Callable[[DaskTaskResources], distributed.Future], +) -> distributed.Future: + if dask_task_imposed_ec2_type and not task_resources: + instance_types = await ec2_client.describe_instance_types( + InstanceTypes=[dask_task_imposed_ec2_type] + ) + assert instance_types + assert "InstanceTypes" in instance_types + assert instance_types["InstanceTypes"] + assert "MemoryInfo" in instance_types["InstanceTypes"][0] + assert "SizeInMiB" in instance_types["InstanceTypes"][0]["MemoryInfo"] + task_resources = Resources( + cpus=1, + ram=TypeAdapter(ByteSize).validate_python( + f"{instance_types['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']}MiB", + ), + ) + + assert task_resources + dask_task_resources = create_dask_task_resources( + dask_task_imposed_ec2_type, task_resources + ) + dask_future = create_dask_task(dask_task_resources) + assert dask_future + return dask_future + + +@dataclass(kw_only=True) +class _ScaleUpParams: + imposed_instance_type: InstanceTypeType | None + task_resources: Resources | None + num_tasks: int + expected_instance_type: InstanceTypeType + expected_num_instances: int + + +_RESOURCE_TO_DASK_RESOURCE_MAP: Final[dict[str, str]] = {"CPUS": "CPU", "RAM": "RAM"} + + +def _dask_task_resources_from_resources(resources: Resources) -> DaskTaskResources: + return { + _RESOURCE_TO_DASK_RESOURCE_MAP[res_key.upper()]: res_value + for res_key, res_value in resources.model_dump().items() + } + + +@pytest.fixture +async def create_tasks_batch( + ec2_client: EC2Client, + create_dask_task: Callable[[DaskTaskResources], distributed.Future], + create_dask_task_resources: Callable[ + [InstanceTypeType | None, Resources], DaskTaskResources + ], +) -> Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]]: + async def _(scale_up_params: _ScaleUpParams) -> list[distributed.Future]: + return await asyncio.gather( + *( + _create_task_with_resources( + ec2_client, + scale_up_params.imposed_instance_type, + scale_up_params.task_resources, + create_dask_task_resources, + create_dask_task, + ) + for _ in range(scale_up_params.num_tasks) + ) + ) + + return _ + + async def test_cluster_scaling_with_no_tasks_does_nothing( minimal_configuration: None, app_settings: ApplicationSettings, @@ -259,103 +382,52 @@ async def test_cluster_scaling_with_task_with_too_much_resources_starts_nothing( ) -@pytest.fixture -def create_dask_task_resources() -> Callable[..., DaskTaskResources]: - def _do( - ec2_instance_type: InstanceTypeType | None, ram: ByteSize - ) -> DaskTaskResources: - resources = DaskTaskResources( - { - "RAM": int(ram), - } - ) - if ec2_instance_type is not None: - resources[create_ec2_resource_constraint_key(ec2_instance_type)] = 1 - return resources - - return _do - - -@pytest.fixture -def mock_dask_get_worker_has_results_in_memory(mocker: MockerFixture) -> mock.Mock: - return mocker.patch( - "simcore_service_autoscaling.modules.dask.get_worker_still_has_results_in_memory", - return_value=0, - autospec=True, - ) - - -@pytest.fixture -def mock_dask_get_worker_used_resources(mocker: MockerFixture) -> mock.Mock: - return mocker.patch( - "simcore_service_autoscaling.modules.dask.get_worker_used_resources", - return_value=Resources.create_as_empty(), - autospec=True, - ) - - -@pytest.fixture -def mock_dask_is_worker_connected(mocker: MockerFixture) -> mock.Mock: - return mocker.patch( - "simcore_service_autoscaling.modules.dask.is_worker_connected", - return_value=True, - autospec=True, - ) - - -async def _create_task_with_resources( - ec2_client: EC2Client, - dask_task_imposed_ec2_type: InstanceTypeType | None, - dask_ram: ByteSize | None, - create_dask_task_resources: Callable[..., DaskTaskResources], - create_dask_task: Callable[[DaskTaskResources], distributed.Future], -) -> distributed.Future: - if dask_task_imposed_ec2_type and not dask_ram: - instance_types = await ec2_client.describe_instance_types( - InstanceTypes=[dask_task_imposed_ec2_type] - ) - assert instance_types - assert "InstanceTypes" in instance_types - assert instance_types["InstanceTypes"] - assert "MemoryInfo" in instance_types["InstanceTypes"][0] - assert "SizeInMiB" in instance_types["InstanceTypes"][0]["MemoryInfo"] - dask_ram = TypeAdapter(ByteSize).validate_python( - f"{instance_types['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']}MiB", - ) - dask_task_resources = create_dask_task_resources( - dask_task_imposed_ec2_type, dask_ram - ) - dask_future = create_dask_task(dask_task_resources) - assert dask_future - return dask_future - - -@pytest.mark.acceptance_test() +@pytest.mark.acceptance_test @pytest.mark.parametrize( - "dask_task_imposed_ec2_type, dask_ram, expected_ec2_type", + "scale_up_params", [ pytest.param( - None, - TypeAdapter(ByteSize).validate_python("128Gib"), - "r5n.4xlarge", + _ScaleUpParams( + imposed_instance_type=None, + task_resources=Resources( + cpus=1, ram=TypeAdapter(ByteSize).validate_python("128Gib") + ), + num_tasks=1, + expected_instance_type="r5n.4xlarge", + expected_num_instances=1, + ), id="No explicit instance defined", ), pytest.param( - "g4dn.2xlarge", - None, - "g4dn.2xlarge", + _ScaleUpParams( + imposed_instance_type="g4dn.2xlarge", + task_resources=None, + num_tasks=1, + expected_instance_type="g4dn.2xlarge", + expected_num_instances=1, + ), id="Explicitely ask for g4dn.2xlarge and use all the resources", ), pytest.param( - "r5n.8xlarge", - TypeAdapter(ByteSize).validate_python("116Gib"), - "r5n.8xlarge", + _ScaleUpParams( + imposed_instance_type="r5n.8xlarge", + task_resources=Resources( + cpus=1, ram=TypeAdapter(ByteSize).validate_python("116Gib") + ), + num_tasks=1, + expected_instance_type="r5n.8xlarge", + expected_num_instances=1, + ), id="Explicitely ask for r5n.8xlarge and set the resources", ), pytest.param( - "r5n.8xlarge", - None, - "r5n.8xlarge", + _ScaleUpParams( + imposed_instance_type="r5n.8xlarge", + task_resources=None, + num_tasks=1, + expected_instance_type="r5n.8xlarge", + expected_num_instances=1, + ), id="Explicitely ask for r5n.8xlarge and use all the resources", ), ], @@ -364,7 +436,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 minimal_configuration: None, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_dask_task: Callable[[DaskTaskResources], distributed.Future], + create_tasks_batch: Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, fake_node: DockerNode, @@ -377,26 +449,17 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 mock_dask_is_worker_connected: mock.Mock, mocker: MockerFixture, dask_spec_local_cluster: distributed.SpecCluster, - create_dask_task_resources: Callable[..., DaskTaskResources], - dask_task_imposed_ec2_type: InstanceTypeType | None, - dask_ram: ByteSize | None, - expected_ec2_type: InstanceTypeType, with_drain_nodes_labelled: bool, ec2_instance_custom_tags: dict[str, str], + scale_up_params: _ScaleUpParams, ): # we have nothing running now all_instances = await ec2_client.describe_instances() assert not all_instances["Reservations"] # create a task that needs more power - dask_future = await _create_task_with_resources( - ec2_client, - dask_task_imposed_ec2_type, - dask_ram, - create_dask_task_resources, - create_dask_task, - ) - + dask_futures = await create_tasks_batch(scale_up_params) + assert dask_futures # this should trigger a scaling up as we have no nodes await auto_scale_cluster( app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() @@ -406,8 +469,8 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -443,8 +506,8 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 instances = await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -456,9 +519,9 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 mock_docker_find_node_with_name_returns_fake_node.assert_called_once() mock_docker_find_node_with_name_returns_fake_node.reset_mock() expected_docker_node_tags = { - DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: expected_ec2_type + DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: scale_up_params.expected_instance_type } - assert mock_docker_tag_node.call_count == 2 + assert mock_docker_tag_node.call_count == 3 assert fake_node.spec assert fake_node.spec.labels fake_attached_node = deepcopy(fake_node) @@ -525,7 +588,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 # now we have 1 monitored node that needs to be mocked fake_attached_node.spec.labels[_OSPARC_SERVICE_READY_LABEL_KEY] = "true" fake_attached_node.status = NodeStatus( - State=NodeState.ready, Message=None, Addr=None + state=NodeState.ready, message=None, addr=None ) fake_attached_node.spec.availability = Availability.active assert fake_attached_node.description @@ -557,14 +620,15 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 assert mock_dask_get_worker_used_resources.call_count == 2 * num_useless_calls mock_dask_get_worker_used_resources.reset_mock() mock_docker_find_node_with_name_returns_fake_node.assert_not_called() - mock_docker_tag_node.assert_not_called() + assert mock_docker_tag_node.call_count == num_useless_calls + mock_docker_tag_node.reset_mock() mock_docker_set_node_availability.assert_not_called() # check the number of instances did not change and is still running await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -577,7 +641,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 # # 4. now scaling down, as we deleted all the tasks # - del dask_future + del dask_futures await auto_scale_cluster(app=initialized_app, auto_scaling_mode=auto_scaling_mode) mock_dask_is_worker_connected.assert_called_once() mock_dask_is_worker_connected.reset_mock() @@ -647,8 +711,8 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -658,7 +722,7 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 fake_attached_node.spec.labels[_OSPARC_SERVICE_READY_LABEL_KEY] = "false" fake_attached_node.spec.labels[ _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY - ] = datetime.datetime.now(tz=datetime.timezone.utc).isoformat() + ] = datetime.datetime.now(tz=datetime.UTC).isoformat() # the node will be not be terminated before the timeout triggers assert app_settings.AUTOSCALING_EC2_INSTANCES @@ -676,15 +740,15 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) # now changing the last update timepoint will trigger the node removal and shutdown the ec2 instance fake_attached_node.spec.labels[_OSPARC_SERVICES_READY_DATETIME_LABEL_KEY] = ( - datetime.datetime.now(tz=datetime.timezone.utc) + datetime.datetime.now(tz=datetime.UTC) - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION - datetime.timedelta(seconds=1) ).isoformat() @@ -694,8 +758,8 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -726,8 +790,8 @@ async def test_cluster_scaling_up_and_down( # noqa: PLR0915 await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="terminated", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -741,7 +805,9 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_allowed( app_settings: ApplicationSettings, initialized_app: FastAPI, create_dask_task: Callable[[DaskTaskResources], distributed.Future], - create_dask_task_resources: Callable[..., DaskTaskResources], + create_dask_task_resources: Callable[ + [InstanceTypeType | None, Resources], DaskTaskResources + ], ec2_client: EC2Client, faker: Faker, caplog: pytest.LogCaptureFixture, @@ -752,7 +818,8 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_allowed( # create a task that needs more power dask_task_resources = create_dask_task_resources( - faker.pystr(), TypeAdapter(ByteSize).validate_python("128GiB") + cast(InstanceTypeType, faker.pystr()), + Resources(cpus=1, ram=TypeAdapter(ByteSize).validate_python("128GiB")), ) dask_future = create_dask_task(dask_task_resources) assert dask_future @@ -777,7 +844,9 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso app_settings: ApplicationSettings, initialized_app: FastAPI, create_dask_task: Callable[[DaskTaskResources], distributed.Future], - create_dask_task_resources: Callable[..., DaskTaskResources], + create_dask_task_resources: Callable[ + [InstanceTypeType | None, Resources], DaskTaskResources + ], ec2_client: EC2Client, faker: Faker, caplog: pytest.LogCaptureFixture, @@ -788,7 +857,8 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso # create a task that needs more power dask_task_resources = create_dask_task_resources( - "t2.xlarge", TypeAdapter(ByteSize).validate_python("128GiB") + "t2.xlarge", + Resources(cpus=1, ram=TypeAdapter(ByteSize).validate_python("128GiB")), ) dask_future = create_dask_task(dask_task_resources) assert dask_future @@ -808,47 +878,12 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso assert "Unexpected error:" in error_messages[0] -@dataclass(frozen=True) -class _ScaleUpParams: - task_resources: Resources - num_tasks: int - expected_instance_type: str - expected_num_instances: int - - -def _dask_task_resources_from_resources(resources: Resources) -> DaskTaskResources: - return { - res_key.upper(): res_value - for res_key, res_value in resources.model_dump().items() - } - - -@pytest.fixture -def patch_ec2_client_launch_instancess_min_number_of_instances( - mocker: MockerFixture, -) -> mock.Mock: - """the moto library always returns min number of instances instead of max number of instances which makes - it difficult to test scaling to multiple of machines. this should help""" - original_fct = SimcoreEC2API.launch_instances - - async def _change_parameters(*args, **kwargs) -> list[EC2InstanceData]: - new_kwargs = kwargs | {"min_number_of_instances": kwargs["number_of_instances"]} - print(f"patching launch_instances with: {new_kwargs}") - return await original_fct(*args, **new_kwargs) - - return mocker.patch.object( - SimcoreEC2API, - "launch_instances", - autospec=True, - side_effect=_change_parameters, - ) - - @pytest.mark.parametrize( "scale_up_params", [ pytest.param( _ScaleUpParams( + imposed_instance_type=None, task_resources=Resources( cpus=5, ram=TypeAdapter(ByteSize).validate_python("36Gib") ), @@ -861,11 +896,11 @@ async def _change_parameters(*args, **kwargs) -> list[EC2InstanceData]: ], ) async def test_cluster_scaling_up_starts_multiple_instances( - patch_ec2_client_launch_instancess_min_number_of_instances: mock.Mock, + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, minimal_configuration: None, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_dask_task: Callable[[DaskTaskResources], distributed.Future], + create_tasks_batch: Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, scale_up_params: _ScaleUpParams, @@ -880,16 +915,7 @@ async def test_cluster_scaling_up_starts_multiple_instances( assert not all_instances["Reservations"] # create several tasks that needs more power - dask_futures = await asyncio.gather( - *( - asyncio.get_event_loop().run_in_executor( - None, - create_dask_task, - _dask_task_resources_from_resources(scale_up_params.task_resources), - ) - for _ in range(scale_up_params.num_tasks) - ) - ) + dask_futures = await create_tasks_batch(scale_up_params) assert dask_futures # run the code @@ -902,7 +928,7 @@ async def test_cluster_scaling_up_starts_multiple_instances( ec2_client, expected_num_reservations=1, expected_num_instances=scale_up_params.expected_num_instances, - expected_instance_type="g3.4xlarge", + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -922,15 +948,29 @@ async def test_cluster_scaling_up_starts_multiple_instances( mock_rabbitmq_post_message.reset_mock() +@pytest.mark.parametrize( + "scale_up_params", + [ + pytest.param( + _ScaleUpParams( + imposed_instance_type="r5n.8xlarge", + task_resources=None, + num_tasks=1, + expected_instance_type="r5n.8xlarge", + expected_num_instances=1, + ), + id="Impose r5n.8xlarge without resources", + ), + ], +) async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and_not_more( - patch_ec2_client_launch_instancess_min_number_of_instances: mock.Mock, + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, minimal_configuration: None, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_dask_task: Callable[[DaskTaskResources], distributed.Future], + create_tasks_batch: Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]], ec2_client: EC2Client, dask_spec_local_cluster: distributed.SpecCluster, - create_dask_task_resources: Callable[..., DaskTaskResources], mock_docker_tag_node: mock.Mock, mock_rabbitmq_post_message: mock.Mock, mock_docker_find_node_with_name_returns_fake_node: mock.Mock, @@ -939,29 +979,23 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and mock_dask_get_worker_has_results_in_memory: mock.Mock, mock_dask_get_worker_used_resources: mock.Mock, ec2_instance_custom_tags: dict[str, str], + scale_up_params: _ScaleUpParams, ): - ec2_instance_type = "r5n.8xlarge" - # we have nothing running now all_instances = await ec2_client.describe_instances() assert not all_instances["Reservations"] assert app_settings.AUTOSCALING_EC2_INSTANCES assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES > 0 - num_tasks = 3 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + # override the number of tasks + scale_up_params.num_tasks = ( + 3 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ) + scale_up_params.expected_num_instances = ( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ) # create the tasks - task_futures = await asyncio.gather( - *( - _create_task_with_resources( - ec2_client, - ec2_instance_type, - None, - create_dask_task_resources, - create_dask_task, - ) - for _ in range(num_tasks) - ) - ) + task_futures = await create_tasks_batch(scale_up_params) assert all(task_futures) # this should trigger a scaling up as we have no nodes @@ -971,8 +1005,8 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, - expected_instance_type=ec2_instance_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -990,7 +1024,7 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and initialized_app, dask_spec_local_cluster.scheduler_address, instances_running=0, - instances_pending=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + instances_pending=scale_up_params.expected_num_instances, ) mock_rabbitmq_post_message.reset_mock() @@ -1003,22 +1037,24 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, - expected_instance_type=ec2_instance_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_starts_max_instances_and_not_more( - patch_ec2_client_launch_instancess_min_number_of_instances: mock.Mock, + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, minimal_configuration: None, app_settings: ApplicationSettings, initialized_app: FastAPI, create_dask_task: Callable[[DaskTaskResources], distributed.Future], ec2_client: EC2Client, dask_spec_local_cluster: distributed.SpecCluster, - create_dask_task_resources: Callable[..., DaskTaskResources], + create_dask_task_resources: Callable[ + [InstanceTypeType | None, Resources], DaskTaskResources + ], mock_docker_tag_node: mock.Mock, mock_rabbitmq_post_message: mock.Mock, mock_docker_find_node_with_name_returns_fake_node: mock.Mock, @@ -1106,12 +1142,18 @@ async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_star @pytest.mark.parametrize( - "dask_task_imposed_ec2_type, dask_ram, expected_ec2_type", + "scale_up_params", [ pytest.param( - None, - TypeAdapter(ByteSize).validate_python("128Gib"), - "r5n.4xlarge", + _ScaleUpParams( + imposed_instance_type=None, + task_resources=Resources( + cpus=1, ram=TypeAdapter(ByteSize).validate_python("128Gib") + ), + num_tasks=1, + expected_instance_type="r5n.4xlarge", + expected_num_instances=1, + ), id="No explicit instance defined", ), ], @@ -1121,18 +1163,15 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( minimal_configuration: None, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_dask_task: Callable[[DaskTaskResources], distributed.Future], + create_tasks_batch: Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]], ec2_client: EC2Client, - dask_task_imposed_ec2_type: InstanceTypeType | None, - dask_ram: ByteSize | None, - create_dask_task_resources: Callable[..., DaskTaskResources], dask_spec_local_cluster: distributed.SpecCluster, - expected_ec2_type: InstanceTypeType, mock_find_node_with_name_returns_none: mock.Mock, mock_docker_tag_node: mock.Mock, mock_rabbitmq_post_message: mock.Mock, short_ec2_instance_max_start_time: datetime.timedelta, ec2_instance_custom_tags: dict[str, str], + scale_up_params: _ScaleUpParams, ): assert app_settings.AUTOSCALING_EC2_INSTANCES assert ( @@ -1143,14 +1182,9 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( all_instances = await ec2_client.describe_instances() assert not all_instances["Reservations"] # create a task that needs more power - dask_future = await _create_task_with_resources( - ec2_client, - dask_task_imposed_ec2_type, - dask_ram, - create_dask_task_resources, - create_dask_task, - ) - assert dask_future + dask_futures = await create_tasks_batch(scale_up_params) + assert dask_futures + # this should trigger a scaling up as we have no nodes await auto_scale_cluster( app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() @@ -1160,8 +1194,8 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( instances = await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -1203,8 +1237,8 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( instances = await assert_autoscaled_computational_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), ) @@ -1216,7 +1250,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( initialized_app, dask_spec_local_cluster.scheduler_address, instances_running=0, - instances_pending=1, + instances_pending=scale_up_params.expected_num_instances, ) mock_rabbitmq_post_message.reset_mock() assert instances @@ -1247,7 +1281,10 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( all_instances = await ec2_client.describe_instances() assert len(all_instances["Reservations"]) == 2 assert "Instances" in all_instances["Reservations"][0] - assert len(all_instances["Reservations"][0]["Instances"]) == 1 + assert ( + len(all_instances["Reservations"][0]["Instances"]) + == scale_up_params.expected_num_instances + ) assert "State" in all_instances["Reservations"][0]["Instances"][0] assert "Name" in all_instances["Reservations"][0]["Instances"][0]["State"] assert ( @@ -1256,9 +1293,304 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( ) assert "Instances" in all_instances["Reservations"][1] - assert len(all_instances["Reservations"][1]["Instances"]) == 1 + assert ( + len(all_instances["Reservations"][1]["Instances"]) + == scale_up_params.expected_num_instances + ) assert "State" in all_instances["Reservations"][1]["Instances"][0] assert "Name" in all_instances["Reservations"][1]["Instances"][0]["State"] assert ( all_instances["Reservations"][1]["Instances"][0]["State"]["Name"] == "running" ) + + +@pytest.mark.parametrize( + "with_docker_join_drained", ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"], indirect=True +) +@pytest.mark.parametrize( + "with_drain_nodes_labelled", + ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"], + indirect=True, +) +@pytest.mark.parametrize( + "scale_up_params1, scale_up_params2", + [ + pytest.param( + _ScaleUpParams( + imposed_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB + task_resources=Resources( + cpus=16, ram=TypeAdapter(ByteSize).validate_python("30Gib") + ), + num_tasks=12, + expected_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB + expected_num_instances=10, + ), + _ScaleUpParams( + imposed_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB + task_resources=Resources( + cpus=32, ram=TypeAdapter(ByteSize).validate_python("20480MB") + ), + num_tasks=7, + expected_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB + expected_num_instances=7, + ), + id="A batch of services requiring g3.4xlarge and a batch requiring g4dn.8xlarge", + ), + ], +) +async def test_cluster_adapts_machines_on_the_fly( + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, + minimal_configuration: None, + ec2_client: EC2Client, + initialized_app: FastAPI, + app_settings: ApplicationSettings, + create_tasks_batch: Callable[[_ScaleUpParams], Awaitable[list[distributed.Future]]], + ec2_instance_custom_tags: dict[str, str], + scale_up_params1: _ScaleUpParams, + scale_up_params2: _ScaleUpParams, + mocked_associate_ec2_instances_with_nodes: mock.Mock, + mock_docker_set_node_availability: mock.Mock, + mock_dask_is_worker_connected: mock.Mock, + create_fake_node: Callable[..., DockerNode], + mock_docker_tag_node: mock.Mock, + spied_cluster_analysis: MockType, + mocker: MockerFixture, +): + # pre-requisites + assert app_settings.AUTOSCALING_EC2_INSTANCES + assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES > 0 + assert ( + scale_up_params1.num_tasks + >= app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ), "this test requires to run a first batch of more services than the maximum number of instances allowed" + # we have nothing running now + all_instances = await ec2_client.describe_instances() + assert not all_instances["Reservations"] + + # + # 1. create the first batch of services requiring the initial machines + first_batch_tasks = await create_tasks_batch(scale_up_params1) + assert first_batch_tasks + + # it will only scale once and do nothing else + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + await assert_autoscaled_computational_ec2_instances( + ec2_client, + expected_num_reservations=1, + expected_num_instances=scale_up_params1.expected_num_instances, + expected_instance_type=scale_up_params1.expected_instance_type, + expected_instance_state="running", + expected_additional_tag_keys=list(ec2_instance_custom_tags), + ) + + assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=0, + ) + mocked_associate_ec2_instances_with_nodes.assert_called_once_with([], []) + mocked_associate_ec2_instances_with_nodes.reset_mock() + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, None, None + ) + mock_docker_tag_node.assert_not_called() + mock_dask_is_worker_connected.assert_not_called() + + # + # 2. now the machines are associated + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + mocked_associate_ec2_instances_with_nodes.assert_called_once() + mock_docker_tag_node.assert_called() + assert ( + mock_docker_tag_node.call_count + == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ) + assert analyzed_cluster.active_nodes + + # + # 3. now we start the second batch of services requiring a different type of machines + second_batch_tasks = await create_tasks_batch(scale_up_params2) + assert second_batch_tasks + + # scaling will do nothing since we have hit the maximum number of machines + for _ in range(3): + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + await assert_autoscaled_computational_ec2_instances( + ec2_client, + expected_num_reservations=1, + expected_num_instances=scale_up_params1.expected_num_instances, + expected_instance_type=scale_up_params1.expected_instance_type, + expected_instance_state="running", + expected_additional_tag_keys=list(ec2_instance_custom_tags), + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=3, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + + # + # 4.now we simulate that some of the services in the 1st batch have completed and that we are 1 below the max + # a machine should switch off and another type should be started (just pop the future out of scope) + for _ in range( + scale_up_params1.num_tasks + - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + + 1 + ): + first_batch_tasks.pop() + + # first call to auto_scale_cluster will mark 1 node as empty + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.set_node_found_empty", + autospec=True, + ) as mock_docker_set_node_found_empty: + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + # the last machine is found empty + mock_docker_set_node_found_empty.assert_called_with( + mock.ANY, + analyzed_cluster.active_nodes[-1].node, + empty=True, + ) + + # now we mock the get_node_found_empty so the next call will actually drain the machine + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_empty_since", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_DRAINING, + ) as mocked_get_node_empty_since: + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + mocked_get_node_empty_since.assert_called_once() + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + # now scaling again should find the drained machine + drained_machine_instance_id = analyzed_cluster.active_nodes[-1].ec2_instance.id + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, drained_machine_instance_id, None + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert analyzed_cluster.drained_nodes + + # this will initiate termination now + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_last_readyness_update", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, + ): + mock_docker_tag_node.reset_mock() + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + mock_docker_tag_node.assert_called_with( + mock.ANY, + analyzed_cluster.drained_nodes[-1].node, + tags=mock.ANY, + available=False, + ) + + # scaling again should find the terminating machine + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, drained_machine_instance_id, drained_machine_instance_id + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + assert analyzed_cluster.terminating_nodes + + # now this will terminate it and straight away start a new machine type + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_termination_started_since", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, + ): + mocked_docker_remove_node = mocker.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + return_value=None, + autospec=True, + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=ComputationalAutoscaling() + ) + mocked_docker_remove_node.assert_called_once() + + # now let's check what we have + all_instances = await ec2_client.describe_instances() + assert len(all_instances["Reservations"]) == 2, "there should be 2 Reservations" + reservation1 = all_instances["Reservations"][0] + assert "Instances" in reservation1 + assert len(reservation1["Instances"]) == ( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ), f"expected {app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES} EC2 instances, found {len(reservation1['Instances'])}" + for instance in reservation1["Instances"]: + assert "InstanceType" in instance + assert instance["InstanceType"] == scale_up_params1.expected_instance_type + assert "InstanceId" in instance + assert "State" in instance + assert "Name" in instance["State"] + if instance["InstanceId"] == drained_machine_instance_id: + assert instance["State"]["Name"] == "terminated" + else: + assert instance["State"]["Name"] == "running" + + reservation2 = all_instances["Reservations"][1] + assert "Instances" in reservation2 + assert ( + len(reservation2["Instances"]) == 1 + ), f"expected 1 EC2 instances, found {len(reservation2['Instances'])}" + for instance in reservation2["Instances"]: + assert "InstanceType" in instance + assert instance["InstanceType"] == scale_up_params2.expected_instance_type diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py index 461baee21fa..ccdb2461c04 100644 --- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py +++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py @@ -9,6 +9,7 @@ import asyncio import datetime import logging +import random from collections.abc import AsyncIterator, Awaitable, Callable, Iterator, Sequence from copy import deepcopy from dataclasses import dataclass @@ -38,12 +39,15 @@ from pydantic import ByteSize, TypeAdapter from pytest_mock import MockType from pytest_mock.plugin import MockerFixture +from pytest_simcore.helpers.autoscaling import ( + assert_cluster_state, + create_fake_association, +) from pytest_simcore.helpers.aws_ec2 import assert_autoscaled_dynamic_ec2_instances from pytest_simcore.helpers.logging_tools import log_context from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict from simcore_service_autoscaling.core.settings import ApplicationSettings from simcore_service_autoscaling.models import AssociatedInstance, Cluster -from simcore_service_autoscaling.modules import auto_scaling_core from simcore_service_autoscaling.modules.auto_scaling_core import ( _activate_drained_nodes, _find_terminateable_instances, @@ -217,24 +221,6 @@ def _assert_rabbit_autoscaling_message_sent( assert mock_rabbitmq_post_message.call_args == mock.call(app, expected_message) -async def test_cluster_scaling_with_no_services_does_nothing( - minimal_configuration: None, - app_settings: ApplicationSettings, - initialized_app: FastAPI, - mock_launch_instances: mock.Mock, - mock_terminate_instances: mock.Mock, - mock_rabbitmq_post_message: mock.Mock, -): - await auto_scale_cluster( - app=initialized_app, auto_scaling_mode=DynamicAutoscaling() - ) - mock_launch_instances.assert_not_called() - mock_terminate_instances.assert_not_called() - _assert_rabbit_autoscaling_message_sent( - mock_rabbitmq_post_message, app_settings, initialized_app - ) - - @pytest.fixture def instance_type_filters( ec2_instance_custom_tags: dict[str, str], @@ -254,13 +240,72 @@ def instance_type_filters( ] +@dataclass(frozen=True) +class _ScaleUpParams: + imposed_instance_type: InstanceTypeType | None + service_resources: Resources + num_services: int + expected_instance_type: InstanceTypeType + expected_num_instances: int + + @pytest.fixture -async def spied_cluster_analysis(mocker: MockerFixture) -> MockType: - return mocker.spy(auto_scaling_core, "_analyze_current_cluster") +async def create_services_batch( + create_service: Callable[ + [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] + ], + task_template: dict[str, Any], + create_task_reservations: Callable[[int, int], dict[str, Any]], + service_monitored_labels: dict[DockerLabelKey, str], + osparc_docker_label_keys: StandardSimcoreDockerLabels, +) -> Callable[[_ScaleUpParams], Awaitable[list[Service]]]: + async def _(scale_up_params: _ScaleUpParams) -> list[Service]: + return await asyncio.gather( + *( + create_service( + task_template + | create_task_reservations( + int(scale_up_params.service_resources.cpus), + scale_up_params.service_resources.ram, + ), + service_monitored_labels + | osparc_docker_label_keys.to_simcore_runtime_docker_labels(), + "pending", + ( + [ + f"node.labels.{DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY}=={scale_up_params.imposed_instance_type}" + ] + if scale_up_params.imposed_instance_type + else [] + ), + ) + for _ in range(scale_up_params.num_services) + ) + ) + + return _ + + +async def test_cluster_scaling_with_no_services_does_nothing( + minimal_configuration: None, + app_settings: ApplicationSettings, + initialized_app: FastAPI, + mock_launch_instances: mock.Mock, + mock_terminate_instances: mock.Mock, + mock_rabbitmq_post_message: mock.Mock, +): + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + mock_launch_instances.assert_not_called() + mock_terminate_instances.assert_not_called() + _assert_rabbit_autoscaling_message_sent( + mock_rabbitmq_post_message, app_settings, initialized_app + ) async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expected_machines( - patch_ec2_client_launch_instancess_min_number_of_instances: mock.Mock, + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, minimal_configuration: None, mock_machines_buffer: int, app_settings: ApplicationSettings, @@ -362,28 +407,34 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect ) +@pytest.mark.parametrize( + "scale_up_params", + [ + pytest.param( + _ScaleUpParams( + imposed_instance_type=None, + service_resources=Resources( + cpus=4, ram=TypeAdapter(ByteSize).validate_python("128000Gib") + ), + num_services=1, + expected_instance_type="r5n.4xlarge", + expected_num_instances=1, + ), + id="No explicit instance defined", + ), + ], +) async def test_cluster_scaling_with_service_asking_for_too_much_resources_starts_nothing( minimal_configuration: None, - service_monitored_labels: dict[DockerLabelKey, str], app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], mock_launch_instances: mock.Mock, mock_terminate_instances: mock.Mock, mock_rabbitmq_post_message: mock.Mock, + scale_up_params: _ScaleUpParams, ): - task_template_with_too_many_resource = task_template | create_task_reservations( - 1000, 0 - ) - await create_service( - task_template_with_too_many_resource, - service_monitored_labels, - "pending", - ) + await create_services_batch(scale_up_params) await auto_scale_cluster( app=initialized_app, auto_scaling_mode=DynamicAutoscaling() @@ -395,38 +446,11 @@ async def test_cluster_scaling_with_service_asking_for_too_much_resources_starts ) -@dataclass(frozen=True) -class _ScaleUpParams: - imposed_instance_type: str | None - service_resources: Resources - num_services: int - expected_instance_type: InstanceTypeType - expected_num_instances: int - - -def _assert_cluster_state( - spied_cluster_analysis: MockType, *, expected_calls: int, expected_num_machines: int -) -> None: - assert spied_cluster_analysis.call_count > 0 - - assert isinstance(spied_cluster_analysis.spy_return, Cluster) - assert ( - spied_cluster_analysis.spy_return.total_number_of_machines() - == expected_num_machines - ) - - async def _test_cluster_scaling_up_and_down( # noqa: PLR0915 *, - service_monitored_labels: dict[DockerLabelKey, str], - osparc_docker_label_keys: StandardSimcoreDockerLabels, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, fake_node: Node, @@ -452,34 +476,13 @@ async def _test_cluster_scaling_up_and_down( # noqa: PLR0915 ), "This test is not made to work with more than 1 expected instance. so please adapt if needed" # create the service(s) - created_docker_services = await asyncio.gather( - *( - create_service( - task_template - | create_task_reservations( - int(scale_up_params.service_resources.cpus), - scale_up_params.service_resources.ram, - ), - service_monitored_labels - | osparc_docker_label_keys.to_simcore_runtime_docker_labels(), - "pending", - ( - [ - f"node.labels.{DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY}=={scale_up_params.imposed_instance_type}" - ] - if scale_up_params.imposed_instance_type - else [] - ), - ) - for _ in range(scale_up_params.num_services) - ) - ) + created_docker_services = await create_services_batch(scale_up_params) # this should trigger a scaling up as we have no nodes await auto_scale_cluster( app=initialized_app, auto_scaling_mode=DynamicAutoscaling() ) - _assert_cluster_state( + assert_cluster_state( spied_cluster_analysis, expected_calls=1, expected_num_machines=0 ) @@ -528,7 +531,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: await auto_scale_cluster( app=initialized_app, auto_scaling_mode=DynamicAutoscaling() ) - _assert_cluster_state( + assert_cluster_state( spied_cluster_analysis, expected_calls=1, expected_num_machines=1 ) @@ -556,7 +559,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: mock_find_node_with_name_returns_fake_node.assert_called_once() mock_find_node_with_name_returns_fake_node.reset_mock() - assert mock_docker_tag_node.call_count == 2 + assert mock_docker_tag_node.call_count == 3 assert fake_node.spec assert fake_node.spec.labels # check attach call @@ -572,9 +575,10 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: available=with_drain_nodes_labelled, ) # update our fake node + fake_attached_node.spec.labels[_OSPARC_SERVICE_READY_LABEL_KEY] = "true" fake_attached_node.spec.labels[ _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY - ] = mock_docker_tag_node.call_args_list[0][1]["tags"][ + ] = mock_docker_tag_node.call_args_list[2][1]["tags"][ _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY ] # check the activate time is later than attach time @@ -587,13 +591,15 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY ] ) + fake_attached_node.spec.availability = Availability.active mock_compute_node_used_resources.assert_called_once_with( get_docker_client(initialized_app), fake_attached_node, ) mock_compute_node_used_resources.reset_mock() # check activate call - assert mock_docker_tag_node.call_args_list[1] == mock.call( + + assert mock_docker_tag_node.call_args_list[2] == mock.call( get_docker_client(initialized_app), fake_attached_node, tags=fake_node.spec.labels @@ -653,7 +659,7 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: # now we have 1 monitored node that needs to be mocked fake_attached_node.spec.labels[_OSPARC_SERVICE_READY_LABEL_KEY] = "true" fake_attached_node.status = NodeStatus( - State=NodeState.ready, Message=None, Addr=None + state=NodeState.ready, message=None, addr=None ) fake_attached_node.spec.availability = Availability.active fake_attached_node.description.hostname = internal_dns_name @@ -676,7 +682,8 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]: assert mock_compute_node_used_resources.call_count == num_useless_calls * 2 mock_compute_node_used_resources.reset_mock() mock_find_node_with_name_returns_fake_node.assert_not_called() - mock_docker_tag_node.assert_not_called() + assert mock_docker_tag_node.call_count == num_useless_calls + mock_docker_tag_node.reset_mock() mock_docker_set_node_availability.assert_not_called() # check the number of instances did not change and is still running instances = await assert_autoscaled_dynamic_ec2_instances( @@ -899,7 +906,7 @@ async def _assert_wait_for_ec2_instances_terminated() -> None: await _assert_wait_for_ec2_instances_terminated() -@pytest.mark.acceptance_test() +@pytest.mark.acceptance_test @pytest.mark.parametrize( "scale_up_params", [ @@ -943,15 +950,9 @@ async def _assert_wait_for_ec2_instances_terminated() -> None: ) async def test_cluster_scaling_up_and_down( minimal_configuration: None, - service_monitored_labels: dict[DockerLabelKey, str], - osparc_docker_label_keys: StandardSimcoreDockerLabels, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, fake_node: Node, @@ -968,13 +969,9 @@ async def test_cluster_scaling_up_and_down( spied_cluster_analysis: MockType, ): await _test_cluster_scaling_up_and_down( - service_monitored_labels=service_monitored_labels, - osparc_docker_label_keys=osparc_docker_label_keys, app_settings=app_settings, initialized_app=initialized_app, - create_service=create_service, - task_template=task_template, - create_task_reservations=create_task_reservations, + create_services_batch=create_services_batch, ec2_client=ec2_client, mock_docker_tag_node=mock_docker_tag_node, fake_node=fake_node, @@ -1021,15 +1018,9 @@ async def test_cluster_scaling_up_and_down_against_aws( disable_buffers_pool_background_task: None, mocked_redis_server: None, external_envfile_dict: EnvVarsDict, - service_monitored_labels: dict[DockerLabelKey, str], - osparc_docker_label_keys: StandardSimcoreDockerLabels, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, fake_node: Node, @@ -1054,13 +1045,9 @@ async def test_cluster_scaling_up_and_down_against_aws( f" The passed external ENV allows for {list(external_ec2_instances_allowed_types)}" ) await _test_cluster_scaling_up_and_down( - service_monitored_labels=service_monitored_labels, - osparc_docker_label_keys=osparc_docker_label_keys, app_settings=app_settings, initialized_app=initialized_app, - create_service=create_service, - task_template=task_template, - create_task_reservations=create_task_reservations, + create_services_batch=create_services_batch, ec2_client=ec2_client, mock_docker_tag_node=mock_docker_tag_node, fake_node=fake_node, @@ -1109,17 +1096,11 @@ async def test_cluster_scaling_up_and_down_against_aws( ], ) async def test_cluster_scaling_up_starts_multiple_instances( - patch_ec2_client_launch_instancess_min_number_of_instances: mock.Mock, + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, minimal_configuration: None, - service_monitored_labels: dict[DockerLabelKey, str], - osparc_docker_label_keys: StandardSimcoreDockerLabels, app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], ec2_client: EC2Client, mock_docker_tag_node: mock.Mock, scale_up_params: _ScaleUpParams, @@ -1134,28 +1115,7 @@ async def test_cluster_scaling_up_starts_multiple_instances( assert not all_instances["Reservations"] # create several tasks that needs more power - await asyncio.gather( - *( - create_service( - task_template - | create_task_reservations( - int(scale_up_params.service_resources.cpus), - scale_up_params.service_resources.ram, - ), - service_monitored_labels - | osparc_docker_label_keys.to_simcore_runtime_docker_labels(), - "pending", - ( - [ - f"node.labels.{DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY}=={scale_up_params.imposed_instance_type}" - ] - if scale_up_params.imposed_instance_type - else [] - ), - ) - for _ in range(scale_up_params.num_services) - ) - ) + await create_services_batch(scale_up_params) # run the code await auto_scale_cluster( @@ -1188,12 +1148,316 @@ async def test_cluster_scaling_up_starts_multiple_instances( @pytest.mark.parametrize( - "docker_service_imposed_ec2_type, docker_service_ram, expected_ec2_type", + "with_docker_join_drained", ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"], indirect=True +) +@pytest.mark.parametrize( + "with_drain_nodes_labelled", + ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"], + indirect=True, +) +@pytest.mark.parametrize( + "scale_up_params1, scale_up_params2", + [ + pytest.param( + _ScaleUpParams( + imposed_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB + service_resources=Resources( + cpus=16, ram=TypeAdapter(ByteSize).validate_python("30Gib") + ), + num_services=12, + expected_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB + expected_num_instances=10, + ), + _ScaleUpParams( + imposed_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB + service_resources=Resources( + cpus=32, ram=TypeAdapter(ByteSize).validate_python("20480MB") + ), + num_services=7, + expected_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB + expected_num_instances=7, + ), + id="A batch of services requiring g3.4xlarge and a batch requiring g4dn.8xlarge", + ), + ], +) +async def test_cluster_adapts_machines_on_the_fly( # noqa: PLR0915 + patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock, + minimal_configuration: None, + ec2_client: EC2Client, + initialized_app: FastAPI, + app_settings: ApplicationSettings, + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], + ec2_instance_custom_tags: dict[str, str], + instance_type_filters: Sequence[FilterTypeDef], + async_docker_client: aiodocker.Docker, + scale_up_params1: _ScaleUpParams, + scale_up_params2: _ScaleUpParams, + mocked_associate_ec2_instances_with_nodes: mock.Mock, + create_fake_node: Callable[..., Node], + mock_docker_tag_node: mock.Mock, + mock_compute_node_used_resources: mock.Mock, + spied_cluster_analysis: MockType, + mocker: MockerFixture, +): + # pre-requisites + assert app_settings.AUTOSCALING_EC2_INSTANCES + assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES > 0 + assert ( + scale_up_params1.num_services + >= app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ), "this test requires to run a first batch of more services than the maximum number of instances allowed" + # we have nothing running now + all_instances = await ec2_client.describe_instances() + assert not all_instances["Reservations"] + + # + # 1. create the first batch of services requiring the initial machines + first_batch_services = await create_services_batch(scale_up_params1) + + # it will only scale once and do nothing else + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + await assert_autoscaled_dynamic_ec2_instances( + ec2_client, + expected_num_reservations=1, + expected_num_instances=scale_up_params1.expected_num_instances, + expected_instance_type=scale_up_params1.expected_instance_type, + expected_instance_state="running", + expected_additional_tag_keys=list(ec2_instance_custom_tags), + instance_filters=instance_type_filters, + ) + assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=0, + ) + mocked_associate_ec2_instances_with_nodes.assert_called_once_with([], []) + mocked_associate_ec2_instances_with_nodes.reset_mock() + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, None, None + ) + + # + # 2. now the machines are associated + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + mocked_associate_ec2_instances_with_nodes.assert_called_once() + mock_docker_tag_node.assert_called() + assert ( + mock_docker_tag_node.call_count + == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ) + assert analyzed_cluster.active_nodes + + # + # 3. now we start the second batch of services requiring a different type of machines + await create_services_batch(scale_up_params2) + + # scaling will do nothing since we have hit the maximum number of machines + for _ in range(3): + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + await assert_autoscaled_dynamic_ec2_instances( + ec2_client, + expected_num_reservations=1, + expected_num_instances=scale_up_params1.expected_num_instances, + expected_instance_type=scale_up_params1.expected_instance_type, + expected_instance_state="running", + expected_additional_tag_keys=list(ec2_instance_custom_tags), + instance_filters=instance_type_filters, + ) + + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=3, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + + # + # 4.now we simulate that some of the services in the 1st batch have completed and that we are 1 below the max + # a machine should switch off and another type should be started + completed_services_to_stop = random.sample( + first_batch_services, + scale_up_params1.num_services + - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + + 1, + ) + await asyncio.gather( + *( + async_docker_client.services.delete(s.id) + for s in completed_services_to_stop + if s.id + ) + ) + + # first call to auto_scale_cluster will mark 1 node as empty + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.set_node_found_empty", + autospec=True, + ) as mock_docker_set_node_found_empty: + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + # the last machine is found empty + mock_docker_set_node_found_empty.assert_called_with( + mock.ANY, + analyzed_cluster.active_nodes[-1].node, + empty=True, + ) + + # now we mock the get_node_found_empty so the next call will actually drain the machine + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_empty_since", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_DRAINING, + ) as mocked_get_node_empty_since: + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + mocked_get_node_empty_since.assert_called_once() + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + # now scaling again should find the drained machine + drained_machine_instance_id = analyzed_cluster.active_nodes[-1].ec2_instance.id + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, drained_machine_instance_id, None + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert analyzed_cluster.drained_nodes + + # this will initiate termination now + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_last_readyness_update", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, + ): + mock_docker_tag_node.reset_mock() + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + mock_docker_tag_node.assert_called_with( + mock.ANY, + analyzed_cluster.drained_nodes[-1].node, + tags=mock.ANY, + available=False, + ) + + # scaling again should find the terminating machine + mocked_associate_ec2_instances_with_nodes.side_effect = create_fake_association( + create_fake_node, drained_machine_instance_id, drained_machine_instance_id + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + analyzed_cluster = assert_cluster_state( + spied_cluster_analysis, + expected_calls=1, + expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES, + ) + assert analyzed_cluster.active_nodes + assert not analyzed_cluster.drained_nodes + assert analyzed_cluster.terminating_nodes + + # now this will terminate it and straight away start a new machine type + with mock.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.get_node_termination_started_since", + autospec=True, + return_value=arrow.utcnow().datetime + - 1.5 + * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_TIME_BEFORE_TERMINATION, + ): + mocked_docker_remove_node = mocker.patch( + "simcore_service_autoscaling.modules.auto_scaling_core.utils_docker.remove_nodes", + return_value=None, + autospec=True, + ) + await auto_scale_cluster( + app=initialized_app, auto_scaling_mode=DynamicAutoscaling() + ) + mocked_docker_remove_node.assert_called_once() + + # now let's check what we have + all_instances = await ec2_client.describe_instances() + assert len(all_instances["Reservations"]) == 2, "there should be 2 Reservations" + reservation1 = all_instances["Reservations"][0] + assert "Instances" in reservation1 + assert len(reservation1["Instances"]) == ( + app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES + ), f"expected {app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES} EC2 instances, found {len(reservation1['Instances'])}" + for instance in reservation1["Instances"]: + assert "InstanceType" in instance + assert instance["InstanceType"] == scale_up_params1.expected_instance_type + assert "InstanceId" in instance + assert "State" in instance + assert "Name" in instance["State"] + if instance["InstanceId"] == drained_machine_instance_id: + assert instance["State"]["Name"] == "terminated" + else: + assert instance["State"]["Name"] == "running" + + reservation2 = all_instances["Reservations"][1] + assert "Instances" in reservation2 + assert ( + len(reservation2["Instances"]) == 1 + ), f"expected 1 EC2 instances, found {len(reservation2['Instances'])}" + for instance in reservation2["Instances"]: + assert "InstanceType" in instance + assert instance["InstanceType"] == scale_up_params2.expected_instance_type + + +@pytest.mark.parametrize( + "scale_up_params", [ pytest.param( - None, - TypeAdapter(ByteSize).validate_python("128Gib"), - "r5n.4xlarge", + _ScaleUpParams( + imposed_instance_type=None, + service_resources=Resources( + cpus=4, ram=TypeAdapter(ByteSize).validate_python("128Gib") + ), + num_services=1, + expected_instance_type="r5n.4xlarge", + expected_num_instances=1, + ), id="No explicit instance defined", ), ], @@ -1201,24 +1465,17 @@ async def test_cluster_scaling_up_starts_multiple_instances( async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( with_short_ec2_instances_max_start_time: EnvVarsDict, minimal_configuration: None, - service_monitored_labels: dict[DockerLabelKey, str], app_settings: ApplicationSettings, initialized_app: FastAPI, - create_service: Callable[ - [dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service] - ], - task_template: dict[str, Any], - create_task_reservations: Callable[[int, int], dict[str, Any]], + create_services_batch: Callable[[_ScaleUpParams], Awaitable[list[Service]]], ec2_client: EC2Client, - docker_service_imposed_ec2_type: InstanceTypeType | None, - docker_service_ram: ByteSize, - expected_ec2_type: InstanceTypeType, mock_find_node_with_name_returns_none: mock.Mock, mock_docker_tag_node: mock.Mock, mock_rabbitmq_post_message: mock.Mock, short_ec2_instance_max_start_time: datetime.timedelta, ec2_instance_custom_tags: dict[str, str], instance_type_filters: Sequence[FilterTypeDef], + scale_up_params: _ScaleUpParams, ): assert app_settings.AUTOSCALING_EC2_INSTANCES assert ( @@ -1228,19 +1485,8 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( # we have nothing running now all_instances = await ec2_client.describe_instances() assert not all_instances["Reservations"] - # create a service - await create_service( - task_template | create_task_reservations(4, docker_service_ram), - service_monitored_labels, - "pending", - ( - [ - f"node.labels.{DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY}=={ docker_service_imposed_ec2_type}" - ] - if docker_service_imposed_ec2_type - else [] - ), - ) + await create_services_batch(scale_up_params) + # this should trigger a scaling up as we have no nodes await auto_scale_cluster( app=initialized_app, auto_scaling_mode=DynamicAutoscaling() @@ -1250,8 +1496,8 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( instances = await assert_autoscaled_dynamic_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), instance_filters=instance_type_filters, @@ -1266,7 +1512,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( app_settings, initialized_app, instances_running=0, - instances_pending=1, + instances_pending=scale_up_params.expected_num_instances, ) mock_rabbitmq_post_message.reset_mock() @@ -1293,8 +1539,8 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( instances = await assert_autoscaled_dynamic_ec2_instances( ec2_client, expected_num_reservations=1, - expected_num_instances=1, - expected_instance_type=expected_ec2_type, + expected_num_instances=scale_up_params.expected_num_instances, + expected_instance_type=scale_up_params.expected_instance_type, expected_instance_state="running", expected_additional_tag_keys=list(ec2_instance_custom_tags), instance_filters=instance_type_filters, @@ -1306,7 +1552,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( app_settings, initialized_app, instances_running=0, - instances_pending=1, + instances_pending=scale_up_params.expected_num_instances, ) mock_rabbitmq_post_message.reset_mock() assert instances @@ -1337,7 +1583,10 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( all_instances = await ec2_client.describe_instances() assert len(all_instances["Reservations"]) == 2 assert "Instances" in all_instances["Reservations"][0] - assert len(all_instances["Reservations"][0]["Instances"]) == 1 + assert ( + len(all_instances["Reservations"][0]["Instances"]) + == scale_up_params.expected_num_instances + ) assert "State" in all_instances["Reservations"][0]["Instances"][0] assert "Name" in all_instances["Reservations"][0]["Instances"][0]["State"] assert ( @@ -1346,7 +1595,10 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted( ) assert "Instances" in all_instances["Reservations"][1] - assert len(all_instances["Reservations"][1]["Instances"]) == 1 + assert ( + len(all_instances["Reservations"][1]["Instances"]) + == scale_up_params.expected_num_instances + ) assert "State" in all_instances["Reservations"][1]["Instances"][0] assert "Name" in all_instances["Reservations"][1]["Instances"][0]["State"] assert ( @@ -1517,7 +1769,17 @@ async def test__activate_drained_nodes_with_drained_node( updated_cluster = await _activate_drained_nodes( initialized_app, cluster_with_drained_nodes, DynamicAutoscaling() ) - assert updated_cluster.active_nodes == cluster_with_drained_nodes.drained_nodes + # they are the same nodes, but the availability might have changed here + assert updated_cluster.active_nodes != cluster_with_drained_nodes.drained_nodes + assert ( + updated_cluster.active_nodes[0].assigned_tasks + == cluster_with_drained_nodes.drained_nodes[0].assigned_tasks + ) + assert ( + updated_cluster.active_nodes[0].ec2_instance + == cluster_with_drained_nodes.drained_nodes[0].ec2_instance + ) + assert drained_host_node.spec mock_docker_tag_node.assert_called_once_with( mock.ANY, diff --git a/services/autoscaling/tests/unit/test_utils_docker.py b/services/autoscaling/tests/unit/test_utils_docker.py index 3f9677112bb..90f214ee530 100644 --- a/services/autoscaling/tests/unit/test_utils_docker.py +++ b/services/autoscaling/tests/unit/test_utils_docker.py @@ -169,6 +169,25 @@ async def test_get_monitored_nodes_with_valid_label( ) +async def test_get_monitored_nodes_are_sorted_according_to_creation_date( + mocker: MockerFixture, + autoscaling_docker: AutoscalingDocker, + create_fake_node: Callable[..., Node], + faker: Faker, +): + fake_nodes = [ + create_fake_node(CreatedAt=faker.date_time(tzinfo=datetime.UTC).isoformat()) + for _ in range(10) + ] + mocked_aiodocker = mocker.patch.object(autoscaling_docker, "nodes", autospec=True) + mocked_aiodocker.list.return_value = fake_nodes + monitored_nodes = await get_monitored_nodes(autoscaling_docker, node_labels=[]) + assert len(monitored_nodes) == len(fake_nodes) + sorted_fake_nodes = sorted(fake_nodes, key=lambda node: arrow.get(node.created_at)) + assert monitored_nodes == sorted_fake_nodes + assert monitored_nodes[0].created_at < monitored_nodes[1].created_at + + async def test_worker_nodes( autoscaling_docker: AutoscalingDocker, host_node: Node, diff --git a/services/catalog/src/simcore_service_catalog/db/repositories/services.py b/services/catalog/src/simcore_service_catalog/db/repositories/services.py index e848fb9b164..7cb1b72e333 100644 --- a/services/catalog/src/simcore_service_catalog/db/repositories/services.py +++ b/services/catalog/src/simcore_service_catalog/db/repositories/services.py @@ -10,10 +10,10 @@ from models_library.api_schemas_catalog.services_specifications import ( ServiceSpecifications, ) -from models_library.groups import GroupAtDB, GroupTypeInModel +from models_library.groups import GroupAtDB, GroupID, GroupType from models_library.products import ProductName from models_library.services import ServiceKey, ServiceVersion -from models_library.users import GroupID, UserID +from models_library.users import UserID from psycopg2.errors import ForeignKeyViolation from pydantic import PositiveInt, TypeAdapter, ValidationError from simcore_postgres_database.utils_services import create_select_latest_services_query @@ -597,16 +597,16 @@ async def get_service_specifications( continue # filter by group type group = gid_to_group_map[row.gid] - if (group.group_type == GroupTypeInModel.STANDARD) and _is_newer( + if (group.group_type == GroupType.STANDARD) and _is_newer( teams_specs.get(db_service_spec.gid), db_service_spec, ): teams_specs[db_service_spec.gid] = db_service_spec - elif (group.group_type == GroupTypeInModel.EVERYONE) and _is_newer( + elif (group.group_type == GroupType.EVERYONE) and _is_newer( everyone_specs, db_service_spec ): everyone_specs = db_service_spec - elif (group.group_type == GroupTypeInModel.PRIMARY) and _is_newer( + elif (group.group_type == GroupType.PRIMARY) and _is_newer( primary_specs, db_service_spec ): primary_specs = db_service_spec diff --git a/services/catalog/src/simcore_service_catalog/models/services_specifications.py b/services/catalog/src/simcore_service_catalog/models/services_specifications.py index d53e56a8c56..fc03805537f 100644 --- a/services/catalog/src/simcore_service_catalog/models/services_specifications.py +++ b/services/catalog/src/simcore_service_catalog/models/services_specifications.py @@ -1,8 +1,8 @@ from models_library.api_schemas_catalog.services_specifications import ( ServiceSpecifications, ) +from models_library.groups import GroupID from models_library.services import ServiceKey, ServiceVersion -from models_library.users import GroupID from pydantic import ConfigDict diff --git a/services/migration/Dockerfile b/services/migration/Dockerfile index fe262597d07..33b55d7d5ce 100644 --- a/services/migration/Dockerfile +++ b/services/migration/Dockerfile @@ -62,7 +62,7 @@ WORKDIR /build/packages/postgres-database # install only base 3rd party dependencies RUN \ - --mount=type=bind,source=packages/postgres-database,target=/build/packages/postgres-database,rw \ + --mount=type=bind,source=packages,target=/build/packages,rw \ --mount=type=cache,target=/root/.cache/uv \ uv pip install \ --requirement requirements/prod.txt \ diff --git a/services/payments/src/simcore_service_payments/db/payment_users_repo.py b/services/payments/src/simcore_service_payments/db/payment_users_repo.py index 6a2c53c7be0..ec643ee8bca 100644 --- a/services/payments/src/simcore_service_payments/db/payment_users_repo.py +++ b/services/payments/src/simcore_service_payments/db/payment_users_repo.py @@ -1,6 +1,7 @@ import sqlalchemy as sa from models_library.api_schemas_webserver.wallets import PaymentID -from models_library.users import GroupID, UserID +from models_library.groups import GroupID +from models_library.users import UserID from simcore_postgres_database.models.payments_transactions import payments_transactions from simcore_postgres_database.models.products import products from simcore_postgres_database.models.users import users diff --git a/services/payments/tests/conftest.py b/services/payments/tests/conftest.py index 220e1edc48a..39608fe4e70 100644 --- a/services/payments/tests/conftest.py +++ b/services/payments/tests/conftest.py @@ -9,7 +9,7 @@ import pytest import simcore_service_payments from faker import Faker -from models_library.users import GroupID +from models_library.groups import GroupID from pydantic import TypeAdapter from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.typing_env import EnvVarsDict diff --git a/services/payments/tests/unit/test_db_payments_users_repo.py b/services/payments/tests/unit/test_db_payments_users_repo.py index 51d5f540c6b..4cff0108033 100644 --- a/services/payments/tests/unit/test_db_payments_users_repo.py +++ b/services/payments/tests/unit/test_db_payments_users_repo.py @@ -10,7 +10,8 @@ import pytest from fastapi import FastAPI -from models_library.users import GroupID, UserID +from models_library.groups import GroupID +from models_library.users import UserID from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict from pytest_simcore.helpers.postgres_tools import insert_and_get_row_lifespan from pytest_simcore.helpers.typing_env import EnvVarsDict diff --git a/services/payments/tests/unit/test_services_notifier.py b/services/payments/tests/unit/test_services_notifier.py index ee55afa9be3..faeed872a5c 100644 --- a/services/payments/tests/unit/test_services_notifier.py +++ b/services/payments/tests/unit/test_services_notifier.py @@ -18,7 +18,8 @@ ) from models_library.api_schemas_webserver.socketio import SocketIORoomStr from models_library.api_schemas_webserver.wallets import PaymentTransaction -from models_library.users import GroupID, UserID +from models_library.groups import GroupID +from models_library.users import UserID from pydantic import TypeAdapter from pytest_mock import MockerFixture from pytest_simcore.helpers.faker_factories import random_payment_transaction diff --git a/services/static-webserver/client/source/class/osparc/data/model/User.js b/services/static-webserver/client/source/class/osparc/data/model/User.js index f0c8a5dabb1..fbdc80c6adf 100644 --- a/services/static-webserver/client/source/class/osparc/data/model/User.js +++ b/services/static-webserver/client/source/class/osparc/data/model/User.js @@ -39,7 +39,7 @@ qx.Class.define("osparc.data.model.User", { if (userData["login"]) { description += userData["login"]; } - const thumbnail = osparc.utils.Avatar.emailToThumbnail(userData["login"]); + const thumbnail = osparc.utils.Avatar.emailToThumbnail(userData["login"], userData["userName"]); this.set({ userId: parseInt(userData["id"]), groupId: parseInt(userData["gid"]), diff --git a/services/static-webserver/client/source/class/osparc/desktop/account/MyAccount.js b/services/static-webserver/client/source/class/osparc/desktop/account/MyAccount.js index 2380a9745f3..40a3e5b5918 100644 --- a/services/static-webserver/client/source/class/osparc/desktop/account/MyAccount.js +++ b/services/static-webserver/client/source/class/osparc/desktop/account/MyAccount.js @@ -42,11 +42,11 @@ qx.Class.define("osparc.desktop.account.MyAccount", { }); const authData = osparc.auth.Data.getInstance(); - + const username = authData.getUsername(); const email = authData.getEmail(); const avatarSize = 80; const img = new qx.ui.basic.Image().set({ - source: osparc.utils.Avatar.getUrl(email, avatarSize), + source: osparc.utils.Avatar.emailToThumbnail(email, username, avatarSize), maxWidth: avatarSize, maxHeight: avatarSize, scale: true, diff --git a/services/static-webserver/client/source/class/osparc/info/CommentAdd.js b/services/static-webserver/client/source/class/osparc/info/CommentAdd.js index b8f36a839e6..53b26c23bad 100644 --- a/services/static-webserver/client/source/class/osparc/info/CommentAdd.js +++ b/services/static-webserver/client/source/class/osparc/info/CommentAdd.js @@ -68,9 +68,11 @@ qx.Class.define("osparc.info.CommentAdd", { maxHeight: 32, decorator: "rounded", }); - const myEmail = osparc.auth.Data.getInstance().getEmail(); + const authData = osparc.auth.Data.getInstance(); + const myUsername = authData.getUsername(); + const myEmail = authData.getEmail(); control.set({ - source: osparc.utils.Avatar.getUrl(myEmail, 32) + source: osparc.utils.Avatar.emailToThumbnail(myEmail, myUsername, 32) }); const layout = this.getChildControl("add-comment-layout"); layout.add(control, { diff --git a/services/static-webserver/client/source/class/osparc/info/CommentUI.js b/services/static-webserver/client/source/class/osparc/info/CommentUI.js index ea6df760a08..94450f804b7 100644 --- a/services/static-webserver/client/source/class/osparc/info/CommentUI.js +++ b/services/static-webserver/client/source/class/osparc/info/CommentUI.js @@ -104,7 +104,7 @@ qx.Class.define("osparc.info.CommentUI", { __buildLayout: function() { const thumbnail = this.getChildControl("thumbnail"); - thumbnail.setSource(osparc.utils.Avatar.getUrl("", 32)); + thumbnail.setSource(osparc.utils.Avatar.emailToThumbnail("", "", 32)); const userName = this.getChildControl("user-name"); userName.setValue("Unknown"); diff --git a/services/static-webserver/client/source/class/osparc/navigation/UserMenuButton.js b/services/static-webserver/client/source/class/osparc/navigation/UserMenuButton.js index e53fc4e7e1c..e1726533215 100644 --- a/services/static-webserver/client/source/class/osparc/navigation/UserMenuButton.js +++ b/services/static-webserver/client/source/class/osparc/navigation/UserMenuButton.js @@ -51,6 +51,7 @@ qx.Class.define("osparc.navigation.UserMenuButton", { const preferencesSettings = osparc.Preferences.getInstance(); preferencesSettings.addListener("changeCreditsWarningThreshold", () => this.__updateHaloColor()); + const myUsername = authData.getUsername() || "Username"; const myEmail = authData.getEmail() || "bizzy@itis.ethz.ch"; const icon = this.getChildControl("icon"); authData.bind("role", this, "icon", { @@ -64,7 +65,7 @@ qx.Class.define("osparc.navigation.UserMenuButton", { icon.getContentElement().setStyles({ "margin-left": "-4px" }); - return osparc.utils.Avatar.getUrl(myEmail, 32); + return osparc.utils.Avatar.emailToThumbnail(myEmail, myUsername, 32); } }); }, diff --git a/services/static-webserver/client/source/class/osparc/store/Groups.js b/services/static-webserver/client/source/class/osparc/store/Groups.js index 21e2d4b2a29..e954de7aba6 100644 --- a/services/static-webserver/client/source/class/osparc/store/Groups.js +++ b/services/static-webserver/client/source/class/osparc/store/Groups.js @@ -96,7 +96,7 @@ qx.Class.define("osparc.store.Groups", { groupMe.set({ label: myAuthData.getUsername(), description: `${myAuthData.getFirstName()} ${myAuthData.getLastName()} - ${myAuthData.getEmail()}`, - thumbnail: osparc.utils.Avatar.emailToThumbnail(myAuthData.getEmail()), + thumbnail: osparc.utils.Avatar.emailToThumbnail(myAuthData.getEmail(), myAuthData.getUsername()), }) return orgs; }); diff --git a/services/static-webserver/client/source/class/osparc/store/Store.js b/services/static-webserver/client/source/class/osparc/store/Store.js index ea05e789754..e9146a1402f 100644 --- a/services/static-webserver/client/source/class/osparc/store/Store.js +++ b/services/static-webserver/client/source/class/osparc/store/Store.js @@ -222,7 +222,7 @@ qx.Class.define("osparc.store.Store", { check: "Array", init: [] }, - market: { + licensedItems: { check: "Array", init: [] }, @@ -618,7 +618,7 @@ qx.Class.define("osparc.store.Store", { __getOrgClassifiers: function(orgId, useCache = false) { const params = { url: { - "gid": orgId + "gid": parseInt(orgId) } }; return osparc.data.Resources.get("classifiers", params, useCache); @@ -640,7 +640,7 @@ qx.Class.define("osparc.store.Store", { } const classifierPromises = []; orgs.forEach(org => { - classifierPromises.push(this.__getOrgClassifiers(org["gid"], !reload)); + classifierPromises.push(this.__getOrgClassifiers(org.getGroupId(), !reload)); }); Promise.all(classifierPromises) .then(orgsClassifiersMD => { diff --git a/services/static-webserver/client/source/class/osparc/utils/Avatar.js b/services/static-webserver/client/source/class/osparc/utils/Avatar.js index a2d40081bcb..c108a661355 100644 --- a/services/static-webserver/client/source/class/osparc/utils/Avatar.js +++ b/services/static-webserver/client/source/class/osparc/utils/Avatar.js @@ -26,7 +26,7 @@ * Here is a little example of how to use the widget. * *
- * let image = osparc.utils.Avatar.getUrl(userEmail); + * let image = osparc.utils.Avatar.emailToThumbnail(userEmail); **/ @@ -34,13 +34,14 @@ qx.Class.define("osparc.utils.Avatar", { type: "static", statics: { - emailToThumbnail: function(email) { - return this.getUrl(email, 32) + emailToThumbnail: function(email, username) { + return this.__getUrl(email, username, 32); }, - getUrl: function(email = "", size = 100, defIcon = "identicon", rating = "g") { + __getUrl: function(email, username, size = 100) { + email = email || ""; // MD5 (Message-Digest Algorithm) by WebToolkit - let MD5 = function(s) { + const MD5 = function(s) { function L(k, d) { return (k << d) | (k >>> (32 - d)); } @@ -257,8 +258,9 @@ qx.Class.define("osparc.utils.Avatar", { return i.toLowerCase(); }; - return "https://secure.gravatar.com/avatar/" + MD5(email) + "?s=" + size + "&d=" + defIcon + "&r=" + rating; - } - + const emailHash = MD5(email); + const defaultImageUrl = `https://ui-avatars.com/api/${username}/${size}`; + return `https://www.gravatar.com/avatar/${emailHash}?d=${defaultImageUrl}&s=${size}&r=g`; + }, } }); diff --git a/services/storage/src/simcore_service_storage/db_access_layer.py b/services/storage/src/simcore_service_storage/db_access_layer.py index b77504088f1..27f9dfb9214 100644 --- a/services/storage/src/simcore_service_storage/db_access_layer.py +++ b/services/storage/src/simcore_service_storage/db_access_layer.py @@ -42,9 +42,10 @@ import sqlalchemy as sa from aiopg.sa.connection import SAConnection from aiopg.sa.result import ResultProxy, RowProxy +from models_library.groups import GroupID from models_library.projects import ProjectID from models_library.projects_nodes_io import StorageFileID -from models_library.users import GroupID, UserID +from models_library.users import UserID from simcore_postgres_database.models.project_to_groups import project_to_groups from simcore_postgres_database.models.projects import projects from simcore_postgres_database.models.workspaces_access_rights import ( diff --git a/services/web/server/src/simcore_service_webserver/folders/_folders_db.py b/services/web/server/src/simcore_service_webserver/folders/_folders_db.py index dee552377fa..6c78855995e 100644 --- a/services/web/server/src/simcore_service_webserver/folders/_folders_db.py +++ b/services/web/server/src/simcore_service_webserver/folders/_folders_db.py @@ -18,10 +18,11 @@ FolderScope, UserFolderAccessRightsDB, ) +from models_library.groups import GroupID from models_library.products import ProductName from models_library.projects import ProjectID from models_library.rest_ordering import OrderBy, OrderDirection -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.workspaces import WorkspaceID, WorkspaceQuery, WorkspaceScope from pydantic import NonNegativeInt from simcore_postgres_database.models.folders_v2 import folders_v2 diff --git a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py index 53750a3c27d..a2108766786 100644 --- a/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py +++ b/services/web/server/src/simcore_service_webserver/garbage_collector/_core_utils.py @@ -2,9 +2,9 @@ import asyncpg.exceptions from aiohttp import web -from models_library.groups import Group, GroupTypeInModel +from models_library.groups import Group, GroupID, GroupType from models_library.projects import ProjectID -from models_library.users import GroupID, UserID +from models_library.users import UserID from simcore_postgres_database.errors import DatabaseError from ..groups.api import get_group_from_gid @@ -86,9 +86,9 @@ async def get_new_project_owner_gid( if access_rights[other_gid]["write"] is not True: continue - if group.group_type == GroupTypeInModel.STANDARD: + if group.group_type == GroupType.STANDARD: standard_groups[other_gid] = access_rights[other_gid] - elif group.group_type == GroupTypeInModel.PRIMARY: + elif group.group_type == GroupType.PRIMARY: primary_groups[other_gid] = access_rights[other_gid] _logger.debug( diff --git a/services/web/server/src/simcore_service_webserver/groups/_common/schemas.py b/services/web/server/src/simcore_service_webserver/groups/_common/schemas.py index 872193aaffe..18ab7cba5ff 100644 --- a/services/web/server/src/simcore_service_webserver/groups/_common/schemas.py +++ b/services/web/server/src/simcore_service_webserver/groups/_common/schemas.py @@ -1,7 +1,8 @@ from typing import Literal +from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters -from models_library.users import GroupID, UserID +from models_library.users import UserID from pydantic import Field from ..._constants import RQ_PRODUCT_KEY, RQT_USERID_KEY diff --git a/services/web/server/src/simcore_service_webserver/groups/_groups_api.py b/services/web/server/src/simcore_service_webserver/groups/_groups_api.py index 18491ec60bf..465b57c8f80 100644 --- a/services/web/server/src/simcore_service_webserver/groups/_groups_api.py +++ b/services/web/server/src/simcore_service_webserver/groups/_groups_api.py @@ -4,13 +4,14 @@ from models_library.groups import ( AccessRightsDict, Group, + GroupID, GroupMember, GroupsByTypeTuple, StandardGroupCreate, StandardGroupUpdate, ) from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from pydantic import EmailStr from ..users.api import get_user diff --git a/services/web/server/src/simcore_service_webserver/groups/_groups_db.py b/services/web/server/src/simcore_service_webserver/groups/_groups_db.py index 570375f3646..aedc78676d3 100644 --- a/services/web/server/src/simcore_service_webserver/groups/_groups_db.py +++ b/services/web/server/src/simcore_service_webserver/groups/_groups_db.py @@ -3,19 +3,20 @@ import sqlalchemy as sa from aiohttp import web +from common_library.groups_enums import GroupType from models_library.basic_types import IDStr from models_library.groups import ( AccessRightsDict, Group, + GroupID, GroupInfoTuple, GroupMember, GroupsByTypeTuple, StandardGroupCreate, StandardGroupUpdate, ) -from models_library.users import GroupID, UserID +from models_library.users import UserID from simcore_postgres_database.errors import UniqueViolation -from simcore_postgres_database.models.groups import GroupType from simcore_postgres_database.utils_products import execute_get_or_create_product_group from simcore_postgres_database.utils_repos import ( pass_or_acquire_connection, @@ -653,19 +654,18 @@ async def add_new_user_in_group( ) _check_group_permissions(group, user_id, group_id, "write") - query = sa.select(sa.func.count()) - if new_user_id: + query = sa.select(users.c.id) + if new_user_id is not None: query = query.where(users.c.id == new_user_id) - elif new_user_name: + elif new_user_name is not None: query = query.where(users.c.name == new_user_name) else: - msg = "Either user name or id but none provided" + msg = "Expected either user-name or user-ID but none was provided" raise ValueError(msg) # now check the new user exists - users_count = await conn.scalar(query) - if not users_count: - assert new_user_id is not None # nosec + new_user_id = await conn.scalar(query) + if not new_user_id: raise UserInGroupNotFoundError(uid=new_user_id, gid=group_id) # add the new user to the group now diff --git a/services/web/server/src/simcore_service_webserver/licenses/_rpc.py b/services/web/server/src/simcore_service_webserver/licenses/_rpc.py new file mode 100644 index 00000000000..fede0759b0d --- /dev/null +++ b/services/web/server/src/simcore_service_webserver/licenses/_rpc.py @@ -0,0 +1,80 @@ +from aiohttp import web +from models_library.api_schemas_webserver import WEBSERVER_RPC_NAMESPACE +from models_library.api_schemas_webserver.licensed_items import LicensedItemGetPage +from models_library.basic_types import IDStr +from models_library.licensed_items import LicensedItemID +from models_library.products import ProductName +from models_library.resource_tracker import ServiceRunId +from models_library.rest_ordering import OrderBy +from models_library.users import UserID +from models_library.wallets import WalletID +from servicelib.rabbitmq import RPCRouter + +from ..rabbitmq import get_rabbitmq_rpc_server +from . import _licensed_items_api + +router = RPCRouter() + + +@router.expose() +async def get_licensed_items( + app: web.Application, + *, + product_name: ProductName, + offset: int, + limit: int, +) -> LicensedItemGetPage: + licensed_item_get_page: LicensedItemGetPage = ( + await _licensed_items_api.list_licensed_items( + app=app, + product_name=product_name, + offset=offset, + limit=limit, + order_by=OrderBy(field=IDStr("name")), + ) + ) + return licensed_item_get_page + + +@router.expose(reraise_if_error_type=(NotImplementedError,)) +async def get_licensed_items_for_wallet( + app: web.Application, + *, + user_id: UserID, + product_name: ProductName, + wallet_id: WalletID, +) -> None: + raise NotImplementedError + + +@router.expose(reraise_if_error_type=(NotImplementedError,)) +async def checkout_licensed_item_for_wallet( + app: web.Application, + *, + user_id: UserID, + product_name: ProductName, + wallet_id: WalletID, + licensed_item_id: LicensedItemID, + num_of_seats: int, + service_run_id: ServiceRunId, +) -> None: + raise NotImplementedError + + +@router.expose(reraise_if_error_type=(NotImplementedError,)) +async def release_licensed_item_for_wallet( + app: web.Application, + *, + user_id: str, + product_name: str, + wallet_id: WalletID, + licensed_item_id: LicensedItemID, + num_of_seats: int, + service_run_id: ServiceRunId, +) -> None: + raise NotImplementedError + + +async def register_rpc_routes_on_startup(app: web.Application): + rpc_server = get_rabbitmq_rpc_server(app) + await rpc_server.register_router(router, WEBSERVER_RPC_NAMESPACE, app) diff --git a/services/web/server/src/simcore_service_webserver/licenses/plugin.py b/services/web/server/src/simcore_service_webserver/licenses/plugin.py index 6c2ea7ce0d9..137c7b2d1dc 100644 --- a/services/web/server/src/simcore_service_webserver/licenses/plugin.py +++ b/services/web/server/src/simcore_service_webserver/licenses/plugin.py @@ -7,7 +7,8 @@ from servicelib.aiohttp.application_keys import APP_SETTINGS_KEY from servicelib.aiohttp.application_setup import ModuleCategory, app_module_setup -from . import _licensed_items_handlers, _licensed_items_purchases_handlers +from ..rabbitmq import setup_rabbitmq +from . import _licensed_items_handlers, _licensed_items_purchases_handlers, _rpc _logger = logging.getLogger(__name__) @@ -25,3 +26,7 @@ def setup_licenses(app: web.Application): # routes app.router.add_routes(_licensed_items_handlers.routes) app.router.add_routes(_licensed_items_purchases_handlers.routes) + + setup_rabbitmq(app) + if app[APP_SETTINGS_KEY].WEBSERVER_RABBITMQ: + app.on_startup.append(_rpc.register_rpc_routes_on_startup) diff --git a/services/web/server/src/simcore_service_webserver/notifications/_rabbitmq_exclusive_queue_consumers.py b/services/web/server/src/simcore_service_webserver/notifications/_rabbitmq_exclusive_queue_consumers.py index 048d0162fe3..1ba51262d84 100644 --- a/services/web/server/src/simcore_service_webserver/notifications/_rabbitmq_exclusive_queue_consumers.py +++ b/services/web/server/src/simcore_service_webserver/notifications/_rabbitmq_exclusive_queue_consumers.py @@ -3,6 +3,7 @@ from typing import Final from aiohttp import web +from models_library.groups import GroupID from models_library.rabbitmq_messages import ( EventRabbitMessage, LoggerRabbitMessage, @@ -12,7 +13,6 @@ WalletCreditsMessage, ) from models_library.socketio import SocketMessageDict -from models_library.users import GroupID from pydantic import TypeAdapter from servicelib.logging_utils import log_catch, log_context from servicelib.rabbitmq import RabbitMQClient diff --git a/services/web/server/src/simcore_service_webserver/projects/_groups_api.py b/services/web/server/src/simcore_service_webserver/projects/_groups_api.py index b32a6d15fa1..355b25481f6 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_groups_api.py +++ b/services/web/server/src/simcore_service_webserver/projects/_groups_api.py @@ -2,9 +2,10 @@ from datetime import datetime from aiohttp import web +from models_library.groups import GroupID from models_library.products import ProductName from models_library.projects import ProjectID -from models_library.users import GroupID, UserID +from models_library.users import UserID from pydantic import BaseModel from ..users import api as users_api diff --git a/services/web/server/src/simcore_service_webserver/projects/_groups_db.py b/services/web/server/src/simcore_service_webserver/projects/_groups_db.py index 4355f0c9d92..86d9c83d781 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_groups_db.py +++ b/services/web/server/src/simcore_service_webserver/projects/_groups_db.py @@ -8,8 +8,8 @@ from datetime import datetime from aiohttp import web +from models_library.groups import GroupID from models_library.projects import ProjectID -from models_library.users import GroupID from pydantic import BaseModel, ConfigDict, TypeAdapter from simcore_postgres_database.models.project_to_groups import project_to_groups from simcore_postgres_database.utils_repos import transaction_context diff --git a/services/web/server/src/simcore_service_webserver/projects/_groups_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_groups_handlers.py index a747798100e..bf612944d4b 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_groups_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_groups_handlers.py @@ -6,9 +6,9 @@ import logging from aiohttp import web +from models_library.groups import GroupID from models_library.projects import ProjectID -from models_library.users import GroupID -from pydantic import ConfigDict, BaseModel +from pydantic import BaseModel, ConfigDict from servicelib.aiohttp import status from servicelib.aiohttp.requests_validation import ( parse_request_body_as, diff --git a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py index d5978f794d2..6670ed64442 100644 --- a/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py +++ b/services/web/server/src/simcore_service_webserver/projects/_nodes_handlers.py @@ -25,12 +25,11 @@ NodePatch, NodeRetrieve, ) -from models_library.groups import EVERYONE_GROUP_ID, Group, GroupTypeInModel +from models_library.groups import EVERYONE_GROUP_ID, Group, GroupID, GroupType from models_library.projects import Project, ProjectID from models_library.projects_nodes_io import NodeID, NodeIDStr from models_library.services import ServiceKeyVersion from models_library.services_resources import ServiceResourcesDict -from models_library.users import GroupID from models_library.utils.fastapi_encoders import jsonable_encoder from pydantic import BaseModel, Field from servicelib.aiohttp import status @@ -567,7 +566,7 @@ async def get_project_services_access_for_gid( raise GroupNotFoundError(gid=query_params.for_gid) # Update groups to compare based on the type of sharing group - if _sharing_with_group.group_type == GroupTypeInModel.PRIMARY: + if _sharing_with_group.group_type == GroupType.PRIMARY: _user_id = await get_user_id_from_gid( app=request.app, primary_gid=query_params.for_gid ) @@ -576,7 +575,7 @@ async def get_project_services_access_for_gid( ) groups_to_compare.update(set(user_groups_ids)) groups_to_compare.add(query_params.for_gid) - elif _sharing_with_group.group_type == GroupTypeInModel.STANDARD: + elif _sharing_with_group.group_type == GroupType.STANDARD: groups_to_compare = {query_params.for_gid} # Initialize a list for inaccessible services diff --git a/services/web/server/src/simcore_service_webserver/projects/projects_api.py b/services/web/server/src/simcore_service_webserver/projects/projects_api.py index c0d3c8af835..472855677b4 100644 --- a/services/web/server/src/simcore_service_webserver/projects/projects_api.py +++ b/services/web/server/src/simcore_service_webserver/projects/projects_api.py @@ -35,6 +35,7 @@ from models_library.api_schemas_webserver.projects_nodes import NodePatch from models_library.basic_types import KeyIDStr from models_library.errors import ErrorDict +from models_library.groups import GroupID from models_library.products import ProductName from models_library.projects import Project, ProjectID, ProjectIDStr from models_library.projects_access import Owner @@ -59,7 +60,7 @@ ServiceResourcesDictHelpers, ) from models_library.socketio import SocketMessageDict -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.utils.fastapi_encoders import jsonable_encoder from models_library.wallets import ZERO_CREDITS, WalletID, WalletInfo from models_library.workspaces import UserWorkspaceAccessRightsDB diff --git a/services/web/server/src/simcore_service_webserver/socketio/messages.py b/services/web/server/src/simcore_service_webserver/socketio/messages.py index 388dd8e6a5b..081cab05377 100644 --- a/services/web/server/src/simcore_service_webserver/socketio/messages.py +++ b/services/web/server/src/simcore_service_webserver/socketio/messages.py @@ -7,8 +7,9 @@ from aiohttp.web import Application from models_library.api_schemas_webserver.socketio import SocketIORoomStr +from models_library.groups import GroupID from models_library.socketio import SocketMessageDict -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.utils.fastapi_encoders import jsonable_encoder from servicelib.logging_utils import log_catch from socketio import AsyncServer # type: ignore[import-untyped] diff --git a/services/web/server/src/simcore_service_webserver/tags/schemas.py b/services/web/server/src/simcore_service_webserver/tags/schemas.py index e2d9e2104cd..34ccce7248a 100644 --- a/services/web/server/src/simcore_service_webserver/tags/schemas.py +++ b/services/web/server/src/simcore_service_webserver/tags/schemas.py @@ -3,8 +3,9 @@ from typing import Annotated from models_library.api_schemas_webserver._base import InputSchema, OutputSchema +from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters -from models_library.users import GroupID, UserID +from models_library.users import UserID from pydantic import Field, PositiveInt, StringConstraints from servicelib.request_keys import RQT_USERID_KEY from simcore_postgres_database.utils_tags import TagDict diff --git a/services/web/server/src/simcore_service_webserver/users/_db.py b/services/web/server/src/simcore_service_webserver/users/_db.py index 2071034d2e6..f80c4596423 100644 --- a/services/web/server/src/simcore_service_webserver/users/_db.py +++ b/services/web/server/src/simcore_service_webserver/users/_db.py @@ -5,7 +5,8 @@ from aiopg.sa.connection import SAConnection from aiopg.sa.engine import Engine from aiopg.sa.result import ResultProxy, RowProxy -from models_library.users import GroupID, UserBillingDetails, UserID +from models_library.groups import GroupID +from models_library.users import UserBillingDetails, UserID from simcore_postgres_database.models.groups import groups, user_to_groups from simcore_postgres_database.models.products import products from simcore_postgres_database.models.users import UserStatus, users diff --git a/services/web/server/src/simcore_service_webserver/users/api.py b/services/web/server/src/simcore_service_webserver/users/api.py index 623d4f44396..1c1d217a28e 100644 --- a/services/web/server/src/simcore_service_webserver/users/api.py +++ b/services/web/server/src/simcore_service_webserver/users/api.py @@ -20,8 +20,9 @@ MyProfilePrivacyGet, ) from models_library.basic_types import IDStr +from models_library.groups import GroupID from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from pydantic import EmailStr, TypeAdapter, ValidationError from simcore_postgres_database.models.groups import GroupType, groups, user_to_groups from simcore_postgres_database.models.users import UserRole, users diff --git a/services/web/server/src/simcore_service_webserver/wallets/_db.py b/services/web/server/src/simcore_service_webserver/wallets/_db.py index 413b68ff84f..98ec51a658c 100644 --- a/services/web/server/src/simcore_service_webserver/wallets/_db.py +++ b/services/web/server/src/simcore_service_webserver/wallets/_db.py @@ -6,8 +6,9 @@ import logging from aiohttp import web +from models_library.groups import GroupID from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.wallets import UserWalletDB, WalletDB, WalletID, WalletStatus from simcore_postgres_database.models.groups import user_to_groups from simcore_postgres_database.models.wallet_to_groups import wallet_to_groups diff --git a/services/web/server/src/simcore_service_webserver/wallets/_groups_api.py b/services/web/server/src/simcore_service_webserver/wallets/_groups_api.py index bdace14a9de..5a3dcc0a339 100644 --- a/services/web/server/src/simcore_service_webserver/wallets/_groups_api.py +++ b/services/web/server/src/simcore_service_webserver/wallets/_groups_api.py @@ -2,8 +2,9 @@ from datetime import datetime from aiohttp import web +from models_library.groups import GroupID from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.wallets import UserWalletDB, WalletID from pydantic import BaseModel, ConfigDict @@ -23,10 +24,8 @@ class WalletGroupGet(BaseModel): delete: bool created: datetime modified: datetime - - model_config = ConfigDict( - from_attributes=True - ) + + model_config = ConfigDict(from_attributes=True) async def create_wallet_group( diff --git a/services/web/server/src/simcore_service_webserver/wallets/_groups_db.py b/services/web/server/src/simcore_service_webserver/wallets/_groups_db.py index 949978a470f..8c2148e05ce 100644 --- a/services/web/server/src/simcore_service_webserver/wallets/_groups_db.py +++ b/services/web/server/src/simcore_service_webserver/wallets/_groups_db.py @@ -7,7 +7,7 @@ from datetime import datetime from aiohttp import web -from models_library.users import GroupID +from models_library.groups import GroupID from models_library.wallets import WalletID from pydantic import BaseModel, TypeAdapter from simcore_postgres_database.models.wallet_to_groups import wallet_to_groups diff --git a/services/web/server/src/simcore_service_webserver/wallets/_groups_handlers.py b/services/web/server/src/simcore_service_webserver/wallets/_groups_handlers.py index ac71f39af41..4ad171090ed 100644 --- a/services/web/server/src/simcore_service_webserver/wallets/_groups_handlers.py +++ b/services/web/server/src/simcore_service_webserver/wallets/_groups_handlers.py @@ -6,7 +6,7 @@ import logging from aiohttp import web -from models_library.users import GroupID +from models_library.groups import GroupID from models_library.wallets import WalletID from pydantic import BaseModel, ConfigDict from servicelib.aiohttp import status diff --git a/services/web/server/src/simcore_service_webserver/workspaces/_groups_api.py b/services/web/server/src/simcore_service_webserver/workspaces/_groups_api.py index cca4da82e4e..2ca935c8967 100644 --- a/services/web/server/src/simcore_service_webserver/workspaces/_groups_api.py +++ b/services/web/server/src/simcore_service_webserver/workspaces/_groups_api.py @@ -2,8 +2,9 @@ from datetime import datetime from aiohttp import web +from models_library.groups import GroupID from models_library.products import ProductName -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.workspaces import UserWorkspaceAccessRightsDB, WorkspaceID from pydantic import BaseModel, ConfigDict @@ -24,10 +25,8 @@ class WorkspaceGroupGet(BaseModel): delete: bool created: datetime modified: datetime - - model_config = ConfigDict( - from_attributes=True - ) + + model_config = ConfigDict(from_attributes=True) async def create_workspace_group( diff --git a/services/web/server/src/simcore_service_webserver/workspaces/_groups_db.py b/services/web/server/src/simcore_service_webserver/workspaces/_groups_db.py index b5b969f0db4..d14127d5b37 100644 --- a/services/web/server/src/simcore_service_webserver/workspaces/_groups_db.py +++ b/services/web/server/src/simcore_service_webserver/workspaces/_groups_db.py @@ -8,7 +8,7 @@ from datetime import datetime from aiohttp import web -from models_library.users import GroupID +from models_library.groups import GroupID from models_library.workspaces import WorkspaceID from pydantic import BaseModel, ConfigDict from simcore_postgres_database.models.workspaces_access_rights import ( diff --git a/services/web/server/src/simcore_service_webserver/workspaces/_models.py b/services/web/server/src/simcore_service_webserver/workspaces/_models.py index af35fe4b63f..d2f22a3c878 100644 --- a/services/web/server/src/simcore_service_webserver/workspaces/_models.py +++ b/services/web/server/src/simcore_service_webserver/workspaces/_models.py @@ -2,6 +2,7 @@ from typing import Annotated from models_library.basic_types import IDStr +from models_library.groups import GroupID from models_library.rest_base import RequestParameters, StrictRequestParameters from models_library.rest_filters import Filters, FiltersQueryParameters from models_library.rest_ordering import ( @@ -11,7 +12,7 @@ ) from models_library.rest_pagination import PageQueryParameters from models_library.trash import RemoveQueryParams -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.utils.common_validators import empty_str_to_none_pre_validator from models_library.workspaces import WorkspaceID from pydantic import BaseModel, BeforeValidator, ConfigDict, Field diff --git a/services/web/server/src/simcore_service_webserver/workspaces/_workspaces_db.py b/services/web/server/src/simcore_service_webserver/workspaces/_workspaces_db.py index 3835e82f9e0..5264a112419 100644 --- a/services/web/server/src/simcore_service_webserver/workspaces/_workspaces_db.py +++ b/services/web/server/src/simcore_service_webserver/workspaces/_workspaces_db.py @@ -8,9 +8,10 @@ from typing import cast from aiohttp import web +from models_library.groups import GroupID from models_library.products import ProductName from models_library.rest_ordering import OrderBy, OrderDirection -from models_library.users import GroupID, UserID +from models_library.users import UserID from models_library.workspaces import ( UserWorkspaceAccessRightsDB, WorkspaceDB, diff --git a/services/web/server/tests/unit/isolated/test_groups_models.py b/services/web/server/tests/unit/isolated/test_groups_models.py index 9813ca6009c..2e5201422e9 100644 --- a/services/web/server/tests/unit/isolated/test_groups_models.py +++ b/services/web/server/tests/unit/isolated/test_groups_models.py @@ -1,6 +1,4 @@ -import models_library.groups import pytest -import simcore_postgres_database.models.groups from faker import Faker from models_library.api_schemas_webserver._base import OutputSchema from models_library.api_schemas_webserver.groups import ( @@ -14,23 +12,13 @@ AccessRightsDict, Group, GroupMember, - GroupTypeInModel, + GroupType, StandardGroupCreate, StandardGroupUpdate, ) -from models_library.utils.enums import enum_to_dict from pydantic import ValidationError -def test_models_library_and_postgress_database_enums_are_equivalent(): - # For the moment these two libraries they do not have a common library to share these - # basic types so we test here that they are in sync - - assert enum_to_dict( - simcore_postgres_database.models.groups.GroupType - ) == enum_to_dict(models_library.groups.GroupTypeInModel) - - def test_sanitize_legacy_data(): users_group_1 = GroupGet.model_validate( { @@ -66,7 +54,7 @@ def test_output_schemas_from_models(faker: Faker): gid=1, name=faker.word(), description=faker.sentence(), - group_type=GroupTypeInModel.STANDARD, + group_type=GroupType.STANDARD, thumbnail=None, ) output_schema = GroupGet.from_model( diff --git a/services/web/server/tests/unit/isolated/test_projects__db_utils.py b/services/web/server/tests/unit/isolated/test_projects__db_utils.py index cee237fda90..c8c4da57eda 100644 --- a/services/web/server/tests/unit/isolated/test_projects__db_utils.py +++ b/services/web/server/tests/unit/isolated/test_projects__db_utils.py @@ -11,9 +11,9 @@ import pytest from faker import Faker +from models_library.groups import GroupID from models_library.projects_nodes import Node from models_library.services import ServiceKey -from models_library.users import GroupID from models_library.utils.fastapi_encoders import jsonable_encoder from simcore_service_webserver.projects._db_utils import ( DB_EXCLUSIVE_COLUMNS, diff --git a/services/web/server/tests/unit/with_dbs/01/groups/test_groups_handlers_users.py b/services/web/server/tests/unit/with_dbs/01/groups/test_groups_handlers_users.py index 97ebd6e2b51..f018e6fab00 100644 --- a/services/web/server/tests/unit/with_dbs/01/groups/test_groups_handlers_users.py +++ b/services/web/server/tests/unit/with_dbs/01/groups/test_groups_handlers_users.py @@ -6,12 +6,14 @@ from collections.abc import AsyncIterator from contextlib import AsyncExitStack +from typing import AsyncIterable import pytest from aiohttp.test_utils import TestClient from faker import Faker from models_library.api_schemas_webserver.groups import GroupGet, GroupUserGet from models_library.groups import AccessRightsDict, Group, StandardGroupCreate +from pydantic import TypeAdapter from pytest_simcore.helpers.assert_checks import assert_status from pytest_simcore.helpers.webserver_login import LoggedUser, NewUser, UserInfoDict from pytest_simcore.helpers.webserver_parametrizations import ( @@ -19,6 +21,7 @@ standard_role_response, ) from servicelib.aiohttp import status +from servicelib.status_codes_utils import is_2xx_success from simcore_postgres_database.models.users import UserRole from simcore_service_webserver._meta import API_VTAG from simcore_service_webserver.groups._groups_api import ( @@ -514,3 +517,96 @@ async def test_adding_user_to_group_with_upper_case_email( assert not data assert not error + + +@pytest.fixture +async def other_user( + client: TestClient, logged_user: UserInfoDict, is_private_user: bool +) -> AsyncIterable[UserInfoDict]: + # new user different from logged_user + async with NewUser( + { + "name": f"other_than_{logged_user['name']}", + "role": "USER", + "privacy_hide_email": is_private_user, + }, + client.app, + ) as user: + yield user + + +@pytest.mark.acceptance_test( + "https://github.com/ITISFoundation/osparc-simcore/pull/6917" +) +@pytest.mark.parametrize("user_role", [UserRole.USER]) +@pytest.mark.parametrize("is_private_user", [True, False]) +@pytest.mark.parametrize("add_user_by", ["user_email", "user_id", "user_name"]) +async def test_create_organization_and_add_users( + client: TestClient, + user_role: UserRole, + logged_user: UserInfoDict, + other_user: UserInfoDict, + is_private_user: bool, + add_user_by: str, +): + assert client.app + assert logged_user["id"] != 0 + assert logged_user["role"] == user_role.value + + # CREATE GROUP + url = client.app.router["create_group"].url_for() + resp = await client.post( + f"{url}", + json={ + "label": "Amies sans-frontiers", + "description": "A desperate attempt to make some friends", + }, + ) + data, error = await assert_status(resp, status.HTTP_201_CREATED) + + assert not error + group = GroupGet.model_validate(data) + + # i have another user + user_id = other_user["id"] + user_name = other_user["name"] + user_email = other_user["email"] + + assert user_id != logged_user["id"] + assert user_name != logged_user["name"] + assert user_email != logged_user["email"] + + # ADD new user to GROUP + url = client.app.router["add_group_user"].url_for(gid=f"{group.gid}") + + expected_status = status.HTTP_204_NO_CONTENT + match add_user_by: + case "user_email": + param = {"email": user_email} + if is_private_user: + expected_status = status.HTTP_404_NOT_FOUND + case "user_id": + param = {"uid": user_id} + case "user_name": + param = {"userName": user_name} + case _: + pytest.fail(reason=f"parameter {add_user_by} was not accounted for") + + response = await client.post(f"{url}", json=param) + await assert_status(response, expected_status) + + # LIST USERS in GROUP + url = client.app.router["get_all_group_users"].url_for(gid=f"{group.gid}") + response = await client.get(f"{url}") + data, _ = await assert_status(response, status.HTTP_200_OK) + + group_members = TypeAdapter(list[GroupUserGet]).validate_python(data) + if is_2xx_success(expected_status): + assert user_id in [ + u.id for u in group_members + ], "failed to add other-user to the group!" + + # DELETE GROUP + url = client.app.router["delete_group"].url_for(gid=f"{group.gid}") + resp = await client.delete(f"{url}") + await assert_status(resp, status.HTTP_204_NO_CONTENT) diff --git a/services/web/server/tests/unit/with_dbs/04/licenses/test_licenses_rpc.py b/services/web/server/tests/unit/with_dbs/04/licenses/test_licenses_rpc.py new file mode 100644 index 00000000000..e3ab4f4cb3d --- /dev/null +++ b/services/web/server/tests/unit/with_dbs/04/licenses/test_licenses_rpc.py @@ -0,0 +1,127 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable + + +from collections.abc import Awaitable, Callable + +import pytest +from aiohttp.test_utils import TestClient +from models_library.licensed_items import LicensedResourceType +from models_library.products import ProductName +from pytest_mock import MockerFixture +from pytest_simcore.helpers.monkeypatch_envs import setenvs_from_dict +from pytest_simcore.helpers.typing_env import EnvVarsDict +from pytest_simcore.helpers.webserver_login import UserInfoDict +from servicelib.rabbitmq import RabbitMQRPCClient +from servicelib.rabbitmq.rpc_interfaces.webserver.licenses.licensed_items import ( + checkout_licensed_item_for_wallet, + get_licensed_items, + get_licensed_items_for_wallet, + release_licensed_item_for_wallet, +) +from settings_library.rabbit import RabbitSettings +from simcore_postgres_database.models.users import UserRole +from simcore_service_webserver.application_settings import ApplicationSettings +from simcore_service_webserver.licenses import _licensed_items_db + +pytest_simcore_core_services_selection = [ + "rabbit", +] + + +@pytest.fixture +def app_environment( + rabbit_service: RabbitSettings, + app_environment: EnvVarsDict, + monkeypatch: pytest.MonkeyPatch, +): + new_envs = setenvs_from_dict( + monkeypatch, + { + **app_environment, + "RABBIT_HOST": rabbit_service.RABBIT_HOST, + "RABBIT_PORT": f"{rabbit_service.RABBIT_PORT}", + "RABBIT_USER": rabbit_service.RABBIT_USER, + "RABBIT_SECURE": f"{rabbit_service.RABBIT_SECURE}", + "RABBIT_PASSWORD": rabbit_service.RABBIT_PASSWORD.get_secret_value(), + }, + ) + + settings = ApplicationSettings.create_from_envs() + assert settings.WEBSERVER_RABBITMQ + + return new_envs + + +@pytest.fixture +def user_role() -> UserRole: + return UserRole.USER + + +@pytest.fixture +async def rpc_client( + rabbitmq_rpc_client: Callable[[str], Awaitable[RabbitMQRPCClient]], + mocker: MockerFixture, +) -> RabbitMQRPCClient: + return await rabbitmq_rpc_client("client") + + +async def test_api_keys_workflow( + client: TestClient, + rpc_client: RabbitMQRPCClient, + osparc_product_name: ProductName, + logged_user: UserInfoDict, + pricing_plan_id: int, +): + assert client.app + + result = await get_licensed_items( + rpc_client, product_name=osparc_product_name, offset=0, limit=20 + ) + assert len(result.items) == 0 + assert result.total == 0 + + await _licensed_items_db.create( + client.app, + product_name=osparc_product_name, + name="Model A", + licensed_resource_type=LicensedResourceType.VIP_MODEL, + pricing_plan_id=pricing_plan_id, + ) + + result = await get_licensed_items( + rpc_client, product_name=osparc_product_name, offset=0, limit=20 + ) + assert len(result.items) == 1 + assert result.total == 1 + + with pytest.raises(NotImplementedError): + await get_licensed_items_for_wallet( + rpc_client, + user_id=logged_user["id"], + product_name=osparc_product_name, + wallet_id=1, + ) + + with pytest.raises(NotImplementedError): + await checkout_licensed_item_for_wallet( + rpc_client, + user_id=logged_user["id"], + product_name=osparc_product_name, + wallet_id=1, + licensed_item_id="c5139a2e-4e1f-4ebe-9bfd-d17f195111ee", + num_of_seats=1, + service_run_id="run_1", + ) + + with pytest.raises(NotImplementedError): + await release_licensed_item_for_wallet( + rpc_client, + user_id=logged_user["id"], + product_name=osparc_product_name, + wallet_id=1, + licensed_item_id="c5139a2e-4e1f-4ebe-9bfd-d17f195111ee", + num_of_seats=1, + service_run_id="run_1", + )