From 770183e237c2e72c2ae42b13297ae0b24d798fcd Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Tue, 15 Oct 2024 18:27:51 +0200 Subject: [PATCH 1/4] Extend wait_until to also wait on cos-agent --- tests/integration/plugins/test_plugins.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py index c91b191d1..1fb0f1523 100644 --- a/tests/integration/plugins/test_plugins.py +++ b/tests/integration/plugins/test_plugins.py @@ -82,7 +82,7 @@ async def _set_config(ops_test: OpsTest, deploy_type: str, conf: dict[str, str]) await ops_test.model.applications[APP_NAME].set_config(conf) -async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None: +async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]) -> None: """Wait for all units to be active. This wait will behavior accordingly to small/large. @@ -90,10 +90,9 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None: if deployment_type == "small_deployment": await wait_until( ops_test, - apps=[APP_NAME], + apps=[APP_NAME] + extra_apps, apps_statuses=["active"], units_statuses=["active"], - wait_for_exact_units={APP_NAME: 3}, timeout=1800, idle_period=IDLE_PERIOD, ) @@ -105,15 +104,10 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None: MAIN_ORCHESTRATOR_NAME, FAILOVER_ORCHESTRATOR_NAME, APP_NAME, - ], + ] + + extra_apps, apps_statuses=["active"], units_statuses=["active"], - wait_for_exact_units={ - TLS_CERTIFICATES_APP_NAME: 1, - MAIN_ORCHESTRATOR_NAME: 1, - FAILOVER_ORCHESTRATOR_NAME: 2, - APP_NAME: 1, - }, timeout=1800, idle_period=IDLE_PERIOD, ) @@ -173,7 +167,7 @@ async def test_prometheus_exporter_enabled_by_default(ops_test, deploy_type: str async def test_small_deployments_prometheus_exporter_cos_relation(ops_test, deploy_type: str): await ops_test.model.deploy(COS_APP_NAME, channel="edge"), await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - await _wait_for_units(ops_test, deploy_type) + await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) # Check that the correct settings were successfully communicated to grafana-agent cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME) @@ -263,7 +257,7 @@ async def test_large_deployment_prometheus_exporter_cos_relation(ops_test, deplo await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, COS_APP_NAME) await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - await _wait_for_units(ops_test, deploy_type) + await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) leader_id = await get_leader_unit_id(ops_test, APP_NAME) leader_name = f"{APP_NAME}/{leader_id}" @@ -315,7 +309,7 @@ async def test_prometheus_monitor_user_password_change(ops_test, deploy_type: st result1 = await run_action( ops_test, leader_id, "set-password", {"username": "monitor"}, app=app ) - await _wait_for_units(ops_test, deploy_type) + await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) new_password = result1.response.get("monitor-password") # Now, we compare the change in the action above with the opensearch's nodes. From 0ea4add0e71ec43e95a28e9a560a9d339e3f6406 Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Tue, 15 Oct 2024 23:43:19 +0200 Subject: [PATCH 2/4] monitor COS --- tests/integration/plugins/test_plugins.py | 33 +++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py index 1fb0f1523..a65180e0c 100644 --- a/tests/integration/plugins/test_plugins.py +++ b/tests/integration/plugins/test_plugins.py @@ -82,7 +82,7 @@ async def _set_config(ops_test: OpsTest, deploy_type: str, conf: dict[str, str]) await ops_test.model.applications[APP_NAME].set_config(conf) -async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]) -> None: +async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos=False) -> None: """Wait for all units to be active. This wait will behavior accordingly to small/large. @@ -90,13 +90,22 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[] if deployment_type == "small_deployment": await wait_until( ops_test, - apps=[APP_NAME] + extra_apps, + apps=[APP_NAME], apps_statuses=["active"], units_statuses=["active"], timeout=1800, idle_period=IDLE_PERIOD, ) - return + if wait_for_cos: + await wait_until( + ops_test, + apps=[COS_APP_NAME], + apps_statuses=["active", "blocked"], + units_statuses=["active", "blocked"], + timeout=1800, + idle_period=IDLE_PERIOD, + ) + return await wait_until( ops_test, apps=[ @@ -104,13 +113,21 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[] MAIN_ORCHESTRATOR_NAME, FAILOVER_ORCHESTRATOR_NAME, APP_NAME, - ] - + extra_apps, + ], apps_statuses=["active"], units_statuses=["active"], timeout=1800, idle_period=IDLE_PERIOD, ) + if wait_for_cos: + await wait_until( + ops_test, + apps=[COS_APP_NAME], + apps_statuses=["active", "blocked"], + units_statuses=["active", "blocked"], + timeout=1800, + idle_period=IDLE_PERIOD, + ) @pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS) @@ -167,7 +184,7 @@ async def test_prometheus_exporter_enabled_by_default(ops_test, deploy_type: str async def test_small_deployments_prometheus_exporter_cos_relation(ops_test, deploy_type: str): await ops_test.model.deploy(COS_APP_NAME, channel="edge"), await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) + await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) # Check that the correct settings were successfully communicated to grafana-agent cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME) @@ -257,7 +274,7 @@ async def test_large_deployment_prometheus_exporter_cos_relation(ops_test, deplo await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, COS_APP_NAME) await ops_test.model.integrate(APP_NAME, COS_APP_NAME) - await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) + await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) leader_id = await get_leader_unit_id(ops_test, APP_NAME) leader_name = f"{APP_NAME}/{leader_id}" @@ -309,7 +326,7 @@ async def test_prometheus_monitor_user_password_change(ops_test, deploy_type: st result1 = await run_action( ops_test, leader_id, "set-password", {"username": "monitor"}, app=app ) - await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME]) + await _wait_for_units(ops_test, deploy_type, wait_for_cos=True) new_password = result1.response.get("monitor-password") # Now, we compare the change in the action above with the opensearch's nodes. From 2b248c4e173c4ba42ba6aa069f5254dd0c49cd96 Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Wed, 16 Oct 2024 08:05:07 +0200 Subject: [PATCH 3/4] Move the cos state --- tests/integration/plugins/test_plugins.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py index a65180e0c..c6adab879 100644 --- a/tests/integration/plugins/test_plugins.py +++ b/tests/integration/plugins/test_plugins.py @@ -100,8 +100,7 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos= await wait_until( ops_test, apps=[COS_APP_NAME], - apps_statuses=["active", "blocked"], - units_statuses=["active", "blocked"], + units_statuses=["blocked"], timeout=1800, idle_period=IDLE_PERIOD, ) @@ -123,8 +122,7 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos= await wait_until( ops_test, apps=[COS_APP_NAME], - apps_statuses=["active", "blocked"], - units_statuses=["active", "blocked"], + units_statuses=["blocked"], timeout=1800, idle_period=IDLE_PERIOD, ) From 6a3372dfa9d9c8c9b72f8cb12a92d2f2ae81bf25 Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Thu, 17 Oct 2024 10:04:53 +0200 Subject: [PATCH 4/4] Add subordinate support to wait_until --- tests/integration/helpers_deployments.py | 66 +++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tests/integration/helpers_deployments.py b/tests/integration/helpers_deployments.py index 14c14811b..58ac39db9 100644 --- a/tests/integration/helpers_deployments.py +++ b/tests/integration/helpers_deployments.py @@ -164,6 +164,60 @@ async def get_application_units(ops_test: OpsTest, app: str) -> List[Unit]: return units +async def get_application_subordinate_units( + ops_test: OpsTest, principal_app: str, app: str +) -> List[Unit]: + """Get fully detailed units of an application.""" + # Juju incorrectly reports the IP addresses after the network is restored this is reported as a + # bug here: https://github.com/juju/python-libjuju/issues/738. Once this bug is resolved use of + # `get_unit_ip` should be replaced with `.public_address` + raw_app = get_raw_application(ops_test, app) + units = [] + for principal_unit in get_raw_application(ops_test, principal_app)["units"].values(): + u_name, unit = None, None + for u_name, unit in principal_unit["subordinates"].items(): + if app in u_name: + break + else: + raise ValueError(f"Subordinate unit for {app} not found in {principal_app}") + + unit_id = int(u_name.split("/")[-1]) + + if not unit.get("public-address"): + # unit not ready yet... + continue + + app_id = f"{ops_test.model.uuid}/{app}" + app_short_id = md5(app_id.encode()).hexdigest()[:3] + unit = Unit( + id=unit_id, + short_name=u_name.replace("/", "-"), + name=f"{u_name.replace('/', '-')}.{app_short_id}", + ip=unit["public-address"], + hostname=await get_unit_hostname(ops_test, unit_id, app), + is_leader=unit.get("leader", False), + machine_id=-1, + workload_status=Status( + value=unit["workload-status"]["current"], + since=unit["workload-status"]["since"], + message=unit["workload-status"].get("message"), + ), + agent_status=Status( + value=unit["juju-status"]["current"], + since=unit["juju-status"]["since"], + ), + app_status=Status( + value=raw_app["application-status"]["current"], + since=raw_app["application-status"]["since"], + message=raw_app["application-status"].get("message"), + ), + ) + + units.append(unit) + + return units + + def _is_every_condition_on_app_met( ops_test: OpsTest, app: str, @@ -245,8 +299,18 @@ async def _is_every_condition_met( ) -> bool: """Evaluate if all the deployment status conditions are met.""" for app in apps: + app_dict = get_raw_application(ops_test, app) expected_units = wait_for_exact_units[app] - units = await get_application_units(ops_test, app) + if "subordinate-to" in app_dict: + logger.debug(f"Subordinate app: {app}") + # In this case, we must search for the principal app + units = await get_application_subordinate_units( + ops_test, app_dict["subordinate-to"][0], app + ) + else: + logger.debug(f"This is a principal app: {app}") + units = await get_application_units(ops_test, app) + if -1 < expected_units != len(units): logger.info(f"{app} -- expected units: {expected_units} -- current: {len(units)}") return False