From 770183e237c2e72c2ae42b13297ae0b24d798fcd Mon Sep 17 00:00:00 2001
From: Pedro Guimaraes <pedro.guimaraes@canonical.com>
Date: Tue, 15 Oct 2024 18:27:51 +0200
Subject: [PATCH 1/4] Extend wait_until to also wait on cos-agent

---
 tests/integration/plugins/test_plugins.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py
index c91b191d1..1fb0f1523 100644
--- a/tests/integration/plugins/test_plugins.py
+++ b/tests/integration/plugins/test_plugins.py
@@ -82,7 +82,7 @@ async def _set_config(ops_test: OpsTest, deploy_type: str, conf: dict[str, str])
     await ops_test.model.applications[APP_NAME].set_config(conf)
 
 
-async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None:
+async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]) -> None:
     """Wait for all units to be active.
 
     This wait will behavior accordingly to small/large.
@@ -90,10 +90,9 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None:
     if deployment_type == "small_deployment":
         await wait_until(
             ops_test,
-            apps=[APP_NAME],
+            apps=[APP_NAME] + extra_apps,
             apps_statuses=["active"],
             units_statuses=["active"],
-            wait_for_exact_units={APP_NAME: 3},
             timeout=1800,
             idle_period=IDLE_PERIOD,
         )
@@ -105,15 +104,10 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str) -> None:
             MAIN_ORCHESTRATOR_NAME,
             FAILOVER_ORCHESTRATOR_NAME,
             APP_NAME,
-        ],
+        ]
+        + extra_apps,
         apps_statuses=["active"],
         units_statuses=["active"],
-        wait_for_exact_units={
-            TLS_CERTIFICATES_APP_NAME: 1,
-            MAIN_ORCHESTRATOR_NAME: 1,
-            FAILOVER_ORCHESTRATOR_NAME: 2,
-            APP_NAME: 1,
-        },
         timeout=1800,
         idle_period=IDLE_PERIOD,
     )
@@ -173,7 +167,7 @@ async def test_prometheus_exporter_enabled_by_default(ops_test, deploy_type: str
 async def test_small_deployments_prometheus_exporter_cos_relation(ops_test, deploy_type: str):
     await ops_test.model.deploy(COS_APP_NAME, channel="edge"),
     await ops_test.model.integrate(APP_NAME, COS_APP_NAME)
-    await _wait_for_units(ops_test, deploy_type)
+    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
 
     # Check that the correct settings were successfully communicated to grafana-agent
     cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME)
@@ -263,7 +257,7 @@ async def test_large_deployment_prometheus_exporter_cos_relation(ops_test, deplo
     await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, COS_APP_NAME)
     await ops_test.model.integrate(APP_NAME, COS_APP_NAME)
 
-    await _wait_for_units(ops_test, deploy_type)
+    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
 
     leader_id = await get_leader_unit_id(ops_test, APP_NAME)
     leader_name = f"{APP_NAME}/{leader_id}"
@@ -315,7 +309,7 @@ async def test_prometheus_monitor_user_password_change(ops_test, deploy_type: st
     result1 = await run_action(
         ops_test, leader_id, "set-password", {"username": "monitor"}, app=app
     )
-    await _wait_for_units(ops_test, deploy_type)
+    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
 
     new_password = result1.response.get("monitor-password")
     # Now, we compare the change in the action above with the opensearch's nodes.

From 0ea4add0e71ec43e95a28e9a560a9d339e3f6406 Mon Sep 17 00:00:00 2001
From: Pedro Guimaraes <pedro.guimaraes@canonical.com>
Date: Tue, 15 Oct 2024 23:43:19 +0200
Subject: [PATCH 2/4] monitor COS

---
 tests/integration/plugins/test_plugins.py | 33 +++++++++++++++++------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py
index 1fb0f1523..a65180e0c 100644
--- a/tests/integration/plugins/test_plugins.py
+++ b/tests/integration/plugins/test_plugins.py
@@ -82,7 +82,7 @@ async def _set_config(ops_test: OpsTest, deploy_type: str, conf: dict[str, str])
     await ops_test.model.applications[APP_NAME].set_config(conf)
 
 
-async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]) -> None:
+async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos=False) -> None:
     """Wait for all units to be active.
 
     This wait will behavior accordingly to small/large.
@@ -90,13 +90,22 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]
     if deployment_type == "small_deployment":
         await wait_until(
             ops_test,
-            apps=[APP_NAME] + extra_apps,
+            apps=[APP_NAME],
             apps_statuses=["active"],
             units_statuses=["active"],
             timeout=1800,
             idle_period=IDLE_PERIOD,
         )
-        return
+        if wait_for_cos:
+            await wait_until(
+                ops_test,
+                apps=[COS_APP_NAME],
+                apps_statuses=["active", "blocked"],
+                units_statuses=["active", "blocked"],
+                timeout=1800,
+                idle_period=IDLE_PERIOD,
+            )
+            return
     await wait_until(
         ops_test,
         apps=[
@@ -104,13 +113,21 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, extra_apps=[]
             MAIN_ORCHESTRATOR_NAME,
             FAILOVER_ORCHESTRATOR_NAME,
             APP_NAME,
-        ]
-        + extra_apps,
+        ],
         apps_statuses=["active"],
         units_statuses=["active"],
         timeout=1800,
         idle_period=IDLE_PERIOD,
     )
+    if wait_for_cos:
+        await wait_until(
+            ops_test,
+            apps=[COS_APP_NAME],
+            apps_statuses=["active", "blocked"],
+            units_statuses=["active", "blocked"],
+            timeout=1800,
+            idle_period=IDLE_PERIOD,
+        )
 
 
 @pytest.mark.parametrize("deploy_type", SMALL_DEPLOYMENTS)
@@ -167,7 +184,7 @@ async def test_prometheus_exporter_enabled_by_default(ops_test, deploy_type: str
 async def test_small_deployments_prometheus_exporter_cos_relation(ops_test, deploy_type: str):
     await ops_test.model.deploy(COS_APP_NAME, channel="edge"),
     await ops_test.model.integrate(APP_NAME, COS_APP_NAME)
-    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
+    await _wait_for_units(ops_test, deploy_type, wait_for_cos=True)
 
     # Check that the correct settings were successfully communicated to grafana-agent
     cos_leader_id = await get_leader_unit_id(ops_test, COS_APP_NAME)
@@ -257,7 +274,7 @@ async def test_large_deployment_prometheus_exporter_cos_relation(ops_test, deplo
     await ops_test.model.integrate(MAIN_ORCHESTRATOR_NAME, COS_APP_NAME)
     await ops_test.model.integrate(APP_NAME, COS_APP_NAME)
 
-    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
+    await _wait_for_units(ops_test, deploy_type, wait_for_cos=True)
 
     leader_id = await get_leader_unit_id(ops_test, APP_NAME)
     leader_name = f"{APP_NAME}/{leader_id}"
@@ -309,7 +326,7 @@ async def test_prometheus_monitor_user_password_change(ops_test, deploy_type: st
     result1 = await run_action(
         ops_test, leader_id, "set-password", {"username": "monitor"}, app=app
     )
-    await _wait_for_units(ops_test, deploy_type, extra_apps=[COS_APP_NAME])
+    await _wait_for_units(ops_test, deploy_type, wait_for_cos=True)
 
     new_password = result1.response.get("monitor-password")
     # Now, we compare the change in the action above with the opensearch's nodes.

From 2b248c4e173c4ba42ba6aa069f5254dd0c49cd96 Mon Sep 17 00:00:00 2001
From: Pedro Guimaraes <pedro.guimaraes@canonical.com>
Date: Wed, 16 Oct 2024 08:05:07 +0200
Subject: [PATCH 3/4] Move the cos state

---
 tests/integration/plugins/test_plugins.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/integration/plugins/test_plugins.py b/tests/integration/plugins/test_plugins.py
index a65180e0c..c6adab879 100644
--- a/tests/integration/plugins/test_plugins.py
+++ b/tests/integration/plugins/test_plugins.py
@@ -100,8 +100,7 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos=
             await wait_until(
                 ops_test,
                 apps=[COS_APP_NAME],
-                apps_statuses=["active", "blocked"],
-                units_statuses=["active", "blocked"],
+                units_statuses=["blocked"],
                 timeout=1800,
                 idle_period=IDLE_PERIOD,
             )
@@ -123,8 +122,7 @@ async def _wait_for_units(ops_test: OpsTest, deployment_type: str, wait_for_cos=
         await wait_until(
             ops_test,
             apps=[COS_APP_NAME],
-            apps_statuses=["active", "blocked"],
-            units_statuses=["active", "blocked"],
+            units_statuses=["blocked"],
             timeout=1800,
             idle_period=IDLE_PERIOD,
         )

From 6a3372dfa9d9c8c9b72f8cb12a92d2f2ae81bf25 Mon Sep 17 00:00:00 2001
From: Pedro Guimaraes <pedro.guimaraes@canonical.com>
Date: Thu, 17 Oct 2024 10:04:53 +0200
Subject: [PATCH 4/4] Add subordinate support to wait_until

---
 tests/integration/helpers_deployments.py | 66 +++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/tests/integration/helpers_deployments.py b/tests/integration/helpers_deployments.py
index 14c14811b..58ac39db9 100644
--- a/tests/integration/helpers_deployments.py
+++ b/tests/integration/helpers_deployments.py
@@ -164,6 +164,60 @@ async def get_application_units(ops_test: OpsTest, app: str) -> List[Unit]:
     return units
 
 
+async def get_application_subordinate_units(
+    ops_test: OpsTest, principal_app: str, app: str
+) -> List[Unit]:
+    """Get fully detailed units of an application."""
+    # Juju incorrectly reports the IP addresses after the network is restored this is reported as a
+    # bug here: https://github.com/juju/python-libjuju/issues/738. Once this bug is resolved use of
+    # `get_unit_ip` should be replaced with `.public_address`
+    raw_app = get_raw_application(ops_test, app)
+    units = []
+    for principal_unit in get_raw_application(ops_test, principal_app)["units"].values():
+        u_name, unit = None, None
+        for u_name, unit in principal_unit["subordinates"].items():
+            if app in u_name:
+                break
+        else:
+            raise ValueError(f"Subordinate unit for {app} not found in {principal_app}")
+
+        unit_id = int(u_name.split("/")[-1])
+
+        if not unit.get("public-address"):
+            # unit not ready yet...
+            continue
+
+        app_id = f"{ops_test.model.uuid}/{app}"
+        app_short_id = md5(app_id.encode()).hexdigest()[:3]
+        unit = Unit(
+            id=unit_id,
+            short_name=u_name.replace("/", "-"),
+            name=f"{u_name.replace('/', '-')}.{app_short_id}",
+            ip=unit["public-address"],
+            hostname=await get_unit_hostname(ops_test, unit_id, app),
+            is_leader=unit.get("leader", False),
+            machine_id=-1,
+            workload_status=Status(
+                value=unit["workload-status"]["current"],
+                since=unit["workload-status"]["since"],
+                message=unit["workload-status"].get("message"),
+            ),
+            agent_status=Status(
+                value=unit["juju-status"]["current"],
+                since=unit["juju-status"]["since"],
+            ),
+            app_status=Status(
+                value=raw_app["application-status"]["current"],
+                since=raw_app["application-status"]["since"],
+                message=raw_app["application-status"].get("message"),
+            ),
+        )
+
+        units.append(unit)
+
+    return units
+
+
 def _is_every_condition_on_app_met(
     ops_test: OpsTest,
     app: str,
@@ -245,8 +299,18 @@ async def _is_every_condition_met(
 ) -> bool:
     """Evaluate if all the deployment status conditions are met."""
     for app in apps:
+        app_dict = get_raw_application(ops_test, app)
         expected_units = wait_for_exact_units[app]
-        units = await get_application_units(ops_test, app)
+        if "subordinate-to" in app_dict:
+            logger.debug(f"Subordinate app: {app}")
+            # In this case, we must search for the principal app
+            units = await get_application_subordinate_units(
+                ops_test, app_dict["subordinate-to"][0], app
+            )
+        else:
+            logger.debug(f"This is a principal app: {app}")
+            units = await get_application_units(ops_test, app)
+
         if -1 < expected_units != len(units):
             logger.info(f"{app} -- expected units: {expected_units} -- current: {len(units)}")
             return False