From 1e7e816c8e95415f46d5a953f46d9574f46a5200 Mon Sep 17 00:00:00 2001 From: Matthias Frei Date: Thu, 28 Sep 2023 14:30:47 +0200 Subject: [PATCH] acceptance: de-flake cert_renewal and hidden_paths Use the existing await-connectivity script to determine when a test can safely start to run, instead of sleeping for arbitrary number of seconds. --- acceptance/cert_renewal/test.py | 3 ++- acceptance/common/base.py | 20 ++++++++++++++++++++ acceptance/common/topogen.bzl | 2 ++ acceptance/hidden_paths/test.py | 6 ++++-- tools/BUILD.bazel | 6 ++++++ tools/await-connectivity | 3 ++- 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/acceptance/cert_renewal/test.py b/acceptance/cert_renewal/test.py index 0b53b2883a..799fa42258 100755 --- a/acceptance/cert_renewal/test.py +++ b/acceptance/cert_renewal/test.py @@ -53,6 +53,7 @@ class Test(base.TestTopogen): """ def _run(self): + self.await_connectivity() isd_ases = scion.ASList.load("%s/gen/as_list.yml" % self.artifacts).all @@ -84,7 +85,7 @@ def _run(self): logger.info("==> Restart containers") self.setup_start() - time.sleep(5) + self.await_connectivity() logger.info("==> Check connectivity") end2end.run_fg() diff --git a/acceptance/common/base.py b/acceptance/common/base.py index a40d46dab0..fd8dd7ee76 100644 --- a/acceptance/common/base.py +++ b/acceptance/common/base.py @@ -213,6 +213,26 @@ def teardown(self): if re.search(r"Exit\s+[1-9]\d*", ps): raise Exception("Failed services.\n" + ps) + def await_connectivity(self, quiet_seconds=None, timeout_seconds=None): + """ + Wait for the beaconing process in a local topology to establish full connectivity, i.e. at + least one path between any two ASes. + Runs the tool/await-connectivity script. + + Returns success when full connectivity is established or an error (exception) at + timeout (default 20s). + + Remains quiet for a configurable time (default 10s). After that, + it reports the missing segments at 1s interval. + """ + cmd = self.get_executable("await-connectivity") + if quiet_seconds is not None: + cmd = cmd["-q", str(quiet_seconds)] + if timeout_seconds is not None: + cmd = cmd["-t", str(timeout_seconds)] + with local.cwd(self.artifacts): + cmd.run_fg() + def execute_tester(self, isd_as: ISD_AS, cmd: str, *args: str) -> str: """Executes a command in the designated "tester" container for the specified ISD-AS. diff --git a/acceptance/common/topogen.bzl b/acceptance/common/topogen.bzl index 675cee50ad..cc18515f01 100644 --- a/acceptance/common/topogen.bzl +++ b/acceptance/common/topogen.bzl @@ -45,6 +45,7 @@ def topogen_test( common_args = [ "--executable=scion-pki:$(location //scion-pki/cmd/scion-pki)", "--executable=topogen:$(location //tools:topogen)", + "--executable=await-connectivity:$(location //tools:await_connectivity)", "--topo=$(location %s)" % topo, ] if gateway: @@ -54,6 +55,7 @@ def topogen_test( "//scion-pki/cmd/scion-pki", "//tools:topogen", "//tools:docker_ip", + "//tools:await_connectivity", topo, ] loaders = container_loaders(tester, gateway) diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 3a9f1af7f9..44cdfff4d5 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -105,9 +105,9 @@ def setup_start(self): ("0.0.0.0", self.http_server_port), http.server.SimpleHTTPRequestHandler) server_thread = threading.Thread(target=configuration_server, args=[server]) server_thread.start() + self._server = server super().setup_start() - time.sleep(4) # Give applications time to download configurations self._testers = { "2": "tester_1-ff00_0_2", @@ -121,9 +121,11 @@ def setup_start(self): "4": "1-ff00:0:4", "5": "1-ff00:0:5", } - server.shutdown() def _run(self): + self.await_connectivity() + self._server.shutdown() # by now configuration must have been downloaded everywhere + # Group 3 self._showpaths_bidirectional("2", "3", 0) self._showpaths_bidirectional("2", "5", 0) diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index b116c1f782..2871a5b8c9 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -7,6 +7,12 @@ sh_binary( visibility = ["//visibility:public"], ) +sh_binary( + name = "await_connectivity", + srcs = ["await-connectivity"], + visibility = ["//visibility:public"], +) + py_binary( name = "gomocks", srcs = ["gomocks.py"], diff --git a/tools/await-connectivity b/tools/await-connectivity index a4e2ccc56d..63ee037e86 100755 --- a/tools/await-connectivity +++ b/tools/await-connectivity @@ -13,7 +13,7 @@ # # Usage: await-connectivity -q QUIET -t TIMEOUT -set -euo pipefail +set -Eeuo pipefail QUIET=10 TIMEOUT=20 @@ -83,6 +83,7 @@ main() { parse_opts "$@" # poor bash-man's yaml parser + stat gen/as_list.yml > /dev/null # ensure file exists; command substitutions below don't fail because local (!?) local cores=$(sed -n '1,/Non-core/{s/^- //p}' gen/as_list.yml) local noncores=$(sed -n '/Non-core/,${s/^- //p}' gen/as_list.yml)