From 6ffb29cc153ff59f3c928cbfa1aaa5c869133169 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Fri, 22 Sep 2023 12:09:04 +0200 Subject: [PATCH 01/24] test environment: bundled jager and prometheus as a single docker composition. The new name is "monitoring". This affects the usage of tools/dc and scion.sh in the following way: * monitoring is composed of both jaeger and prometheus. * scion.sh no-longer accepts the commands "start_yeager" and "stop_yeager". * scion.sh accepts two new commands: "start_monitoring" and "stop monitoring" that do what the name implies. * prom and jeager are no-longer valid services for tools/dc * tools/dc accepts a new service: "monitoring" * jeager is no-longer started automatically by "scions.sh run" or "scion.sh start". * jeager is no-longer stopped automatically by "scions.sh stop". * monitoring is **not** started or stopped automatically by "scion.sh start/stop". * monitoring can safely stay up accross restarts of scion. * monitoring *is* stopped automatically by "scion.sh topology". --- Makefile | 4 ++ scion.sh | 54 +++++++++----- tools/dc | 12 ++-- tools/topology/config.py | 23 +++--- tools/topology/go.py | 2 +- tools/topology/jaeger.py | 70 ------------------- .../topology/{prometheus.py => monitoring.py} | 58 +++++++++++---- tools/topology/sig.py | 2 +- 8 files changed, 96 insertions(+), 129 deletions(-) delete mode 100644 tools/topology/jaeger.py rename tools/topology/{prometheus.py => monitoring.py} (75%) diff --git a/Makefile b/Makefile index 43fe60d304..2cb2419407 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,10 @@ clean: bazel clean rm -f bin/* +scrub: + bazel clean --expunge + rm -f bin/* + bazel: rm -f bin/* bazel build //:scion //:scion-ci diff --git a/scion.sh b/scion.sh index c437e32125..dd9c270d20 100755 --- a/scion.sh +++ b/scion.sh @@ -11,7 +11,8 @@ cmd_bazel_remote() { cmd_topo_clean() { set -e - cmd_stop || true + stop_scion || true + cmd_stop_monitoring || true rm -rf traces/* mkdir -p logs traces gen gen-cache gen-certs find gen gen-cache gen-certs -mindepth 1 -maxdepth 1 -exec rm -r {} + @@ -32,8 +33,7 @@ cmd_topodot() { ./tools/topodot.py "$@" } -cmd_run() { - run_jaeger +run_scion() { echo "Running the network..." if is_docker_be; then docker-compose -f gen/scion-dc.yml -p scion up -d @@ -44,6 +44,13 @@ cmd_run() { fi } +cmd_run() { + run_scion + echo "Note that jaeger is not included anymore." + echo "To run jaeger and prometheus, use run_monitoring." + +} + cmd_sciond-addr() { jq -r 'to_entries | map(select(.key | match("'"$1"'";"i"))) | @@ -51,20 +58,20 @@ cmd_sciond-addr() { "[\(.value)]:30255"' gen/sciond_addresses.json } -run_jaeger() { - if [ ! -f "gen/jaeger-dc.yml" ]; then +cmd_run_monitoring() { + if [ ! -f "gen/monitoring-dc.yml" ]; then return fi - echo "Running jaeger..." - ./tools/quiet ./tools/dc jaeger up -d + echo "Running monitoring..." + ./tools/quiet ./tools/dc monitoring up -d } -stop_jaeger() { - if [ ! -f "gen/jaeger-dc.yml" ]; then +cmd_stop_monitoring() { + if [ ! -f "gen/monitoring-dc.yml" ]; then return fi - echo "Stopping jaeger..." - ./tools/quiet ./tools/dc jaeger down -v + echo "Stopping monitoring..." + ./tools/quiet ./tools/dc monitoring down -v } cmd_mstart() { @@ -99,7 +106,7 @@ run_teardown() { fi } -cmd_stop() { +stop_scion() { echo "Terminating this run of the SCION infrastructure" if is_docker_be; then ./tools/quiet ./tools/dc down @@ -107,7 +114,12 @@ cmd_stop() { ./tools/quiet tools/supervisor.sh shutdown run_teardown fi - stop_jaeger +} + +cmd_stop() { + stop_scion + echo "Note that jaeger is not included anymore." + echo "To stop jaeger and prometheus, use stop_monitoring." } cmd_mstop() { @@ -236,15 +248,19 @@ cmd_help() { $PROGRAM run Run network. $PROGRAM mstart PROCESS - Start multiple processes + Start multiple processes. $PROGRAM stop Terminate this run of the SCION infrastructure. $PROGRAM mstop PROCESS - Stop multiple processes + Stop multiple processes. + $PROGRAM run_monitoring + Run the monitoring infrastructure. + $PROGRAM stop_monitoring + Terminate this run of the monitoring infrastructure. $PROGRAM status Show all non-running tasks. $PROGRAM mstatus PROCESS - Show status of provided processes + Show status of provided processes. $PROGRAM sciond-addr ISD-AS Return the address for the scion daemon for the matching ISD-AS by consulting gen/sciond_addresses.json. @@ -255,9 +271,9 @@ cmd_help() { $PROGRAM help Show this text. $PROGRAM traces [folder] - Serve jaeger traces from the specified folder (default: traces/) + Serve jaeger traces from the specified folder (default: traces/). $PROGRAM stop_traces - Stop the jaeger container started during the traces command + Stop the jaeger container started during the traces command. $PROGRAM bazel_remote Starts the bazel remote. _EOF @@ -269,7 +285,7 @@ COMMAND="$1" shift case "$COMMAND" in - help|run|mstart|mstatus|mstop|stop|status|topology|sciond-addr|traces|stop_traces|topo_clean|topodot|bazel_remote) + help|run|run_monitoring|mstart|mstatus|mstop|stop|stop_monitoring|status|topology|sciond-addr|traces|stop_traces|topo_clean|topodot|bazel_remote) "cmd_$COMMAND" "$@" ;; start) cmd_run "$@" ;; *) cmd_help; exit 1 ;; diff --git a/tools/dc b/tools/dc index 7901082a31..fd9ad5a58c 100755 --- a/tools/dc +++ b/tools/dc @@ -37,7 +37,7 @@ cmd_help() { - [SERVICE]: As scion service glob, e.g. cs1*. - [GROUP]: - scion: For all scion services. - - prom: For the prometheus service. + - monitoring: For the prometheus service. - [COMMAND]: A docker-compose command like 'up -d' or 'down' - [IA]: An IA number like 1-ff00:0:110 - [LOG_DIR]: A folder. @@ -70,12 +70,8 @@ cmd_scion() { dc "scion" "$@" } -cmd_jaeger() { - dc "jaeger" "$@" -} - -cmd_prom() { - dc "prom" "$@" +cmd_monitoring() { + dc "monitoring" "$@" } # Runs docker compose for the given project @@ -135,7 +131,7 @@ COMMAND="$1" shift case "$COMMAND" in - start|stop|down|run|scion|jaeger|prom|collect_logs) + start|stop|down|run|scion|monitoring|collect_logs) "cmd_$COMMAND" "$@" ;; exec_tester) "exec_tester" "$@" ;; diff --git a/tools/topology/config.py b/tools/topology/config.py index 5fcabb91c9..aa70bfbb2e 100644 --- a/tools/topology/config.py +++ b/tools/topology/config.py @@ -38,14 +38,13 @@ from topology.common import ArgsBase from topology.docker import DockerGenArgs, DockerGenerator from topology.go import GoGenArgs, GoGenerator -from topology.jaeger import JaegerGenArgs, JaegerGenerator from topology.net import ( NetworkDescription, IPNetwork, SubnetGenerator, DEFAULT_NETWORK, ) -from topology.prometheus import PrometheusGenArgs, PrometheusGenerator +from topology.monitoring import MonitoringGenArgs, MonitoringGenerator from topology.supervisor import SupervisorGenArgs, SupervisorGenerator from topology.topo import TopoGenArgs, TopoGenerator @@ -113,8 +112,7 @@ def _generate_with_topo(self, topo_dicts): self._generate_docker(topo_dicts) else: self._generate_supervisor(topo_dicts) - self._generate_jaeger(topo_dicts) - self._generate_prom_conf(topo_dicts) + self._generate_monitoring_conf(topo_dicts) self._generate_certs_trcs(topo_dicts) def _generate_certs_trcs(self, topo_dicts): @@ -135,11 +133,6 @@ def _generate_go(self, topo_dicts): def _go_args(self, topo_dicts): return GoGenArgs(self.args, self.topo_config, topo_dicts, self.networks) - def _generate_jaeger(self, topo_dicts): - args = JaegerGenArgs(self.args, topo_dicts) - jaeger_gen = JaegerGenerator(args) - jaeger_gen.generate() - def _generate_topology(self): topo_gen = TopoGenerator(self._topo_args()) return topo_gen.generate() @@ -164,13 +157,13 @@ def _generate_docker(self, topo_dicts): def _docker_args(self, topo_dicts): return DockerGenArgs(self.args, topo_dicts, self.all_networks) - def _generate_prom_conf(self, topo_dicts): - args = self._prometheus_args(topo_dicts) - prom_gen = PrometheusGenerator(args) - prom_gen.generate() + def _generate_monitoring_conf(self, topo_dicts): + args = self._monitoring_args(topo_dicts) + mon_gen = MonitoringGenerator(args) + mon_gen.generate() - def _prometheus_args(self, topo_dicts): - return PrometheusGenArgs(self.args, topo_dicts, self.networks) + def _monitoring_args(self, topo_dicts): + return MonitoringGenArgs(self.args, topo_dicts, self.networks) def _write_ca_files(self, topo_dicts, ca_files): isds = set() diff --git a/tools/topology/go.py b/tools/topology/go.py index 36c82537e2..d8f06f6058 100644 --- a/tools/topology/go.py +++ b/tools/topology/go.py @@ -38,7 +38,7 @@ from topology.net import socket_address_str, NetworkDescription, IPNetwork -from topology.prometheus import ( +from topology.monitoring import ( CS_PROM_PORT, DEFAULT_BR_PROM_PORT, SCIOND_PROM_PORT, diff --git a/tools/topology/jaeger.py b/tools/topology/jaeger.py deleted file mode 100644 index 7355694c55..0000000000 --- a/tools/topology/jaeger.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2019 Anapaya Systems -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import yaml - -from topology.util import write_file -from topology.common import ( - ArgsTopoDicts, -) - -JAEGER_DC = 'jaeger-dc.yml' - - -class JaegerGenArgs(ArgsTopoDicts): - pass - - -class JaegerGenerator(object): - - def __init__(self, args): - self.args = args - output_base = os.environ.get('SCION_OUTPUT_BASE', os.getcwd()) - self.local_jaeger_dir = os.path.join('traces') - self.docker_jaeger_dir = os.path.join(output_base, self.local_jaeger_dir) - - def generate(self): - dc_conf = self._generate_dc() - os.makedirs(os.path.join(self.local_jaeger_dir, 'data'), exist_ok=True) - os.makedirs(os.path.join(self.local_jaeger_dir, 'key'), exist_ok=True) - write_file(os.path.join(self.args.output_dir, JAEGER_DC), - yaml.dump(dc_conf, default_flow_style=False)) - - def _generate_dc(self): - name = 'jaeger' - entry = { - 'version': '2', - 'services': { - 'jaeger': { - 'image': 'jaegertracing/all-in-one:1.22.0', - 'container_name': name, - 'user': '%s:%s' % (str(os.getuid()), str(os.getgid())), - 'ports': [ - '6831:6831/udp', - '16686:16686' - ], - 'environment': [ - 'SPAN_STORAGE_TYPE=badger', - 'BADGER_EPHEMERAL=false', - 'BADGER_DIRECTORY_VALUE=/badger/data', - 'BADGER_DIRECTORY_KEY=/badger/key', - ], - 'volumes': [ - '%s:/badger:rw' % self.docker_jaeger_dir, - ], - } - } - } - return entry diff --git a/tools/topology/prometheus.py b/tools/topology/monitoring.py similarity index 75% rename from tools/topology/prometheus.py rename to tools/topology/monitoring.py index 9a896a64fc..631499a914 100644 --- a/tools/topology/prometheus.py +++ b/tools/topology/monitoring.py @@ -1,5 +1,7 @@ # Copyright 2014 ETH Zurich # Copyright 2018 ETH Zurich, Anapaya Systems +# Copyright 2019 Anapaya Systems +# Copyright 2023 SCION Association # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,9 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -:mod:`prometheus` --- SCION topology prometheus generator -============================================= +:mod:`monitoring` --- SCION topology monitoring generator +========================================================= """ + # Stdlib import os from collections import defaultdict @@ -43,17 +46,15 @@ SIG_PROM_PORT = 30456 DISP_PROM_PORT = 30441 DEFAULT_BR_PROM_PORT = 30442 - -PROM_DC_FILE = "prom-dc.yml" +MONITORING_DC_FILE = "monitoring-dc.yml" -class PrometheusGenArgs(ArgsTopoDicts): +class MonitoringGenArgs(ArgsTopoDicts): def __init__(self, args, topo_dicts, networks: Mapping[IPNetwork, NetworkDescription]): super().__init__(args, topo_dicts) self.networks = networks - -class PrometheusGenerator(object): +class MonitoringGenerator(object): PROM_DIR = "prometheus" TARGET_FILES = { "BorderRouters": "br.yml", @@ -70,10 +71,12 @@ class PrometheusGenerator(object): def __init__(self, args): """ - :param PrometheusGenArgs args: Contains the passed command line arguments and topo dicts. + :param MonitoringGenArgs args: Contains the passed command line arguments and topo dicts. """ self.args = args self.output_base = os.environ.get('SCION_OUTPUT_BASE', os.getcwd()) + self.local_jaeger_dir = os.path.join('traces') + self.docker_jaeger_dir = os.path.join(self.output_base, self.local_jaeger_dir) def generate(self): config_dict = {} @@ -99,7 +102,12 @@ def generate(self): self._write_dc_file() self._write_disp_file() + # For yeager + os.makedirs(os.path.join(self.local_jaeger_dir, 'data'), exist_ok=True) + os.makedirs(os.path.join(self.local_jaeger_dir, 'key'), exist_ok=True) + def _write_config_files(self, config_dict): + # For Prometheus targets_paths = defaultdict(list) for topo_id, ele_dict in config_dict.items(): base = topo_id.base_dir(self.args.output_dir) @@ -143,26 +151,46 @@ def _write_disp_file(self): if self.args.docker: return targets_path = os.path.join(self.args.output_dir, "dispatcher", - PrometheusGenerator.PROM_DIR, "disp.yml") + self.PROM_DIR, "disp.yml") target_config = [{'targets': [prom_addr_dispatcher(False, None, None, DISP_PROM_PORT, None)]}] write_file(targets_path, yaml.dump(target_config, default_flow_style=False)) def _write_dc_file(self): - name = 'prometheus' - prom_dc = { + # Merged yeager and prometheus files. + # FIXME: should be a single container + name = 'monitoring' + monitoring_dc = { 'version': DOCKER_COMPOSE_CONFIG_VERSION, 'services': { - name: { + 'prometheus': { 'image': 'prom/prometheus:v2.6.0', - 'container_name': name, + 'container_name': name+'prometheus', 'network_mode': 'host', 'volumes': [ self.output_base + '/gen:/prom-config:ro' ], 'command': ['--config.file', '/prom-config/prometheus.yml'], + }, + 'jaeger': { + 'image': 'jaegertracing/all-in-one:1.22.0', + 'container_name': name+'yeager', + 'user': '%s:%s' % (str(os.getuid()), str(os.getgid())), + 'ports': [ + '6831:6831/udp', + '16686:16686' + ], + 'environment': [ + 'SPAN_STORAGE_TYPE=badger', + 'BADGER_EPHEMERAL=false', + 'BADGER_DIRECTORY_VALUE=/badger/data', + 'BADGER_DIRECTORY_KEY=/badger/key', + ], + 'volumes': [ + '%s:/badger:rw' % self.docker_jaeger_dir, + ], } } } - write_file(os.path.join(self.args.output_dir, PROM_DC_FILE), - yaml.dump(prom_dc, default_flow_style=False)) + write_file(os.path.join(self.args.output_dir, MONITORING_DC_FILE), + yaml.dump(monitoring_dc, default_flow_style=False)) diff --git a/tools/topology/sig.py b/tools/topology/sig.py index eb7855c0ae..9ddd2121ef 100644 --- a/tools/topology/sig.py +++ b/tools/topology/sig.py @@ -29,7 +29,7 @@ translate_features, ) from topology.net import socket_address_str -from topology.prometheus import SIG_PROM_PORT +from topology.monitoring import SIG_PROM_PORT class SIGGenArgs(ArgsBase): From 12bfb166c1440da30530ee3beab86a9445974878 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Fri, 22 Sep 2023 12:55:03 +0200 Subject: [PATCH 02/24] test environment: Document monitoring services. ...plus other minor improvements. --- doc/dev/run.rst | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/doc/dev/run.rst b/doc/dev/run.rst index 77563c6fc7..d473cff9ef 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -16,6 +16,10 @@ The scripts support two different process orchestrators as "backends": - `supervisor `_. This is the default and a bit more light-weight. Packets are sent over the loopback interface. - `docker-compose `_. Runs individual processes in separate containers connected with docker network bridges. Only this mode supports running a "SCION-IP gateway". +Before attempting to use the docker-compose mode, be sure to build the necessary docker images with: +.. code-block:: bash + + make docker-images .. TODO - Describe configuration directory (referencing manuals) @@ -84,6 +88,10 @@ Various helper files are also generated for the benefit of scripts and tooling o for example, ``gen/sciond_addresses.json`` is a simple mapping from AS number to the address of the corresponding :doc:`scion daemon ` instance. +If :option:`scion.sh topology -d` command is used, additional configuration files are created to +enable running the SCION services in docker containers (see `docker`_). Otherwise, a configuration +file is created to enable running the SCION services as plain processes (see `supervisor`_) + supervisor ---------- The ``gen/supervisord.conf`` configuration defines the programs that make up the local topology. @@ -180,6 +188,14 @@ The basic usage is ``./scion.sh ``. The main subcommands a Terminate this run of the local SCION topology. +.. option:: run_monitoring + + Start the monitoring services (jaeger and prometheus). + +.. option:: stop_monitoring + + Stop the monitoring services. + .. option:: sciond-addr Return the address for the scion daemon for the matching ISD-AS by consulting @@ -190,3 +206,15 @@ The basic usage is ``./scion.sh ``. The main subcommands a .. option:: help Describe all available subcommands + +end2end_integration +=================== + +:program:bin/end2end_integration is a basic functional test. + +The basic usage is ``./end2end_integration ``. + +.. option:: -d + + Assume the SCION services are dockerized. Must be consistent with the last + invocation of ``scion.sh topology``. From bd36719d71e4531eabf3c28693ebda42ca3f572a Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Mon, 25 Sep 2023 12:27:28 +0200 Subject: [PATCH 03/24] test environment: Small cleanup. use "start", not "run". Keeping "run" as shim. The opposite of "stop" is "start". So the standard start commands are now all named "m?start[_something]". The run command is an alias to "start", not the other way around. --- scion.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scion.sh b/scion.sh index dd9c270d20..8af426e24b 100755 --- a/scion.sh +++ b/scion.sh @@ -33,7 +33,7 @@ cmd_topodot() { ./tools/topodot.py "$@" } -run_scion() { +start_scion() { echo "Running the network..." if is_docker_be; then docker-compose -f gen/scion-dc.yml -p scion up -d @@ -44,8 +44,8 @@ run_scion() { fi } -cmd_run() { - run_scion +cmd_start() { + start_scion echo "Note that jaeger is not included anymore." echo "To run jaeger and prometheus, use run_monitoring." @@ -58,7 +58,7 @@ cmd_sciond-addr() { "[\(.value)]:30255"' gen/sciond_addresses.json } -cmd_run_monitoring() { +cmd_start_monitoring() { if [ ! -f "gen/monitoring-dc.yml" ]; then return fi @@ -253,7 +253,7 @@ cmd_help() { Terminate this run of the SCION infrastructure. $PROGRAM mstop PROCESS Stop multiple processes. - $PROGRAM run_monitoring + $PROGRAM start_monitoring Run the monitoring infrastructure. $PROGRAM stop_monitoring Terminate this run of the monitoring infrastructure. @@ -285,8 +285,8 @@ COMMAND="$1" shift case "$COMMAND" in - help|run|run_monitoring|mstart|mstatus|mstop|stop|stop_monitoring|status|topology|sciond-addr|traces|stop_traces|topo_clean|topodot|bazel_remote) + help|start|start_monitoring|mstart|mstatus|mstop|stop|stop_monitoring|status|topology|sciond-addr|traces|stop_traces|topo_clean|topodot|bazel_remote) "cmd_$COMMAND" "$@" ;; - start) cmd_run "$@" ;; + run) cmd_start "$@" ;; *) cmd_help; exit 1 ;; esac From 59a0b8b189c9e71c471eebccc9626d4f0d2cc8a9 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Tue, 26 Sep 2023 18:01:55 +0200 Subject: [PATCH 04/24] build: assorted cleanups * Severed our dependency on docker-compose V1 (including any shim script in /usr/bin): updated installation instructions and other docs accordingly. * Gave a $HOME dir to integration tests. It's not actually used, but docker-compose v2 fails (in a very cryptic way) without it. * Deflaked some integration tests (timing issues). Those were playing with my head when I was chasing other issues. * Clarified the instructions for running tests wrt to supervisor mode vs docker mode (e.g. the e2e integration tests must be run with -d in the docker mode). --- .buildkite/hooks/pre-command | 4 ++-- acceptance/cert_renewal/test.py | 5 +++++ acceptance/common/README.md | 2 +- acceptance/common/base.py | 2 +- acceptance/common/docker.py | 3 ++- acceptance/common/raw.bzl | 1 + acceptance/common/topogen.bzl | 1 + acceptance/hidden_paths/test.py | 4 ++-- acceptance/sig_short_exp_time/test | 14 ++++++------- acceptance/topo_cs_reload/reload_test.go | 21 +++++++++++--------- acceptance/topo_daemon_reload/reload_test.go | 21 +++++++++++--------- doc/dev/run.rst | 16 +++++++-------- doc/dev/setup.rst | 7 ++++--- scion.sh | 8 ++++---- tools/await-connectivity | 2 +- tools/dc | 4 ++-- tools/integration/cmd.go | 2 +- tools/integration/docker.go | 5 +++-- tools/topogen.py | 2 +- tools/topology/monitoring.py | 1 + 20 files changed, 71 insertions(+), 54 deletions(-) diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 0650d36442..ada7f07ae8 100755 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -43,7 +43,7 @@ echo "~~~ Starting bazel remote cache proxy" # Start bazel remote cache proxy for S3 # Note that S3 keys are injected by buildkite, see # https://buildkite.com/docs/pipelines/secrets#storing-secrets-with-the-elastic-ci-stack-for-aws -docker-compose -f .buildkite/hooks/bazel-remote.yml -p bazel_remote up -d +docker compose --compatibility -f .buildkite/hooks/bazel-remote.yml -p bazel_remote up -d echo "~~~ Starting go module proxy" -docker-compose -f .buildkite/hooks/go-module-proxy.yml -p athens up -d +docker compose --compatibility -f .buildkite/hooks/go-module-proxy.yml -p athens up -d diff --git a/acceptance/cert_renewal/test.py b/acceptance/cert_renewal/test.py index 0b53b2883a..6fb8030970 100755 --- a/acceptance/cert_renewal/test.py +++ b/acceptance/cert_renewal/test.py @@ -58,6 +58,9 @@ def _run(self): self.artifacts).all cs_configs = self._cs_configs() + # Services in their containers need time to start. + time.sleep(5) + logger.info("==> Start renewal process") for isd_as in isd_ases: logging.info("===> Start renewal: %s" % isd_as) @@ -84,6 +87,8 @@ def _run(self): logger.info("==> Restart containers") self.setup_start() + + # Services in their containers need time to start. time.sleep(5) logger.info("==> Check connectivity") diff --git a/acceptance/common/README.md b/acceptance/common/README.md index 0ca1fc3080..c2759d81b7 100644 --- a/acceptance/common/README.md +++ b/acceptance/common/README.md @@ -56,7 +56,7 @@ Tests can use: - `self.artifacts`: the specified directory for test outputs, created during setup. - `self.get_executable()`: returns an executable specified using the `--executable` switch. -- `self.dc`: a wrapper for `docker-compose`, instantiated during `TestTopogen.setup`. +- `self.dc`: a wrapper for `docker compose`, instantiated during `TestTopogen.setup`. The `base.main` function is the main entry point to run the tests and must be invoked in `__main__`. diff --git a/acceptance/common/base.py b/acceptance/common/base.py index a40d46dab0..e229ba3dd3 100644 --- a/acceptance/common/base.py +++ b/acceptance/common/base.py @@ -203,7 +203,7 @@ def setup_start(self): raise Exception("Failed services.\n" + ps) def teardown(self): - # Avoid running docker-compose teardown if setup_prepare failed + # Avoid running docker compose teardown if setup_prepare failed if self._setup_prepare_failed: return out_dir = self.artifacts / "logs" diff --git a/acceptance/common/docker.py b/acceptance/common/docker.py index b77a7f8b4b..e4a1b7014b 100644 --- a/acceptance/common/docker.py +++ b/acceptance/common/docker.py @@ -51,7 +51,8 @@ def __call__(self, *args, **kwargs) -> str: # Note: not using plumbum here due to complications with encodings in the captured output try: res = subprocess.run( - ["docker-compose", "-f", self.compose_file, "-p", self.project, *args], + ["docker", "compose", "--compatibility", + "-f", self.compose_file, "-p", self.project, *args], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") except subprocess.CalledProcessError as e: raise _CalledProcessErrorWithOutput(e) from None diff --git a/acceptance/common/raw.bzl b/acceptance/common/raw.bzl index 5d8734d7ec..ba16e2b23b 100644 --- a/acceptance/common/raw.bzl +++ b/acceptance/common/raw.bzl @@ -63,5 +63,6 @@ def raw_test( "PYTHONUNBUFFERED": "1", # Ensure that unicode output can be printed to the log/console "PYTHONIOENCODING": "utf-8", + "HOME": "/tmp", }, ) diff --git a/acceptance/common/topogen.bzl b/acceptance/common/topogen.bzl index 675cee50ad..23b7b61d95 100644 --- a/acceptance/common/topogen.bzl +++ b/acceptance/common/topogen.bzl @@ -103,6 +103,7 @@ def topogen_test( "PYTHONUNBUFFERED": "1", # Ensure that unicode output can be printed to the log/console "PYTHONIOENCODING": "utf-8", + "HOME": "/tmp", }, ) diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 3a9f1af7f9..9a5c01add4 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -107,7 +107,7 @@ def setup_start(self): server_thread.start() super().setup_start() - time.sleep(4) # Give applications time to download configurations + time.sleep(6) # Give applications time to download configurations self._testers = { "2": "tester_1-ff00_0_2", @@ -144,7 +144,7 @@ def _showpaths_bidirectional(self, source: str, destination: str, retcode: int): def _showpaths_run(self, source_as: str, destination_as: str, retcode: int): print(cmd.docker("exec", "-t", self._testers[source_as], "scion", "sp", self._ases[destination_as], - "--timeout", "2s", + "--timeout", "3s", retcode=retcode)) diff --git a/acceptance/sig_short_exp_time/test b/acceptance/sig_short_exp_time/test index 1908cde8dc..55bd704850 100755 --- a/acceptance/sig_short_exp_time/test +++ b/acceptance/sig_short_exp_time/test @@ -55,24 +55,24 @@ run_test() {(set -e docker image load -i acceptance/sig_short_exp_time/sig1.tar docker image load -i acceptance/sig_short_exp_time/sig2.tar - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml up -d dispatcher1 dispatcher2 sig1 sig2 patha pathb + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml up -d dispatcher1 dispatcher2 sig1 sig2 patha pathb # Set up forward route on network stack 1 and 2 through the sig tunnel # device. The route is a property of the network stack, and persists after # the container that added it has exited. # # If the route configuration fails, the test is not stopped. - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route1 --rm tester1 ip route add 242.254.200.2/32 dev sig || true - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route2 --rm tester2 ip route add 242.254.100.2/32 dev sig || true + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route1 --rm tester1 ip route add 242.254.200.2/32 dev sig || true + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route2 --rm tester2 ip route add 242.254.100.2/32 dev sig || true echo "Start background ping, ping every 0.2 seconds" - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name tester1 -d tester1 ping -i 0.2 242.254.200.2 + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name tester1 -d tester1 ping -i 0.2 242.254.200.2 echo "Waiting 10 seconds for path A to expire..." sleep 10 echo "Path A expired, simulating it by shutting down path A proxy" # Traffic should have switched beforehand to path b, and no pings should be lost - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml stop patha + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml stop patha sleep 1 docker kill -s SIGINT tester1 @@ -104,9 +104,9 @@ OUTPUT_DIR=$TEST_UNDECLARED_OUTPUTS_DIR mkdir -p $OUTPUT_DIR/logs for CNTR in sig1 sig2 dispatcher1 dispatcher2; do - docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml logs "$CNTR" > "$OUTPUT_DIR/logs/$CNTR.log" + docker compose -f acceptance/sig_short_exp_time/docker-compose.yml logs "$CNTR" > "$OUTPUT_DIR/logs/$CNTR.log" done -docker-compose -f acceptance/sig_short_exp_time/docker-compose.yml down -v +docker compose -f acceptance/sig_short_exp_time/docker-compose.yml down -v exit $RC diff --git a/acceptance/topo_cs_reload/reload_test.go b/acceptance/topo_cs_reload/reload_test.go index 790f4a0120..53186c6428 100644 --- a/acceptance/topo_cs_reload/reload_test.go +++ b/acceptance/topo_cs_reload/reload_test.go @@ -108,16 +108,19 @@ func setupTest(t *testing.T) testState { s.mustExec(t, "docker", "image", "load", "-i", "dispatcher.tar") s.mustExec(t, "docker", "image", "load", "-i", "control.tar") // now start the docker containers - s.mustExec(t, "docker-compose", "-f", "docker-compose.yml", "up", "-d") + s.mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "up", "-d") // wait a bit to make sure the containers are ready. time.Sleep(time.Second / 2) t.Log("Test setup done") - s.mustExec(t, "docker-compose", "-f", "docker-compose.yml", "ps") + s.mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "ps") return s } func (s testState) teardownTest(t *testing.T) { - defer s.mustExec(t, "docker-compose", "-f", "docker-compose.yml", "down", "-v") + defer s.mustExec(t, "docker", "compose", "--compatibility", + "-f", "docker-compose.yml", "down", "-v") outdir, exists := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR") require.True(t, exists, "TEST_UNDECLARED_OUTPUTS_DIR must be defined") @@ -127,8 +130,8 @@ func (s testState) teardownTest(t *testing.T) { "topo_cs_reload_dispatcher": "disp.log", "topo_cs_reload_control_srv": "control.log", } { - cmd := exec.Command("docker-compose", "-f", "docker-compose.yml", "logs", "--no-color", - service) + cmd := exec.Command("docker", "compose", "--compatibility", + "-f", "docker-compose.yml", "logs", "--no-color", service) logFileName := fmt.Sprintf("%s/logs/%s", outdir, file) logFile, err := os.Create(logFileName) if err != nil { @@ -146,10 +149,10 @@ func (s testState) teardownTest(t *testing.T) { func (s testState) loadTopo(t *testing.T, name string) { t.Helper() - s.mustExec(t, "docker-compose", "-f", "docker-compose.yml", "exec", "-T", - "topo_cs_reload_control_srv", "mv", name, "/topology.json") - s.mustExec(t, "docker-compose", "-f", "docker-compose.yml", "kill", "-s", "SIGHUP", - "topo_cs_reload_control_srv") + s.mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "exec", "-T", "topo_cs_reload_control_srv", "mv", name, "/topology.json") + s.mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "kill", "-s", "SIGHUP", "topo_cs_reload_control_srv") } func (s testState) mustExec(t *testing.T, name string, arg ...string) { diff --git a/acceptance/topo_daemon_reload/reload_test.go b/acceptance/topo_daemon_reload/reload_test.go index 243cb133aa..d63831b5e9 100644 --- a/acceptance/topo_daemon_reload/reload_test.go +++ b/acceptance/topo_daemon_reload/reload_test.go @@ -71,16 +71,18 @@ func setupTest(t *testing.T) { mustExec(t, "docker", "image", "load", "-i", "dispatcher.tar") mustExec(t, "docker", "image", "load", "-i", "daemon.tar") // now start the docker containers - mustExec(t, "docker-compose", "-f", "docker-compose.yml", "up", - "-d", "topo_daemon_reload_dispatcher", "topo_daemon_reload_daemon") + mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "up", "-d", "topo_daemon_reload_dispatcher", "topo_daemon_reload_daemon") // wait a bit to make sure the containers are ready. time.Sleep(time.Second / 2) t.Log("Test setup done") - mustExec(t, "docker-compose", "-f", "docker-compose.yml", "ps") + mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "ps") } func teardownTest(t *testing.T) { - defer mustExec(t, "docker-compose", "-f", "docker-compose.yml", "down", "-v") + defer mustExec(t, "docker", "compose", "--compatibility", + "-f", "docker-compose.yml", "down", "-v") outdir, exists := os.LookupEnv("TEST_UNDECLARED_OUTPUTS_DIR") require.True(t, exists, "TEST_UNDECLARED_OUTPUTS_DIR must be defined") @@ -90,7 +92,8 @@ func teardownTest(t *testing.T) { "topo_daemon_reload_dispatcher": "disp.log", "topo_daemon_reload_daemon": "daemon.log", } { - cmd := exec.Command("docker-compose", "-f", "docker-compose.yml", "logs", "--no-color", + cmd := exec.Command("docker", "compose", "--compatibility", + "-f", "docker-compose.yml", "logs", "--no-color", service) logFileName := fmt.Sprintf("%s/logs/%s", outdir, file) logFile, err := os.Create(logFileName) @@ -108,10 +111,10 @@ func teardownTest(t *testing.T) { func loadTopo(t *testing.T, name string) { t.Helper() - mustExec(t, "docker-compose", "-f", "docker-compose.yml", "exec", "-T", - "topo_daemon_reload_daemon", "mv", name, "/topology.json") - mustExec(t, "docker-compose", "-f", "docker-compose.yml", "kill", "-s", "SIGHUP", - "topo_daemon_reload_daemon") + mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "exec", "-T", "topo_daemon_reload_daemon", "mv", name, "/topology.json") + mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", + "kill", "-s", "SIGHUP", "topo_daemon_reload_daemon") } func mustExec(t *testing.T, name string, arg ...string) { diff --git a/doc/dev/run.rst b/doc/dev/run.rst index d473cff9ef..30fa218a74 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -14,9 +14,9 @@ Running SCION in this developement setup, is also called running a **local topol The scripts support two different process orchestrators as "backends": - `supervisor `_. This is the default and a bit more light-weight. Packets are sent over the loopback interface. -- `docker-compose `_. Runs individual processes in separate containers connected with docker network bridges. Only this mode supports running a "SCION-IP gateway". +- `docker compose `_. Runs individual processes in separate containers connected with docker network bridges. Only this mode supports running a "SCION-IP gateway". -Before attempting to use the docker-compose mode, be sure to build the necessary docker images with: +Before attempting to use the docker compose mode, be sure to build the necessary docker images with: .. code-block:: bash make docker-images @@ -35,7 +35,7 @@ Quickstart * Build, using ``make`` #. Generate the control-plane PKI keys and certificates, configuration files, and process - orchestrator (supervisor or docker-compose) configuration. + orchestrator (supervisor or docker compose) configuration. .. code-block:: bash @@ -119,14 +119,14 @@ For example:: docker ------ -The main docker-compose file is ``gen/scion-dc.yml``. +The main docker compose file is ``gen/scion-dc.yml``. Each SCION service or router runs in a separate container, and the network access of the individual containers is configured to mimick real-world connectivity. There are "tester" containers configured in each AS to mimick end hosts in a SCION AS. These tester containers can be used to run commands accessing the SCION network. -As a shorthand for the somewhat unwieldy ``docker-compose`` invocation, the :file-ref:`tools/dc` +As a shorthand for the somewhat unwieldy ``docker compose`` invocation, the :file-ref:`tools/dc` script can be used. For example:: # show paths from 1-ff00:0:112 to 1-ff00:0:110 @@ -148,7 +148,7 @@ scion.sh .. Note:: The SCION tools and services need to be built **before** running these commands, using - ``make`` or ``make docker-images`` (when using the docker-compose configuration). + ``make`` or ``make docker-images`` (when using the docker compose configuration). The basic usage is ``./scion.sh ``. The main subcommands are: @@ -157,7 +157,7 @@ The basic usage is ``./scion.sh ``. The main subcommands a .. option:: topology Generate the control-plane PKI keys and certificates, configuration files, - and process orchestrator (supervisor or docker-compose) configuration + and process orchestrator (supervisor or docker compose) configuration for a given network topopology defined in a :file-ref:`*.topo configuration file `. @@ -169,7 +169,7 @@ The basic usage is ``./scion.sh ``. The main subcommands a .. option:: -d, --docker - Create a docker-compose configuration (instead of default supervisord). + Create a docker compose configuration (instead of default supervisord). .. option:: --sig diff --git a/doc/dev/setup.rst b/doc/dev/setup.rst index 53040cec4b..9bfdce79f9 100644 --- a/doc/dev/setup.rst +++ b/doc/dev/setup.rst @@ -23,9 +23,10 @@ Prerequisites ``sudo usermod -a -G docker ${LOGNAME}``, where ``${LOGNAME}`` is replaced with your user name. Log out and log back in so that your membership of the ``docker`` group is seen by the shell session. - Optionally install ``docker-compose``. This is needed if you want to run the - ``docker-compose`` based test topology setup instead of the default setup based on ``supervisord``. - Please follow the instructions for `docker-compose `_. + Optionally install the ``Docker Compose Plugin``. This is needed if you want to run the + ``docker compose`` based test topology setup instead of the default setup based on ``supervisord``. + Please follow the instructions for + `Install Compose Plugin `_. Bazel ----- diff --git a/scion.sh b/scion.sh index 8af426e24b..4bca512626 100755 --- a/scion.sh +++ b/scion.sh @@ -6,7 +6,7 @@ cmd_bazel_remote() { mkdir -p "$HOME/.cache/bazel/remote" uid=$(id -u) gid=$(id -g) - USER_ID="$uid" GROUP_ID="$gid" docker-compose -f bazel-remote.yml -p bazel_remote up -d + USER_ID="$uid" GROUP_ID="$gid" docker compose --compatibility -f bazel-remote.yml -p bazel_remote up -d } cmd_topo_clean() { @@ -36,7 +36,7 @@ cmd_topodot() { start_scion() { echo "Running the network..." if is_docker_be; then - docker-compose -f gen/scion-dc.yml -p scion up -d + docker compose --compatibility -f gen/scion-dc.yml -p scion up -d return 0 else run_setup @@ -75,7 +75,7 @@ cmd_stop_monitoring() { } cmd_mstart() { - # Run with docker-compose or supervisor + # Run with docker compose or supervisor if is_docker_be; then services="$(glob_docker "$@")" [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } @@ -237,7 +237,7 @@ cmd_help() { Two options for process control systems are supported to run the SCION services. - supervisord (default) - - docker-compose + - docker compose This can be selected when initially creating the configuration with the topology subcommand. diff --git a/tools/await-connectivity b/tools/await-connectivity index a4e2ccc56d..a81bc0440e 100755 --- a/tools/await-connectivity +++ b/tools/await-connectivity @@ -1,7 +1,7 @@ #!/bin/bash # This script waits for the beaconing process in a local topology (running -# either with supervisor or docker-compose) to establish full connectivity. +# either with supervisor or docker compose) to establish full connectivity. # Uses the control service's segments/ API to determine whether segments have # been registered. # diff --git a/tools/dc b/tools/dc index fd9ad5a58c..cb4228b1d8 100755 --- a/tools/dc +++ b/tools/dc @@ -38,7 +38,7 @@ cmd_help() { - [GROUP]: - scion: For all scion services. - monitoring: For the prometheus service. - - [COMMAND]: A docker-compose command like 'up -d' or 'down' + - [COMMAND]: A docker compose command like 'up -d' or 'down' - [IA]: An IA number like 1-ff00:0:110 - [LOG_DIR]: A folder. _EOF @@ -79,7 +79,7 @@ dc() { local project="$1" local dc_file="gen/$1-dc.yml" shift - COMPOSE_FILE="$dc_file" docker-compose -p "$project" --ansi never "$@" + COMPOSE_FILE="$dc_file" docker compose --compatibility -p "$project" --ansi never "$@" } cmd_collect_logs() { diff --git a/tools/integration/cmd.go b/tools/integration/cmd.go index 5b666d781f..e93b920991 100644 --- a/tools/integration/cmd.go +++ b/tools/integration/cmd.go @@ -75,7 +75,7 @@ func Run(ctx context.Context, cfg RunConfig) error { if cfg.Tester != "" { args := append([]string{}, dockerArgs...) args = append(args, cfg.Tester, "sh", "-c", joinCmds(cfg.Commands)) - cmd = exec.CommandContext(ctx, "docker-compose", args...) + cmd = exec.CommandContext(ctx, "docker", args...) log.Debug("Running docker command", "cmd", cmd) } else { cmd = exec.CommandContext(ctx, "sh", "-c", joinCmds(cfg.Commands)) diff --git a/tools/integration/docker.go b/tools/integration/docker.go index e154ab264b..055c391830 100644 --- a/tools/integration/docker.go +++ b/tools/integration/docker.go @@ -27,7 +27,7 @@ import ( ) const ( - dockerCmd = "docker-compose" + dockerCmd = "docker" ) var ( @@ -38,7 +38,8 @@ var ( var dockerArgs []string func initDockerArgs() { - dockerArgs = []string{"-f", GenFile("scion-dc.yml"), "-p", "scion", "exec", "-T", "-e", + dockerArgs = []string{"compose", "--compatibility", + "-f", GenFile("scion-dc.yml"), "-p", "scion", "exec", "-T", "-e", fmt.Sprintf("%s=1", GoIntegrationEnv)} } diff --git a/tools/topogen.py b/tools/topogen.py index 209dc75e08..eb57e9e8ab 100755 --- a/tools/topogen.py +++ b/tools/topogen.py @@ -35,7 +35,7 @@ def add_arguments(parser): parser.add_argument('-c', '--topo-config', default=DEFAULT_TOPOLOGY_FILE, help='Path policy file') parser.add_argument('-d', '--docker', action='store_true', - help='Create a docker-compose configuration') + help='Create a docker compose configuration') parser.add_argument('-n', '--network', help='Network to create subnets in (E.g. "127.0.0.0/8"') parser.add_argument('-o', '--output-dir', default=GEN_PATH, diff --git a/tools/topology/monitoring.py b/tools/topology/monitoring.py index 631499a914..476519f367 100644 --- a/tools/topology/monitoring.py +++ b/tools/topology/monitoring.py @@ -54,6 +54,7 @@ def __init__(self, args, topo_dicts, networks: Mapping[IPNetwork, NetworkDescrip super().__init__(args, topo_dicts) self.networks = networks + class MonitoringGenerator(object): PROM_DIR = "prometheus" TARGET_FILES = { From b73d9db3623a1dced33adb5c04c8e2690ece73dd Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Wed, 27 Sep 2023 11:29:24 +0200 Subject: [PATCH 05/24] build: Please the code format checker. --- acceptance/topo_daemon_reload/reload_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/topo_daemon_reload/reload_test.go b/acceptance/topo_daemon_reload/reload_test.go index d63831b5e9..ee9b9f44bf 100644 --- a/acceptance/topo_daemon_reload/reload_test.go +++ b/acceptance/topo_daemon_reload/reload_test.go @@ -114,7 +114,7 @@ func loadTopo(t *testing.T, name string) { mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", "exec", "-T", "topo_daemon_reload_daemon", "mv", name, "/topology.json") mustExec(t, "docker", "compose", "--compatibility", "-f", "docker-compose.yml", - "kill", "-s", "SIGHUP", "topo_daemon_reload_daemon") + "kill", "-s", "SIGHUP", "topo_daemon_reload_daemon") } func mustExec(t *testing.T, name string, arg ...string) { From d159aefa50c61bdaee55e871cdf155b50dc8851d Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Wed, 27 Sep 2023 15:47:51 +0200 Subject: [PATCH 06/24] build: made scion.sh mstatus resitant to docker compose cosmetic changes. The script was parsing the output of docker-compose. This was already incompatible with the latest docker-compose-v1 (possibly not affecting the buildkite environment) not mentionning docker compose v2. I preserved the original mstat behavior, although not all of the intent is very obvious: why output something only about what is *not* running? --- scion.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scion.sh b/scion.sh index 4bca512626..cf9e83c8f9 100755 --- a/scion.sh +++ b/scion.sh @@ -140,10 +140,16 @@ cmd_mstatus() { if is_docker_be; then services="$(glob_docker "$@")" [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } - out=$(./tools/dc scion ps $services | tail -n +3) - rscount=$(echo "$out" | grep '\' | wc -l) # Number of running services + rscount=$(./tools/dc scion ps --status=running --format "{{.Name}}" $services | wc -l) tscount=$(echo "$services" | wc -w) # Number of all globed services - echo "$out" | grep -v '\' + ./tools/dc scion ps -a \ + --status=paused \ + --status=restarting \ + --status=removing \ + --status=dead \ + --status=created \ + --status=exited \ + --format "{{.Name}} {{.State}}" $services [ $rscount -eq $tscount ] else if [ $# -ne 0 ]; then From bd3bb9baf645da7d486c66b084ff08ecb419b9a3 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Wed, 27 Sep 2023 16:51:19 +0200 Subject: [PATCH 07/24] test: fix the revocation test. That test was broken when using docker; possibly when using supervisor too: * It was failing to pass the "-d" arg when invoking end2end_integration while using docker. $DOCKER_ARGS got lost through prior refactorings. * It stopped the border routers but never started them again. * The $SLEEP variable, also lost to refactorings was still being used (cosmetic only). --- tools/integration/revocation_test.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/integration/revocation_test.sh b/tools/integration/revocation_test.sh index 9215c9ed6a..5ef7b5318d 100755 --- a/tools/integration/revocation_test.sh +++ b/tools/integration/revocation_test.sh @@ -27,14 +27,25 @@ done # Bring down routers. echo "Revocation test" -echo "Stopping routers and waiting for ${SLEEP}s." +echo "Stopping routers and waiting for 4s." ./scion.sh mstop $REV_BRS if [ $? -ne 0 ]; then echo "Failed stopping routers." exit 1 fi sleep 4 + +# The below seemed do be missing. +# Was there a time when end2end_integration would start the +# scion service containers? +./scion.sh mstart $REV_BRS +sleep 2 + +if [ -f gen/scion-dc.yml ]; then + DOCKER_ARGS="-d" +fi + # Do another round of e2e test with retries echo "Testing connectivity between all the hosts (with retries)." -bin/end2end_integration -log.console info -attempts 15 -subset 1-ff00:0:131#2-ff00:0:222 $DOCKER_ARGS +bin/end2end_integration $DOCKER_ARGS -log.console info -attempts 15 -subset 1-ff00:0:131#2-ff00:0:222 exit $? From c831b8c268452dbef127a109e747a502dc8967f2 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 12:05:31 +0200 Subject: [PATCH 08/24] build: Implemented some reviewers suggestions. * scion.sh commands all kebab-case. untabified. * removed wrong fix to revocation test. --- doc/dev/run.rst | 4 +- doc/dev/setup.rst | 2 +- scion.sh | 132 +++++++++++++-------------- tools/dc | 2 +- tools/integration/revocation_test.sh | 6 -- tools/topology/monitoring.py | 1 - 6 files changed, 70 insertions(+), 77 deletions(-) diff --git a/doc/dev/run.rst b/doc/dev/run.rst index 30fa218a74..43bf6c42d4 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -188,11 +188,11 @@ The basic usage is ``./scion.sh ``. The main subcommands a Terminate this run of the local SCION topology. -.. option:: run_monitoring +.. option:: start-monitoring Start the monitoring services (jaeger and prometheus). -.. option:: stop_monitoring +.. option:: stop-monitoring Stop the monitoring services. diff --git a/doc/dev/setup.rst b/doc/dev/setup.rst index 9bfdce79f9..dadeb2d690 100644 --- a/doc/dev/setup.rst +++ b/doc/dev/setup.rst @@ -64,7 +64,7 @@ Bazel build artifacts from bazel. Bazel-remote can manage the disk space and does not infinitely grow like the Bazel built-in disk-cache. To start bazel-remote run:: - ./scion.sh bazel_remote + ./scion.sh bazel-remote #. Build SCION services and tools. diff --git a/scion.sh b/scion.sh index cf9e83c8f9..41c775d0ce 100755 --- a/scion.sh +++ b/scion.sh @@ -2,17 +2,17 @@ # BEGIN subcommand functions -cmd_bazel_remote() { +cmd_bazel-remote() { mkdir -p "$HOME/.cache/bazel/remote" uid=$(id -u) gid=$(id -g) USER_ID="$uid" GROUP_ID="$gid" docker compose --compatibility -f bazel-remote.yml -p bazel_remote up -d } -cmd_topo_clean() { +cmd_topo-clean() { set -e stop_scion || true - cmd_stop_monitoring || true + cmd_stop-monitoring || true rm -rf traces/* mkdir -p logs traces gen gen-cache gen-certs find gen gen-cache gen-certs -mindepth 1 -maxdepth 1 -exec rm -r {} + @@ -20,7 +20,7 @@ cmd_topo_clean() { cmd_topology() { set -e - cmd_topo_clean + cmd_topo-clean echo "Create topology, configuration, and execution files." tools/topogen.py "$@" @@ -58,7 +58,7 @@ cmd_sciond-addr() { "[\(.value)]:30255"' gen/sciond_addresses.json } -cmd_start_monitoring() { +cmd_start-monitoring() { if [ ! -f "gen/monitoring-dc.yml" ]; then return fi @@ -66,7 +66,7 @@ cmd_start_monitoring() { ./tools/quiet ./tools/dc monitoring up -d } -cmd_stop_monitoring() { +cmd_stop-monitoring() { if [ ! -f "gen/monitoring-dc.yml" ]; then return fi @@ -119,7 +119,7 @@ stop_scion() { cmd_stop() { stop_scion echo "Note that jaeger is not included anymore." - echo "To stop jaeger and prometheus, use stop_monitoring." + echo "To stop jaeger and prometheus, use stop-monitoring." } cmd_mstop() { @@ -142,14 +142,14 @@ cmd_mstatus() { [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } rscount=$(./tools/dc scion ps --status=running --format "{{.Name}}" $services | wc -l) tscount=$(echo "$services" | wc -w) # Number of all globed services - ./tools/dc scion ps -a \ - --status=paused \ - --status=restarting \ - --status=removing \ - --status=dead \ - --status=created \ - --status=exited \ - --format "{{.Name}} {{.State}}" $services + ./tools/dc scion ps -a \ + --status=paused \ + --status=restarting \ + --status=removing \ + --status=dead \ + --status=created \ + --status=exited \ + --format "{{.Name}} {{.State}}" $services [ $rscount -eq $tscount ] else if [ $# -ne 0 ]; then @@ -210,12 +210,12 @@ traces_name() { echo "$name" } -cmd_traces() { +cmd_start-traces() { set -e local trace_dir=${1:-"$(readlink -e .)/traces"} local port=16687 local name=$(traces_name) - cmd_stop_traces + cmd_stop-traces docker run -d --name "$name" \ -u "$(id -u):$(id -g)" \ -e SPAN_STORAGE_TYPE=badger \ @@ -229,60 +229,60 @@ cmd_traces() { x-www-browser "http://localhost:$port" } -cmd_stop_traces() { +cmd_stop-traces() { local name=$(traces_name) docker stop "$name" || true docker rm "$name" || true } cmd_help() { - cat <<-_EOF - SCION - - $PROGRAM runs a SCION network locally for development and testing purposes. - Two options for process control systems are supported to run the SCION - services. - - supervisord (default) - - docker compose - This can be selected when initially creating the configuration with the - topology subcommand. - - Usage: - $PROGRAM topology [-d] [-c TOPOFILE] - Create topology, configuration, and execution files. - All arguments or options are passed to tools/topogen.py - $PROGRAM run - Run network. - $PROGRAM mstart PROCESS - Start multiple processes. - $PROGRAM stop - Terminate this run of the SCION infrastructure. - $PROGRAM mstop PROCESS - Stop multiple processes. - $PROGRAM start_monitoring - Run the monitoring infrastructure. - $PROGRAM stop_monitoring - Terminate this run of the monitoring infrastructure. - $PROGRAM status - Show all non-running tasks. - $PROGRAM mstatus PROCESS - Show status of provided processes. - $PROGRAM sciond-addr ISD-AS - Return the address for the scion daemon for the matching ISD-AS by - consulting gen/sciond_addresses.json. - The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last - three digits), as long as there is a unique match. - $PROGRAM topodot [-s|--show] TOPOFILE - Draw a graphviz graph of a *.topo topology configuration file. - $PROGRAM help - Show this text. - $PROGRAM traces [folder] - Serve jaeger traces from the specified folder (default: traces/). - $PROGRAM stop_traces - Stop the jaeger container started during the traces command. - $PROGRAM bazel_remote - Starts the bazel remote. - _EOF + cat <<-_EOF + SCION + + $PROGRAM runs a SCION network locally for development and testing purposes. + Two options for process control systems are supported to run the SCION + services. + - supervisord (default) + - docker compose + This can be selected when initially creating the configuration with the + topology subcommand. + + Usage: + $PROGRAM topology [-d] [-c TOPOFILE] + Create topology, configuration, and execution files. + All arguments or options are passed to tools/topogen.py + $PROGRAM run + Run network. + $PROGRAM mstart PROCESS + Start multiple processes. + $PROGRAM stop + Terminate this run of the SCION infrastructure. + $PROGRAM mstop PROCESS + Stop multiple processes. + $PROGRAM start-monitoring + Run the monitoring infrastructure. + $PROGRAM stop-monitoring + Terminate this run of the monitoring infrastructure. + $PROGRAM status + Show all non-running tasks. + $PROGRAM mstatus PROCESS + Show status of provided processes. + $PROGRAM sciond-addr ISD-AS + Return the address for the scion daemon for the matching ISD-AS by + consulting gen/sciond_addresses.json. + The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last + three digits), as long as there is a unique match. + $PROGRAM topodot [-s|--show] TOPOFILE + Draw a graphviz graph of a *.topo topology configuration file. + $PROGRAM help + Show this text. + $PROGRAM start-traces [folder] + Serve jaeger traces from the specified folder (default: traces/). + $PROGRAM stop-traces + Stop the jaeger container started during the start-traces command. + $PROGRAM bazel-remote + Starts the bazel remote. + _EOF } # END subcommand functions @@ -291,7 +291,7 @@ COMMAND="$1" shift case "$COMMAND" in - help|start|start_monitoring|mstart|mstatus|mstop|stop|stop_monitoring|status|topology|sciond-addr|traces|stop_traces|topo_clean|topodot|bazel_remote) + help|start|start-monitoring|mstart|mstatus|mstop|stop|stop-monitoring|status|topology|sciond-addr|start-traces|stop-traces|topo-clean|topodot|bazel-remote) "cmd_$COMMAND" "$@" ;; run) cmd_start "$@" ;; *) cmd_help; exit 1 ;; diff --git a/tools/dc b/tools/dc index cb4228b1d8..938405e65a 100755 --- a/tools/dc +++ b/tools/dc @@ -37,7 +37,7 @@ cmd_help() { - [SERVICE]: As scion service glob, e.g. cs1*. - [GROUP]: - scion: For all scion services. - - monitoring: For the prometheus service. + - monitoring: For the monitoring service (i.e. prometheus seand yaeger. - [COMMAND]: A docker compose command like 'up -d' or 'down' - [IA]: An IA number like 1-ff00:0:110 - [LOG_DIR]: A folder. diff --git a/tools/integration/revocation_test.sh b/tools/integration/revocation_test.sh index 5ef7b5318d..ac78309274 100755 --- a/tools/integration/revocation_test.sh +++ b/tools/integration/revocation_test.sh @@ -35,12 +35,6 @@ if [ $? -ne 0 ]; then fi sleep 4 -# The below seemed do be missing. -# Was there a time when end2end_integration would start the -# scion service containers? -./scion.sh mstart $REV_BRS -sleep 2 - if [ -f gen/scion-dc.yml ]; then DOCKER_ARGS="-d" fi diff --git a/tools/topology/monitoring.py b/tools/topology/monitoring.py index 476519f367..60ac56007e 100644 --- a/tools/topology/monitoring.py +++ b/tools/topology/monitoring.py @@ -159,7 +159,6 @@ def _write_disp_file(self): def _write_dc_file(self): # Merged yeager and prometheus files. - # FIXME: should be a single container name = 'monitoring' monitoring_dc = { 'version': DOCKER_COMPOSE_CONFIG_VERSION, From 2598e457b2c18e19ba1c5daf63bad75fb24242de Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 13:37:56 +0200 Subject: [PATCH 09/24] build: indulge bash's quirk. Tabify the end marker for a tabified here-doc. --- scion.sh | 94 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/scion.sh b/scion.sh index 41c775d0ce..3baa1b30c7 100755 --- a/scion.sh +++ b/scion.sh @@ -236,53 +236,53 @@ cmd_stop-traces() { } cmd_help() { - cat <<-_EOF - SCION - - $PROGRAM runs a SCION network locally for development and testing purposes. - Two options for process control systems are supported to run the SCION - services. - - supervisord (default) - - docker compose - This can be selected when initially creating the configuration with the - topology subcommand. - - Usage: - $PROGRAM topology [-d] [-c TOPOFILE] - Create topology, configuration, and execution files. - All arguments or options are passed to tools/topogen.py - $PROGRAM run - Run network. - $PROGRAM mstart PROCESS - Start multiple processes. - $PROGRAM stop - Terminate this run of the SCION infrastructure. - $PROGRAM mstop PROCESS - Stop multiple processes. - $PROGRAM start-monitoring - Run the monitoring infrastructure. - $PROGRAM stop-monitoring - Terminate this run of the monitoring infrastructure. - $PROGRAM status - Show all non-running tasks. - $PROGRAM mstatus PROCESS - Show status of provided processes. - $PROGRAM sciond-addr ISD-AS - Return the address for the scion daemon for the matching ISD-AS by - consulting gen/sciond_addresses.json. - The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last - three digits), as long as there is a unique match. - $PROGRAM topodot [-s|--show] TOPOFILE - Draw a graphviz graph of a *.topo topology configuration file. - $PROGRAM help - Show this text. - $PROGRAM start-traces [folder] - Serve jaeger traces from the specified folder (default: traces/). - $PROGRAM stop-traces - Stop the jaeger container started during the start-traces command. - $PROGRAM bazel-remote - Starts the bazel remote. - _EOF + cat <<-_EOF + SCION + + $PROGRAM runs a SCION network locally for development and testing purposes. + Two options for process control systems are supported to run the SCION + services. + - supervisord (default) + - docker compose + This can be selected when initially creating the configuration with the + topology subcommand. + + Usage: + $PROGRAM topology [-d] [-c TOPOFILE] + Create topology, configuration, and execution files. + All arguments or options are passed to tools/topogen.py + $PROGRAM run + Run network. + $PROGRAM mstart PROCESS + Start multiple processes. + $PROGRAM stop + Terminate this run of the SCION infrastructure. + $PROGRAM mstop PROCESS + Stop multiple processes. + $PROGRAM start-monitoring + Run the monitoring infrastructure. + $PROGRAM stop-monitoring + Terminate this run of the monitoring infrastructure. + $PROGRAM status + Show all non-running tasks. + $PROGRAM mstatus PROCESS + Show status of provided processes. + $PROGRAM sciond-addr ISD-AS + Return the address for the scion daemon for the matching ISD-AS by + consulting gen/sciond_addresses.json. + The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last + three digits), as long as there is a unique match. + $PROGRAM topodot [-s|--show] TOPOFILE + Draw a graphviz graph of a *.topo topology configuration file. + $PROGRAM help + Show this text. + $PROGRAM start-traces [folder] + Serve jaeger traces from the specified folder (default: traces/). + $PROGRAM stop-traces + Stop the jaeger container started during the start-traces command. + $PROGRAM bazel-remote + Starts the bazel remote. + _EOF } # END subcommand functions From 6543b5c85efc13150c35b32504a7aa7915b06c0e Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 13:44:59 +0200 Subject: [PATCH 10/24] build: More tab wrangling. Tabified here docs are rendered with *all* leading tabs removed. Adjust accordingly to keep the output legible. --- scion.sh | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/scion.sh b/scion.sh index 3baa1b30c7..8aaf55ba8d 100755 --- a/scion.sh +++ b/scion.sh @@ -249,37 +249,37 @@ cmd_help() { Usage: $PROGRAM topology [-d] [-c TOPOFILE] - Create topology, configuration, and execution files. - All arguments or options are passed to tools/topogen.py + Create topology, configuration, and execution files. + All arguments or options are passed to tools/topogen.py $PROGRAM run - Run network. + Run network. $PROGRAM mstart PROCESS - Start multiple processes. + Start multiple processes. $PROGRAM stop - Terminate this run of the SCION infrastructure. + Terminate this run of the SCION infrastructure. $PROGRAM mstop PROCESS - Stop multiple processes. + Stop multiple processes. $PROGRAM start-monitoring - Run the monitoring infrastructure. + Run the monitoring infrastructure. $PROGRAM stop-monitoring - Terminate this run of the monitoring infrastructure. + Terminate this run of the monitoring infrastructure. $PROGRAM status - Show all non-running tasks. + Show all non-running tasks. $PROGRAM mstatus PROCESS - Show status of provided processes. + Show status of provided processes. $PROGRAM sciond-addr ISD-AS - Return the address for the scion daemon for the matching ISD-AS by - consulting gen/sciond_addresses.json. - The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last - three digits), as long as there is a unique match. + Return the address for the scion daemon for the matching ISD-AS by + consulting gen/sciond_addresses.json. + The ISD-AS parameter can be a substring of the full ISD-AS (e.g. last + three digits), as long as there is a unique match. $PROGRAM topodot [-s|--show] TOPOFILE - Draw a graphviz graph of a *.topo topology configuration file. + Draw a graphviz graph of a *.topo topology configuration file. $PROGRAM help - Show this text. + Show this text. $PROGRAM start-traces [folder] - Serve jaeger traces from the specified folder (default: traces/). + Serve jaeger traces from the specified folder (default: traces/). $PROGRAM stop-traces - Stop the jaeger container started during the start-traces command. + Stop the jaeger container started during the start-traces command. $PROGRAM bazel-remote Starts the bazel remote. _EOF From 6dfff54e0d5f1553da0b24233a823342b5602741 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 18:13:38 +0200 Subject: [PATCH 11/24] build: undo deflaking of hidden_paths and cert_renewal tests. Matthias' is about to commit a better deflaking. --- acceptance/cert_renewal/test.py | 6 ------ acceptance/hidden_paths/test.py | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/acceptance/cert_renewal/test.py b/acceptance/cert_renewal/test.py index 6fb8030970..fa0fc569c4 100755 --- a/acceptance/cert_renewal/test.py +++ b/acceptance/cert_renewal/test.py @@ -58,9 +58,6 @@ def _run(self): self.artifacts).all cs_configs = self._cs_configs() - # Services in their containers need time to start. - time.sleep(5) - logger.info("==> Start renewal process") for isd_as in isd_ases: logging.info("===> Start renewal: %s" % isd_as) @@ -88,9 +85,6 @@ def _run(self): logger.info("==> Restart containers") self.setup_start() - # Services in their containers need time to start. - time.sleep(5) - logger.info("==> Check connectivity") end2end.run_fg() diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 9a5c01add4..3a9f1af7f9 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -107,7 +107,7 @@ def setup_start(self): server_thread.start() super().setup_start() - time.sleep(6) # Give applications time to download configurations + time.sleep(4) # Give applications time to download configurations self._testers = { "2": "tester_1-ff00_0_2", @@ -144,7 +144,7 @@ def _showpaths_bidirectional(self, source: str, destination: str, retcode: int): def _showpaths_run(self, source_as: str, destination_as: str, retcode: int): print(cmd.docker("exec", "-t", self._testers[source_as], "scion", "sp", self._ases[destination_as], - "--timeout", "3s", + "--timeout", "2s", retcode=retcode)) From 31b03ec7dc1b8e9f43a597c5f3aeea60489699c1 Mon Sep 17 00:00:00 2001 From: rohrerj <26304001+rohrerj@users.noreply.github.com> Date: Thu, 28 Sep 2023 15:00:07 +0200 Subject: [PATCH 12/24] router: slow-path in async packet processing (#4383) Add a low-priority slow path for special case packet handling, in particular SCMP errors and traceroutes, for the the asynchronous packet processing pipeline in the router added in #4351. This is implementation part three of three described in the design document (#4339, doc/dev/design/BorderRouter.rst). Closes #4334 --- doc/manuals/router.rst | 4 +- router/dataplane.go | 586 ++++++++++++++++++++---------- router/dataplane_internal_test.go | 144 +++++++- router/dataplane_test.go | 90 +---- router/export_test.go | 2 +- 5 files changed, 545 insertions(+), 281 deletions(-) diff --git a/doc/manuals/router.rst b/doc/manuals/router.rst index cd5f97b578..6427ab6165 100644 --- a/doc/manuals/router.rst +++ b/doc/manuals/router.rst @@ -160,8 +160,8 @@ considers the following options. .. option:: router.num_slow_processors = (Default: 1) - Number of goroutines started for the slow-path processing. This feature will be implemented soon. Currently - this setting has no effect. + Number of goroutines started for the slow-path processing which includes all SCMP traffic and traceroutes. + A minimum of 1 slow-path processor is required. .. option:: router.batch_size = (Default: 256) diff --git a/router/dataplane.go b/router/dataplane.go index 649d16b42b..57b533da4a 100644 --- a/router/dataplane.go +++ b/router/dataplane.go @@ -118,25 +118,31 @@ type DataPlane struct { } var ( - alreadySet = serrors.New("already set") - invalidSrcIA = serrors.New("invalid source ISD-AS") - invalidDstIA = serrors.New("invalid destination ISD-AS") - invalidSrcAddrForTransit = serrors.New("invalid source address for transit pkt") - cannotRoute = serrors.New("cannot route, dropping pkt") - emptyValue = serrors.New("empty value") - malformedPath = serrors.New("malformed path content") - modifyExisting = serrors.New("modifying a running dataplane is not allowed") - noSVCBackend = serrors.New("cannot find internal IP for the SVC") - unsupportedPathType = serrors.New("unsupported path type") - unsupportedPathTypeNextHeader = serrors.New("unsupported combination") - noBFDSessionFound = serrors.New("no BFD sessions was found") - noBFDSessionConfigured = serrors.New("no BFD sessions have been configured") - errBFDDisabled = serrors.New("BFD is disabled") - errPeeringEmptySeg0 = serrors.New("zero-length segment[0] in peering path") - errPeeringEmptySeg1 = serrors.New("zero-length segment[1] in peering path") - errPeeringNonemptySeg2 = serrors.New("non-zero-length segment[2] in peering path") - errShortPacket = serrors.New("Packet is too short") - errBFDSessionDown = serrors.New("bfd session down") + alreadySet = errors.New("already set") + invalidSrcIA = errors.New("invalid source ISD-AS") + invalidDstIA = errors.New("invalid destination ISD-AS") + invalidSrcAddrForTransit = errors.New("invalid source address for transit pkt") + cannotRoute = errors.New("cannot route, dropping pkt") + emptyValue = errors.New("empty value") + malformedPath = errors.New("malformed path content") + modifyExisting = errors.New("modifying a running dataplane is not allowed") + noSVCBackend = errors.New("cannot find internal IP for the SVC") + unsupportedPathType = errors.New("unsupported path type") + unsupportedPathTypeNextHeader = errors.New("unsupported combination") + noBFDSessionFound = errors.New("no BFD sessions was found") + noBFDSessionConfigured = errors.New("no BFD sessions have been configured") + errBFDDisabled = errors.New("BFD is disabled") + errPeeringEmptySeg0 = errors.New("zero-length segment[0] in peering path") + errPeeringEmptySeg1 = errors.New("zero-length segment[1] in peering path") + errPeeringNonemptySeg2 = errors.New("non-zero-length segment[2] in peering path") + errShortPacket = errors.New("Packet is too short") + errBFDSessionDown = errors.New("bfd session down") + expiredHop = errors.New("expired hop") + ingressInterfaceInvalid = errors.New("ingress interface invalid") + macVerificationFailed = errors.New("MAC verification failed") + badPacketSize = errors.New("bad packet size") + slowPathRequired = errors.New("slow-path required") + // zeroBuffer will be used to reset the Authenticator option in the // scionPacketProcessor.OptAuth zeroBuffer = make([]byte, 16) @@ -152,15 +158,6 @@ type drkeyProvider interface { ) (drkey.ASHostKey, error) } -type scmpError struct { - TypeCode slayers.SCMPTypeCode - Cause error -} - -func (e scmpError) Error() string { - return serrors.New("scmp", "typecode", e.TypeCode, "cause", e.Cause).Error() -} - // SetIA sets the local IA for the dataplane. func (d *DataPlane) SetIA(ia addr.IA) error { d.mtx.Lock() @@ -511,7 +508,7 @@ func (d *DataPlane) Run(ctx context.Context, cfg *RunConfig) error { cfg.BatchSize) d.initPacketPool(cfg, processorQueueSize) - procQs, fwQs := initQueues(cfg, d.interfaces, processorQueueSize) + procQs, fwQs, slowQs := initQueues(cfg, d.interfaces, processorQueueSize) for ifID, conn := range d.interfaces { go func(ifID uint16, conn BatchConn) { @@ -526,7 +523,13 @@ func (d *DataPlane) Run(ctx context.Context, cfg *RunConfig) error { for i := 0; i < cfg.NumProcessors; i++ { go func(i int) { defer log.HandlePanic() - d.runProcessor(i, procQs[i], fwQs) + d.runProcessor(i, procQs[i], fwQs, slowQs[i%cfg.NumSlowPathProcessors]) + }(i) + } + for i := 0; i < cfg.NumSlowPathProcessors; i++ { + go func(i int) { + defer log.HandlePanic() + d.runSlowPathProcessor(i, slowQs[i], fwQs) }(i) } @@ -548,7 +551,7 @@ func (d *DataPlane) Run(ctx context.Context, cfg *RunConfig) error { // current dataplane settings and allocates all the buffers func (d *DataPlane) initPacketPool(cfg *RunConfig, processorQueueSize int) { poolSize := len(d.interfaces)*cfg.BatchSize + - cfg.NumProcessors*(processorQueueSize+1) + + (cfg.NumProcessors+cfg.NumSlowPathProcessors)*(processorQueueSize+1) + len(d.interfaces)*(2*cfg.BatchSize) log.Debug("Initialize packet pool of size", "poolSize", poolSize) @@ -560,17 +563,22 @@ func (d *DataPlane) initPacketPool(cfg *RunConfig, processorQueueSize int) { // initializes the processing routines and forwarders queues func initQueues(cfg *RunConfig, interfaces map[uint16]BatchConn, - processorQueueSize int) ([]chan packet, map[uint16]chan packet) { + processorQueueSize int) ([]chan packet, map[uint16]chan packet, + []chan slowPacket) { procQs := make([]chan packet, cfg.NumProcessors) for i := 0; i < cfg.NumProcessors; i++ { procQs[i] = make(chan packet, processorQueueSize) } + slowQs := make([]chan slowPacket, cfg.NumSlowPathProcessors) + for i := 0; i < cfg.NumSlowPathProcessors; i++ { + slowQs[i] = make(chan slowPacket, processorQueueSize) + } fwQs := make(map[uint16]chan packet) for ifID := range interfaces { fwQs[ifID] = make(chan packet, cfg.BatchSize) } - return procQs, fwQs + return procQs, fwQs, slowQs } type packet struct { @@ -585,6 +593,11 @@ type packet struct { rawPacket []byte } +type slowPacket struct { + packet + slowPathRequest slowPathRequest +} + func (d *DataPlane) runReceiver(ifID uint16, conn BatchConn, cfg *RunConfig, procQs []chan packet) { @@ -675,11 +688,10 @@ func (d *DataPlane) returnPacketToPool(pkt []byte) { } func (d *DataPlane) runProcessor(id int, q <-chan packet, - fwQs map[uint16]chan packet) { + fwQs map[uint16]chan packet, slowQ chan<- slowPacket) { log.Debug("Initialize processor with", "id", id) processor := newPacketProcessor(d) - var scmpErr scmpError for d.running { p, ok := <-q if !ok { @@ -691,10 +703,14 @@ func (d *DataPlane) runProcessor(id int, q <-chan packet, egress := result.EgressID switch { case err == nil: - case errors.As(err, &scmpErr): - if !scmpErr.TypeCode.InfoMsg() { - log.Debug("SCMP", "err", scmpErr) + case errors.Is(err, slowPathRequired): + select { + case slowQ <- slowPacket{p, result.SlowPathRequest}: + default: + metrics.DroppedPacketsBusySlowPath.Inc() + d.returnPacketToPool(p.rawPacket) } + continue default: log.Debug("Error processing packet", "err", err) metrics.DroppedPacketsInvalid.Inc() @@ -725,6 +741,150 @@ func (d *DataPlane) runProcessor(id int, q <-chan packet, } } +func (d *DataPlane) runSlowPathProcessor(id int, q <-chan slowPacket, + fwQs map[uint16]chan packet) { + + log.Debug("Initialize slow-path processor with", "id", id) + processor := newSlowPathProcessor(d) + for d.running { + p, ok := <-q + if !ok { + continue + } + metrics := d.forwardingMetrics[p.packet.ingress] + res, err := processor.processPacket(p) + if err != nil { + log.Debug("Error processing packet", "err", err) + metrics.DroppedPacketsInvalid.Inc() + d.returnPacketToPool(p.packet.rawPacket) + continue + } + p.packet.dstAddr = res.OutAddr + p.packet.rawPacket = res.OutPkt + + fwCh, ok := fwQs[res.EgressID] + if !ok { + log.Debug("Error determining forwarder. Egress is invalid", "egress", res.EgressID) + d.returnPacketToPool(p.packet.rawPacket) + continue + } + select { + case fwCh <- p.packet: + default: + d.returnPacketToPool(p.packet.rawPacket) + } + } +} + +func newSlowPathProcessor(d *DataPlane) *slowPathPacketProcessor { + p := &slowPathPacketProcessor{ + d: d, + buffer: gopacket.NewSerializeBuffer(), + macInputBuffer: make([]byte, spao.MACBufferSize), + drkeyProvider: &drkeyutil.FakeProvider{ + EpochDuration: drkeyutil.LoadEpochDuration(), + AcceptanceWindow: drkeyutil.LoadAcceptanceWindow(), + }, + optAuth: slayers.PacketAuthOption{EndToEndOption: new(slayers.EndToEndOption)}, + validAuthBuf: make([]byte, 16), + } + p.scionLayer.RecyclePaths() + return p +} + +type slowPathPacketProcessor struct { + d *DataPlane + ingressID uint16 + rawPkt []byte + srcAddr *net.UDPAddr + buffer gopacket.SerializeBuffer + + scionLayer slayers.SCION + hbhLayer slayers.HopByHopExtnSkipper + e2eLayer slayers.EndToEndExtnSkipper + lastLayer gopacket.DecodingLayer + path *scion.Raw + + // macInputBuffer avoid allocating memory during processing. + macInputBuffer []byte + + // optAuth is a reusable Packet Authenticator Option + optAuth slayers.PacketAuthOption + // validAuthBuf is a reusable buffer for the authentication tag + // to be used in the hasValidAuth() method. + validAuthBuf []byte + + // DRKey key derivation for SCMP authentication + drkeyProvider drkeyProvider +} + +func (p *slowPathPacketProcessor) reset() { + if err := p.buffer.Clear(); err != nil { + log.Debug("Error while clearing buffer", "err", err) + } + p.path = nil + p.hbhLayer = slayers.HopByHopExtnSkipper{} + p.e2eLayer = slayers.EndToEndExtnSkipper{} +} + +func (p *slowPathPacketProcessor) processPacket(pkt slowPacket) (processResult, error) { + var err error + p.reset() + p.ingressID = pkt.ingress + p.srcAddr = pkt.srcAddr + p.rawPkt = pkt.rawPacket + + p.lastLayer, err = decodeLayers(pkt.rawPacket, &p.scionLayer, &p.hbhLayer, &p.e2eLayer) + if err != nil { + return processResult{}, err + } + pathType := p.scionLayer.PathType + switch pathType { + case scion.PathType: + var ok bool + p.path, ok = p.scionLayer.Path.(*scion.Raw) + if !ok { + return processResult{}, malformedPath + } + case epic.PathType: + epicPath, ok := p.scionLayer.Path.(*epic.Path) + if !ok { + return processResult{}, malformedPath + } + p.path = epicPath.ScionPath + if p.path == nil { + return processResult{}, malformedPath + } + default: + //unsupported path type + return processResult{}, serrors.New("Path type not supported for slow-path", + "type", pathType) + } + switch pkt.slowPathRequest.typ { + case slowPathSCMP: //SCMP + s := pkt.slowPathRequest + var layer gopacket.SerializableLayer + switch s.scmpType { + case slayers.SCMPTypeParameterProblem: + layer = &slayers.SCMPParameterProblem{Pointer: s.pointer} + case slayers.SCMPTypeDestinationUnreachable: + layer = &slayers.SCMPDestinationUnreachable{} + case slayers.SCMPTypeExternalInterfaceDown: + layer = &slayers.SCMPExternalInterfaceDown{IA: s.ia, + IfID: uint64(s.interfaceId)} + case slayers.SCMPTypeInternalConnectivityDown: + layer = &slayers.SCMPInternalConnectivityDown{IA: s.ia, + Ingress: uint64(s.ingressId), Egress: uint64(s.egressId)} + } + return p.packSCMP(s.scmpType, s.code, layer, s.cause) + + case slowPathRouterAlert: //Traceroute + return p.handleSCMPTraceRouteRequest(pkt.slowPathRequest.interfaceId) + default: + panic("Unsupported slow-path type") + } +} + func (d *DataPlane) runForwarder(ifID uint16, conn BatchConn, cfg *RunConfig, c <-chan packet) { @@ -818,29 +978,18 @@ func (d *DataPlane) initMetrics() { } type processResult struct { - EgressID uint16 - OutAddr *net.UDPAddr - OutPkt []byte + EgressID uint16 + OutAddr *net.UDPAddr + OutPkt []byte + SlowPathRequest slowPathRequest } func newPacketProcessor(d *DataPlane) *scionPacketProcessor { p := &scionPacketProcessor{ - d: d, - buffer: gopacket.NewSerializeBuffer(), - mac: d.macFactory(), - macBuffers: macBuffers{ - scionInput: make([]byte, path.MACBufferSize), - epicInput: make([]byte, libepic.MACBufferSize), - drkeyInput: make([]byte, spao.MACBufferSize), - }, - // TODO(JordiSubira): Replace this with a useful implementation. - - drkeyProvider: &drkeyutil.FakeProvider{ - EpochDuration: drkeyutil.LoadEpochDuration(), - AcceptanceWindow: drkeyutil.LoadAcceptanceWindow(), - }, - optAuth: slayers.PacketAuthOption{EndToEndOption: new(slayers.EndToEndOption)}, - validAuthBuf: make([]byte, 16), + d: d, + buffer: gopacket.NewSerializeBuffer(), + mac: d.macFactory(), + macInputBuffer: make([]byte, max(path.MACBufferSize, libepic.MACBufferSize)), } p.scionLayer.RecyclePaths() return p @@ -1005,7 +1154,7 @@ func (p *scionPacketProcessor) processEPIC() (processResult, error) { HVF = epicPath.LHVF } err = libepic.VerifyHVF(p.cachedMac, epicPath.PktID, - &p.scionLayer, firstInfo.Timestamp, HVF, p.macBuffers.epicInput) + &p.scionLayer, firstInfo.Timestamp, HVF, p.macInputBuffer[:libepic.MACBufferSize]) if err != nil { // TODO(mawyss): Send back SCMP packet return processResult{}, err @@ -1054,29 +1203,37 @@ type scionPacketProcessor struct { // cachedMac contains the full 16 bytes of the MAC. Will be set during processing. // For a hop performing an Xover, it is the MAC corresponding to the down segment. cachedMac []byte - // macBuffers avoid allocating memory during processing. - macBuffers macBuffers + // macInputBuffer avoid allocating memory during processing. + macInputBuffer []byte // bfdLayer is reusable buffer for parsing BFD messages bfdLayer layers.BFD - // optAuth is a reusable Packet Authenticator Option - optAuth slayers.PacketAuthOption - // validAuthBuf is a reusable buffer for the authentication tag - // to be used in the hasValidAuth() method. - validAuthBuf []byte - - // DRKey key derivation for SCMP authentication - drkeyProvider drkeyProvider } -// macBuffers are preallocated buffers for the in- and outputs of MAC functions. -type macBuffers struct { - scionInput []byte - epicInput []byte - drkeyInput []byte +type slowPathType int + +const ( + slowPathSCMP slowPathType = iota + slowPathRouterAlert +) + +type slowPathRequest struct { + typ slowPathType + scmpType slayers.SCMPType + code slayers.SCMPCode + cause error + + // The parameters. Only those used for that particular mode and + // type will be valid. + + pointer uint16 + ia addr.IA + interfaceId uint16 + ingressId uint16 + egressId uint16 } -func (p *scionPacketProcessor) packSCMP( +func (p *slowPathPacketProcessor) packSCMP( typ slayers.SCMPType, code slayers.SCMPCode, scmpP gopacket.SerializableLayer, @@ -1119,30 +1276,35 @@ func (p *scionPacketProcessor) parsePath() (processResult, error) { return processResult{}, nil } -func (p *scionPacketProcessor) determinePeer() (processResult, error) { - if !p.infoField.Peer { - return processResult{}, nil +func determinePeer(pathMeta scion.MetaHdr, inf path.InfoField) (bool, error) { + if !inf.Peer { + return false, nil } - if p.path.PathMeta.SegLen[0] == 0 { - return processResult{}, errPeeringEmptySeg0 + if pathMeta.SegLen[0] == 0 { + return false, errPeeringEmptySeg0 } - if p.path.PathMeta.SegLen[1] == 0 { - return processResult{}, errPeeringEmptySeg1 + if pathMeta.SegLen[1] == 0 { + return false, errPeeringEmptySeg1 } - if p.path.PathMeta.SegLen[2] != 0 { - return processResult{}, errPeeringNonemptySeg2 + if pathMeta.SegLen[2] != 0 { + return false, errPeeringNonemptySeg2 } // The peer hop fields are the last hop field on the first path // segment (at SegLen[0] - 1) and the first hop field of the second // path segment (at SegLen[0]). The below check applies only // because we already know this is a well-formed peering path. - currHF := p.path.PathMeta.CurrHF - segLen := p.path.PathMeta.SegLen[0] - p.peering = currHF == segLen-1 || currHF == segLen - return processResult{}, nil + currHF := pathMeta.CurrHF + segLen := pathMeta.SegLen[0] + return currHF == segLen-1 || currHF == segLen, nil +} + +func (p *scionPacketProcessor) determinePeer() (processResult, error) { + peer, err := determinePeer(p.path.PathMeta, p.infoField) + p.peering = peer + return processResult{}, err } func (p *scionPacketProcessor) validateHopExpiry() (processResult, error) { @@ -1152,13 +1314,15 @@ func (p *scionPacketProcessor) validateHopExpiry() (processResult, error) { if !expired { return processResult{}, nil } - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodePathExpired, - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - serrors.New("expired hop", "cons_dir", p.infoField.ConsDir, "if_id", p.ingressID, - "curr_inf", p.path.PathMeta.CurrINF, "curr_hf", p.path.PathMeta.CurrHF), - ) + log.Debug("SCMP: expired hop", "cons_dir", p.infoField.ConsDir, "if_id", p.ingressID, + "curr_inf", p.path.PathMeta.CurrINF, "curr_hf", p.path.PathMeta.CurrHF) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodePathExpired, + pointer: p.currentHopPointer(), + cause: expiredHop, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } func (p *scionPacketProcessor) validateIngressID() (processResult, error) { @@ -1169,13 +1333,15 @@ func (p *scionPacketProcessor) validateIngressID() (processResult, error) { errCode = slayers.SCMPCodeUnknownHopFieldEgress } if p.ingressID != 0 && p.ingressID != pktIngressID { - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - errCode, - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - serrors.New("ingress interface invalid", - "pkt_ingress", pktIngressID, "router_ingress", p.ingressID), - ) + log.Debug("SCMP: ingress interface invalid", "pkt_ingress", + pktIngressID, "router_ingress", p.ingressID) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: errCode, + pointer: p.currentHopPointer(), + cause: ingressInterfaceInvalid, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } return processResult{}, nil } @@ -1208,22 +1374,26 @@ func (p *scionPacketProcessor) validateSrcDstIA() (processResult, error) { // invalidSrcIA is a helper to return an SCMP error for an invalid SrcIA. func (p *scionPacketProcessor) invalidSrcIA() (processResult, error) { - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidSourceAddress, - &slayers.SCMPParameterProblem{Pointer: uint16(slayers.CmnHdrLen + addr.IABytes)}, - invalidSrcIA, - ) + log.Debug("SCMP: invalid source IA") + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidSourceAddress, + pointer: uint16(slayers.CmnHdrLen + addr.IABytes), + cause: invalidSrcIA, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } // invalidDstIA is a helper to return an SCMP error for an invalid DstIA. func (p *scionPacketProcessor) invalidDstIA() (processResult, error) { - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidDestinationAddress, - &slayers.SCMPParameterProblem{Pointer: uint16(slayers.CmnHdrLen)}, - invalidDstIA, - ) + log.Debug("SCMP: invalid destination IA") + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidDestinationAddress, + pointer: uint16(slayers.CmnHdrLen), + cause: invalidDstIA, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } // validateTransitUnderlaySrc checks that the source address of transit packets @@ -1254,12 +1424,14 @@ func (p *scionPacketProcessor) validateEgressID() (processResult, error) { if !p.infoField.ConsDir { errCode = slayers.SCMPCodeUnknownHopFieldIngress } - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - errCode, - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - cannotRoute, - ) + log.Debug("SCMP: cannot route") + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: errCode, + pointer: p.currentHopPointer(), + cause: cannotRoute, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } ingress, egress := p.d.linkTypes[p.ingressID], p.d.linkTypes[pktEgressID] @@ -1282,12 +1454,15 @@ func (p *scionPacketProcessor) validateEgressID() (processResult, error) { case ingress == topology.Peer && egress == topology.Child: return processResult{}, nil default: // malicious - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidPath, // XXX(matzf) new code InvalidHop? - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - serrors.WithCtx(cannotRoute, "ingress_id", p.ingressID, "ingress_type", ingress, - "egress_id", pktEgressID, "egress_type", egress)) + log.Debug("SCMP: cannot route", "ingress_id", p.ingressID, + "ingress_type", ingress, "egress_id", pktEgressID, "egress_type", egress) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidPath, // XXX(matzf) new code InvalidHop?, + pointer: p.currentHopPointer(), + cause: cannotRoute, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } } // Check that the interface pair is valid on a segment switch. @@ -1303,12 +1478,15 @@ func (p *scionPacketProcessor) validateEgressID() (processResult, error) { case ingress == topology.Child && egress == topology.Child: return processResult{}, nil default: - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidSegmentChange, - &slayers.SCMPParameterProblem{Pointer: p.currentInfoPointer()}, - serrors.WithCtx(cannotRoute, "ingress_id", p.ingressID, "ingress_type", ingress, - "egress_id", pktEgressID, "egress_type", egress)) + log.Debug("SCMP: cannot route", "ingress_id", p.ingressID, "ingress_type", ingress, + "egress_id", pktEgressID, "egress_type", egress) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidSegmentChange, + pointer: p.currentInfoPointer(), + cause: cannotRoute, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } } @@ -1336,21 +1514,21 @@ func (p *scionPacketProcessor) currentHopPointer() uint16 { } func (p *scionPacketProcessor) verifyCurrentMAC() (processResult, error) { - fullMac := path.FullMAC(p.mac, p.infoField, p.hopField, p.macBuffers.scionInput) + fullMac := path.FullMAC(p.mac, p.infoField, p.hopField, p.macInputBuffer[:path.MACBufferSize]) if subtle.ConstantTimeCompare(p.hopField.Mac[:path.MacLen], fullMac[:path.MacLen]) == 0 { - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidHopFieldMAC, - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - serrors.New("MAC verification failed", - "expected", fmt.Sprintf("%x", fullMac[:path.MacLen]), - "actual", fmt.Sprintf("%x", p.hopField.Mac[:path.MacLen]), - "cons_dir", p.infoField.ConsDir, - "if_id", p.ingressID, - "curr_inf", p.path.PathMeta.CurrINF, - "curr_hf", p.path.PathMeta.CurrHF, - "seg_id", p.infoField.SegID), - ) + log.Debug("SCMP: MAC verification failed", "expected", fmt.Sprintf( + "%x", fullMac[:path.MacLen]), + "actual", fmt.Sprintf("%x", p.hopField.Mac[:path.MacLen]), + "cons_dir", p.infoField.ConsDir, + "if_id", p.ingressID, "curr_inf", p.path.PathMeta.CurrINF, + "curr_hf", p.path.PathMeta.CurrHF, "seg_id", p.infoField.SegID) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidHopFieldMAC, + pointer: p.currentHopPointer(), + cause: macVerificationFailed, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } // Add the full MAC to the SCION packet processor, // such that EPIC does not need to recalculate it. @@ -1363,11 +1541,13 @@ func (p *scionPacketProcessor) resolveInbound() (*net.UDPAddr, processResult, er a, err := p.d.resolveLocalDst(p.scionLayer) switch { case errors.Is(err, noSVCBackend): - r, err := p.packSCMP( - slayers.SCMPTypeDestinationUnreachable, - slayers.SCMPCodeNoRoute, - &slayers.SCMPDestinationUnreachable{}, err) - return nil, r, err + log.Debug("SCMP: no SVC backend") + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeDestinationUnreachable, + code: slayers.SCMPCodeNoRoute, + cause: err, + } + return nil, processResult{SlowPathRequest: slowPathRequest}, slowPathRequired default: return a, processResult{}, nil } @@ -1442,20 +1622,28 @@ func (p *scionPacketProcessor) validateEgressUp() (processResult, error) { egressID := p.egressInterface() if v, ok := p.d.bfdSessions[egressID]; ok { if !v.IsUp() { - typ := slayers.SCMPTypeExternalInterfaceDown - var scmpP gopacket.SerializableLayer = &slayers.SCMPExternalInterfaceDown{ - IA: p.d.localIA, - IfID: uint64(egressID), - } + var s slowPathRequest + log.Debug("SCMP: bfd session down") if _, external := p.d.external[egressID]; !external { - typ = slayers.SCMPTypeInternalConnectivityDown - scmpP = &slayers.SCMPInternalConnectivityDown{ - IA: p.d.localIA, - Ingress: uint64(p.ingressID), - Egress: uint64(egressID), + s = slowPathRequest{ + scmpType: slayers.SCMPTypeInternalConnectivityDown, + code: 0, + ia: p.d.localIA, + ingressId: p.ingressID, + egressId: egressID, + cause: errBFDSessionDown, + } + } else { + s = slowPathRequest{ + scmpType: slayers.SCMPTypeExternalInterfaceDown, + code: 0, + ia: p.d.localIA, + interfaceId: egressID, + cause: errBFDSessionDown, } } - return p.packSCMP(typ, 0, scmpP, errBFDSessionDown) + return processResult{SlowPathRequest: s}, slowPathRequired + } } return processResult{}, nil @@ -1473,7 +1661,11 @@ func (p *scionPacketProcessor) handleIngressRouterAlert() (processResult, error) if err := p.path.SetHopField(p.hopField, int(p.path.PathMeta.CurrHF)); err != nil { return processResult{}, serrors.WrapStr("update hop field", err) } - return p.handleSCMPTraceRouteRequest(p.ingressID) + slowPathRequest := slowPathRequest{ + typ: slowPathRouterAlert, + interfaceId: p.ingressID, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } func (p *scionPacketProcessor) ingressRouterAlertFlag() *bool { @@ -1496,7 +1688,11 @@ func (p *scionPacketProcessor) handleEgressRouterAlert() (processResult, error) if err := p.path.SetHopField(p.hopField, int(p.path.PathMeta.CurrHF)); err != nil { return processResult{}, serrors.WrapStr("update hop field", err) } - return p.handleSCMPTraceRouteRequest(egressID) + slowPathRequest := slowPathRequest{ + typ: slowPathRouterAlert, + interfaceId: egressID, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } func (p *scionPacketProcessor) egressRouterAlertFlag() *bool { @@ -1506,7 +1702,7 @@ func (p *scionPacketProcessor) egressRouterAlertFlag() *bool { return &p.hopField.EgressRouterAlert } -func (p *scionPacketProcessor) handleSCMPTraceRouteRequest( +func (p *slowPathPacketProcessor) handleSCMPTraceRouteRequest( interfaceID uint16) (processResult, error) { if p.lastLayer.NextLayerType() != slayers.LayerTypeSCMP { @@ -1542,13 +1738,15 @@ func (p *scionPacketProcessor) validatePktLen() (processResult, error) { if int(p.scionLayer.PayloadLen) == len(p.scionLayer.Payload) { return processResult{}, nil } - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidPacketSize, - &slayers.SCMPParameterProblem{Pointer: 0}, - serrors.New("bad packet size", - "header", p.scionLayer.PayloadLen, "actual", len(p.scionLayer.Payload)), - ) + log.Debug("SCMP: bad packet size", "header", p.scionLayer.PayloadLen, + "actual", len(p.scionLayer.Payload)) + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidPacketSize, + pointer: 0, + cause: badPacketSize, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } func (p *scionPacketProcessor) process() (processResult, error) { @@ -1636,12 +1834,14 @@ func (p *scionPacketProcessor) process() (processResult, error) { if !p.infoField.ConsDir { errCode = slayers.SCMPCodeUnknownHopFieldIngress } - return p.packSCMP( - slayers.SCMPTypeParameterProblem, - errCode, - &slayers.SCMPParameterProblem{Pointer: p.currentHopPointer()}, - cannotRoute, - ) + log.Debug("SCMP: cannot route") + slowPathRequest := slowPathRequest{ + scmpType: slayers.SCMPTypeParameterProblem, + code: errCode, + pointer: p.currentHopPointer(), + cause: cannotRoute, + } + return processResult{SlowPathRequest: slowPathRequest}, slowPathRequired } func (p *scionPacketProcessor) processOHP() (processResult, error) { @@ -1677,7 +1877,7 @@ func (p *scionPacketProcessor) processOHP() (processResult, error) { "type", "ohp", "egress", ohp.FirstHop.ConsEgress, "neighborIA", neighborIA, "dstIA", s.DstIA) } - mac := path.MAC(p.mac, ohp.Info, ohp.FirstHop, p.macBuffers.scionInput) + mac := path.MAC(p.mac, ohp.Info, ohp.FirstHop, p.macInputBuffer[:path.MACBufferSize]) if subtle.ConstantTimeCompare(ohp.FirstHop.Mac[:], mac[:]) == 0 { // TODO parameter problem -> invalid MAC return processResult{}, serrors.New("MAC", "expected", fmt.Sprintf("%x", mac), @@ -1712,7 +1912,8 @@ func (p *scionPacketProcessor) processOHP() (processResult, error) { // XXX(roosd): Here we leak the buffer into the SCION packet header. // This is okay because we do not operate on the buffer or the packet // for the rest of processing. - ohp.SecondHop.Mac = path.MAC(p.mac, ohp.Info, ohp.SecondHop, p.macBuffers.scionInput) + ohp.SecondHop.Mac = path.MAC(p.mac, ohp.Info, ohp.SecondHop, + p.macInputBuffer[:path.MACBufferSize]) if err := updateSCIONLayer(p.rawPkt, s, p.buffer); err != nil { return processResult{}, err @@ -1861,7 +2062,7 @@ func (b *bfdSend) Send(bfd *layers.BFD) error { return err } -func (p *scionPacketProcessor) prepareSCMP( +func (p *slowPathPacketProcessor) prepareSCMP( typ slayers.SCMPType, code slayers.SCMPCode, scmpP gopacket.SerializableLayer, @@ -1901,8 +2102,13 @@ func (p *scionPacketProcessor) prepareSCMP( } revPath := revPathTmp.(*scion.Decoded) + peering, err := determinePeer(revPath.PathMeta, revPath.InfoFields[revPath.PathMeta.CurrINF]) + if err != nil { + return nil, serrors.Wrap(cannotRoute, err, "details", "peering cannot be determined") + } + // Revert potential path segment switches that were done during processing. - if revPath.IsXover() && !p.peering { + if revPath.IsXover() && !peering { // An effective cross-over is a change of segment other than at // a peering hop. if err := revPath.IncPath(); err != nil { @@ -1914,7 +2120,7 @@ func (p *scionPacketProcessor) prepareSCMP( _, external := p.d.external[p.ingressID] if external { infoField := &revPath.InfoFields[revPath.PathMeta.CurrINF] - if infoField.ConsDir && !p.peering { + if infoField.ConsDir && !peering { hopField := revPath.HopFields[revPath.PathMeta.CurrHF] infoField.UpdateSegID(hopField.Mac) } @@ -2017,7 +2223,7 @@ func (p *scionPacketProcessor) prepareSCMP( PldType: slayers.L4SCMP, Pld: p.buffer.Bytes(), }, - p.macBuffers.drkeyInput, + p.macInputBuffer, p.optAuth.Authenticator(), ) if err != nil { @@ -2033,10 +2239,11 @@ func (p *scionPacketProcessor) prepareSCMP( return nil, serrors.Wrap(cannotRoute, err, "details", "serializing SCION header") } - return p.buffer.Bytes(), scmpError{TypeCode: scmpH.TypeCode, Cause: cause} + log.Debug("scmp", "typecode", scmpH.TypeCode, "cause", cause) + return p.buffer.Bytes(), nil } -func (p *scionPacketProcessor) resetSPAOMetadata(key drkey.ASHostKey, now time.Time) error { +func (p *slowPathPacketProcessor) resetSPAOMetadata(key drkey.ASHostKey, now time.Time) error { // For creating SCMP responses we use sender side. dir := slayers.PacketAuthSenderSide drkeyType := slayers.PacketAuthASHost @@ -2059,7 +2266,7 @@ func (p *scionPacketProcessor) resetSPAOMetadata(key drkey.ASHostKey, now time.T }) } -func (p *scionPacketProcessor) hasValidAuth(t time.Time) bool { +func (p *slowPathPacketProcessor) hasValidAuth(t time.Time) bool { // Check if e2eLayer was parsed for this packet if !p.lastLayer.CanDecode().Contains(slayers.LayerTypeEndToEndExtn) { return false @@ -2107,13 +2314,12 @@ func (p *scionPacketProcessor) hasValidAuth(t time.Time) bool { PldType: slayers.L4SCMP, Pld: p.lastLayer.LayerPayload(), }, - p.macBuffers.drkeyInput, + p.macInputBuffer, p.validAuthBuf, ) if err != nil { return false } - // compare incoming authField with computed authentication tag return subtle.ConstantTimeCompare(authOption.Authenticator(), p.validAuthBuf) != 0 } @@ -2164,6 +2370,7 @@ type forwardingMetrics struct { DroppedPacketsInvalid prometheus.Counter DroppedPacketsBusyProcessor prometheus.Counter DroppedPacketsBusyForwarder prometheus.Counter + DroppedPacketsBusySlowPath prometheus.Counter ProcessedPackets prometheus.Counter } @@ -2181,6 +2388,8 @@ func initForwardingMetrics(metrics *Metrics, labels prometheus.Labels) forwardin c.DroppedPacketsBusyProcessor = metrics.DroppedPacketsTotal.With(labels) labels["reason"] = "busy_forwarder" c.DroppedPacketsBusyForwarder = metrics.DroppedPacketsTotal.With(labels) + labels["reason"] = "busy_slow_path" + c.DroppedPacketsBusySlowPath = metrics.DroppedPacketsTotal.With(labels) c.InputBytesTotal.Add(0) c.InputPacketsTotal.Add(0) @@ -2189,6 +2398,7 @@ func initForwardingMetrics(metrics *Metrics, labels prometheus.Labels) forwardin c.DroppedPacketsInvalid.Add(0) c.DroppedPacketsBusyProcessor.Add(0) c.DroppedPacketsBusyForwarder.Add(0) + c.DroppedPacketsBusySlowPath.Add(0) c.ProcessedPackets.Add(0) return c } diff --git a/router/dataplane_internal_test.go b/router/dataplane_internal_test.go index 60291aaa4c..6e92a8219d 100644 --- a/router/dataplane_internal_test.go +++ b/router/dataplane_internal_test.go @@ -34,6 +34,7 @@ import ( "github.com/scionproto/scion/pkg/private/serrors" "github.com/scionproto/scion/pkg/private/util" "github.com/scionproto/scion/pkg/private/xtest" + "github.com/scionproto/scion/pkg/scrypto" "github.com/scionproto/scion/pkg/slayers" "github.com/scionproto/scion/pkg/slayers/path" "github.com/scionproto/scion/pkg/slayers/path/scion" @@ -41,6 +42,8 @@ import ( "github.com/scionproto/scion/router/mock_router" ) +var testKey = []byte("testkey_xxxxxxxx") + // TestReceiver sets up a mocked batchConn, starts the receiver that reads from // this batchConn and forwards it to the processing routines channels. We verify // by directly reading from the processing routine channels that we received @@ -80,7 +83,7 @@ func TestReceiver(t *testing.T) { BatchSize: 64, } dp.initPacketPool(runConfig, 64) - procCh, _ := initQueues(runConfig, dp.interfaces, 64) + procCh, _, _ := initQueues(runConfig, dp.interfaces, 64) initialPoolSize := len(dp.packetPool) dp.running = true dp.initMetrics() @@ -174,7 +177,7 @@ func TestForwarder(t *testing.T) { BatchSize: 64, } dp.initPacketPool(runConfig, 64) - _, fwCh := initQueues(runConfig, dp.interfaces, 64) + _, fwCh, _ := initQueues(runConfig, dp.interfaces, 64) initialPoolSize := len(dp.packetPool) dp.running = true dp.initMetrics() @@ -414,6 +417,140 @@ func TestComputeProcIdErrorCases(t *testing.T) { } } +func TestSlowPathProcessing(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + payload := []byte("actualpayloadbytes") + testCases := map[string]struct { + mockMsg func() []byte + prepareDP func(*gomock.Controller) *DataPlane + expectedSlowPathRequest slowPathRequest + srcInterface uint16 + expectedLayerType gopacket.LayerType + }{ + "svc nobackend": { + prepareDP: func(ctrl *gomock.Controller) *DataPlane { + return NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, + map[addr.SVC][]*net.UDPAddr{}, + xtest.MustParseIA("1-ff00:0:110"), nil, testKey) + }, + mockMsg: func() []byte { + spkt := prepBaseMsg(t, payload, 0) + _ = spkt.SetDstAddr(addr.MustParseHost("CS")) + spkt.DstIA = xtest.MustParseIA("1-ff00:0:110") + ret := toMsg(t, spkt) + return ret + }, + srcInterface: 1, + expectedSlowPathRequest: slowPathRequest{ + typ: slowPathSCMP, + scmpType: slayers.SCMPTypeDestinationUnreachable, + code: slayers.SCMPCodeNoRoute, + cause: noSVCBackend, + }, + expectedLayerType: slayers.LayerTypeSCMPDestinationUnreachable, + }, + "svc invalid": { + prepareDP: func(ctrl *gomock.Controller) *DataPlane { + return NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, + map[addr.SVC][]*net.UDPAddr{}, + xtest.MustParseIA("1-ff00:0:110"), nil, testKey) + }, + mockMsg: func() []byte { + spkt := prepBaseMsg(t, payload, 0) + _ = spkt.SetDstAddr(addr.MustParseHost("CS")) + spkt.DstIA = xtest.MustParseIA("1-ff00:0:110") + ret := toMsg(t, spkt) + return ret + }, + srcInterface: 1, + expectedSlowPathRequest: slowPathRequest{ + typ: slowPathSCMP, + scmpType: slayers.SCMPTypeDestinationUnreachable, + code: slayers.SCMPCodeNoRoute, + cause: noSVCBackend, + }, + expectedLayerType: slayers.LayerTypeSCMPDestinationUnreachable, + }, + "invalid dest": { + prepareDP: func(ctrl *gomock.Controller) *DataPlane { + return NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, + map[addr.SVC][]*net.UDPAddr{}, + xtest.MustParseIA("1-ff00:0:110"), nil, testKey) + }, + mockMsg: func() []byte { + spkt := prepBaseMsg(t, payload, 0) + spkt.DstIA = xtest.MustParseIA("1-ff00:0:f1") + ret := toMsg(t, spkt) + return ret + }, + srcInterface: 1, + expectedSlowPathRequest: slowPathRequest{ + typ: slowPathSCMP, + scmpType: slayers.SCMPTypeParameterProblem, + code: slayers.SCMPCodeInvalidDestinationAddress, + cause: invalidDstIA, + pointer: 0xc, + }, + expectedLayerType: slayers.LayerTypeSCMPParameterProblem, + }, + } + for name, tc := range testCases { + name, tc := name, tc + t.Run(name, func(t *testing.T) { + t.Parallel() + dp := tc.prepareDP(ctrl) + rawPacket := tc.mockMsg() + var srcAddr *net.UDPAddr + + processor := newPacketProcessor(dp) + result, err := processor.processPkt(rawPacket, srcAddr, tc.srcInterface) + assert.ErrorIs(t, err, SlowPathRequired) + + assert.Equal(t, tc.expectedSlowPathRequest, result.SlowPathRequest) + slowPathProcessor := newSlowPathProcessor(dp) + result, err = slowPathProcessor.processPacket(slowPacket{ + packet: packet{ + srcAddr: srcAddr, + dstAddr: result.OutAddr, + ingress: tc.srcInterface, + rawPacket: rawPacket, + }, + slowPathRequest: result.SlowPathRequest, + }) + assert.NoError(t, err) + + // here we parse the result.OutPkt to verify that it contains the correct SCMP + // header and typecodes. + packet := gopacket.NewPacket(result.OutPkt, slayers.LayerTypeSCION, gopacket.Default) + scmp := packet.Layer(slayers.LayerTypeSCMP).(*slayers.SCMP) + expectedTypeCode := slayers.CreateSCMPTypeCode(tc.expectedSlowPathRequest.scmpType, + tc.expectedSlowPathRequest.code) + assert.Equal(t, expectedTypeCode, scmp.TypeCode) + assert.NotNil(t, packet.Layer(tc.expectedLayerType)) + }) + } +} + +func toMsg(t *testing.T, spkt *slayers.SCION) []byte { + t.Helper() + buffer := gopacket.NewSerializeBuffer() + payload := []byte("actualpayloadbytes") + err := gopacket.SerializeLayers(buffer, gopacket.SerializeOptions{FixLengths: true}, + spkt, gopacket.Payload(payload)) + require.NoError(t, err) + raw := buffer.Bytes() + ret := make([]byte, bufSize) + copy(ret, raw) + return ret[:len(raw)] +} + +func computeMAC(t *testing.T, key []byte, info path.InfoField, hf path.HopField) [path.MacLen]byte { + mac, err := scrypto.InitMac(key) + require.NoError(t, err) + return path.MAC(mac, info, hf, nil) +} + func serializedBaseMsg(t *testing.T, payload []byte, flowId uint32) []byte { s := prepBaseMsg(t, payload, flowId) buffer := gopacket.NewSerializeBuffer() @@ -440,7 +577,7 @@ func prepBaseMsg(t *testing.T, payload []byte, flowId uint32) *slayers.SCION { dpath := &scion.Decoded{ Base: scion.Base{ PathMeta: scion.MetaHdr{ - CurrHF: 1, + CurrHF: 2, SegLen: [3]uint8{3, 0, 0}, }, NumINF: 1, @@ -456,6 +593,7 @@ func prepBaseMsg(t *testing.T, payload []byte, flowId uint32) *slayers.SCION { {ConsIngress: 1, ConsEgress: 0}, }, } + dpath.HopFields[2].Mac = computeMAC(t, testKey, dpath.InfoFields[0], dpath.HopFields[2]) spkt.Path = dpath return spkt } diff --git a/router/dataplane_test.go b/router/dataplane_test.go index c83bc2701a..57f3b20ddf 100644 --- a/router/dataplane_test.go +++ b/router/dataplane_test.go @@ -539,8 +539,9 @@ func TestDataPlaneRun(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() runConfig := &router.RunConfig{ - NumProcessors: 8, - BatchSize: 256, + NumProcessors: 8, + BatchSize: 256, + NumSlowPathProcessors: 1, } ch := make(chan struct{}) dp := tc.prepareDP(ctrl, ch) @@ -1146,54 +1147,6 @@ func TestProcessPkt(t *testing.T) { egressInterface: 0, assertFunc: assert.NoError, }, - "svc nobackend": { - prepareDP: func(ctrl *gomock.Controller) *router.DataPlane { - return router.NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, - map[addr.SVC][]*net.UDPAddr{}, - xtest.MustParseIA("1-ff00:0:110"), nil, key) - }, - mockMsg: func(afterProcessing bool) *ipv4.Message { - spkt, dpath := prepBaseMsg(now) - _ = spkt.SetDstAddr(addr.MustParseHost("CS")) - spkt.DstIA = xtest.MustParseIA("1-ff00:0:110") - dpath.HopFields = []path.HopField{ - {ConsIngress: 41, ConsEgress: 40}, - {ConsIngress: 31, ConsEgress: 30}, - {ConsIngress: 1, ConsEgress: 0}, - } - dpath.Base.PathMeta.CurrHF = 2 - dpath.HopFields[2].Mac = computeMAC(t, key, dpath.InfoFields[0], dpath.HopFields[2]) - ret := toMsg(t, spkt, dpath) - return ret - }, - srcInterface: 1, - egressInterface: 0, - assertFunc: assertIsSCMPError(slayers.SCMPTypeDestinationUnreachable, 0), - }, - "svc invalid": { - prepareDP: func(ctrl *gomock.Controller) *router.DataPlane { - return router.NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, - map[addr.SVC][]*net.UDPAddr{}, - xtest.MustParseIA("1-ff00:0:110"), nil, key) - }, - mockMsg: func(afterProcessing bool) *ipv4.Message { - spkt, dpath := prepBaseMsg(now) - _ = spkt.SetDstAddr(addr.MustParseHost("CS")) - spkt.DstIA = xtest.MustParseIA("1-ff00:0:110") - dpath.HopFields = []path.HopField{ - {ConsIngress: 41, ConsEgress: 40}, - {ConsIngress: 31, ConsEgress: 30}, - {ConsIngress: 1, ConsEgress: 0}, - } - dpath.Base.PathMeta.CurrHF = 2 - dpath.HopFields[2].Mac = computeMAC(t, key, dpath.InfoFields[0], dpath.HopFields[2]) - ret := toMsg(t, spkt, dpath) - return ret - }, - srcInterface: 1, - egressInterface: 0, - assertFunc: assertIsSCMPError(slayers.SCMPTypeDestinationUnreachable, 0), - }, "onehop inbound": { prepareDP: func(ctrl *gomock.Controller) *router.DataPlane { return router.NewDP( @@ -1401,32 +1354,6 @@ func TestProcessPkt(t *testing.T) { egressInterface: 2, assertFunc: assert.NoError, }, - "invalid dest": { - prepareDP: func(ctrl *gomock.Controller) *router.DataPlane { - return router.NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, - map[addr.SVC][]*net.UDPAddr{}, - xtest.MustParseIA("1-ff00:0:110"), nil, key) - }, - mockMsg: func(afterProcessing bool) *ipv4.Message { - spkt, dpath := prepBaseMsg(now) - spkt.DstIA = xtest.MustParseIA("1-ff00:0:f1") - dpath.HopFields = []path.HopField{ - {ConsIngress: 41, ConsEgress: 40}, - {ConsIngress: 31, ConsEgress: 404}, - {ConsIngress: 1, ConsEgress: 0}, - } - dpath.Base.PathMeta.CurrHF = 2 - dpath.HopFields[1].Mac = computeMAC(t, key, dpath.InfoFields[0], dpath.HopFields[1]) - ret := toMsg(t, spkt, dpath) - return ret - }, - srcInterface: 1, - egressInterface: 0, - assertFunc: assertIsSCMPError( - slayers.SCMPTypeParameterProblem, - slayers.SCMPCodeInvalidDestinationAddress, - ), - }, "epic inbound": { prepareDP: func(ctrl *gomock.Controller) *router.DataPlane { return router.NewDP(nil, nil, mock_router.NewMockBatchConn(ctrl), nil, @@ -1655,14 +1582,3 @@ func bfd() control.BFD { RequiredMinRxInterval: 25 * time.Millisecond, } } - -// assertIsSCMPError returns an ErrorAssertionFunc asserting that error is an -// scmpError with the specified type/code. -func assertIsSCMPError(typ slayers.SCMPType, code slayers.SCMPCode) assert.ErrorAssertionFunc { - return func(t assert.TestingT, err error, args ...interface{}) bool { - target := router.SCMPError{} - return assert.ErrorAs(t, err, &target, args) && - assert.Equal(t, typ, target.TypeCode.Type(), args) && - assert.Equal(t, code, target.TypeCode.Code(), args) - } -} diff --git a/router/export_test.go b/router/export_test.go index 32d2d14b80..a82d01e5a0 100644 --- a/router/export_test.go +++ b/router/export_test.go @@ -37,7 +37,7 @@ type ProcessResult struct { processResult } -type SCMPError = scmpError +var SlowPathRequired error = slowPathRequired func NewDP( external map[uint16]BatchConn, From 54a9360559171d6a679f9758677d8c25d6017749 Mon Sep 17 00:00:00 2001 From: Matthias Frei Date: Thu, 28 Sep 2023 18:13:09 +0200 Subject: [PATCH 13/24] acceptance: de-flake cert_renewal and hidden_paths (#4399) These tests have become even more flaky with the updated CI runners; it seems that this is triggered due to using the significantly quicker docker-compose v2. Use the existing await-connectivity script to determine when a test can safely start to run, instead of sleeping for arbitrary number of seconds. --- acceptance/cert_renewal/test.py | 2 ++ acceptance/common/base.py | 20 ++++++++++++++++++++ acceptance/common/topogen.bzl | 2 ++ acceptance/hidden_paths/test.py | 7 ++++--- tools/BUILD.bazel | 6 ++++++ tools/await-connectivity | 3 ++- 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/acceptance/cert_renewal/test.py b/acceptance/cert_renewal/test.py index fa0fc569c4..799fa42258 100755 --- a/acceptance/cert_renewal/test.py +++ b/acceptance/cert_renewal/test.py @@ -53,6 +53,7 @@ class Test(base.TestTopogen): """ def _run(self): + self.await_connectivity() isd_ases = scion.ASList.load("%s/gen/as_list.yml" % self.artifacts).all @@ -84,6 +85,7 @@ def _run(self): logger.info("==> Restart containers") self.setup_start() + self.await_connectivity() logger.info("==> Check connectivity") end2end.run_fg() diff --git a/acceptance/common/base.py b/acceptance/common/base.py index e229ba3dd3..204c35f9ce 100644 --- a/acceptance/common/base.py +++ b/acceptance/common/base.py @@ -213,6 +213,26 @@ def teardown(self): if re.search(r"Exit\s+[1-9]\d*", ps): raise Exception("Failed services.\n" + ps) + def await_connectivity(self, quiet_seconds=None, timeout_seconds=None): + """ + Wait for the beaconing process in a local topology to establish full connectivity, i.e. at + least one path between any two ASes. + Runs the tool/await-connectivity script. + + Returns success when full connectivity is established or an error (exception) at + timeout (default 20s). + + Remains quiet for a configurable time (default 10s). After that, + it reports the missing segments at 1s interval. + """ + cmd = self.get_executable("await-connectivity") + cmd.cwd = self.artifacts + if quiet_seconds is not None: + cmd = cmd["-q", str(quiet_seconds)] + if timeout_seconds is not None: + cmd = cmd["-t", str(timeout_seconds)] + cmd.run_fg() + def execute_tester(self, isd_as: ISD_AS, cmd: str, *args: str) -> str: """Executes a command in the designated "tester" container for the specified ISD-AS. diff --git a/acceptance/common/topogen.bzl b/acceptance/common/topogen.bzl index 23b7b61d95..b92967e269 100644 --- a/acceptance/common/topogen.bzl +++ b/acceptance/common/topogen.bzl @@ -45,6 +45,7 @@ def topogen_test( common_args = [ "--executable=scion-pki:$(location //scion-pki/cmd/scion-pki)", "--executable=topogen:$(location //tools:topogen)", + "--executable=await-connectivity:$(location //tools:await_connectivity)", "--topo=$(location %s)" % topo, ] if gateway: @@ -54,6 +55,7 @@ def topogen_test( "//scion-pki/cmd/scion-pki", "//tools:topogen", "//tools:docker_ip", + "//tools:await_connectivity", topo, ] loaders = container_loaders(tester, gateway) diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 3a9f1af7f9..cf0d3ae438 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -3,7 +3,6 @@ # Copyright 2020 Anapaya Systems import http.server -import time import threading from plumbum import cmd @@ -105,9 +104,9 @@ def setup_start(self): ("0.0.0.0", self.http_server_port), http.server.SimpleHTTPRequestHandler) server_thread = threading.Thread(target=configuration_server, args=[server]) server_thread.start() + self._server = server super().setup_start() - time.sleep(4) # Give applications time to download configurations self._testers = { "2": "tester_1-ff00_0_2", @@ -121,9 +120,11 @@ def setup_start(self): "4": "1-ff00:0:4", "5": "1-ff00:0:5", } - server.shutdown() def _run(self): + self.await_connectivity() + self._server.shutdown() # by now configuration must have been downloaded everywhere + # Group 3 self._showpaths_bidirectional("2", "3", 0) self._showpaths_bidirectional("2", "5", 0) diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index b116c1f782..2871a5b8c9 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -7,6 +7,12 @@ sh_binary( visibility = ["//visibility:public"], ) +sh_binary( + name = "await_connectivity", + srcs = ["await-connectivity"], + visibility = ["//visibility:public"], +) + py_binary( name = "gomocks", srcs = ["gomocks.py"], diff --git a/tools/await-connectivity b/tools/await-connectivity index a81bc0440e..4f0d1f2aee 100755 --- a/tools/await-connectivity +++ b/tools/await-connectivity @@ -13,7 +13,7 @@ # # Usage: await-connectivity -q QUIET -t TIMEOUT -set -euo pipefail +set -Eeuo pipefail QUIET=10 TIMEOUT=20 @@ -83,6 +83,7 @@ main() { parse_opts "$@" # poor bash-man's yaml parser + stat gen/as_list.yml > /dev/null # ensure file exists; command substitutions below don't fail because local (!?) local cores=$(sed -n '1,/Non-core/{s/^- //p}' gen/as_list.yml) local noncores=$(sed -n '/Non-core/,${s/^- //p}' gen/as_list.yml) From 222c5f5c7b652eefbc0e2085c4d718e74c5bf208 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 18:25:31 +0200 Subject: [PATCH 14/24] build: assorted cleanups * Severed our dependency on docker-compose V1 (including any shim script in /usr/bin): updated installation instructions and other docs accordingly. * Gave a $HOME dir to integration tests. It's not actually used, but docker-compose v2 fails (in a very cryptic way) without it. * Deflaked some integration tests (timing issues). Those were playing with my head when I was chasing other issues. * Clarified the instructions for running tests wrt to supervisor mode vs docker mode (e.g. the e2e integration tests must be run with -d in the docker mode). --- acceptance/hidden_paths/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index cf0d3ae438..47e399b10e 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -145,7 +145,7 @@ def _showpaths_bidirectional(self, source: str, destination: str, retcode: int): def _showpaths_run(self, source_as: str, destination_as: str, retcode: int): print(cmd.docker("exec", "-t", self._testers[source_as], "scion", "sp", self._ases[destination_as], - "--timeout", "2s", + "--timeout", "3s", retcode=retcode)) From 86db75515cc1a77e61d9b5981b746b14934ac37c Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 18:13:38 +0200 Subject: [PATCH 15/24] build: undo deflaking of hidden_paths and cert_renewal tests. Matthias' is about to commit a better deflaking. --- acceptance/cert_renewal/test.py | 1 - acceptance/hidden_paths/test.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/acceptance/cert_renewal/test.py b/acceptance/cert_renewal/test.py index 799fa42258..db72b427e5 100755 --- a/acceptance/cert_renewal/test.py +++ b/acceptance/cert_renewal/test.py @@ -85,7 +85,6 @@ def _run(self): logger.info("==> Restart containers") self.setup_start() - self.await_connectivity() logger.info("==> Check connectivity") end2end.run_fg() diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 47e399b10e..cf0d3ae438 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -145,7 +145,7 @@ def _showpaths_bidirectional(self, source: str, destination: str, retcode: int): def _showpaths_run(self, source_as: str, destination_as: str, retcode: int): print(cmd.docker("exec", "-t", self._testers[source_as], "scion", "sp", self._ases[destination_as], - "--timeout", "3s", + "--timeout", "2s", retcode=retcode)) From 927085b78fe54d895323b982f57edaa8687212da Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 28 Sep 2023 18:46:26 +0200 Subject: [PATCH 16/24] build: fix comment. --- tools/dc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/dc b/tools/dc index 938405e65a..a2912882aa 100755 --- a/tools/dc +++ b/tools/dc @@ -37,7 +37,7 @@ cmd_help() { - [SERVICE]: As scion service glob, e.g. cs1*. - [GROUP]: - scion: For all scion services. - - monitoring: For the monitoring service (i.e. prometheus seand yaeger. + - monitoring: For the monitoring service (i.e. prometheus and yaeger). - [COMMAND]: A docker compose command like 'up -d' or 'down' - [IA]: An IA number like 1-ff00:0:110 - [LOG_DIR]: A folder. From 65bbe58a6cf29d7f4d936c931bfbdc436f9b9809 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Fri, 29 Sep 2023 12:41:29 +0200 Subject: [PATCH 17/24] build: address reviewers comments. * More scion.sh cleanup * Doc fixes. --- doc/dev/run.rst | 17 ++++++------- scion.sh | 59 ++++++++----------------------------------- tools/jaeger_ui.sh | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 58 deletions(-) create mode 100755 tools/jaeger_ui.sh diff --git a/doc/dev/run.rst b/doc/dev/run.rst index 43bf6c42d4..2a917f05cd 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -15,11 +15,7 @@ The scripts support two different process orchestrators as "backends": - `supervisor `_. This is the default and a bit more light-weight. Packets are sent over the loopback interface. - `docker compose `_. Runs individual processes in separate containers connected with docker network bridges. Only this mode supports running a "SCION-IP gateway". - -Before attempting to use the docker compose mode, be sure to build the necessary docker images with: -.. code-block:: bash - - make docker-images + .. hint:: Before attempting to use the docker compose mode, be sure to build the necessary docker images with `make docker-images` .. TODO - Describe configuration directory (referencing manuals) @@ -88,10 +84,12 @@ Various helper files are also generated for the benefit of scripts and tooling o for example, ``gen/sciond_addresses.json`` is a simple mapping from AS number to the address of the corresponding :doc:`scion daemon ` instance. -If :option:`scion.sh topology -d` command is used, additional configuration files are created to -enable running the SCION services in docker containers (see `docker`_). Otherwise, a configuration -file is created to enable running the SCION services as plain processes (see `supervisor`_) +If :option:`scion.sh topology -d` command is used, configuration files are created to +enable running the SCION services in docker containers (see :ref:`docker-section`_). Otherwise, +a configuration file is created to enable running the SCION services as plain processes +(see :ref:`supervisor-section`_) +.. _supervisor-section: supervisor ---------- The ``gen/supervisord.conf`` configuration defines the programs that make up the local topology. @@ -117,6 +115,7 @@ For example:: # and now ping this host from inside AS 1-ff00:0:110, with interactive path prompt bin/scion ping --sciond $(./scion.sh sciond-addr 110) 1-ff00:0:111,127.0.0.1 --interactive +.. _docker-section: docker ------ The main docker compose file is ``gen/scion-dc.yml``. @@ -195,7 +194,7 @@ The basic usage is ``./scion.sh ``. The main subcommands a .. option:: stop-monitoring Stop the monitoring services. - + .. option:: sciond-addr Return the address for the scion daemon for the matching ISD-AS by consulting diff --git a/scion.sh b/scion.sh index 8aaf55ba8d..4a93ade2cc 100755 --- a/scion.sh +++ b/scion.sh @@ -46,8 +46,8 @@ start_scion() { cmd_start() { start_scion - echo "Note that jaeger is not included anymore." - echo "To run jaeger and prometheus, use run_monitoring." + echo "Note that jaeger is no longer started automatically." + echo "To run jaeger and prometheus, use $PROGRAM start-monitoring." } @@ -63,6 +63,8 @@ cmd_start-monitoring() { return fi echo "Running monitoring..." + echo "Jaeger UI: http://localhost:16686" + echo "Prometheus UI: http://localhost:9090" ./tools/quiet ./tools/dc monitoring up -d } @@ -118,8 +120,8 @@ stop_scion() { cmd_stop() { stop_scion - echo "Note that jaeger is not included anymore." - echo "To stop jaeger and prometheus, use stop-monitoring." + echo "Note that jaeger is no longer stopped automatically." + echo "To stop jaeger and prometheus, use $PROGRAM stop-monitoring." } cmd_mstop() { @@ -142,22 +144,15 @@ cmd_mstatus() { [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } rscount=$(./tools/dc scion ps --status=running --format "{{.Name}}" $services | wc -l) tscount=$(echo "$services" | wc -w) # Number of all globed services - ./tools/dc scion ps -a \ - --status=paused \ - --status=restarting \ - --status=removing \ - --status=dead \ - --status=created \ - --status=exited \ - --format "{{.Name}} {{.State}}" $services + ./tools/dc scion ps -a --format "table {{.Name}}\t{{upper .State}}\tuptime {{.RunningFor}}" $services | sed "s/ ago//" | tail -n+2 [ $rscount -eq $tscount ] else if [ $# -ne 0 ]; then services="$(glob_supervisor "$@")" [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } - tools/supervisor.sh status "$services" | grep -v RUNNING + tools/supervisor.sh status "$services" else - tools/supervisor.sh status | grep -v RUNNING + tools/supervisor.sh status fi [ $? -eq 1 ] fi @@ -205,36 +200,6 @@ is_docker_be() { [ -f gen/scion-dc.yml ] } -traces_name() { - local name=jaeger_read_badger_traces - echo "$name" -} - -cmd_start-traces() { - set -e - local trace_dir=${1:-"$(readlink -e .)/traces"} - local port=16687 - local name=$(traces_name) - cmd_stop-traces - docker run -d --name "$name" \ - -u "$(id -u):$(id -g)" \ - -e SPAN_STORAGE_TYPE=badger \ - -e BADGER_EPHEMERAL=false \ - -e BADGER_DIRECTORY_VALUE=/badger/data \ - -e BADGER_DIRECTORY_KEY=/badger/key \ - -v "$trace_dir:/badger" \ - -p "$port":16686 \ - jaegertracing/all-in-one:1.22.0 - sleep 3 - x-www-browser "http://localhost:$port" -} - -cmd_stop-traces() { - local name=$(traces_name) - docker stop "$name" || true - docker rm "$name" || true -} - cmd_help() { cat <<-_EOF SCION @@ -276,10 +241,6 @@ cmd_help() { Draw a graphviz graph of a *.topo topology configuration file. $PROGRAM help Show this text. - $PROGRAM start-traces [folder] - Serve jaeger traces from the specified folder (default: traces/). - $PROGRAM stop-traces - Stop the jaeger container started during the start-traces command. $PROGRAM bazel-remote Starts the bazel remote. _EOF @@ -291,7 +252,7 @@ COMMAND="$1" shift case "$COMMAND" in - help|start|start-monitoring|mstart|mstatus|mstop|stop|stop-monitoring|status|topology|sciond-addr|start-traces|stop-traces|topo-clean|topodot|bazel-remote) + help|start|start-monitoring|mstart|mstatus|mstop|stop|stop-monitoring|status|topology|sciond-addr|topo-clean|topodot|bazel-remote) "cmd_$COMMAND" "$@" ;; run) cmd_start "$@" ;; *) cmd_help; exit 1 ;; diff --git a/tools/jaeger_ui.sh b/tools/jaeger_ui.sh new file mode 100755 index 0000000000..3fe3a2b925 --- /dev/null +++ b/tools/jaeger_ui.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# BEGIN subcommand functions + +traces_name() { + local name=jaeger_read_badger_traces + echo "$name" +} + +cmd_start() { + set -e + local trace_dir=${1:-"$(readlink -e .)/traces"} + local port=16687 + local name=$(traces_name) + cmd_stop + docker run -d --name "$name" \ + -u "$(id -u):$(id -g)" \ + -e SPAN_STORAGE_TYPE=badger \ + -e BADGER_EPHEMERAL=false \ + -e BADGER_DIRECTORY_VALUE=/badger/data \ + -e BADGER_DIRECTORY_KEY=/badger/key \ + -v "$trace_dir:/badger" \ + -p "$port":16686 \ + jaegertracing/all-in-one:1.22.0 + sleep 3 + echo "Jaeger UI: http://localhost:$port" +} + +cmd_stop() { + local name=$(traces_name) + docker stop "$name" || true + docker rm "$name" || true +} + +cmd_help() { + cat <<-_EOF + $PROGRAM is a helper script to start and stop a stand-alone jaeger UI. + It does not initiate any jaeger trace collection and shows whatever + traces happen to be in the given directory; not necessarily scion traces. + + If you mean to initiate SCION jaeger traces (and look at them), use + scion.sh start-traces instead. + + Usage: + $PROGRAM start + Serve jaeger traces from the default folder: traces/. + $PROGRAM start [folder] + Serve jaeger traces from the specified folder. + $PROGRAM stop + Stop the jaeger container started with start command. + _EOF +} +# END subcommand functions + +PROGRAM="${0##*/}" +COMMAND="$1" +shift + +case "$COMMAND" in + help|start|stop) + "cmd_$COMMAND" "$@" ;; + *) cmd_help; exit 1 ;; +esac From 08b97540d214402fd054cec1ad04c8ed44222918 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Fri, 29 Sep 2023 13:27:49 +0200 Subject: [PATCH 18/24] build: add --compatibility arg to docker compose. Had missed that file. --- acceptance/sig_short_exp_time/test | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/acceptance/sig_short_exp_time/test b/acceptance/sig_short_exp_time/test index 55bd704850..10bc25eb2a 100755 --- a/acceptance/sig_short_exp_time/test +++ b/acceptance/sig_short_exp_time/test @@ -55,24 +55,24 @@ run_test() {(set -e docker image load -i acceptance/sig_short_exp_time/sig1.tar docker image load -i acceptance/sig_short_exp_time/sig2.tar - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml up -d dispatcher1 dispatcher2 sig1 sig2 patha pathb + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml up -d dispatcher1 dispatcher2 sig1 sig2 patha pathb # Set up forward route on network stack 1 and 2 through the sig tunnel # device. The route is a property of the network stack, and persists after # the container that added it has exited. # # If the route configuration fails, the test is not stopped. - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route1 --rm tester1 ip route add 242.254.200.2/32 dev sig || true - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name route2 --rm tester2 ip route add 242.254.100.2/32 dev sig || true + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml run --name route1 --rm tester1 ip route add 242.254.200.2/32 dev sig || true + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml run --name route2 --rm tester2 ip route add 242.254.100.2/32 dev sig || true echo "Start background ping, ping every 0.2 seconds" - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml run --name tester1 -d tester1 ping -i 0.2 242.254.200.2 + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml run --name tester1 -d tester1 ping -i 0.2 242.254.200.2 echo "Waiting 10 seconds for path A to expire..." sleep 10 echo "Path A expired, simulating it by shutting down path A proxy" # Traffic should have switched beforehand to path b, and no pings should be lost - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml stop patha + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml stop patha sleep 1 docker kill -s SIGINT tester1 @@ -104,9 +104,9 @@ OUTPUT_DIR=$TEST_UNDECLARED_OUTPUTS_DIR mkdir -p $OUTPUT_DIR/logs for CNTR in sig1 sig2 dispatcher1 dispatcher2; do - docker compose -f acceptance/sig_short_exp_time/docker-compose.yml logs "$CNTR" > "$OUTPUT_DIR/logs/$CNTR.log" + docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml logs "$CNTR" > "$OUTPUT_DIR/logs/$CNTR.log" done -docker compose -f acceptance/sig_short_exp_time/docker-compose.yml down -v +docker compose --compatibility -f acceptance/sig_short_exp_time/docker-compose.yml down -v exit $RC From 0bfc34efe6eb71a8f7f7ac9c6bc59459093e9d24 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Fri, 29 Sep 2023 18:27:13 +0200 Subject: [PATCH 19/24] build: small simplification. --- tools/jaeger_ui.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/jaeger_ui.sh b/tools/jaeger_ui.sh index 3fe3a2b925..071d4c2463 100755 --- a/tools/jaeger_ui.sh +++ b/tools/jaeger_ui.sh @@ -3,8 +3,7 @@ # BEGIN subcommand functions traces_name() { - local name=jaeger_read_badger_traces - echo "$name" + echo "jaeger_read_badger_traces" } cmd_start() { From f72b7405088383a8a3cdc40a63539004e8b576af Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Mon, 2 Oct 2023 12:13:36 +0200 Subject: [PATCH 20/24] build: fix internal refs in doc. --- doc/dev/run.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/dev/run.rst b/doc/dev/run.rst index 2a917f05cd..97522de8bb 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -85,11 +85,12 @@ for example, ``gen/sciond_addresses.json`` is a simple mapping from AS number to corresponding :doc:`scion daemon ` instance. If :option:`scion.sh topology -d` command is used, configuration files are created to -enable running the SCION services in docker containers (see :ref:`docker-section`_). Otherwise, +enable running the SCION services in docker containers (see :ref:`docker-section`). Otherwise, a configuration file is created to enable running the SCION services as plain processes -(see :ref:`supervisor-section`_) +(see :ref:`supervisor-section`) .. _supervisor-section: + supervisor ---------- The ``gen/supervisord.conf`` configuration defines the programs that make up the local topology. @@ -116,6 +117,7 @@ For example:: bin/scion ping --sciond $(./scion.sh sciond-addr 110) 1-ff00:0:111,127.0.0.1 --interactive .. _docker-section: + docker ------ The main docker compose file is ``gen/scion-dc.yml``. From 3f62376f169a40562150f0d8356d8a45394d3ed8 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Mon, 2 Oct 2023 13:03:11 +0200 Subject: [PATCH 21/24] build: fix mstatus bug in supervisord mode. Also simplified a bit. No need to special case an empty argument list or compute the result differently in supervisor vs docker cases. --- scion.sh | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scion.sh b/scion.sh index 4a93ade2cc..cf851d4f64 100755 --- a/scion.sh +++ b/scion.sh @@ -139,24 +139,23 @@ cmd_status() { } cmd_mstatus() { + rscount=0 + tscount=0 if is_docker_be; then services="$(glob_docker "$@")" [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } rscount=$(./tools/dc scion ps --status=running --format "{{.Name}}" $services | wc -l) tscount=$(echo "$services" | wc -w) # Number of all globed services ./tools/dc scion ps -a --format "table {{.Name}}\t{{upper .State}}\tuptime {{.RunningFor}}" $services | sed "s/ ago//" | tail -n+2 - [ $rscount -eq $tscount ] else - if [ $# -ne 0 ]; then - services="$(glob_supervisor "$@")" - [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } - tools/supervisor.sh status "$services" - else - tools/supervisor.sh status - fi - [ $? -eq 1 ] + services="$(glob_supervisor "$@")" + [ -z "$services" ] && { echo "ERROR: No process matched for $@!"; exit 255; } + rscount=$(./tools/supervisor.sh status "$services" | grep RUNNIN | wc -l) + tscount=$(echo "$services" | wc -w) # Number of all globed services + tools/supervisor.sh status "$services" fi # If all tasks are running, then return 0. Else return 1. + [ $rscount -eq $tscount ] return } From 6c7963277cdb9fdf9289e5781dea1c73b0acbab0 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Mon, 2 Oct 2023 14:07:38 +0200 Subject: [PATCH 22/24] build: fix formating. --- doc/dev/run.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/dev/run.rst b/doc/dev/run.rst index 97522de8bb..0de92eb3cc 100644 --- a/doc/dev/run.rst +++ b/doc/dev/run.rst @@ -15,7 +15,8 @@ The scripts support two different process orchestrators as "backends": - `supervisor `_. This is the default and a bit more light-weight. Packets are sent over the loopback interface. - `docker compose `_. Runs individual processes in separate containers connected with docker network bridges. Only this mode supports running a "SCION-IP gateway". - .. hint:: Before attempting to use the docker compose mode, be sure to build the necessary docker images with `make docker-images` + + .. hint:: Before attempting to use the docker compose mode, be sure to build the necessary docker images with ``make docker-images`` .. TODO - Describe configuration directory (referencing manuals) From dc4b7865f693ba29326e267297d52fc7fd983ed8 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Tue, 3 Oct 2023 13:02:14 +0200 Subject: [PATCH 23/24] build: Provide a proper home dir to python tests that run "docker compose". This is due to a bug in some version of docker-compose v2 that causes an unused .cache directory to be created in $HOME. Due to a bug in Bazel, tests don't have a HOME variable (it should be set to $TEST_TMPDIR but isn't set at all). So we must provide one ourselves. My previous work around of using /tmp had the potential of creating collisions in the future. This method is cleaner. It uses an output location internal to the test. This is the simplest I could manage. Tools are supposed to be enablers, but Bazel prefers to be a disabler it seems. --- BUILD.bazel | 1 + acceptance/common/raw.bzl | 8 +++++++- acceptance/common/topogen.bzl | 8 +++++++- acceptance/router_multi/BUILD.bazel | 2 ++ acceptance/trc_update/BUILD.bazel | 1 + 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index be76b0e6c1..267914fd4b 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -245,6 +245,7 @@ updatesrc_update_all( write_source_files( name = "write_all_source_files", additional_update_targets = [ + "//doc/_build/_static/command:write_files", "//doc/command:write_files", ], ) diff --git a/acceptance/common/raw.bzl b/acceptance/common/raw.bzl index ba16e2b23b..2f024ca8d9 100644 --- a/acceptance/common/raw.bzl +++ b/acceptance/common/raw.bzl @@ -1,6 +1,11 @@ load("//tools/lint:py.bzl", "py_binary", "py_library", "py_test") load("@com_github_scionproto_scion_python_deps//:requirements.bzl", "requirement") +# Bug in bazel: HOME isn't set to TEST_TMPDIR. +# Bug in docker-compose v2.21 a writable HOME is required (eventhough not used). +# Poor design in Bazel, there's no sane way to obtain the path to some +# location that's not a litteral dependency. +# So, HOME must be provided by the invoker. def raw_test( name, src, @@ -8,6 +13,7 @@ def raw_test( deps = [], data = [], tags = [], + homedir = "", local = False): py_library( name = "%s_lib" % name, @@ -63,6 +69,6 @@ def raw_test( "PYTHONUNBUFFERED": "1", # Ensure that unicode output can be printed to the log/console "PYTHONIOENCODING": "utf-8", - "HOME": "/tmp", + "HOME": homedir, }, ) diff --git a/acceptance/common/topogen.bzl b/acceptance/common/topogen.bzl index b92967e269..08d90df42f 100644 --- a/acceptance/common/topogen.bzl +++ b/acceptance/common/topogen.bzl @@ -1,6 +1,11 @@ load("//tools/lint:py.bzl", "py_binary", "py_library", "py_test") load("@com_github_scionproto_scion_python_deps//:requirements.bzl", "requirement") +# Bug in bazel: HOME isn't set to TEST_TMPDIR. +# Bug in docker-compose v2.21 a writable HOME is required (eventhough not used). +# Poor design in Bazel, there's no sane way to obtain the path to some +# location that's not a litteral dependency. +# So, HOME must be provided by the invoker. def topogen_test( name, src, @@ -10,6 +15,7 @@ def topogen_test( args = [], deps = [], data = [], + homedir = "", tester = "//docker:tester"): """Creates a test based on a topology file. @@ -105,7 +111,7 @@ def topogen_test( "PYTHONUNBUFFERED": "1", # Ensure that unicode output can be printed to the log/console "PYTHONIOENCODING": "utf-8", - "HOME": "/tmp", + "HOME": homedir, }, ) diff --git a/acceptance/router_multi/BUILD.bazel b/acceptance/router_multi/BUILD.bazel index 1809014062..762c404da2 100644 --- a/acceptance/router_multi/BUILD.bazel +++ b/acceptance/router_multi/BUILD.bazel @@ -30,6 +30,7 @@ raw_test( "--bfd", ], data = data, + homedir = "$(rootpath :router)", # This test uses sudo and accesses /var/run/netns. local = True, ) @@ -39,6 +40,7 @@ raw_test( src = "test.py", args = args, data = data, + homedir = "$(rootpath :router)", # This test uses sudo and accesses /var/run/netns. local = True, ) diff --git a/acceptance/trc_update/BUILD.bazel b/acceptance/trc_update/BUILD.bazel index 4f91062faf..6e8125faf7 100644 --- a/acceptance/trc_update/BUILD.bazel +++ b/acceptance/trc_update/BUILD.bazel @@ -7,5 +7,6 @@ topogen_test( "--executable=end2end_integration:$(location //tools/end2end_integration)", ], data = ["//tools/end2end_integration"], + homedir = "$(rootpath //tools/end2end_integration)", topo = "//topology:tiny4.topo", ) From 2a8b43ccce14a0cbdf3d94551caab8bd3e231464 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hugly Date: Thu, 5 Oct 2023 11:32:38 +0200 Subject: [PATCH 24/24] build: delete the install_docker script. This gets outdated too fast. Better refer the user to docker install instructions. --- tools/install_docker | 48 -------------------------------------------- 1 file changed, 48 deletions(-) delete mode 100755 tools/install_docker diff --git a/tools/install_docker b/tools/install_docker deleted file mode 100755 index 384219a5d8..0000000000 --- a/tools/install_docker +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -set -e - -[ $(id -u) -eq 0 ] || { echo "Error: this script should be run as root" && exit 1; } - -# Install prereqs -DEBIAN_FRONTEND=noninteractive apt-get install -y apt-transport-https ca-certificates curl lsb-release software-properties-common - -chksum() { - echo "${1:?} ${2:?}" | sha256sum --status -c - -} - -if [ "$(lsb_release -is)" == "LinuxMint" ]; then - release=$(grep UBUNTU_CODENAME /etc/os-release | cut -f2 -d=) -fi -release=${release:-$(lsb_release -cs)} - -# Add docker apt repo. -if ! grep -Rq "https://download.docker.com/linux/ubuntu" /etc/apt/sources.list.d/; then - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - - add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $release stable" - apt-get update -fi - -# Install docker-ce -DEBIAN_FRONTEND=noninteractive apt-get install -y docker-ce docker-ce-cli containerd.io - -# Install docker-compose v2.5.1 -case "$(uname -m)" in - x86_64) - src=https://github.com/docker/compose/releases/download/v2.5.1/docker-compose-linux-x86_64 - sum=d99de1ea7616f2a4c7914d37674f0650660a5e832be555805a71c0fc377233c9 - file=/usr/libexec/docker/cli-plugins/docker-compose - ;; - *) - echo "ERROR: unsupported architecture '$(uname -m)'" - exit 1 -esac -# If the file doesn't exist, or the checksum fails, (re)download it. -if [ ! -e "$file" ] || ! chksum $sum $file; then - curl -sSL "$src" -o "$file" - chksum $sum $file || { echo "Error: $file doesn't match the expected checksum. ($sum)"; exit 1; } -fi -chmod +x "$file" - -# Install docker-compose switch. -curl -fL https://raw.githubusercontent.com/docker/compose-switch/master/install_on_linux.sh | sh