testing: an improved router benchmark that only runs one router (#4444)

This is an alternative to router_benchmark and meant to eventually replace it. It's main feature is that it only runs a single router. As a result it's measured performance is far less dependent on the number of cores available on the test system. It is heavily inspired by the braccept test, in that: * it relies on the test harness to create an isolated network and veth interfaces to connect the test driver to the router. * it manufactures packets that are fed directly into the router's interfaces and it optionally captures packets for verification in the same manner. * it assumes a custom topology and cannot be usefully run on any other topology. It differs from braccept in that: * It does far less verification of the forwarded packets (it's braccept's job to do that). * It is considerably simplified, mainly because of the lighter verifications. * It relies on a simple address-assignment scheme so code maintainers don't have to keep tens of addresses in mind. * It does not assume that the router runs in a container network; it could also talk to a real router if given the names and mac addresses of the relevant interfaces. The test harness (or the user) is responsible for supplying the interface names and mac addresses to the test driver. * It does not require the test harness (or the user) to have any understanding of the topology (only to configure the router with it). The specifics of the captive network to be configured are supplied by the test driver. * The test harness doesn't need the "pause" container anymore. The similarity between this and braccept means that, in the future, we could re-converge them. Notably, the benefits of the simple addressing scheme would make the braccept test cases easier to maintain or expand. Collateral: the go version required was bumped to 1.21 Fixes #4442
scionproto · Dec 7, 2023 · d9c6c00 · d9c6c00
1 parent b4e65a6
commit d9c6c00
Show file tree

Hide file tree

Showing 21 changed files with 1,763 additions and 4 deletions.
diff --git a/acceptance/common/docker.py b/acceptance/common/docker.py
@@ -197,7 +197,7 @@ def assert_no_networks(writer=None):
             writer.write("Docker networking assertions are OFF\n")
         return
 
-    allowed_nets = ['bridge', 'host', 'none']
+    allowed_nets = ['bridge', 'host', 'none', 'benchmark']
     unexpected_nets = []
     for net in _get_networks():
         if net.name not in allowed_nets:

diff --git a/acceptance/router_benchmark/test.py b/acceptance/router_benchmark/test.py
@@ -141,7 +141,7 @@ def _run(self):
             "-name", "router_benchmark",
             "-cmd", "./bin/end2endblast",
             "-attempts", 1500000,
-            "-timeout", "120s",  # Timeout is for all attempts together
+            "-timeout", "180s",  # Timeout is for all attempts together
             "-parallelism", 100,
             "-subset", "noncore#core#remoteISD"
         ].run_tee()

diff --git a/acceptance/router_newbenchmark/BUILD.bazel b/acceptance/router_newbenchmark/BUILD.bazel
@@ -0,0 +1,28 @@
+load("//acceptance/common:raw.bzl", "raw_test")
+
+exports_files([
+    "conf",
+    "test.py",
+])
+
+args = [
+    "--executable",
+    "brload:$(location //acceptance/router_newbenchmark/brload:brload)",
+    "--container-loader=posix-router:latest#$(location //docker:posix_router)",
+]
+
+data = [
+    ":conf",
+    "//docker:posix_router",
+    "//acceptance/router_newbenchmark/brload:brload",
+]
+
+raw_test(
+    name = "test",
+    src = "test.py",
+    args = args,
+    data = data,
+    homedir = "$(rootpath //docker:posix_router)",
+    # This test uses sudo and accesses /var/run/netns.
+    local = True,
+)
diff --git a/acceptance/router_newbenchmark/brload/BUILD.bazel b/acceptance/router_newbenchmark/brload/BUILD.bazel
@@ -0,0 +1,27 @@
+load("//tools/lint:go.bzl", "go_library")
+load("//:scion.bzl", "scion_go_binary")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "github.com/scionproto/scion/acceptance/router_newbenchmark/brload",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//acceptance/router_newbenchmark/cases:go_default_library",
+        "//pkg/log:go_default_library",
+        "//pkg/private/serrors:go_default_library",
+        "//pkg/scrypto:go_default_library",
+        "//pkg/slayers:go_default_library",
+        "//private/keyconf:go_default_library",
+        "@com_github_google_gopacket//:go_default_library",
+        "@com_github_google_gopacket//afpacket:go_default_library",
+        "@com_github_google_gopacket//layers:go_default_library",
+        "@com_github_spf13_cobra//:go_default_library",
+    ],
+)
+
+scion_go_binary(
+    name = "brload",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/acceptance/router_newbenchmark/brload/main.go b/acceptance/router_newbenchmark/brload/main.go
@@ -0,0 +1,316 @@
+// Copyright 2023 SCION Association
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"errors"
+	"fmt"
+	"hash"
+	"net"
+	"os"
+	"path/filepath"
+	"reflect"
+	"strings"
+	"time"
+
+	"github.com/google/gopacket"
+	"github.com/google/gopacket/afpacket"
+	"github.com/google/gopacket/layers"
+	"github.com/spf13/cobra"
+
+	"github.com/scionproto/scion/acceptance/router_newbenchmark/cases"
+	"github.com/scionproto/scion/pkg/log"
+	"github.com/scionproto/scion/pkg/private/serrors"
+	"github.com/scionproto/scion/pkg/scrypto"
+	"github.com/scionproto/scion/pkg/slayers"
+	"github.com/scionproto/scion/private/keyconf"
+)
+
+type Case func(payload string, mac hash.Hash, numDistinct int) (string, string, [][]byte)
+
+type caseChoice string
+
+func (c *caseChoice) String() string {
+	return string(*c)
+}
+
+func (c *caseChoice) Set(v string) error {
+	_, ok := allCases[v]
+	if !ok {
+		return errors.New("No such case")
+	}
+	*c = caseChoice(v)
+	return nil
+}
+
+func (c *caseChoice) Type() string {
+	return "string enum"
+}
+
+func (c *caseChoice) Allowed() string {
+	return fmt.Sprintf("One of: %v", reflect.ValueOf(allCases).MapKeys())
+}
+
+var (
+	allCases = map[string]Case{
+		"in":          cases.In,
+		"out":         cases.Out,
+		"in_transit":  cases.InTransit,
+		"out_transit": cases.OutTransit,
+		"br_transit":  cases.BrTransit,
+	}
+	logConsole string
+	dir        string
+	numPackets int
+	numStreams int
+	caseToRun  caseChoice
+	interfaces []string
+)
+
+func main() {
+	rootCmd := &cobra.Command{
+		Use:   "brload",
+		Short: "Generates traffic into a specific router of a specific topology",
+	}
+	intfCmd := &cobra.Command{
+		Use:   "show-interfaces",
+		Short: "Provides a terse list of the interfaces that this test requires",
+		Run: func(cmd *cobra.Command, args []string) {
+			os.Exit(showInterfaces(cmd))
+		},
+	}
+	runCmd := &cobra.Command{
+		Use:   "run",
+		Short: "Executes the test",
+		Run: func(cmd *cobra.Command, args []string) {
+			os.Exit(run(cmd))
+		},
+	}
+	runCmd.Flags().IntVar(&numPackets, "num-packets", 10, "Number of packets to send")
+	runCmd.Flags().IntVar(&numStreams, "num-streams", 4,
+		"Number of independent streams (flowID) to use")
+	runCmd.Flags().StringVar(&logConsole, "log.console", "error",
+		"Console logging level: debug|info|error|etc.")
+	runCmd.Flags().StringVar(&dir, "artifacts", "", "Artifacts directory")
+	runCmd.Flags().Var(&caseToRun, "case", "Case to run. "+caseToRun.Allowed())
+	runCmd.Flags().StringArrayVar(&interfaces, "interface", []string{},
+		`label=host_interface,mac,peer_mac where:
+    host_interface: use this to exchange traffic with interface <label>
+    mac: the mac address of interface <label>
+    peer_mac: the mac address of <host_interface>`)
+	runCmd.MarkFlagRequired("case")
+	runCmd.MarkFlagRequired("interface")
+
+	rootCmd.AddCommand(intfCmd)
+	rootCmd.AddCommand(runCmd)
+	rootCmd.CompletionOptions.HiddenDefaultCmd = true
+
+	if rootCmd.Execute() != nil {
+		os.Exit(1)
+	}
+	os.Exit(0)
+}
+
+func showInterfaces(cmd *cobra.Command) int {
+	fmt.Println(cases.ListInterfaces())
+	return 0
+}
+
+func run(cmd *cobra.Command) int {
+	logCfg := log.Config{Console: log.ConsoleConfig{Level: logConsole}}
+	if err := log.Setup(logCfg); err != nil {
+		fmt.Fprintf(os.Stderr, "%s\n", err)
+		return 1
+	}
+	defer log.HandlePanic()
+
+	caseFunc := allCases[string(caseToRun)] // key already checked.
+
+	artifactsDir := dir
+	if v := os.Getenv("TEST_ARTIFACTS_DIR"); v != "" {
+		artifactsDir = v
+	}
+
+	if artifactsDir == "" {
+		log.Error("Artifacts directory not configured")
+		return 1
+	}
+
+	hfMAC, err := loadKey(artifactsDir)
+	if err != nil {
+		log.Error("Loading keys failed", "err", err)
+		return 1
+	}
+
+	cases.InitInterfaces(interfaces)
+	handles, err := openDevices()
+	if err != nil {
+		log.Error("Loading devices failed", "err", err)
+		return 1
+	}
+
+	registerScionPorts()
+
+	log.Info("BRLoad acceptance tests:")
+
+	payloadString := "actualpayloadbytes"
+	caseDevIn, caseDevOut, rawPkts := caseFunc(payloadString, hfMAC, numStreams)
+
+	writePktTo, ok := handles[caseDevIn]
+	if !ok {
+		log.Error("device not found", "device", caseDevIn)
+		return 1
+	}
+
+	readPktFrom, ok := handles[caseDevOut]
+	if !ok {
+		log.Error("device not found", "device", caseDevOut)
+		return 1
+	}
+
+	// Try and pick-up one packet and check the payload. If that works, we're content
+	// that this test works.
+	packetSource := gopacket.NewPacketSource(readPktFrom, layers.LinkTypeEthernet)
+	packetChan := packetSource.Packets()
+	listenerChan := make(chan int)
+
+	go func() {
+		defer log.HandlePanic()
+		defer close(listenerChan)
+		listenerChan <- receivePackets(packetChan, payloadString)
+	}()
+
+	// We started everything that could be started. So the best window for perf mertics
+	// opens somewhere around now.
+	metricsBegin := time.Now().Unix()
+	for i := 0; i < numPackets; i++ {
+		if err := writePktTo.WritePacketData(rawPkts[i%numStreams]); err != nil {
+			log.Error("writing input packet", "case", string(caseToRun), "error", err)
+			return 1
+		}
+	}
+	metricsEnd := time.Now().Unix()
+	// The test harness looks for this output.
+	fmt.Printf("metricsBegin: %d metricsEnd: %d\n", metricsBegin, metricsEnd)
+
+	// Get the results from the packet listener.
+	// Give it one second as in very short tests (<1M pkts) we get here before the first packet.
+	outcome := 0
+	timeout := time.After(1 * time.Second)
+	for outcome == 0 {
+		select {
+		case outcome = <-listenerChan:
+			if outcome == 0 {
+				log.Error("Listener never saw a valid packet being forwarded")
+				return 1
+			}
+		case <-timeout:
+			// If our listener is still stuck there, unstick it. Closing the device doesn't cause
+			// the packet channel to close (presumably a bug). Close the channel ourselves. After
+			// this, the next loop is guaranteed an outcome.
+			close(packetChan)
+		}
+	}
+
+	fmt.Printf("Listener results: %d\n", outcome)
+	return 0
+}
+
+// receivePkts consume some or all (at least one if it arrives) of the packets
+// arriving on the given handle and checks that they contain the given payload.
+// The number of consumed packets is returned.
+// Currently we are content with receiving a single correct packet and we terminate after
+// that.
+func receivePackets(packetChan chan gopacket.Packet, payload string) int {
+	numRcv := 0
+
+	for {
+		got, ok := <-packetChan
+		if !ok {
+			// No more packets
+			log.Info("No more Packets")
+			return numRcv
+		}
+		if err := got.ErrorLayer(); err != nil {
+			log.Error("error decoding packet", "err", err)
+			continue
+		}
+		layer := got.Layer(gopacket.LayerTypePayload)
+		if layer == nil {
+			log.Error("error fetching packet payload: no PayLoad")
+			continue
+		}
+		if string(layer.LayerContents()) == payload {
+			// To return the count of all packets received, just remove the "return" below.
+			// Return will occur once packetChan closes (which happens after a short timeout at
+			// the end of the test.
+			numRcv++
+			return numRcv
+		}
+	}
+}
+
+// initDevices inventories the available network interfaces, picks the ones that a case may inject
+// traffic into, and associates them with a AF Packet interface. It returns the packet interfaces
+// corresponding to each network interface.
+func openDevices() (map[string]*afpacket.TPacket, error) {
+	devs, err := net.Interfaces()
+	if err != nil {
+		return nil, serrors.WrapStr("listing network interfaces", err)
+	}
+
+	handles := make(map[string]*afpacket.TPacket)
+
+	for _, dev := range devs {
+		if !strings.HasPrefix(dev.Name, "veth_") || !strings.HasSuffix(dev.Name, "_host") {
+			continue
+		}
+		handle, err := afpacket.NewTPacket(afpacket.OptInterface(dev.Name))
+		if err != nil {
+			return nil, serrors.WrapStr("creating TPacket", err)
+		}
+		handles[dev.Name] = handle
+	}
+
+	return handles, nil
+}
+
+// loadKey loads the keys that the router under test uses to sign hop fields.
+func loadKey(artifactsDir string) (hash.Hash, error) {
+	keysDir := filepath.Join(artifactsDir, "conf", "keys")
+	mk, err := keyconf.LoadMaster(keysDir)
+	if err != nil {
+		return nil, err
+	}
+	macGen, err := scrypto.HFMacFactory(mk.Key0)
+	if err != nil {
+		return nil, err
+	}
+	return macGen(), nil
+}
+
+// registerScionPorts registers the following UDP ports in gopacket such as SCION is the
+// next layer. In other words, map the following ports to expect SCION as the payload.
+func registerScionPorts() {
+	for i := 30041; i < 30043; i++ {
+		layers.RegisterUDPPortLayerType(layers.UDPPort(i), slayers.LayerTypeSCION)
+	}
+	for i := 30000; i < 30010; i++ {
+		layers.RegisterUDPPortLayerType(layers.UDPPort(i), slayers.LayerTypeSCION)
+	}
+	for i := 50000; i < 50010; i++ {
+		layers.RegisterUDPPortLayerType(layers.UDPPort(i), slayers.LayerTypeSCION)
+	}
+}