diff --git a/acceptance/router_benchmark/test.py b/acceptance/router_benchmark/test.py index 866798afd6..14d25e92d0 100755 --- a/acceptance/router_benchmark/test.py +++ b/acceptance/router_benchmark/test.py @@ -137,11 +137,11 @@ def _run(self): loadtest = self.get_executable("end2end_integration") retCode, stdOut, stdErr = loadtest[ "-d", - "-traces=false", "-outDir", self.artifacts, "-name", "router_benchmark", "-cmd", "./bin/end2endblast", "-attempts", 1500000, + "-timeout", "90s", # Timeout is for all attempts together "-parallelism", 100, "-subset", "noncore#core#remoteISD" ].run_tee() @@ -207,11 +207,11 @@ def _run(self): loadtest = self.get_executable("end2end_integration") retCode, stdOut, stdErr = loadtest[ "-d", - "-traces=false", "-outDir", self.artifacts, "-name", "router_benchmark", "-cmd", "./bin/end2endblast", "-attempts", 1500000, + "-timeout", "90s", # Timeout is for all attempts together "-parallelism", 100, "-subset", "noncore#noncore#remoteAS" ].run_tee() diff --git a/doc/manuals/router.rst b/doc/manuals/router.rst index 08dbf72db8..7310f5d272 100644 --- a/doc/manuals/router.rst +++ b/doc/manuals/router.rst @@ -120,8 +120,6 @@ Environment Variables :Type: :ref:`duration ` :Default: ``5m`` -.. _gomaxprocs: - .. envvar:: GOMAXPROCS Specified by the GO runtime. The Go runtime starts a number kernel threads such that the number @@ -211,7 +209,7 @@ considers the following options. experimentaly. The number of kernel threads that go creates depends on the number of usable cores, which is - controlled by the environment variable ``GOMAXPROCS``. See :ref:`GOMAXPROCS `. + controlled by the environment variable ``GOMAXPROCS``. See :env:`GOMAXPROCS`. .. option:: router.num_slow_processors = (Default: 1) diff --git a/tools/end2end/main.go b/tools/end2end/main.go index cb5b385459..067f245b6a 100644 --- a/tools/end2end/main.go +++ b/tools/end2end/main.go @@ -1,5 +1,6 @@ // Copyright 2018 ETH Zurich // Copyright 2019 ETH Zurich, Anapaya Systems +// Copyright 2023 SCION Association // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -101,14 +102,12 @@ func realMain() int { } validateFlags() - if traces { - closeTracer, err := integration.InitTracer("end2end-" + integration.Mode) - if err != nil { - log.Error("Tracer initialization failed", "err", err) - return 1 - } - defer closeTracer() + closeTracer, err := integration.InitTracer("end2end-" + integration.Mode) + if err != nil { + log.Error("Tracer initialization failed", "err", err) + return 1 } + defer closeTracer() if integration.Mode == integration.ModeServer { server{}.run() @@ -137,8 +136,7 @@ func validateFlags() { integration.LogFatal("Invalid timeout provided", "timeout", timeout) } } - log.Info("Flags", "traces", traces, "timeout", timeout, "epic", epic, - "remote", remote) + log.Info("Flags", "timeout", timeout, "epic", epic, "remote", remote) } type server struct { @@ -202,28 +200,23 @@ func (s server) handlePing(conn snet.PacketConn) error { ) } + spanCtx, err := opentracing.GlobalTracer().Extract( + opentracing.Binary, + bytes.NewReader(pld.Trace), + ) + if err != nil { + return serrors.WrapStr("extracting trace information", err) + } + span, _ := opentracing.StartSpanFromContext( + context.Background(), + "handle_ping", + ext.RPCServerOption(spanCtx), + ) + defer span.Finish() withTag := func(err error) error { + tracing.Error(span, err) return err } - if traces { - spanCtx, err := opentracing.GlobalTracer().Extract( - opentracing.Binary, - bytes.NewReader(pld.Trace), - ) - if err != nil { - return serrors.WrapStr("extracting trace information", err) - } - span, _ := opentracing.StartSpanFromContext( - context.Background(), - "handle_ping", - ext.RPCServerOption(spanCtx), - ) - defer span.Finish() - withTag = func(err error) error { - tracing.Error(span, err) - return err - } - } if pld.Message != ping || !pld.Server.Equal(integration.Local.IA) { return withTag(serrors.New("unexpected data in payload", @@ -309,16 +302,12 @@ func (c *client) run() int { func (c *client) attemptRequest(n int) bool { timeoutCtx, cancel := context.WithTimeout(context.Background(), timeout.Duration) defer cancel() - ctx := timeoutCtx - span, _ := tracing.NilCtx() - if traces { - span, ctx = tracing.CtxWith(timeoutCtx, "attempt") - span.SetTag("attempt", n) - span.SetTag("src", integration.Local.IA) - span.SetTag("dst", remote.IA) - defer span.Finish() - } + span, ctx := tracing.CtxWith(timeoutCtx, "attempt") + span.SetTag("attempt", n) + span.SetTag("src", integration.Local.IA) + span.SetTag("dst", remote.IA) + defer span.Finish() logger := log.FromCtx(ctx) @@ -328,25 +317,20 @@ func (c *client) attemptRequest(n int) bool { return false } + span, ctx = tracing.StartSpanFromCtx(ctx, "attempt.ping") + defer span.Finish() withTag := func(err error) error { + tracing.Error(span, err) return err } - if traces { - span, ctx = tracing.StartSpanFromCtx(ctx, "attempt.ping") - defer span.Finish() - withTag = func(err error) error { - tracing.Error(span, err) - return err - } - } // Send ping - if err := c.ping(ctx, n, path, true); err != nil { + if err := c.ping(ctx, n, path); err != nil { logger.Error("Could not send packet", "err", withTag(err)) return false } // Receive pong - if err := c.pong(ctx, true); err != nil { + if err := c.pong(ctx); err != nil { tracing.Error(span, err) logger.Error("Error receiving pong", "err", withTag(err)) if path != nil { @@ -357,7 +341,7 @@ func (c *client) attemptRequest(n int) bool { return true } -func (c *client) ping(ctx context.Context, n int, path snet.Path, logIfOk bool) error { +func (c *client) ping(ctx context.Context, n int, path snet.Path) error { rawPing, err := json.Marshal(Ping{ Server: remote.IA, Message: ping, @@ -402,9 +386,9 @@ func (c *client) ping(ctx context.Context, n int, path snet.Path, logIfOk bool) }, }, } - if logIfOk { - log.Info("sending ping", "attempt", n, "path", path) - } + + log.Info("sending ping", "attempt", n, "path", path) + if err := c.conn.WriteTo(pkt, remote.NextHop); err != nil { return err } @@ -466,7 +450,7 @@ func (c *client) getRemote(ctx context.Context, n int) (snet.Path, error) { return path, nil } -func (c *client) pong(ctx context.Context, logIfOk bool) error { +func (c *client) pong(ctx context.Context) error { if err := c.conn.SetReadDeadline(getDeadline(ctx)); err != nil { return serrors.WrapStr("setting read deadline", err) } @@ -494,9 +478,9 @@ func (c *client) pong(ctx context.Context, logIfOk bool) error { if pld.Client != expected.Client || pld.Server != expected.Server || pld.Message != pong { return serrors.New("unexpected contents received", "data", pld, "expected", expected) } - if logIfOk { - log.Info("Received pong", "server", p.Source) - } + + log.Info("Received pong", "server", p.Source) + return nil } diff --git a/tools/end2end_integration/main.go b/tools/end2end_integration/main.go index 2e02007a09..f2caa9c062 100644 --- a/tools/end2end_integration/main.go +++ b/tools/end2end_integration/main.go @@ -43,7 +43,6 @@ var ( cmd string features string epic bool - traces bool ) func getCmd() (string, bool) { @@ -76,12 +75,10 @@ func realMain() int { "-local", integration.SrcAddrPattern + ":0", "-remote", integration.DstAddrPattern + ":" + integration.ServerPortReplace, fmt.Sprintf("-epic=%t", epic), - fmt.Sprintf("-traces=%t", traces), } serverArgs := []string{ "-mode", "server", "-local", integration.DstAddrPattern + ":0", - fmt.Sprintf("-traces=%t", traces), } if len(features) != 0 { clientArgs = append(clientArgs, "--features", features) @@ -121,7 +118,6 @@ func addFlags() { flag.StringVar(&features, "features", "", fmt.Sprintf("enable development features (%v)", feature.String(&feature.Default{}, "|"))) flag.BoolVar(&epic, "epic", false, "Enable EPIC.") - flag.BoolVar(&traces, "traces", true, "Enable Jaeger traces.") } // runTests runs the end2end tests for all pairs. In case of an error the @@ -298,7 +294,6 @@ func clientTemplate(progressSock string) integration.Cmd { "-timeout", timeout.String(), "-local", integration.SrcAddrPattern + ":0", "-remote", integration.DstAddrPattern + ":" + integration.ServerPortReplace, - fmt.Sprintf("-traces=%t", traces), fmt.Sprintf("-epic=%t", epic), }, } diff --git a/tools/end2endblast/BUILD.bazel b/tools/end2endblast/BUILD.bazel index c880a30652..e89d3c45cc 100644 --- a/tools/end2endblast/BUILD.bazel +++ b/tools/end2endblast/BUILD.bazel @@ -18,11 +18,8 @@ go_library( "//pkg/snet/path:go_default_library", "//pkg/sock/reliable:go_default_library", "//private/topology:go_default_library", - "//private/tracing:go_default_library", "//tools/integration:go_default_library", "//tools/integration/integrationlib:go_default_library", - "@com_github_opentracing_opentracing_go//:go_default_library", - "@com_github_opentracing_opentracing_go//ext:go_default_library", ], ) diff --git a/tools/end2endblast/main.go b/tools/end2endblast/main.go index f8900bdd4c..74d04ef854 100644 --- a/tools/end2endblast/main.go +++ b/tools/end2endblast/main.go @@ -21,7 +21,6 @@ package main import ( - "bytes" "context" "encoding/json" "errors" @@ -32,9 +31,6 @@ import ( "os" "time" - "github.com/opentracing/opentracing-go" - "github.com/opentracing/opentracing-go/ext" - "github.com/scionproto/scion/pkg/addr" "github.com/scionproto/scion/pkg/daemon" "github.com/scionproto/scion/pkg/log" @@ -46,7 +42,6 @@ import ( snetpath "github.com/scionproto/scion/pkg/snet/path" "github.com/scionproto/scion/pkg/sock/reliable" "github.com/scionproto/scion/private/topology" - "github.com/scionproto/scion/private/tracing" libint "github.com/scionproto/scion/tools/integration" integration "github.com/scionproto/scion/tools/integration/integrationlib" ) @@ -59,23 +54,20 @@ const ( type Ping struct { Server addr.IA `json:"server"` Message string `json:"message"` - Trace []byte `json:"trace"` } type Pong struct { Client addr.IA `json:"client"` Server addr.IA `json:"server"` Message string `json:"message"` - Trace []byte `json:"trace"` } var ( remote snet.UDPAddr - timeout = &util.DurWrap{Duration: 10 * time.Second} + timeout = &util.DurWrap{Duration: 90 * time.Second} scionPacketConnMetrics = metrics.NewSCIONPacketConnMetrics() scmpErrorsCounter = scionPacketConnMetrics.SCMPErrors epic bool - traces bool ) func main() { @@ -93,15 +85,6 @@ func realMain() int { } validateFlags() - if traces { - closeTracer, err := integration.InitTracer("end2endblast-" + integration.Mode) - if err != nil { - log.Error("Tracer initialization failed", "err", err) - return 1 - } - defer closeTracer() - } - if integration.Mode == integration.ModeServer { (&server{}).run() return 0 @@ -112,9 +95,8 @@ func realMain() int { func addFlags() { flag.Var(&remote, "remote", "(Mandatory for clients) address to connect to") - flag.Var(timeout, "timeout", "The timeout for each attempt") + flag.Var(timeout, "timeout", "The timeout for completing the whole test") flag.BoolVar(&epic, "epic", false, "Enable EPIC") - flag.BoolVar(&traces, "traces", true, "Enable Jaeger traces") } func validateFlags() { @@ -129,8 +111,7 @@ func validateFlags() { integration.LogFatal("Invalid timeout provided", "timeout", timeout) } } - log.Info("Flags", "traces", traces, "timeout", timeout, "epic", epic, - "remote", remote) + log.Info("Flags", "timeout", timeout, "epic", epic, "remote", remote) } type server struct { @@ -197,35 +178,12 @@ func (s *server) handlePing(conn snet.PacketConn) error { ) } - withTag := func(err error) error { - return err - } - if traces { - spanCtx, err := opentracing.GlobalTracer().Extract( - opentracing.Binary, - bytes.NewReader(pld.Trace), - ) - if err != nil { - return serrors.WrapStr("extracting trace information", err) - } - span, _ := opentracing.StartSpanFromContext( - context.Background(), - "handle_ping", - ext.RPCServerOption(spanCtx), - ) - defer span.Finish() - withTag = func(err error) error { - tracing.Error(span, err) - return err - } - } - if pld.Message != ping || !pld.Server.Equal(integration.Local.IA) { - return withTag(serrors.New("unexpected data in payload", + return serrors.New("unexpected data in payload", "source", p.Source, "destination", p.Destination, "data", pld, - )) + ) } // In this game, we respond to 1/256 (~0.4%) of the pings. Just enough @@ -239,10 +197,9 @@ func (s *server) handlePing(conn snet.PacketConn) error { Client: p.Source.IA, Server: integration.Local.IA, Message: pong, - Trace: pld.Trace, }) if err != nil { - return withTag(serrors.WrapStr("packing pong", err)) + return serrors.WrapStr("packing pong", err) } p.Destination, p.Source = p.Source, p.Destination @@ -264,18 +221,17 @@ func (s *server) handlePing(conn snet.PacketConn) error { p.Path = replyPath // Send pong if err := conn.WriteTo(&p, &ov); err != nil { - return withTag(serrors.WrapStr("sending reply", err)) + return serrors.WrapStr("sending reply", err) } log.Info("Sent pong to", "client", p.Destination) return nil } type client struct { - conn snet.PacketConn - port uint16 - sdConn daemon.Connector - path snet.Path - errorPaths map[snet.PathFingerprint]struct{} + conn snet.PacketConn + port uint16 + sdConn daemon.Connector + path snet.Path } func (c *client) run() int { @@ -302,114 +258,73 @@ func (c *client) run() int { fmt.Sprintf("%v,[%v]:%d", integration.Local.IA, integration.Local.Host.IP, c.port)) c.sdConn = integration.SDConn() defer c.sdConn.Close() - c.errorPaths = make(map[snet.PathFingerprint]struct{}) - pong_out := make(chan int) + + // Drain pongs in the background + pongOut := make(chan int) go func() { defer log.HandlePanic() - totalFailed := 1 + + // The timeout extends over the entire test. When we don't need to drain any more + // we just cancel it. + ctx, cancel := context.WithTimeout(context.Background(), timeout.Duration) + defer cancel() + // Drain pongs as long as we get them. We assume that failure means // there are no more pongs. We want ro receive at least one pong. The // rest doesn't matter. + allFailed := 1 integration.RepeatUntilFail("End2EndBlast", func(n int) bool { - failed := c.drainPong(n) - if !failed { - totalFailed = 0 + + if err := c.pong(ctx); err != nil { + // We should receive at least one, but this runs until pings stop + // coming, so there will always be one failure in the end. + return true // Stop. } - return failed + allFailed = 0 + return false // Keep consuming pongs }) - pong_out <- totalFailed + pongOut <- allFailed }() - // We return a "number of failures". So 0 means everything is fine. - ping_result := integration.RepeatUntilFail("End2EndBlast", c.blindPing) - - // Stop drainPongs, so we're not stuck here for up to 10s. - c.conn.Close() - - pong_result := <-pong_out - if pong_result != 0 { - log.Info("Never got a single pong") - } - return ping_result + pong_result -} - -// blindping sends one ping packet and expect no response. -// Returns true (which means finished) *if sending failed* -func (c *client) blindPing(n int) bool { - timeoutCtx, cancel := context.WithTimeout(context.Background(), timeout.Duration) + // Same here, the timeout context lives on for the rest of the test (so we don't keep + // creating and discarding contexts). + ctx, cancel := context.WithTimeout(context.Background(), timeout.Duration) defer cancel() - ctx := timeoutCtx - span, _ := tracing.NilCtx() - if traces { - span, ctx = tracing.CtxWith(timeoutCtx, "blindping") - span.SetTag("blindping", n) - span.SetTag("src", integration.Local.IA) - span.SetTag("dst", remote.IA) - defer span.Finish() + // Get a path, then use it for all the repeats + p, err := c.getRemote(ctx) + if err != nil { + integration.LogFatal("Could not get remote", "err", err) + return 1 } - logger := log.FromCtx(ctx) + c.path = p // struct fields cannot be assigned with := - if c.path == nil { - p, err := c.getRemote(ctx, n) - if err != nil { - logger.Error("Could not get remote", "err", err) + // We return a "number of failures". So 0 means everything is fine. + pingResult := integration.RepeatUntilFail("End2EndBlast", func(n int) bool { + // Send ping + if err := c.ping(ctx, n, c.path); err != nil { + logger := log.FromCtx(ctx) + logger.Error("Could not send packet", "err", err) return true } - c.path = p // struct fields cannot be assigned with := - } - // Default closure if traces are off. - withTag := func(err error) error { - return err - } + return false // Don't stop. Do it again! + }) - if traces { - span, ctx = tracing.StartSpanFromCtx(ctx, "attempt.ping") - defer span.Finish() - withTag = func(err error) error { - tracing.Error(span, err) - return err - } - } + // Stop drainPongs, so we're not stuck here for up to 10s. + c.conn.Close() - // Send ping - if err := c.ping(ctx, n, c.path, false); err != nil { - logger.Error("Could not send packet", "err", withTag(err)) - return true + pongResult := <-pongOut + if pongResult != 0 { + log.Info("Never got a single pong") } - - return false // Don't stop. Do it again! -} - -// drainPong consumes any pong message that might be received. -func (c *client) drainPong(n int) bool { - timeoutCtx, cancel := context.WithTimeout(context.Background(), timeout.Duration) - defer cancel() - ctx := timeoutCtx - span, _ := tracing.NilCtx() - - if traces { - span, ctx = tracing.CtxWith(timeoutCtx, "drainpong") - span.SetTag("drainpong", n) - span.SetTag("dst", integration.Local.IA) - span.SetTag("src", remote.IA) - defer span.Finish() - } - if err := c.pong(ctx, false); err != nil { - // We should receive at least one within 10s, but this is called until pings stop - // coming, so there will always be one failure in the end. Therefore... do not report an - // error here. - return true // Stop. Responses don't arrive anymore (or never did). - } - return false // Don't stop; keep consuming pongs + return pingResult + pongResult } -func (c *client) ping(ctx context.Context, n int, path snet.Path, logIfOk bool) error { +func (c *client) ping(ctx context.Context, n int, path snet.Path) error { rawPing, err := json.Marshal(Ping{ Server: remote.IA, Message: ping, - Trace: tracing.IDFromCtx(ctx), }) if err != nil { return serrors.WrapStr("packing ping", err) @@ -450,51 +365,28 @@ func (c *client) ping(ctx context.Context, n int, path snet.Path, logIfOk bool) }, }, } - if logIfOk { - log.Info("sending ping", "attempt", n, "path", path) - } if err := c.conn.WriteTo(pkt, remote.NextHop); err != nil { return err } return nil } -func (c *client) getRemote(ctx context.Context, n int) (snet.Path, error) { +func (c *client) getRemote(ctx context.Context) (snet.Path, error) { if remote.IA.Equal(integration.Local.IA) { remote.Path = snetpath.Empty{} return nil, nil } - span, ctx := tracing.StartSpanFromCtx(ctx, "attempt.get_remote") - defer span.Finish() - withTag := func(err error) error { - tracing.Error(span, err) - return err - } - paths, err := c.sdConn.Paths(ctx, remote.IA, integration.Local.IA, - daemon.PathReqFlags{Refresh: n != 0}) + daemon.PathReqFlags{Refresh: false}) if err != nil { - return nil, withTag(serrors.WrapStr("requesting paths", err)) - } - // If all paths had an error, let's try them again. - if len(paths) <= len(c.errorPaths) { - c.errorPaths = make(map[snet.PathFingerprint]struct{}) - } - // Select first path that didn't error before. - var path snet.Path - for _, p := range paths { - if _, ok := c.errorPaths[snet.Fingerprint(p)]; ok { - continue - } - path = p - break + return nil, serrors.WrapStr("requesting paths", err) } - if path == nil { - return nil, withTag(serrors.New("no path found", - "candidates", len(paths), - "errors", len(c.errorPaths), - )) + // Select first path + if len(paths) == 0 { + return nil, serrors.New("no path found") } + path := paths[0] + // Extract forwarding path from the SCION Daemon response. // If the epic flag is set, try to use the EPIC path type header. if epic { @@ -514,7 +406,7 @@ func (c *client) getRemote(ctx context.Context, n int) (snet.Path, error) { return path, nil } -func (c *client) pong(ctx context.Context, logIfOk bool) error { +func (c *client) pong(ctx context.Context) error { if err := c.conn.SetReadDeadline(getDeadline(ctx)); err != nil { return serrors.WrapStr("setting read deadline", err) } @@ -542,9 +434,6 @@ func (c *client) pong(ctx context.Context, logIfOk bool) error { if pld.Client != expected.Client || pld.Server != expected.Server || pld.Message != pong { return serrors.New("unexpected contents received", "data", pld, "expected", expected) } - if logIfOk { - log.Info("Received pong", "server", p.Source) - } return nil }