From c13960707b9835ec11e5f70520ef95cef7113a54 Mon Sep 17 00:00:00 2001 From: rosstimothy <39066650+rosstimothy@users.noreply.github.com> Date: Fri, 15 Nov 2024 13:59:19 -0500 Subject: [PATCH] Attempt to deflake TestDynamicClientReuse (#49081) The test was only waiting for a subset of services to be ready, proceeding with the test, and then closing the process. This caused a few problems that contributed to the flakiness. First, not calling `process.Wait` resulted in some services still being active and writing to the data directory while the testing framework was cleaning up the temp directory. Second, adding the Wait alone, would cause deadlocks because the test did not wait for all services to be initialized and ready before shutting down. In addition to making both of th changes above, the test was also modified to reduce the number of services being launched to slightly speed up the test. Closes #46958. --- lib/service/service_test.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/lib/service/service_test.go b/lib/service/service_test.go index a735e325d1e9a..a0a01065f47a6 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -59,6 +59,7 @@ import ( "github.com/gravitational/teleport/lib/integrations/externalauditstorage" "github.com/gravitational/teleport/lib/limiter" "github.com/gravitational/teleport/lib/modules" + "github.com/gravitational/teleport/lib/multiplexer" "github.com/gravitational/teleport/lib/reversetunnelclient" "github.com/gravitational/teleport/lib/service/servicecfg" "github.com/gravitational/teleport/lib/services" @@ -184,15 +185,18 @@ func TestDynamicClientReuse(t *testing.T) { cfg := servicecfg.MakeDefaultConfig() cfg.Clock = fakeClock - var err error cfg.DataDir = t.TempDir() - cfg.DiagnosticAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}) cfg.Auth.Enabled = true cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} cfg.Auth.SessionRecordingConfig.SetMode(types.RecordOff) + cfg.Auth.NoAudit = true cfg.Proxy.Enabled = true + cfg.Proxy.DisableDatabaseProxy = true cfg.Proxy.DisableWebInterface = true + cfg.Proxy.DisableReverseTunnel = true + cfg.Proxy.IdP.SAMLIdP.Enabled = false + cfg.Proxy.PROXYProtocolMode = multiplexer.PROXYProtocolOff cfg.Proxy.WebAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "localhost:0"} cfg.SSH.Enabled = false cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig() @@ -201,7 +205,18 @@ func TestDynamicClientReuse(t *testing.T) { require.NoError(t, err) require.NoError(t, process.Start()) - t.Cleanup(func() { require.NoError(t, process.Close()) }) + + ctx, cancel := context.WithTimeout(process.ExitContext(), 30*time.Second) + defer cancel() + for _, eventName := range []string{AuthTLSReady, ProxySSHReady, ProxyWebServerReady, InstanceReady} { + _, err := process.WaitForEvent(ctx, eventName) + require.NoError(t, err) + } + + t.Cleanup(func() { + require.NoError(t, process.Close()) + require.NoError(t, process.Wait()) + }) // wait for instance connector iconn, err := process.WaitForConnector(InstanceIdentityEvent, process.log) @@ -229,17 +244,19 @@ func TestDynamicClientReuse(t *testing.T) { // initial static set of system roles that got applied to the instance cert. require.NotSame(t, iconn.Client, nconn.Client) - nconn.Close() + require.NoError(t, nconn.Close()) // node connector closure should not affect proxy client _, err = pconn.Client.Ping(context.Background()) require.NoError(t, err) - pconn.Close() + require.NoError(t, pconn.Close()) // proxy connector closure should not affect instance client _, err = iconn.Client.Ping(context.Background()) require.NoError(t, err) + + require.NoError(t, iconn.Close()) } func TestMonitor(t *testing.T) {