From a1039717393b21e848681577c00ad53e7647ad78 Mon Sep 17 00:00:00 2001
From: hugoShaka <hugo.hervieux@goteleport.com>
Date: Thu, 31 Oct 2024 15:18:44 -0400
Subject: [PATCH 1/5] implement autoupdate_agent_rollout reconciler

---
 lib/autoupdate/rolloutcontroller/client.go    |  45 ++
 .../rolloutcontroller/client_test.go          | 188 ++++++
 .../rolloutcontroller/reconciler.go           | 222 +++++++
 .../rolloutcontroller/reconciler_test.go      | 565 ++++++++++++++++++
 4 files changed, 1020 insertions(+)
 create mode 100644 lib/autoupdate/rolloutcontroller/client.go
 create mode 100644 lib/autoupdate/rolloutcontroller/client_test.go
 create mode 100644 lib/autoupdate/rolloutcontroller/reconciler.go
 create mode 100644 lib/autoupdate/rolloutcontroller/reconciler_test.go

diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rolloutcontroller/client.go
new file mode 100644
index 0000000000000..c54d41ad75ccd
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/client.go
@@ -0,0 +1,45 @@
+/*
+ * Teleport
+ * Copyright (C) 2024  Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package rolloutcontroller
+
+import (
+	"context"
+	autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+)
+
+// Client is the subset of the Teleport client RPCs the controller needs.
+type Client interface {
+	// GetAutoUpdateConfig gets the AutoUpdateConfig singleton resource.
+	GetAutoUpdateConfig(ctx context.Context) (*autoupdatepb.AutoUpdateConfig, error)
+
+	// GetAutoUpdateVersion gets the AutoUpdateVersion singleton resource.
+	GetAutoUpdateVersion(ctx context.Context) (*autoupdatepb.AutoUpdateVersion, error)
+
+	// GetAutoUpdateAgentRollout gets the AutoUpdateAgentRollout singleton resource.
+	GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+	// CreateAutoUpdateAgentRollout creates the AutoUpdateAgentRollout singleton resource.
+	CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+	// UpdateAutoUpdateAgentRollout updates the AutoUpdateAgentRollout singleton resource.
+	UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+	// DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource.
+	DeleteAutoUpdateAgentRollout(ctx context.Context) error
+}
diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rolloutcontroller/client_test.go
new file mode 100644
index 0000000000000..3baa12153c47a
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/client_test.go
@@ -0,0 +1,188 @@
+/*
+ * Teleport
+ * Copyright (C) 2024  Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package rolloutcontroller
+
+import (
+	"context"
+	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+	"github.com/gravitational/teleport/api/utils"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/protoadapt"
+	"testing"
+)
+
+// mockClient is a mock implementation if the Client interface for testing purposes.
+// This is used to precisely check which calls are made by the Reconciler during tests.
+// Use newMockClient to create one from stubs. Once the test is over, you must call
+// mockClient.checkIfEmpty to validate all expected calls were made.
+type mockClient struct {
+	getAutoUpdateConfig          *getHandler[*autoupdate.AutoUpdateConfig]
+	getAutoUpdateVersion         *getHandler[*autoupdate.AutoUpdateVersion]
+	getAutoUpdateAgentRollout    *getHandler[*autoupdate.AutoUpdateAgentRollout]
+	createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
+	updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
+	deleteAutoUpdateAgentRollout *deleteHandler
+}
+
+func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) {
+	return m.getAutoUpdateConfig.handle(ctx)
+}
+
+func (m mockClient) GetAutoUpdateVersion(ctx context.Context) (*autoupdate.AutoUpdateVersion, error) {
+	return m.getAutoUpdateVersion.handle(ctx)
+}
+
+func (m mockClient) GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdate.AutoUpdateAgentRollout, error) {
+	return m.getAutoUpdateAgentRollout.handle(ctx)
+}
+
+func (m mockClient) CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) {
+	return m.createAutoUpdateAgentRollout.handle(ctx, rollout)
+}
+
+func (m mockClient) UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) {
+	return m.updateAutoUpdateAgentRollout.handle(ctx, rollout)
+}
+
+func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error {
+	return m.deleteAutoUpdateAgentRollout.handle(ctx)
+}
+
+func (m mockClient) checkIfEmpty(t *testing.T) {
+	require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty")
+	require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty")
+	require.True(t, m.getAutoUpdateAgentRollout.isEmpty(), "Get autoupdate_agent_rollout mock not empty")
+	require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty")
+	require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty")
+	require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty")
+}
+
+func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
+	// Fail early if there's a mismatch
+	require.Equal(t, len(stubs.createRolloutAnswers), len(stubs.createRolloutExpects), "invalid stubs, create validations and answers slices are not the same length")
+	require.Equal(t, len(stubs.updateRolloutAnswers), len(stubs.updateRolloutExpects), "invalid stubs, update validations and answers slices are not the same length")
+
+	return &mockClient{
+		getAutoUpdateConfig:          &getHandler[*autoupdate.AutoUpdateConfig]{t, stubs.configAnswers},
+		getAutoUpdateVersion:         &getHandler[*autoupdate.AutoUpdateVersion]{t, stubs.versionAnswers},
+		getAutoUpdateAgentRollout:    &getHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.rolloutAnswers},
+		createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers},
+		updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers},
+		deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers},
+	}
+}
+
+type mockClientStubs struct {
+	configAnswers        []callAnswer[*autoupdate.AutoUpdateConfig]
+	versionAnswers       []callAnswer[*autoupdate.AutoUpdateVersion]
+	rolloutAnswers       []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+	createRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+	createRolloutExpects []require.ValueAssertionFunc
+	updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+	updateRolloutExpects []require.ValueAssertionFunc
+	deleteRolloutAnswers []error
+}
+
+type callAnswer[T any] struct {
+	result T
+	err    error
+}
+
+// getHandler is used in a mock client to answer get resource requests during tests.
+// It takes a list of answers and errors and will return them when invoked.
+// If there are no stubs left it fails the test.
+type getHandler[T protoadapt.MessageV1] struct {
+	t       *testing.T
+	answers []callAnswer[T]
+}
+
+func (h *getHandler[T]) handle(_ context.Context) (T, error) {
+	if len(h.answers) == 0 {
+		require.Fail(h.t, "no answers left")
+	}
+
+	entry := h.answers[0]
+	h.answers = h.answers[1:]
+
+	// We need to deep copy because the reconciler might do updates in place.
+	// We don't want the original resource to be edited as this would mess with other tests.
+	return utils.CloneProtoMsg(entry.result), entry.err
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *getHandler[T]) isEmpty() bool {
+	return len(h.answers) == 0
+}
+
+// createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2).
+// It first validates the input suing the passed validation function, then it returns the predefined answer and error.
+// If there are no stubs left it fails the test.
+type createUpdateHandler[T protoadapt.MessageV1] struct {
+	t       *testing.T
+	expect  []require.ValueAssertionFunc
+	answers []callAnswer[T]
+}
+
+func (h *createUpdateHandler[T]) handle(_ context.Context, object T) (T, error) {
+	if len(h.expect) == 0 {
+		require.Fail(h.t, "not expecting more calls")
+	}
+	h.expect[0](h.t, object)
+	h.expect = h.expect[1:]
+
+	if len(h.answers) == 0 {
+		require.Fail(h.t, "no answers left")
+	}
+
+	entry := h.answers[0]
+	h.answers = h.answers[1:]
+
+	// We need to deep copy because the reconciler might do updates in place.
+	// We don't want the original resource to be edited as this would mess with other tests.
+	return utils.CloneProtoMsg(entry.result), entry.err
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *createUpdateHandler[T]) isEmpty() bool {
+	return len(h.answers) == 0 && len(h.expect) == 0
+}
+
+// deleteHandler is used in a mock client to answer delete resource requests during tests.
+// It takes a list of errors and returns them when invoked.
+// If there are no stubs left it fails the test.
+type deleteHandler struct {
+	t       *testing.T
+	answers []error
+}
+
+func (h *deleteHandler) handle(_ context.Context) error {
+	if len(h.answers) == 0 {
+		require.Fail(h.t, "no answers left")
+	}
+
+	entry := h.answers[0]
+	h.answers = h.answers[1:]
+
+	return entry
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *deleteHandler) isEmpty() bool {
+	return len(h.answers) == 0
+}
diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
new file mode 100644
index 0000000000000..0e1b8c75631db
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -0,0 +1,222 @@
+/*
+ * Teleport
+ * Copyright (C) 2024  Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package rolloutcontroller
+
+import (
+	"context"
+	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+	update "github.com/gravitational/teleport/api/types/autoupdate"
+	"github.com/gravitational/trace"
+	"log/slog"
+	"sync"
+	"time"
+)
+
+const (
+	reconciliationTimeout = 30 * time.Second
+	defaultConfigMode     = update.AgentsUpdateModeEnabled
+	defaultStrategy       = update.AgentsStrategyHaltOnError
+)
+
+// Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and
+// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because:
+// - we reconcile 2 resources with one
+// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs
+type Reconciler struct {
+	clt Client
+	log *slog.Logger
+
+	// lock ensures we only run one reconciliation at a time
+	lock sync.Mutex
+}
+
+// Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths
+// are racing), in this case we retry the reconciliation immediately.
+func (r Reconciler) Reconcile(ctx context.Context) error {
+	ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout)
+	defer cancel()
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			err := r.tryReconcile(ctx)
+			switch {
+			case err == nil:
+				return nil
+			case trace.IsCompareFailed(err), trace.IsNotFound(err):
+				// The resource changed since we last saw it
+				// We must have raced against another auth
+				// Let's retry the reconciliation
+				// TODO: add a backoff to avoid a tight or infinite loop?
+				r.log.DebugContext(ctx, "retrying reconciliation", "error", err)
+			default:
+				// error is non-nil and non-retryable
+				return trace.Wrap(err, "failed to reconcile rollout")
+			}
+		}
+	}
+}
+
+// tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton.
+// This function should be nilpotent if the AutoUpdateAgentRollout is already up-to-date.
+// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError
+// if the resource change while we were computing it.
+// The caller must handle those error and retry the reconciliation.
+func (r Reconciler) tryReconcile(ctx context.Context) error {
+	// get autoupdate_config
+	config, err := r.clt.GetAutoUpdateConfig(ctx)
+	if err != nil && !trace.IsNotFound(err) {
+		return trace.Wrap(err, "getting autoupdate_config")
+	}
+
+	// get autoupdate_version
+	version, err := r.clt.GetAutoUpdateVersion(ctx)
+	if err != nil && !trace.IsNotFound(err) {
+		return trace.Wrap(err, "getting autoupdate_version")
+	}
+
+	// get autoupdate_agent_rollout
+	rolloutExists := true
+	existingRollout, err := r.clt.GetAutoUpdateAgentRollout(ctx)
+	if err != nil && !trace.IsNotFound(err) {
+		return trace.Wrap(err, "getting autoupdate_agent_rollout")
+	}
+	if trace.IsNotFound(err) {
+		// rollout doesn't exist yet, we'll need to call Create instead of Update.
+		rolloutExists = false
+	}
+
+	// if autoupdate_version does not exist or does not contain spec.agents, we should not configure a rollout
+	if version.GetSpec().GetAgents() == nil {
+		if !rolloutExists {
+			// the rollout doesn't exist, nothing to do
+			return nil
+		}
+		// the rollout exists, we must delete it
+		return r.clt.DeleteAutoUpdateAgentRollout(ctx)
+	}
+
+	// compute what the spec should look like
+	newSpec, err := r.buildRolloutSpec(config.GetSpec().GetAgents(), version.GetSpec().GetAgents())
+	if err != nil {
+		return trace.Wrap(err, "mutating rollout")
+	}
+
+	// if there are no existing rollout, we create a new one
+	if !rolloutExists {
+		rollout, err := update.NewAutoUpdateAgentRollout(newSpec)
+		if err != nil {
+			return trace.Wrap(err, "validating new rollout")
+		}
+		_, err = r.clt.CreateAutoUpdateAgentRollout(ctx, rollout)
+		return trace.Wrap(err, "creating rollout")
+	}
+
+	// there was an existing rollout, we must figure if something changed
+	specChanged := existingRollout.GetSpec().GetStartVersion() != newSpec.GetStartVersion() ||
+		existingRollout.GetSpec().GetTargetVersion() != newSpec.GetTargetVersion() ||
+		existingRollout.GetSpec().GetAutoupdateMode() != newSpec.GetAutoupdateMode() ||
+		existingRollout.GetSpec().GetStrategy() != newSpec.GetStrategy() ||
+		existingRollout.GetSpec().GetSchedule() != newSpec.GetSchedule()
+
+	// TODO: reconcile the status here when we'll add group support.
+	// Even if the spec does not change, we might still have to update the status:
+	// - sync groups with the ones from the user config
+	// - progress the rollout across groups
+
+	// if nothing changed, no need to update the resource
+	if !specChanged {
+		r.log.DebugContext(ctx, "rollout unchanged")
+		return nil
+	}
+
+	// something changed, we replace the old spec with the new one, validate and update the resource
+	// we don't create a new resource to keep the revision ID and
+	existingRollout.Spec = newSpec
+	err = update.ValidateAutoUpdateAgentRollout(existingRollout)
+	if err != nil {
+		return trace.Wrap(err, "validating mutated rollout")
+	}
+	_, err = r.clt.UpdateAutoUpdateAgentRollout(ctx, existingRollout)
+	return trace.Wrap(err, "updating rollout")
+}
+
+func (r Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
+	// reconcile mode
+	mode, err := getMode(config.GetMode(), version.GetMode())
+	if err != nil {
+		return nil, trace.Wrap(err, "computing agent update mode")
+	}
+
+	strategy := config.GetStrategy()
+	if strategy == "" {
+		strategy = defaultStrategy
+	}
+
+	return &autoupdate.AutoUpdateAgentRolloutSpec{
+		StartVersion:   version.GetStartVersion(),
+		TargetVersion:  version.GetTargetVersion(),
+		Schedule:       version.GetSchedule(),
+		AutoupdateMode: mode,
+		Strategy:       strategy,
+	}, nil
+
+}
+
+// agentModeCode maps agents mode to integers.
+// When config and version modes don't match, the lowest integer takes precedence.
+var (
+	agentModeCode = map[string]int{
+		update.AgentsUpdateModeDisabled:  0,
+		update.AgentsUpdateModeSuspended: 1,
+		update.AgentsUpdateModeEnabled:   2,
+	}
+	codeToAgentMode = map[int]string{
+		0: update.AgentsUpdateModeDisabled,
+		1: update.AgentsUpdateModeSuspended,
+		2: update.AgentsUpdateModeEnabled,
+	}
+)
+
+// getMode merges the agent modes coming from the version and config resources into a single mode.
+// "disabled" takes precedence over "suspended", which takes precedence over "enabled".
+func getMode(configMode, versionMode string) (string, error) {
+	if configMode == "" {
+		configMode = defaultConfigMode
+	}
+	if versionMode == "" {
+		return "", trace.BadParameter("version mode empty")
+	}
+
+	configCode, ok := agentModeCode[configMode]
+	if !ok {
+		return "", trace.BadParameter("unsupported agent config mode: %v", configMode)
+	}
+	versionCode, ok := agentModeCode[versionMode]
+	if !ok {
+		return "", trace.BadParameter("unsupported agent version mode: %v", versionMode)
+	}
+
+	// The lowest code takes precedence
+	if configCode <= versionCode {
+		return codeToAgentMode[configCode], nil
+	}
+	return codeToAgentMode[versionCode], nil
+}
diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rolloutcontroller/reconciler_test.go
new file mode 100644
index 0000000000000..5baf13d903e5e
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/reconciler_test.go
@@ -0,0 +1,565 @@
+/*
+ * Teleport
+ * Copyright (C) 2024  Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package rolloutcontroller
+
+import (
+	"context"
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/uuid"
+	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+	update "github.com/gravitational/teleport/api/types/autoupdate"
+	apiutils "github.com/gravitational/teleport/api/utils"
+	"github.com/gravitational/teleport/lib/backend"
+	"github.com/gravitational/teleport/lib/utils"
+	"github.com/gravitational/trace"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/testing/protocmp"
+	"testing"
+)
+
+// rolloutEquals returns a require.ValueAssertionFunc that checks the rollout is identical.
+// The comparison does not take into account the proto internal state.
+func rolloutEquals(expected *autoupdate.AutoUpdateAgentRollout) require.ValueAssertionFunc {
+	return func(t require.TestingT, i interface{}, _ ...interface{}) {
+		require.IsType(t, &autoupdate.AutoUpdateAgentRollout{}, i)
+		actual := i.(*autoupdate.AutoUpdateAgentRollout)
+		require.Empty(t, cmp.Diff(expected, actual, protocmp.Transform()))
+	}
+}
+
+// cancelContext wraps a require.ValueAssertionFunc so that the given context is canceled before checking the assertion.
+// This is used to test how the reconciler behaves when its context is canceled.
+func cancelContext(assertionFunc require.ValueAssertionFunc, cancel func()) require.ValueAssertionFunc {
+	return func(t require.TestingT, i interface{}, i2 ...interface{}) {
+		cancel()
+		assertionFunc(t, i, i2...)
+	}
+}
+
+// withRevisionID creates a deep copy of an agent rollout and sets the revisionID in its metadata.
+// This is used to test the conditional update retry logic.
+func withRevisionID(original *autoupdate.AutoUpdateAgentRollout, revision string) *autoupdate.AutoUpdateAgentRollout {
+	revisioned := apiutils.CloneProtoMsg(original)
+	revisioned.Metadata.Revision = revision
+	return revisioned
+}
+
+func TestGetMode(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name        string
+		configMode  string
+		versionMode string
+		expected    string
+		checkErr    require.ErrorAssertionFunc
+	}{
+		{
+			name:        "config and version equal",
+			configMode:  update.AgentsUpdateModeEnabled,
+			versionMode: update.AgentsUpdateModeEnabled,
+			expected:    update.AgentsUpdateModeEnabled,
+			checkErr:    require.NoError,
+		},
+		{
+			name:        "config suspends, version enables",
+			configMode:  update.AgentsUpdateModeSuspended,
+			versionMode: update.AgentsUpdateModeEnabled,
+			expected:    update.AgentsUpdateModeSuspended,
+			checkErr:    require.NoError,
+		},
+		{
+			name:        "config enables, version suspends",
+			configMode:  update.AgentsUpdateModeEnabled,
+			versionMode: update.AgentsUpdateModeSuspended,
+			expected:    update.AgentsUpdateModeSuspended,
+			checkErr:    require.NoError,
+		},
+		{
+			name:        "config suspends, version disables",
+			configMode:  update.AgentsUpdateModeSuspended,
+			versionMode: update.AgentsUpdateModeDisabled,
+			expected:    update.AgentsUpdateModeDisabled,
+			checkErr:    require.NoError,
+		},
+		{
+			name:        "version enables, no config",
+			configMode:  "",
+			versionMode: update.AgentsUpdateModeEnabled,
+			expected:    update.AgentsUpdateModeEnabled,
+			checkErr:    require.NoError,
+		},
+		{
+			name:        "config enables, no version",
+			configMode:  update.AgentsUpdateModeEnabled,
+			versionMode: "",
+			expected:    "",
+			checkErr:    require.Error,
+		},
+		{
+			name:        "unknown mode",
+			configMode:  "this in not a mode",
+			versionMode: update.AgentsUpdateModeEnabled,
+			expected:    "",
+			checkErr:    require.Error,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := getMode(tt.configMode, tt.versionMode)
+			tt.checkErr(t, err)
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestTryReconcile(t *testing.T) {
+	t.Parallel()
+	log := utils.NewSlogLoggerForTests()
+	ctx := context.Background()
+	// Test setup: creating fixtures
+	configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+		Tools: &autoupdate.AutoUpdateConfigSpecTools{
+			Mode: update.ToolsUpdateModeEnabled,
+		},
+		Agents: &autoupdate.AutoUpdateConfigSpecAgents{
+			Mode:     update.AgentsUpdateModeEnabled,
+			Strategy: update.AgentsStrategyHaltOnError,
+		},
+	})
+	require.NoError(t, err)
+
+	configNoAgent, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+		Tools: &autoupdate.AutoUpdateConfigSpecTools{
+			Mode: update.ToolsUpdateModeEnabled,
+		},
+	})
+	require.NoError(t, err)
+
+	versionOK, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+		Tools: &autoupdate.AutoUpdateVersionSpecTools{
+			TargetVersion: "1.2.3",
+		},
+		Agents: &autoupdate.AutoUpdateVersionSpecAgents{
+			StartVersion:  "1.2.3",
+			TargetVersion: "1.2.4",
+			Schedule:      update.AgentsScheduleImmediate,
+			Mode:          update.AgentsUpdateModeEnabled,
+		},
+	})
+	require.NoError(t, err)
+
+	versionNoAgent, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+		Tools: &autoupdate.AutoUpdateVersionSpecTools{
+			TargetVersion: "1.2.3",
+		},
+	})
+	require.NoError(t, err)
+
+	upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+		StartVersion:   "1.2.3",
+		TargetVersion:  "1.2.4",
+		Schedule:       update.AgentsScheduleImmediate,
+		AutoupdateMode: update.AgentsUpdateModeEnabled,
+		Strategy:       update.AgentsStrategyHaltOnError,
+	})
+	require.NoError(t, err)
+
+	outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+		StartVersion:   "1.2.2",
+		TargetVersion:  "1.2.3",
+		Schedule:       update.AgentsScheduleImmediate,
+		AutoupdateMode: update.AgentsUpdateModeEnabled,
+		Strategy:       update.AgentsStrategyHaltOnError,
+	})
+	require.NoError(t, err)
+
+	tests := []struct {
+		name            string
+		config          *autoupdate.AutoUpdateConfig
+		version         *autoupdate.AutoUpdateVersion
+		existingRollout *autoupdate.AutoUpdateAgentRollout
+		createExpect    *autoupdate.AutoUpdateAgentRollout
+		updateExpect    *autoupdate.AutoUpdateAgentRollout
+		deleteExpect    bool
+	}{
+		{
+			name: "config and version exist, no existing rollout",
+			// rollout should be created
+			config:       configOK,
+			version:      versionOK,
+			createExpect: upToDateRollout,
+		},
+		{
+			name: "version exist, no existing rollout nor config",
+			// rollout should be created
+			version:      versionOK,
+			createExpect: upToDateRollout,
+		},
+		{
+			name: "version exist, no existing rollout, config exist but doesn't contain agent section",
+			// rollout should be created
+			config:       configNoAgent,
+			version:      versionOK,
+			createExpect: upToDateRollout,
+		},
+		{
+			name: "config exist, no existing rollout nor version",
+			// rollout should not be created as there is no version
+			config: configOK,
+		},
+		{
+			name: "config exist, no existing rollout, version exist but doesn't contain agent section",
+			// rollout should not be created as there is no version
+			config:  configOK,
+			version: versionNoAgent,
+		},
+		{
+			name: "no existing rollout, config, nor version",
+			// rollout should not be created as there is no version
+		},
+		{
+			name: "existing out-of-date rollout, config and version exist",
+			// rollout should be updated
+			config:          configOK,
+			version:         versionOK,
+			existingRollout: outOfDateRollout,
+			updateExpect:    upToDateRollout,
+		},
+		{
+			name: "existing up-to-date rollout, config and version exist",
+			// rollout should not be updated as its spec is already good
+			config:          configOK,
+			version:         versionOK,
+			existingRollout: upToDateRollout,
+		},
+		{
+			name: "existing rollout and config but no version",
+			// rollout should be deleted as there is no version
+			config:          configOK,
+			existingRollout: upToDateRollout,
+			deleteExpect:    true,
+		},
+		{
+			name: "existing rollout but no config nor version",
+			// rollout should be deleted as there is no version
+			existingRollout: upToDateRollout,
+			deleteExpect:    true,
+		},
+	}
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			// Test setup: creating a fake client answering fixtures
+			var stubs mockClientStubs
+
+			if tt.config != nil {
+				stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{tt.config, nil}}
+			} else {
+				stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{nil, trace.NotFound("no config")}}
+			}
+
+			if tt.version != nil {
+				stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{tt.version, nil}}
+			} else {
+				stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{nil, trace.NotFound("no version")}}
+			}
+
+			if tt.existingRollout != nil {
+				stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.existingRollout, nil}}
+			} else {
+				stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.NotFound("no rollout")}}
+			}
+
+			if tt.createExpect != nil {
+				stubs.createRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.createExpect, nil}}
+				stubs.createRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.createExpect)}
+			}
+
+			if tt.updateExpect != nil {
+				stubs.updateRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.updateExpect, nil}}
+				stubs.updateRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.updateExpect)}
+			}
+
+			if tt.deleteExpect {
+				stubs.deleteRolloutAnswers = []error{nil}
+			}
+
+			client := newMockClient(t, stubs)
+
+			// Test execution: Running the reconciliation
+
+			reconciler := Reconciler{
+				clt: client,
+				log: log,
+			}
+
+			require.NoError(t, reconciler.tryReconcile(ctx))
+			// Test validation: Checking that the mock client is now empty
+
+			client.checkIfEmpty(t)
+		})
+	}
+}
+
+func TestReconciler_Reconcile(t *testing.T) {
+	log := utils.NewSlogLoggerForTests()
+	ctx := context.Background()
+	// Test setup: creating fixtures
+	config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+		Tools: &autoupdate.AutoUpdateConfigSpecTools{
+			Mode: update.ToolsUpdateModeEnabled,
+		},
+		Agents: &autoupdate.AutoUpdateConfigSpecAgents{
+			Mode:     update.AgentsUpdateModeEnabled,
+			Strategy: update.AgentsStrategyHaltOnError,
+		},
+	})
+	require.NoError(t, err)
+	version, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+		Tools: &autoupdate.AutoUpdateVersionSpecTools{
+			TargetVersion: "1.2.3",
+		},
+		Agents: &autoupdate.AutoUpdateVersionSpecAgents{
+			StartVersion:  "1.2.3",
+			TargetVersion: "1.2.4",
+			Schedule:      update.AgentsScheduleImmediate,
+			Mode:          update.AgentsUpdateModeEnabled,
+		},
+	})
+	require.NoError(t, err)
+	upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+		StartVersion:   "1.2.3",
+		TargetVersion:  "1.2.4",
+		Schedule:       update.AgentsScheduleImmediate,
+		AutoupdateMode: update.AgentsUpdateModeEnabled,
+		Strategy:       update.AgentsStrategyHaltOnError,
+	})
+	require.NoError(t, err)
+
+	outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+		StartVersion:   "1.2.2",
+		TargetVersion:  "1.2.3",
+		Schedule:       update.AgentsScheduleImmediate,
+		AutoupdateMode: update.AgentsUpdateModeEnabled,
+		Strategy:       update.AgentsStrategyHaltOnError,
+	})
+	require.NoError(t, err)
+
+	// Those tests are not written in table format because the fixture setup it too complex and this would harm
+	// readability.
+	t.Run("reconciliation has nothing to do, should exit", func(t *testing.T) {
+		// Test setup: build mock client
+		stubs := mockClientStubs{
+			configAnswers:  []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+			versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+			rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.NoError(t, reconciler.Reconcile(ctx))
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation succeeds on first try, should exit", func(t *testing.T) {
+		stubs := mockClientStubs{
+			configAnswers:        []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+			versionAnswers:       []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+			rolloutAnswers:       []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+			updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.NoError(t, reconciler.Reconcile(ctx))
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation faces conflict on first try, should retry and see that there's nothing left to do", func(t *testing.T) {
+		stubs := mockClientStubs{
+			// because of the retry, we expect 2 GETs on every resource
+			configAnswers:  []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+			versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+			rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}, {upToDateRollout, nil}},
+			// Single update expected, because there's nothing to do after the retry
+			updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.NoError(t, reconciler.Reconcile(ctx))
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation faces conflict on first try, should retry and update a second time", func(t *testing.T) {
+		rev1, err := uuid.NewUUID()
+		require.NoError(t, err)
+		rev2, err := uuid.NewUUID()
+		require.NoError(t, err)
+		rev3, err := uuid.NewUUID()
+		require.NoError(t, err)
+
+		stubs := mockClientStubs{
+			// because of the retry, we expect 2 GETs on every resource
+			configAnswers:  []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+			versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+			rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{withRevisionID(outOfDateRollout, rev1.String()), nil},
+				{withRevisionID(outOfDateRollout, rev2.String()), nil}},
+			// Two updates expected, one with the old revision, then a second one with the new
+			updateRolloutExpects: []require.ValueAssertionFunc{
+				rolloutEquals(withRevisionID(upToDateRollout, rev1.String())),
+				rolloutEquals(withRevisionID(upToDateRollout, rev2.String())),
+			},
+			// We mimic a race and reject the first update because of the outdated revision
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{nil, trace.Wrap(backend.ErrIncorrectRevision)},
+				{withRevisionID(upToDateRollout, rev3.String()), nil},
+			},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.NoError(t, reconciler.Reconcile(ctx))
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation faces missing rollout on first try, should retry and create the rollout", func(t *testing.T) {
+		stubs := mockClientStubs{
+			// because of the retry, we expect 2 GETs on every resource
+			configAnswers:  []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+			versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+			rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{outOfDateRollout, nil},
+				{nil, trace.NotFound("no rollout")}},
+			// One update expected on the first try, the second try should create
+			updateRolloutExpects: []require.ValueAssertionFunc{
+				rolloutEquals(upToDateRollout),
+			},
+			// We mimic the fact the rollout got deleted in the meantime
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{nil, trace.NotFound("no rollout")},
+			},
+			// One create expected on the second try
+			createRolloutExpects: []require.ValueAssertionFunc{
+				rolloutEquals(upToDateRollout),
+			},
+			createRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{upToDateRollout, nil},
+			},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.NoError(t, reconciler.Reconcile(ctx))
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation meets a hard unexpected failure on first try, should exit in error", func(t *testing.T) {
+		stubs := mockClientStubs{
+			configAnswers:        []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+			versionAnswers:       []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+			rolloutAnswers:       []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+			updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+				{nil, trace.ConnectionProblem(trace.Errorf("io/timeout"), "the DB fell on the floor")},
+			},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.ErrorContains(t, reconciler.Reconcile(ctx), "the DB fell on the floor")
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+
+	t.Run("reconciliation faces conflict on first try, should retry but context is expired so it bails out", func(t *testing.T) {
+		cancelableCtx, cancel := context.WithCancel(ctx)
+		// just in case
+		t.Cleanup(cancel)
+
+		stubs := mockClientStubs{
+			// we expect a single GET because the context expires before the second retry
+			configAnswers:  []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+			versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+			rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+			// Single update expected, because there's nothing to do after the retry.
+			// We wrap the update validation function into a context canceler, so the context is done after the first update
+			updateRolloutExpects: []require.ValueAssertionFunc{cancelContext(rolloutEquals(upToDateRollout), cancel)},
+			// return a retryable error
+			updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}},
+		}
+
+		client := newMockClient(t, stubs)
+		reconciler := Reconciler{
+			clt: client,
+			log: log,
+		}
+
+		// Test execution: run the reconciliation loop
+		require.ErrorContains(t, reconciler.Reconcile(cancelableCtx), "canceled")
+
+		// Test validation: check that all the expected calls were received
+		client.checkIfEmpty(t)
+	})
+}

From 330e7b56d885a62f08de5126fcae841a7d281443 Mon Sep 17 00:00:00 2001
From: hugoShaka <hugo.hervieux@goteleport.com>
Date: Mon, 4 Nov 2024 15:45:51 -0500
Subject: [PATCH 2/5] address edoardo's feedback

---
 .../rolloutcontroller/client_test.go          | 13 +++++------
 .../rolloutcontroller/reconciler.go           | 23 ++++++++++++-------
 .../rolloutcontroller/reconciler_test.go      | 16 ++++++-------
 3 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rolloutcontroller/client_test.go
index 3baa12153c47a..43a6f8b180fa3 100644
--- a/lib/autoupdate/rolloutcontroller/client_test.go
+++ b/lib/autoupdate/rolloutcontroller/client_test.go
@@ -21,9 +21,8 @@ package rolloutcontroller
 import (
 	"context"
 	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
-	"github.com/gravitational/teleport/api/utils"
 	"github.com/stretchr/testify/require"
-	"google.golang.org/protobuf/protoadapt"
+	"google.golang.org/protobuf/proto"
 	"testing"
 )
 
@@ -107,7 +106,7 @@ type callAnswer[T any] struct {
 // getHandler is used in a mock client to answer get resource requests during tests.
 // It takes a list of answers and errors and will return them when invoked.
 // If there are no stubs left it fails the test.
-type getHandler[T protoadapt.MessageV1] struct {
+type getHandler[T proto.Message] struct {
 	t       *testing.T
 	answers []callAnswer[T]
 }
@@ -122,7 +121,7 @@ func (h *getHandler[T]) handle(_ context.Context) (T, error) {
 
 	// We need to deep copy because the reconciler might do updates in place.
 	// We don't want the original resource to be edited as this would mess with other tests.
-	return utils.CloneProtoMsg(entry.result), entry.err
+	return proto.Clone(entry.result).(T), entry.err
 }
 
 // isEmpty returns true only if all stubs were consumed
@@ -131,9 +130,9 @@ func (h *getHandler[T]) isEmpty() bool {
 }
 
 // createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2).
-// It first validates the input suing the passed validation function, then it returns the predefined answer and error.
+// It first validates the input using the provided validation function, then it returns the predefined answer and error.
 // If there are no stubs left it fails the test.
-type createUpdateHandler[T protoadapt.MessageV1] struct {
+type createUpdateHandler[T proto.Message] struct {
 	t       *testing.T
 	expect  []require.ValueAssertionFunc
 	answers []callAnswer[T]
@@ -155,7 +154,7 @@ func (h *createUpdateHandler[T]) handle(_ context.Context, object T) (T, error)
 
 	// We need to deep copy because the reconciler might do updates in place.
 	// We don't want the original resource to be edited as this would mess with other tests.
-	return utils.CloneProtoMsg(entry.result), entry.err
+	return proto.Clone(entry.result).(T), entry.err
 }
 
 // isEmpty returns true only if all stubs were consumed
diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
index 0e1b8c75631db..83822d6c8cced 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -32,6 +32,7 @@ const (
 	reconciliationTimeout = 30 * time.Second
 	defaultConfigMode     = update.AgentsUpdateModeEnabled
 	defaultStrategy       = update.AgentsStrategyHaltOnError
+	maxConflictRetry      = 3
 )
 
 // Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and
@@ -42,21 +43,27 @@ type Reconciler struct {
 	clt Client
 	log *slog.Logger
 
-	// lock ensures we only run one reconciliation at a time
-	lock sync.Mutex
+	// mutex ensures we only run one reconciliation at a time
+	mutex sync.Mutex
 }
 
 // Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths
 // are racing), in this case we retry the reconciliation immediately.
-func (r Reconciler) Reconcile(ctx context.Context) error {
+func (r *Reconciler) Reconcile(ctx context.Context) error {
+	r.mutex.Lock()
+	defer r.mutex.Unlock()
+
 	ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout)
 	defer cancel()
-	for {
+	tries := 0
+	var err error
+	for tries < maxConflictRetry {
+		tries++
 		select {
 		case <-ctx.Done():
 			return ctx.Err()
 		default:
-			err := r.tryReconcile(ctx)
+			err = r.tryReconcile(ctx)
 			switch {
 			case err == nil:
 				return nil
@@ -64,7 +71,6 @@ func (r Reconciler) Reconcile(ctx context.Context) error {
 				// The resource changed since we last saw it
 				// We must have raced against another auth
 				// Let's retry the reconciliation
-				// TODO: add a backoff to avoid a tight or infinite loop?
 				r.log.DebugContext(ctx, "retrying reconciliation", "error", err)
 			default:
 				// error is non-nil and non-retryable
@@ -72,6 +78,7 @@ func (r Reconciler) Reconcile(ctx context.Context) error {
 			}
 		}
 	}
+	return trace.CompareFailed("compare failed, tried %d times, last error: %w", tries, err)
 }
 
 // tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton.
@@ -79,7 +86,7 @@ func (r Reconciler) Reconcile(ctx context.Context) error {
 // The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError
 // if the resource change while we were computing it.
 // The caller must handle those error and retry the reconciliation.
-func (r Reconciler) tryReconcile(ctx context.Context) error {
+func (r *Reconciler) tryReconcile(ctx context.Context) error {
 	// get autoupdate_config
 	config, err := r.clt.GetAutoUpdateConfig(ctx)
 	if err != nil && !trace.IsNotFound(err) {
@@ -158,7 +165,7 @@ func (r Reconciler) tryReconcile(ctx context.Context) error {
 	return trace.Wrap(err, "updating rollout")
 }
 
-func (r Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
+func (r *Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
 	// reconcile mode
 	mode, err := getMode(config.GetMode(), version.GetMode())
 	if err != nil {
diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rolloutcontroller/reconciler_test.go
index 5baf13d903e5e..de17fd9831583 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler_test.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler_test.go
@@ -305,7 +305,7 @@ func TestTryReconcile(t *testing.T) {
 
 			// Test execution: Running the reconciliation
 
-			reconciler := Reconciler{
+			reconciler := &Reconciler{
 				clt: client,
 				log: log,
 			}
@@ -373,7 +373,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -395,7 +395,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -419,7 +419,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -459,7 +459,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -497,7 +497,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -521,7 +521,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}
@@ -551,7 +551,7 @@ func TestReconciler_Reconcile(t *testing.T) {
 		}
 
 		client := newMockClient(t, stubs)
-		reconciler := Reconciler{
+		reconciler := &Reconciler{
 			clt: client,
 			log: log,
 		}

From bf79776c58517eccf3eefb7f1d40e4476e0a72ca Mon Sep 17 00:00:00 2001
From: hugoShaka <hugo.hervieux@goteleport.com>
Date: Mon, 4 Nov 2024 16:20:53 -0500
Subject: [PATCH 3/5] address edoardo's feedback pt.2

---
 lib/autoupdate/rolloutcontroller/reconciler.go | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
index 83822d6c8cced..bf0abd8f5ed60 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -88,15 +88,19 @@ func (r *Reconciler) Reconcile(ctx context.Context) error {
 // The caller must handle those error and retry the reconciliation.
 func (r *Reconciler) tryReconcile(ctx context.Context) error {
 	// get autoupdate_config
-	config, err := r.clt.GetAutoUpdateConfig(ctx)
-	if err != nil && !trace.IsNotFound(err) {
+	var config *autoupdate.AutoUpdateConfig
+	if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil {
+		config = c
+	} else if !trace.IsNotFound(err) {
 		return trace.Wrap(err, "getting autoupdate_config")
 	}
 
 	// get autoupdate_version
-	version, err := r.clt.GetAutoUpdateVersion(ctx)
-	if err != nil && !trace.IsNotFound(err) {
-		return trace.Wrap(err, "getting autoupdate_version")
+	var version *autoupdate.AutoUpdateVersion
+	if v, err := r.clt.GetAutoUpdateVersion(ctx); err == nil {
+		version = v
+	} else if !trace.IsNotFound(err) {
+		return trace.Wrap(err, "getting autoupdate version")
 	}
 
 	// get autoupdate_agent_rollout

From 291f0eadc263188562829f94e19caccd2ae5b7b7 Mon Sep 17 00:00:00 2001
From: hugoShaka <hugo.hervieux@goteleport.com>
Date: Mon, 4 Nov 2024 16:58:45 -0500
Subject: [PATCH 4/5] fixup! address edoardo's feedback

---
 lib/autoupdate/rolloutcontroller/reconciler.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
index bf0abd8f5ed60..de1f20396bedf 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -78,7 +78,7 @@ func (r *Reconciler) Reconcile(ctx context.Context) error {
 			}
 		}
 	}
-	return trace.CompareFailed("compare failed, tried %d times, last error: %w", tries, err)
+	return trace.CompareFailed("compare failed, tried %d times, last error: %s", tries, err)
 }
 
 // tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton.

From 3bcb41e599dbafd4302f70411b92a611e689072f Mon Sep 17 00:00:00 2001
From: hugoShaka <hugo.hervieux@goteleport.com>
Date: Wed, 13 Nov 2024 17:25:08 -0500
Subject: [PATCH 5/5] lint

---
 lib/autoupdate/rolloutcontroller/client.go          |  1 +
 lib/autoupdate/rolloutcontroller/client_test.go     |  6 ++++--
 lib/autoupdate/rolloutcontroller/reconciler.go      |  8 +++++---
 lib/autoupdate/rolloutcontroller/reconciler_test.go | 10 ++++++----
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rolloutcontroller/client.go
index c54d41ad75ccd..4dead0f9dee19 100644
--- a/lib/autoupdate/rolloutcontroller/client.go
+++ b/lib/autoupdate/rolloutcontroller/client.go
@@ -20,6 +20,7 @@ package rolloutcontroller
 
 import (
 	"context"
+
 	autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
 )
 
diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rolloutcontroller/client_test.go
index 43a6f8b180fa3..ba204ffb77db3 100644
--- a/lib/autoupdate/rolloutcontroller/client_test.go
+++ b/lib/autoupdate/rolloutcontroller/client_test.go
@@ -20,10 +20,12 @@ package rolloutcontroller
 
 import (
 	"context"
-	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+	"testing"
+
 	"github.com/stretchr/testify/require"
 	"google.golang.org/protobuf/proto"
-	"testing"
+
+	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
 )
 
 // mockClient is a mock implementation if the Client interface for testing purposes.
diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
index de1f20396bedf..78989c4ec4a6b 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -20,12 +20,14 @@ package rolloutcontroller
 
 import (
 	"context"
-	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
-	update "github.com/gravitational/teleport/api/types/autoupdate"
-	"github.com/gravitational/trace"
 	"log/slog"
 	"sync"
 	"time"
+
+	"github.com/gravitational/trace"
+
+	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+	update "github.com/gravitational/teleport/api/types/autoupdate"
 )
 
 const (
diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rolloutcontroller/reconciler_test.go
index de17fd9831583..340451d8da46d 100644
--- a/lib/autoupdate/rolloutcontroller/reconciler_test.go
+++ b/lib/autoupdate/rolloutcontroller/reconciler_test.go
@@ -20,17 +20,19 @@ package rolloutcontroller
 
 import (
 	"context"
+	"testing"
+
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/uuid"
+	"github.com/gravitational/trace"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/testing/protocmp"
+
 	"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
 	update "github.com/gravitational/teleport/api/types/autoupdate"
 	apiutils "github.com/gravitational/teleport/api/utils"
 	"github.com/gravitational/teleport/lib/backend"
 	"github.com/gravitational/teleport/lib/utils"
-	"github.com/gravitational/trace"
-	"github.com/stretchr/testify/require"
-	"google.golang.org/protobuf/testing/protocmp"
-	"testing"
 )
 
 // rolloutEquals returns a require.ValueAssertionFunc that checks the rollout is identical.