diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rolloutcontroller/client.go
new file mode 100644
index 0000000000000..4dead0f9dee19
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/client.go
@@ -0,0 +1,46 @@
+/*
+ * Teleport
+ * Copyright (C) 2024 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package rolloutcontroller
+
+import (
+ "context"
+
+ autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+)
+
+// Client is the subset of the Teleport client RPCs the controller needs.
+type Client interface {
+ // GetAutoUpdateConfig gets the AutoUpdateConfig singleton resource.
+ GetAutoUpdateConfig(ctx context.Context) (*autoupdatepb.AutoUpdateConfig, error)
+
+ // GetAutoUpdateVersion gets the AutoUpdateVersion singleton resource.
+ GetAutoUpdateVersion(ctx context.Context) (*autoupdatepb.AutoUpdateVersion, error)
+
+ // GetAutoUpdateAgentRollout gets the AutoUpdateAgentRollout singleton resource.
+ GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+ // CreateAutoUpdateAgentRollout creates the AutoUpdateAgentRollout singleton resource.
+ CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+ // UpdateAutoUpdateAgentRollout updates the AutoUpdateAgentRollout singleton resource.
+ UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error)
+
+ // DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource.
+ DeleteAutoUpdateAgentRollout(ctx context.Context) error
+}
diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rolloutcontroller/client_test.go
new file mode 100644
index 0000000000000..ba204ffb77db3
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/client_test.go
@@ -0,0 +1,189 @@
+/*
+ * Teleport
+ * Copyright (C) 2024 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package rolloutcontroller
+
+import (
+ "context"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "google.golang.org/protobuf/proto"
+
+ "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+)
+
+// mockClient is a mock implementation if the Client interface for testing purposes.
+// This is used to precisely check which calls are made by the Reconciler during tests.
+// Use newMockClient to create one from stubs. Once the test is over, you must call
+// mockClient.checkIfEmpty to validate all expected calls were made.
+type mockClient struct {
+ getAutoUpdateConfig *getHandler[*autoupdate.AutoUpdateConfig]
+ getAutoUpdateVersion *getHandler[*autoupdate.AutoUpdateVersion]
+ getAutoUpdateAgentRollout *getHandler[*autoupdate.AutoUpdateAgentRollout]
+ createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
+ updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]
+ deleteAutoUpdateAgentRollout *deleteHandler
+}
+
+func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) {
+ return m.getAutoUpdateConfig.handle(ctx)
+}
+
+func (m mockClient) GetAutoUpdateVersion(ctx context.Context) (*autoupdate.AutoUpdateVersion, error) {
+ return m.getAutoUpdateVersion.handle(ctx)
+}
+
+func (m mockClient) GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdate.AutoUpdateAgentRollout, error) {
+ return m.getAutoUpdateAgentRollout.handle(ctx)
+}
+
+func (m mockClient) CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) {
+ return m.createAutoUpdateAgentRollout.handle(ctx, rollout)
+}
+
+func (m mockClient) UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) {
+ return m.updateAutoUpdateAgentRollout.handle(ctx, rollout)
+}
+
+func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error {
+ return m.deleteAutoUpdateAgentRollout.handle(ctx)
+}
+
+func (m mockClient) checkIfEmpty(t *testing.T) {
+ require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty")
+ require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty")
+ require.True(t, m.getAutoUpdateAgentRollout.isEmpty(), "Get autoupdate_agent_rollout mock not empty")
+ require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty")
+ require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty")
+ require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty")
+}
+
+func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient {
+ // Fail early if there's a mismatch
+ require.Equal(t, len(stubs.createRolloutAnswers), len(stubs.createRolloutExpects), "invalid stubs, create validations and answers slices are not the same length")
+ require.Equal(t, len(stubs.updateRolloutAnswers), len(stubs.updateRolloutExpects), "invalid stubs, update validations and answers slices are not the same length")
+
+ return &mockClient{
+ getAutoUpdateConfig: &getHandler[*autoupdate.AutoUpdateConfig]{t, stubs.configAnswers},
+ getAutoUpdateVersion: &getHandler[*autoupdate.AutoUpdateVersion]{t, stubs.versionAnswers},
+ getAutoUpdateAgentRollout: &getHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.rolloutAnswers},
+ createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers},
+ updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers},
+ deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers},
+ }
+}
+
+type mockClientStubs struct {
+ configAnswers []callAnswer[*autoupdate.AutoUpdateConfig]
+ versionAnswers []callAnswer[*autoupdate.AutoUpdateVersion]
+ rolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+ createRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+ createRolloutExpects []require.ValueAssertionFunc
+ updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout]
+ updateRolloutExpects []require.ValueAssertionFunc
+ deleteRolloutAnswers []error
+}
+
+type callAnswer[T any] struct {
+ result T
+ err error
+}
+
+// getHandler is used in a mock client to answer get resource requests during tests.
+// It takes a list of answers and errors and will return them when invoked.
+// If there are no stubs left it fails the test.
+type getHandler[T proto.Message] struct {
+ t *testing.T
+ answers []callAnswer[T]
+}
+
+func (h *getHandler[T]) handle(_ context.Context) (T, error) {
+ if len(h.answers) == 0 {
+ require.Fail(h.t, "no answers left")
+ }
+
+ entry := h.answers[0]
+ h.answers = h.answers[1:]
+
+ // We need to deep copy because the reconciler might do updates in place.
+ // We don't want the original resource to be edited as this would mess with other tests.
+ return proto.Clone(entry.result).(T), entry.err
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *getHandler[T]) isEmpty() bool {
+ return len(h.answers) == 0
+}
+
+// createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2).
+// It first validates the input using the provided validation function, then it returns the predefined answer and error.
+// If there are no stubs left it fails the test.
+type createUpdateHandler[T proto.Message] struct {
+ t *testing.T
+ expect []require.ValueAssertionFunc
+ answers []callAnswer[T]
+}
+
+func (h *createUpdateHandler[T]) handle(_ context.Context, object T) (T, error) {
+ if len(h.expect) == 0 {
+ require.Fail(h.t, "not expecting more calls")
+ }
+ h.expect[0](h.t, object)
+ h.expect = h.expect[1:]
+
+ if len(h.answers) == 0 {
+ require.Fail(h.t, "no answers left")
+ }
+
+ entry := h.answers[0]
+ h.answers = h.answers[1:]
+
+ // We need to deep copy because the reconciler might do updates in place.
+ // We don't want the original resource to be edited as this would mess with other tests.
+ return proto.Clone(entry.result).(T), entry.err
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *createUpdateHandler[T]) isEmpty() bool {
+ return len(h.answers) == 0 && len(h.expect) == 0
+}
+
+// deleteHandler is used in a mock client to answer delete resource requests during tests.
+// It takes a list of errors and returns them when invoked.
+// If there are no stubs left it fails the test.
+type deleteHandler struct {
+ t *testing.T
+ answers []error
+}
+
+func (h *deleteHandler) handle(_ context.Context) error {
+ if len(h.answers) == 0 {
+ require.Fail(h.t, "no answers left")
+ }
+
+ entry := h.answers[0]
+ h.answers = h.answers[1:]
+
+ return entry
+}
+
+// isEmpty returns true only if all stubs were consumed
+func (h *deleteHandler) isEmpty() bool {
+ return len(h.answers) == 0
+}
diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go
new file mode 100644
index 0000000000000..78989c4ec4a6b
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/reconciler.go
@@ -0,0 +1,235 @@
+/*
+ * Teleport
+ * Copyright (C) 2024 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package rolloutcontroller
+
+import (
+ "context"
+ "log/slog"
+ "sync"
+ "time"
+
+ "github.com/gravitational/trace"
+
+ "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+ update "github.com/gravitational/teleport/api/types/autoupdate"
+)
+
+const (
+ reconciliationTimeout = 30 * time.Second
+ defaultConfigMode = update.AgentsUpdateModeEnabled
+ defaultStrategy = update.AgentsStrategyHaltOnError
+ maxConflictRetry = 3
+)
+
+// Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and
+// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because:
+// - we reconcile 2 resources with one
+// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs
+type Reconciler struct {
+ clt Client
+ log *slog.Logger
+
+ // mutex ensures we only run one reconciliation at a time
+ mutex sync.Mutex
+}
+
+// Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths
+// are racing), in this case we retry the reconciliation immediately.
+func (r *Reconciler) Reconcile(ctx context.Context) error {
+ r.mutex.Lock()
+ defer r.mutex.Unlock()
+
+ ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout)
+ defer cancel()
+ tries := 0
+ var err error
+ for tries < maxConflictRetry {
+ tries++
+ select {
+ case <-ctx.Done():
+ return ctx.Err()
+ default:
+ err = r.tryReconcile(ctx)
+ switch {
+ case err == nil:
+ return nil
+ case trace.IsCompareFailed(err), trace.IsNotFound(err):
+ // The resource changed since we last saw it
+ // We must have raced against another auth
+ // Let's retry the reconciliation
+ r.log.DebugContext(ctx, "retrying reconciliation", "error", err)
+ default:
+ // error is non-nil and non-retryable
+ return trace.Wrap(err, "failed to reconcile rollout")
+ }
+ }
+ }
+ return trace.CompareFailed("compare failed, tried %d times, last error: %s", tries, err)
+}
+
+// tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton.
+// This function should be nilpotent if the AutoUpdateAgentRollout is already up-to-date.
+// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError
+// if the resource change while we were computing it.
+// The caller must handle those error and retry the reconciliation.
+func (r *Reconciler) tryReconcile(ctx context.Context) error {
+ // get autoupdate_config
+ var config *autoupdate.AutoUpdateConfig
+ if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil {
+ config = c
+ } else if !trace.IsNotFound(err) {
+ return trace.Wrap(err, "getting autoupdate_config")
+ }
+
+ // get autoupdate_version
+ var version *autoupdate.AutoUpdateVersion
+ if v, err := r.clt.GetAutoUpdateVersion(ctx); err == nil {
+ version = v
+ } else if !trace.IsNotFound(err) {
+ return trace.Wrap(err, "getting autoupdate version")
+ }
+
+ // get autoupdate_agent_rollout
+ rolloutExists := true
+ existingRollout, err := r.clt.GetAutoUpdateAgentRollout(ctx)
+ if err != nil && !trace.IsNotFound(err) {
+ return trace.Wrap(err, "getting autoupdate_agent_rollout")
+ }
+ if trace.IsNotFound(err) {
+ // rollout doesn't exist yet, we'll need to call Create instead of Update.
+ rolloutExists = false
+ }
+
+ // if autoupdate_version does not exist or does not contain spec.agents, we should not configure a rollout
+ if version.GetSpec().GetAgents() == nil {
+ if !rolloutExists {
+ // the rollout doesn't exist, nothing to do
+ return nil
+ }
+ // the rollout exists, we must delete it
+ return r.clt.DeleteAutoUpdateAgentRollout(ctx)
+ }
+
+ // compute what the spec should look like
+ newSpec, err := r.buildRolloutSpec(config.GetSpec().GetAgents(), version.GetSpec().GetAgents())
+ if err != nil {
+ return trace.Wrap(err, "mutating rollout")
+ }
+
+ // if there are no existing rollout, we create a new one
+ if !rolloutExists {
+ rollout, err := update.NewAutoUpdateAgentRollout(newSpec)
+ if err != nil {
+ return trace.Wrap(err, "validating new rollout")
+ }
+ _, err = r.clt.CreateAutoUpdateAgentRollout(ctx, rollout)
+ return trace.Wrap(err, "creating rollout")
+ }
+
+ // there was an existing rollout, we must figure if something changed
+ specChanged := existingRollout.GetSpec().GetStartVersion() != newSpec.GetStartVersion() ||
+ existingRollout.GetSpec().GetTargetVersion() != newSpec.GetTargetVersion() ||
+ existingRollout.GetSpec().GetAutoupdateMode() != newSpec.GetAutoupdateMode() ||
+ existingRollout.GetSpec().GetStrategy() != newSpec.GetStrategy() ||
+ existingRollout.GetSpec().GetSchedule() != newSpec.GetSchedule()
+
+ // TODO: reconcile the status here when we'll add group support.
+ // Even if the spec does not change, we might still have to update the status:
+ // - sync groups with the ones from the user config
+ // - progress the rollout across groups
+
+ // if nothing changed, no need to update the resource
+ if !specChanged {
+ r.log.DebugContext(ctx, "rollout unchanged")
+ return nil
+ }
+
+ // something changed, we replace the old spec with the new one, validate and update the resource
+ // we don't create a new resource to keep the revision ID and
+ existingRollout.Spec = newSpec
+ err = update.ValidateAutoUpdateAgentRollout(existingRollout)
+ if err != nil {
+ return trace.Wrap(err, "validating mutated rollout")
+ }
+ _, err = r.clt.UpdateAutoUpdateAgentRollout(ctx, existingRollout)
+ return trace.Wrap(err, "updating rollout")
+}
+
+func (r *Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) {
+ // reconcile mode
+ mode, err := getMode(config.GetMode(), version.GetMode())
+ if err != nil {
+ return nil, trace.Wrap(err, "computing agent update mode")
+ }
+
+ strategy := config.GetStrategy()
+ if strategy == "" {
+ strategy = defaultStrategy
+ }
+
+ return &autoupdate.AutoUpdateAgentRolloutSpec{
+ StartVersion: version.GetStartVersion(),
+ TargetVersion: version.GetTargetVersion(),
+ Schedule: version.GetSchedule(),
+ AutoupdateMode: mode,
+ Strategy: strategy,
+ }, nil
+
+}
+
+// agentModeCode maps agents mode to integers.
+// When config and version modes don't match, the lowest integer takes precedence.
+var (
+ agentModeCode = map[string]int{
+ update.AgentsUpdateModeDisabled: 0,
+ update.AgentsUpdateModeSuspended: 1,
+ update.AgentsUpdateModeEnabled: 2,
+ }
+ codeToAgentMode = map[int]string{
+ 0: update.AgentsUpdateModeDisabled,
+ 1: update.AgentsUpdateModeSuspended,
+ 2: update.AgentsUpdateModeEnabled,
+ }
+)
+
+// getMode merges the agent modes coming from the version and config resources into a single mode.
+// "disabled" takes precedence over "suspended", which takes precedence over "enabled".
+func getMode(configMode, versionMode string) (string, error) {
+ if configMode == "" {
+ configMode = defaultConfigMode
+ }
+ if versionMode == "" {
+ return "", trace.BadParameter("version mode empty")
+ }
+
+ configCode, ok := agentModeCode[configMode]
+ if !ok {
+ return "", trace.BadParameter("unsupported agent config mode: %v", configMode)
+ }
+ versionCode, ok := agentModeCode[versionMode]
+ if !ok {
+ return "", trace.BadParameter("unsupported agent version mode: %v", versionMode)
+ }
+
+ // The lowest code takes precedence
+ if configCode <= versionCode {
+ return codeToAgentMode[configCode], nil
+ }
+ return codeToAgentMode[versionCode], nil
+}
diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rolloutcontroller/reconciler_test.go
new file mode 100644
index 0000000000000..340451d8da46d
--- /dev/null
+++ b/lib/autoupdate/rolloutcontroller/reconciler_test.go
@@ -0,0 +1,567 @@
+/*
+ * Teleport
+ * Copyright (C) 2024 Gravitational, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package rolloutcontroller
+
+import (
+ "context"
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/uuid"
+ "github.com/gravitational/trace"
+ "github.com/stretchr/testify/require"
+ "google.golang.org/protobuf/testing/protocmp"
+
+ "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
+ update "github.com/gravitational/teleport/api/types/autoupdate"
+ apiutils "github.com/gravitational/teleport/api/utils"
+ "github.com/gravitational/teleport/lib/backend"
+ "github.com/gravitational/teleport/lib/utils"
+)
+
+// rolloutEquals returns a require.ValueAssertionFunc that checks the rollout is identical.
+// The comparison does not take into account the proto internal state.
+func rolloutEquals(expected *autoupdate.AutoUpdateAgentRollout) require.ValueAssertionFunc {
+ return func(t require.TestingT, i interface{}, _ ...interface{}) {
+ require.IsType(t, &autoupdate.AutoUpdateAgentRollout{}, i)
+ actual := i.(*autoupdate.AutoUpdateAgentRollout)
+ require.Empty(t, cmp.Diff(expected, actual, protocmp.Transform()))
+ }
+}
+
+// cancelContext wraps a require.ValueAssertionFunc so that the given context is canceled before checking the assertion.
+// This is used to test how the reconciler behaves when its context is canceled.
+func cancelContext(assertionFunc require.ValueAssertionFunc, cancel func()) require.ValueAssertionFunc {
+ return func(t require.TestingT, i interface{}, i2 ...interface{}) {
+ cancel()
+ assertionFunc(t, i, i2...)
+ }
+}
+
+// withRevisionID creates a deep copy of an agent rollout and sets the revisionID in its metadata.
+// This is used to test the conditional update retry logic.
+func withRevisionID(original *autoupdate.AutoUpdateAgentRollout, revision string) *autoupdate.AutoUpdateAgentRollout {
+ revisioned := apiutils.CloneProtoMsg(original)
+ revisioned.Metadata.Revision = revision
+ return revisioned
+}
+
+func TestGetMode(t *testing.T) {
+ t.Parallel()
+ tests := []struct {
+ name string
+ configMode string
+ versionMode string
+ expected string
+ checkErr require.ErrorAssertionFunc
+ }{
+ {
+ name: "config and version equal",
+ configMode: update.AgentsUpdateModeEnabled,
+ versionMode: update.AgentsUpdateModeEnabled,
+ expected: update.AgentsUpdateModeEnabled,
+ checkErr: require.NoError,
+ },
+ {
+ name: "config suspends, version enables",
+ configMode: update.AgentsUpdateModeSuspended,
+ versionMode: update.AgentsUpdateModeEnabled,
+ expected: update.AgentsUpdateModeSuspended,
+ checkErr: require.NoError,
+ },
+ {
+ name: "config enables, version suspends",
+ configMode: update.AgentsUpdateModeEnabled,
+ versionMode: update.AgentsUpdateModeSuspended,
+ expected: update.AgentsUpdateModeSuspended,
+ checkErr: require.NoError,
+ },
+ {
+ name: "config suspends, version disables",
+ configMode: update.AgentsUpdateModeSuspended,
+ versionMode: update.AgentsUpdateModeDisabled,
+ expected: update.AgentsUpdateModeDisabled,
+ checkErr: require.NoError,
+ },
+ {
+ name: "version enables, no config",
+ configMode: "",
+ versionMode: update.AgentsUpdateModeEnabled,
+ expected: update.AgentsUpdateModeEnabled,
+ checkErr: require.NoError,
+ },
+ {
+ name: "config enables, no version",
+ configMode: update.AgentsUpdateModeEnabled,
+ versionMode: "",
+ expected: "",
+ checkErr: require.Error,
+ },
+ {
+ name: "unknown mode",
+ configMode: "this in not a mode",
+ versionMode: update.AgentsUpdateModeEnabled,
+ expected: "",
+ checkErr: require.Error,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ result, err := getMode(tt.configMode, tt.versionMode)
+ tt.checkErr(t, err)
+ require.Equal(t, tt.expected, result)
+ })
+ }
+}
+
+func TestTryReconcile(t *testing.T) {
+ t.Parallel()
+ log := utils.NewSlogLoggerForTests()
+ ctx := context.Background()
+ // Test setup: creating fixtures
+ configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+ Tools: &autoupdate.AutoUpdateConfigSpecTools{
+ Mode: update.ToolsUpdateModeEnabled,
+ },
+ Agents: &autoupdate.AutoUpdateConfigSpecAgents{
+ Mode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ },
+ })
+ require.NoError(t, err)
+
+ configNoAgent, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+ Tools: &autoupdate.AutoUpdateConfigSpecTools{
+ Mode: update.ToolsUpdateModeEnabled,
+ },
+ })
+ require.NoError(t, err)
+
+ versionOK, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+ Tools: &autoupdate.AutoUpdateVersionSpecTools{
+ TargetVersion: "1.2.3",
+ },
+ Agents: &autoupdate.AutoUpdateVersionSpecAgents{
+ StartVersion: "1.2.3",
+ TargetVersion: "1.2.4",
+ Schedule: update.AgentsScheduleImmediate,
+ Mode: update.AgentsUpdateModeEnabled,
+ },
+ })
+ require.NoError(t, err)
+
+ versionNoAgent, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+ Tools: &autoupdate.AutoUpdateVersionSpecTools{
+ TargetVersion: "1.2.3",
+ },
+ })
+ require.NoError(t, err)
+
+ upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+ StartVersion: "1.2.3",
+ TargetVersion: "1.2.4",
+ Schedule: update.AgentsScheduleImmediate,
+ AutoupdateMode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ })
+ require.NoError(t, err)
+
+ outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+ StartVersion: "1.2.2",
+ TargetVersion: "1.2.3",
+ Schedule: update.AgentsScheduleImmediate,
+ AutoupdateMode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ })
+ require.NoError(t, err)
+
+ tests := []struct {
+ name string
+ config *autoupdate.AutoUpdateConfig
+ version *autoupdate.AutoUpdateVersion
+ existingRollout *autoupdate.AutoUpdateAgentRollout
+ createExpect *autoupdate.AutoUpdateAgentRollout
+ updateExpect *autoupdate.AutoUpdateAgentRollout
+ deleteExpect bool
+ }{
+ {
+ name: "config and version exist, no existing rollout",
+ // rollout should be created
+ config: configOK,
+ version: versionOK,
+ createExpect: upToDateRollout,
+ },
+ {
+ name: "version exist, no existing rollout nor config",
+ // rollout should be created
+ version: versionOK,
+ createExpect: upToDateRollout,
+ },
+ {
+ name: "version exist, no existing rollout, config exist but doesn't contain agent section",
+ // rollout should be created
+ config: configNoAgent,
+ version: versionOK,
+ createExpect: upToDateRollout,
+ },
+ {
+ name: "config exist, no existing rollout nor version",
+ // rollout should not be created as there is no version
+ config: configOK,
+ },
+ {
+ name: "config exist, no existing rollout, version exist but doesn't contain agent section",
+ // rollout should not be created as there is no version
+ config: configOK,
+ version: versionNoAgent,
+ },
+ {
+ name: "no existing rollout, config, nor version",
+ // rollout should not be created as there is no version
+ },
+ {
+ name: "existing out-of-date rollout, config and version exist",
+ // rollout should be updated
+ config: configOK,
+ version: versionOK,
+ existingRollout: outOfDateRollout,
+ updateExpect: upToDateRollout,
+ },
+ {
+ name: "existing up-to-date rollout, config and version exist",
+ // rollout should not be updated as its spec is already good
+ config: configOK,
+ version: versionOK,
+ existingRollout: upToDateRollout,
+ },
+ {
+ name: "existing rollout and config but no version",
+ // rollout should be deleted as there is no version
+ config: configOK,
+ existingRollout: upToDateRollout,
+ deleteExpect: true,
+ },
+ {
+ name: "existing rollout but no config nor version",
+ // rollout should be deleted as there is no version
+ existingRollout: upToDateRollout,
+ deleteExpect: true,
+ },
+ }
+ for _, tt := range tests {
+ tt := tt
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+ // Test setup: creating a fake client answering fixtures
+ var stubs mockClientStubs
+
+ if tt.config != nil {
+ stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{tt.config, nil}}
+ } else {
+ stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{nil, trace.NotFound("no config")}}
+ }
+
+ if tt.version != nil {
+ stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{tt.version, nil}}
+ } else {
+ stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{nil, trace.NotFound("no version")}}
+ }
+
+ if tt.existingRollout != nil {
+ stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.existingRollout, nil}}
+ } else {
+ stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.NotFound("no rollout")}}
+ }
+
+ if tt.createExpect != nil {
+ stubs.createRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.createExpect, nil}}
+ stubs.createRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.createExpect)}
+ }
+
+ if tt.updateExpect != nil {
+ stubs.updateRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.updateExpect, nil}}
+ stubs.updateRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.updateExpect)}
+ }
+
+ if tt.deleteExpect {
+ stubs.deleteRolloutAnswers = []error{nil}
+ }
+
+ client := newMockClient(t, stubs)
+
+ // Test execution: Running the reconciliation
+
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ require.NoError(t, reconciler.tryReconcile(ctx))
+ // Test validation: Checking that the mock client is now empty
+
+ client.checkIfEmpty(t)
+ })
+ }
+}
+
+func TestReconciler_Reconcile(t *testing.T) {
+ log := utils.NewSlogLoggerForTests()
+ ctx := context.Background()
+ // Test setup: creating fixtures
+ config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
+ Tools: &autoupdate.AutoUpdateConfigSpecTools{
+ Mode: update.ToolsUpdateModeEnabled,
+ },
+ Agents: &autoupdate.AutoUpdateConfigSpecAgents{
+ Mode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ },
+ })
+ require.NoError(t, err)
+ version, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{
+ Tools: &autoupdate.AutoUpdateVersionSpecTools{
+ TargetVersion: "1.2.3",
+ },
+ Agents: &autoupdate.AutoUpdateVersionSpecAgents{
+ StartVersion: "1.2.3",
+ TargetVersion: "1.2.4",
+ Schedule: update.AgentsScheduleImmediate,
+ Mode: update.AgentsUpdateModeEnabled,
+ },
+ })
+ require.NoError(t, err)
+ upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+ StartVersion: "1.2.3",
+ TargetVersion: "1.2.4",
+ Schedule: update.AgentsScheduleImmediate,
+ AutoupdateMode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ })
+ require.NoError(t, err)
+
+ outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
+ StartVersion: "1.2.2",
+ TargetVersion: "1.2.3",
+ Schedule: update.AgentsScheduleImmediate,
+ AutoupdateMode: update.AgentsUpdateModeEnabled,
+ Strategy: update.AgentsStrategyHaltOnError,
+ })
+ require.NoError(t, err)
+
+ // Those tests are not written in table format because the fixture setup it too complex and this would harm
+ // readability.
+ t.Run("reconciliation has nothing to do, should exit", func(t *testing.T) {
+ // Test setup: build mock client
+ stubs := mockClientStubs{
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}},
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.NoError(t, reconciler.Reconcile(ctx))
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation succeeds on first try, should exit", func(t *testing.T) {
+ stubs := mockClientStubs{
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+ updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}},
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.NoError(t, reconciler.Reconcile(ctx))
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation faces conflict on first try, should retry and see that there's nothing left to do", func(t *testing.T) {
+ stubs := mockClientStubs{
+ // because of the retry, we expect 2 GETs on every resource
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}, {upToDateRollout, nil}},
+ // Single update expected, because there's nothing to do after the retry
+ updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}},
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.NoError(t, reconciler.Reconcile(ctx))
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation faces conflict on first try, should retry and update a second time", func(t *testing.T) {
+ rev1, err := uuid.NewUUID()
+ require.NoError(t, err)
+ rev2, err := uuid.NewUUID()
+ require.NoError(t, err)
+ rev3, err := uuid.NewUUID()
+ require.NoError(t, err)
+
+ stubs := mockClientStubs{
+ // because of the retry, we expect 2 GETs on every resource
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {withRevisionID(outOfDateRollout, rev1.String()), nil},
+ {withRevisionID(outOfDateRollout, rev2.String()), nil}},
+ // Two updates expected, one with the old revision, then a second one with the new
+ updateRolloutExpects: []require.ValueAssertionFunc{
+ rolloutEquals(withRevisionID(upToDateRollout, rev1.String())),
+ rolloutEquals(withRevisionID(upToDateRollout, rev2.String())),
+ },
+ // We mimic a race and reject the first update because of the outdated revision
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {nil, trace.Wrap(backend.ErrIncorrectRevision)},
+ {withRevisionID(upToDateRollout, rev3.String()), nil},
+ },
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.NoError(t, reconciler.Reconcile(ctx))
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation faces missing rollout on first try, should retry and create the rollout", func(t *testing.T) {
+ stubs := mockClientStubs{
+ // because of the retry, we expect 2 GETs on every resource
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {outOfDateRollout, nil},
+ {nil, trace.NotFound("no rollout")}},
+ // One update expected on the first try, the second try should create
+ updateRolloutExpects: []require.ValueAssertionFunc{
+ rolloutEquals(upToDateRollout),
+ },
+ // We mimic the fact the rollout got deleted in the meantime
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {nil, trace.NotFound("no rollout")},
+ },
+ // One create expected on the second try
+ createRolloutExpects: []require.ValueAssertionFunc{
+ rolloutEquals(upToDateRollout),
+ },
+ createRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {upToDateRollout, nil},
+ },
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.NoError(t, reconciler.Reconcile(ctx))
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation meets a hard unexpected failure on first try, should exit in error", func(t *testing.T) {
+ stubs := mockClientStubs{
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+ updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)},
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{
+ {nil, trace.ConnectionProblem(trace.Errorf("io/timeout"), "the DB fell on the floor")},
+ },
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.ErrorContains(t, reconciler.Reconcile(ctx), "the DB fell on the floor")
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+
+ t.Run("reconciliation faces conflict on first try, should retry but context is expired so it bails out", func(t *testing.T) {
+ cancelableCtx, cancel := context.WithCancel(ctx)
+ // just in case
+ t.Cleanup(cancel)
+
+ stubs := mockClientStubs{
+ // we expect a single GET because the context expires before the second retry
+ configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}},
+ versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}},
+ rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}},
+ // Single update expected, because there's nothing to do after the retry.
+ // We wrap the update validation function into a context canceler, so the context is done after the first update
+ updateRolloutExpects: []require.ValueAssertionFunc{cancelContext(rolloutEquals(upToDateRollout), cancel)},
+ // return a retryable error
+ updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}},
+ }
+
+ client := newMockClient(t, stubs)
+ reconciler := &Reconciler{
+ clt: client,
+ log: log,
+ }
+
+ // Test execution: run the reconciliation loop
+ require.ErrorContains(t, reconciler.Reconcile(cancelableCtx), "canceled")
+
+ // Test validation: check that all the expected calls were received
+ client.checkIfEmpty(t)
+ })
+}