diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rolloutcontroller/client.go new file mode 100644 index 0000000000000..4dead0f9dee19 --- /dev/null +++ b/lib/autoupdate/rolloutcontroller/client.go @@ -0,0 +1,46 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rolloutcontroller + +import ( + "context" + + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" +) + +// Client is the subset of the Teleport client RPCs the controller needs. +type Client interface { + // GetAutoUpdateConfig gets the AutoUpdateConfig singleton resource. + GetAutoUpdateConfig(ctx context.Context) (*autoupdatepb.AutoUpdateConfig, error) + + // GetAutoUpdateVersion gets the AutoUpdateVersion singleton resource. + GetAutoUpdateVersion(ctx context.Context) (*autoupdatepb.AutoUpdateVersion, error) + + // GetAutoUpdateAgentRollout gets the AutoUpdateAgentRollout singleton resource. + GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdatepb.AutoUpdateAgentRollout, error) + + // CreateAutoUpdateAgentRollout creates the AutoUpdateAgentRollout singleton resource. + CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error) + + // UpdateAutoUpdateAgentRollout updates the AutoUpdateAgentRollout singleton resource. + UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdatepb.AutoUpdateAgentRollout) (*autoupdatepb.AutoUpdateAgentRollout, error) + + // DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource. + DeleteAutoUpdateAgentRollout(ctx context.Context) error +} diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rolloutcontroller/client_test.go new file mode 100644 index 0000000000000..ba204ffb77db3 --- /dev/null +++ b/lib/autoupdate/rolloutcontroller/client_test.go @@ -0,0 +1,189 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rolloutcontroller + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" +) + +// mockClient is a mock implementation if the Client interface for testing purposes. +// This is used to precisely check which calls are made by the Reconciler during tests. +// Use newMockClient to create one from stubs. Once the test is over, you must call +// mockClient.checkIfEmpty to validate all expected calls were made. +type mockClient struct { + getAutoUpdateConfig *getHandler[*autoupdate.AutoUpdateConfig] + getAutoUpdateVersion *getHandler[*autoupdate.AutoUpdateVersion] + getAutoUpdateAgentRollout *getHandler[*autoupdate.AutoUpdateAgentRollout] + createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout] + updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout] + deleteAutoUpdateAgentRollout *deleteHandler +} + +func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) { + return m.getAutoUpdateConfig.handle(ctx) +} + +func (m mockClient) GetAutoUpdateVersion(ctx context.Context) (*autoupdate.AutoUpdateVersion, error) { + return m.getAutoUpdateVersion.handle(ctx) +} + +func (m mockClient) GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdate.AutoUpdateAgentRollout, error) { + return m.getAutoUpdateAgentRollout.handle(ctx) +} + +func (m mockClient) CreateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) { + return m.createAutoUpdateAgentRollout.handle(ctx, rollout) +} + +func (m mockClient) UpdateAutoUpdateAgentRollout(ctx context.Context, rollout *autoupdate.AutoUpdateAgentRollout) (*autoupdate.AutoUpdateAgentRollout, error) { + return m.updateAutoUpdateAgentRollout.handle(ctx, rollout) +} + +func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error { + return m.deleteAutoUpdateAgentRollout.handle(ctx) +} + +func (m mockClient) checkIfEmpty(t *testing.T) { + require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty") + require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty") + require.True(t, m.getAutoUpdateAgentRollout.isEmpty(), "Get autoupdate_agent_rollout mock not empty") + require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty") + require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty") + require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty") +} + +func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient { + // Fail early if there's a mismatch + require.Equal(t, len(stubs.createRolloutAnswers), len(stubs.createRolloutExpects), "invalid stubs, create validations and answers slices are not the same length") + require.Equal(t, len(stubs.updateRolloutAnswers), len(stubs.updateRolloutExpects), "invalid stubs, update validations and answers slices are not the same length") + + return &mockClient{ + getAutoUpdateConfig: &getHandler[*autoupdate.AutoUpdateConfig]{t, stubs.configAnswers}, + getAutoUpdateVersion: &getHandler[*autoupdate.AutoUpdateVersion]{t, stubs.versionAnswers}, + getAutoUpdateAgentRollout: &getHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.rolloutAnswers}, + createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers}, + updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers}, + deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers}, + } +} + +type mockClientStubs struct { + configAnswers []callAnswer[*autoupdate.AutoUpdateConfig] + versionAnswers []callAnswer[*autoupdate.AutoUpdateVersion] + rolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout] + createRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout] + createRolloutExpects []require.ValueAssertionFunc + updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout] + updateRolloutExpects []require.ValueAssertionFunc + deleteRolloutAnswers []error +} + +type callAnswer[T any] struct { + result T + err error +} + +// getHandler is used in a mock client to answer get resource requests during tests. +// It takes a list of answers and errors and will return them when invoked. +// If there are no stubs left it fails the test. +type getHandler[T proto.Message] struct { + t *testing.T + answers []callAnswer[T] +} + +func (h *getHandler[T]) handle(_ context.Context) (T, error) { + if len(h.answers) == 0 { + require.Fail(h.t, "no answers left") + } + + entry := h.answers[0] + h.answers = h.answers[1:] + + // We need to deep copy because the reconciler might do updates in place. + // We don't want the original resource to be edited as this would mess with other tests. + return proto.Clone(entry.result).(T), entry.err +} + +// isEmpty returns true only if all stubs were consumed +func (h *getHandler[T]) isEmpty() bool { + return len(h.answers) == 0 +} + +// createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2). +// It first validates the input using the provided validation function, then it returns the predefined answer and error. +// If there are no stubs left it fails the test. +type createUpdateHandler[T proto.Message] struct { + t *testing.T + expect []require.ValueAssertionFunc + answers []callAnswer[T] +} + +func (h *createUpdateHandler[T]) handle(_ context.Context, object T) (T, error) { + if len(h.expect) == 0 { + require.Fail(h.t, "not expecting more calls") + } + h.expect[0](h.t, object) + h.expect = h.expect[1:] + + if len(h.answers) == 0 { + require.Fail(h.t, "no answers left") + } + + entry := h.answers[0] + h.answers = h.answers[1:] + + // We need to deep copy because the reconciler might do updates in place. + // We don't want the original resource to be edited as this would mess with other tests. + return proto.Clone(entry.result).(T), entry.err +} + +// isEmpty returns true only if all stubs were consumed +func (h *createUpdateHandler[T]) isEmpty() bool { + return len(h.answers) == 0 && len(h.expect) == 0 +} + +// deleteHandler is used in a mock client to answer delete resource requests during tests. +// It takes a list of errors and returns them when invoked. +// If there are no stubs left it fails the test. +type deleteHandler struct { + t *testing.T + answers []error +} + +func (h *deleteHandler) handle(_ context.Context) error { + if len(h.answers) == 0 { + require.Fail(h.t, "no answers left") + } + + entry := h.answers[0] + h.answers = h.answers[1:] + + return entry +} + +// isEmpty returns true only if all stubs were consumed +func (h *deleteHandler) isEmpty() bool { + return len(h.answers) == 0 +} diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go new file mode 100644 index 0000000000000..78989c4ec4a6b --- /dev/null +++ b/lib/autoupdate/rolloutcontroller/reconciler.go @@ -0,0 +1,235 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rolloutcontroller + +import ( + "context" + "log/slog" + "sync" + "time" + + "github.com/gravitational/trace" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + update "github.com/gravitational/teleport/api/types/autoupdate" +) + +const ( + reconciliationTimeout = 30 * time.Second + defaultConfigMode = update.AgentsUpdateModeEnabled + defaultStrategy = update.AgentsStrategyHaltOnError + maxConflictRetry = 3 +) + +// Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and +// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because: +// - we reconcile 2 resources with one +// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs +type Reconciler struct { + clt Client + log *slog.Logger + + // mutex ensures we only run one reconciliation at a time + mutex sync.Mutex +} + +// Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths +// are racing), in this case we retry the reconciliation immediately. +func (r *Reconciler) Reconcile(ctx context.Context) error { + r.mutex.Lock() + defer r.mutex.Unlock() + + ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout) + defer cancel() + tries := 0 + var err error + for tries < maxConflictRetry { + tries++ + select { + case <-ctx.Done(): + return ctx.Err() + default: + err = r.tryReconcile(ctx) + switch { + case err == nil: + return nil + case trace.IsCompareFailed(err), trace.IsNotFound(err): + // The resource changed since we last saw it + // We must have raced against another auth + // Let's retry the reconciliation + r.log.DebugContext(ctx, "retrying reconciliation", "error", err) + default: + // error is non-nil and non-retryable + return trace.Wrap(err, "failed to reconcile rollout") + } + } + } + return trace.CompareFailed("compare failed, tried %d times, last error: %s", tries, err) +} + +// tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton. +// This function should be nilpotent if the AutoUpdateAgentRollout is already up-to-date. +// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError +// if the resource change while we were computing it. +// The caller must handle those error and retry the reconciliation. +func (r *Reconciler) tryReconcile(ctx context.Context) error { + // get autoupdate_config + var config *autoupdate.AutoUpdateConfig + if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil { + config = c + } else if !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate_config") + } + + // get autoupdate_version + var version *autoupdate.AutoUpdateVersion + if v, err := r.clt.GetAutoUpdateVersion(ctx); err == nil { + version = v + } else if !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate version") + } + + // get autoupdate_agent_rollout + rolloutExists := true + existingRollout, err := r.clt.GetAutoUpdateAgentRollout(ctx) + if err != nil && !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate_agent_rollout") + } + if trace.IsNotFound(err) { + // rollout doesn't exist yet, we'll need to call Create instead of Update. + rolloutExists = false + } + + // if autoupdate_version does not exist or does not contain spec.agents, we should not configure a rollout + if version.GetSpec().GetAgents() == nil { + if !rolloutExists { + // the rollout doesn't exist, nothing to do + return nil + } + // the rollout exists, we must delete it + return r.clt.DeleteAutoUpdateAgentRollout(ctx) + } + + // compute what the spec should look like + newSpec, err := r.buildRolloutSpec(config.GetSpec().GetAgents(), version.GetSpec().GetAgents()) + if err != nil { + return trace.Wrap(err, "mutating rollout") + } + + // if there are no existing rollout, we create a new one + if !rolloutExists { + rollout, err := update.NewAutoUpdateAgentRollout(newSpec) + if err != nil { + return trace.Wrap(err, "validating new rollout") + } + _, err = r.clt.CreateAutoUpdateAgentRollout(ctx, rollout) + return trace.Wrap(err, "creating rollout") + } + + // there was an existing rollout, we must figure if something changed + specChanged := existingRollout.GetSpec().GetStartVersion() != newSpec.GetStartVersion() || + existingRollout.GetSpec().GetTargetVersion() != newSpec.GetTargetVersion() || + existingRollout.GetSpec().GetAutoupdateMode() != newSpec.GetAutoupdateMode() || + existingRollout.GetSpec().GetStrategy() != newSpec.GetStrategy() || + existingRollout.GetSpec().GetSchedule() != newSpec.GetSchedule() + + // TODO: reconcile the status here when we'll add group support. + // Even if the spec does not change, we might still have to update the status: + // - sync groups with the ones from the user config + // - progress the rollout across groups + + // if nothing changed, no need to update the resource + if !specChanged { + r.log.DebugContext(ctx, "rollout unchanged") + return nil + } + + // something changed, we replace the old spec with the new one, validate and update the resource + // we don't create a new resource to keep the revision ID and + existingRollout.Spec = newSpec + err = update.ValidateAutoUpdateAgentRollout(existingRollout) + if err != nil { + return trace.Wrap(err, "validating mutated rollout") + } + _, err = r.clt.UpdateAutoUpdateAgentRollout(ctx, existingRollout) + return trace.Wrap(err, "updating rollout") +} + +func (r *Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) { + // reconcile mode + mode, err := getMode(config.GetMode(), version.GetMode()) + if err != nil { + return nil, trace.Wrap(err, "computing agent update mode") + } + + strategy := config.GetStrategy() + if strategy == "" { + strategy = defaultStrategy + } + + return &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: version.GetStartVersion(), + TargetVersion: version.GetTargetVersion(), + Schedule: version.GetSchedule(), + AutoupdateMode: mode, + Strategy: strategy, + }, nil + +} + +// agentModeCode maps agents mode to integers. +// When config and version modes don't match, the lowest integer takes precedence. +var ( + agentModeCode = map[string]int{ + update.AgentsUpdateModeDisabled: 0, + update.AgentsUpdateModeSuspended: 1, + update.AgentsUpdateModeEnabled: 2, + } + codeToAgentMode = map[int]string{ + 0: update.AgentsUpdateModeDisabled, + 1: update.AgentsUpdateModeSuspended, + 2: update.AgentsUpdateModeEnabled, + } +) + +// getMode merges the agent modes coming from the version and config resources into a single mode. +// "disabled" takes precedence over "suspended", which takes precedence over "enabled". +func getMode(configMode, versionMode string) (string, error) { + if configMode == "" { + configMode = defaultConfigMode + } + if versionMode == "" { + return "", trace.BadParameter("version mode empty") + } + + configCode, ok := agentModeCode[configMode] + if !ok { + return "", trace.BadParameter("unsupported agent config mode: %v", configMode) + } + versionCode, ok := agentModeCode[versionMode] + if !ok { + return "", trace.BadParameter("unsupported agent version mode: %v", versionMode) + } + + // The lowest code takes precedence + if configCode <= versionCode { + return codeToAgentMode[configCode], nil + } + return codeToAgentMode[versionCode], nil +} diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rolloutcontroller/reconciler_test.go new file mode 100644 index 0000000000000..340451d8da46d --- /dev/null +++ b/lib/autoupdate/rolloutcontroller/reconciler_test.go @@ -0,0 +1,567 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rolloutcontroller + +import ( + "context" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/uuid" + "github.com/gravitational/trace" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/testing/protocmp" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + update "github.com/gravitational/teleport/api/types/autoupdate" + apiutils "github.com/gravitational/teleport/api/utils" + "github.com/gravitational/teleport/lib/backend" + "github.com/gravitational/teleport/lib/utils" +) + +// rolloutEquals returns a require.ValueAssertionFunc that checks the rollout is identical. +// The comparison does not take into account the proto internal state. +func rolloutEquals(expected *autoupdate.AutoUpdateAgentRollout) require.ValueAssertionFunc { + return func(t require.TestingT, i interface{}, _ ...interface{}) { + require.IsType(t, &autoupdate.AutoUpdateAgentRollout{}, i) + actual := i.(*autoupdate.AutoUpdateAgentRollout) + require.Empty(t, cmp.Diff(expected, actual, protocmp.Transform())) + } +} + +// cancelContext wraps a require.ValueAssertionFunc so that the given context is canceled before checking the assertion. +// This is used to test how the reconciler behaves when its context is canceled. +func cancelContext(assertionFunc require.ValueAssertionFunc, cancel func()) require.ValueAssertionFunc { + return func(t require.TestingT, i interface{}, i2 ...interface{}) { + cancel() + assertionFunc(t, i, i2...) + } +} + +// withRevisionID creates a deep copy of an agent rollout and sets the revisionID in its metadata. +// This is used to test the conditional update retry logic. +func withRevisionID(original *autoupdate.AutoUpdateAgentRollout, revision string) *autoupdate.AutoUpdateAgentRollout { + revisioned := apiutils.CloneProtoMsg(original) + revisioned.Metadata.Revision = revision + return revisioned +} + +func TestGetMode(t *testing.T) { + t.Parallel() + tests := []struct { + name string + configMode string + versionMode string + expected string + checkErr require.ErrorAssertionFunc + }{ + { + name: "config and version equal", + configMode: update.AgentsUpdateModeEnabled, + versionMode: update.AgentsUpdateModeEnabled, + expected: update.AgentsUpdateModeEnabled, + checkErr: require.NoError, + }, + { + name: "config suspends, version enables", + configMode: update.AgentsUpdateModeSuspended, + versionMode: update.AgentsUpdateModeEnabled, + expected: update.AgentsUpdateModeSuspended, + checkErr: require.NoError, + }, + { + name: "config enables, version suspends", + configMode: update.AgentsUpdateModeEnabled, + versionMode: update.AgentsUpdateModeSuspended, + expected: update.AgentsUpdateModeSuspended, + checkErr: require.NoError, + }, + { + name: "config suspends, version disables", + configMode: update.AgentsUpdateModeSuspended, + versionMode: update.AgentsUpdateModeDisabled, + expected: update.AgentsUpdateModeDisabled, + checkErr: require.NoError, + }, + { + name: "version enables, no config", + configMode: "", + versionMode: update.AgentsUpdateModeEnabled, + expected: update.AgentsUpdateModeEnabled, + checkErr: require.NoError, + }, + { + name: "config enables, no version", + configMode: update.AgentsUpdateModeEnabled, + versionMode: "", + expected: "", + checkErr: require.Error, + }, + { + name: "unknown mode", + configMode: "this in not a mode", + versionMode: update.AgentsUpdateModeEnabled, + expected: "", + checkErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := getMode(tt.configMode, tt.versionMode) + tt.checkErr(t, err) + require.Equal(t, tt.expected, result) + }) + } +} + +func TestTryReconcile(t *testing.T) { + t.Parallel() + log := utils.NewSlogLoggerForTests() + ctx := context.Background() + // Test setup: creating fixtures + configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ + Tools: &autoupdate.AutoUpdateConfigSpecTools{ + Mode: update.ToolsUpdateModeEnabled, + }, + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }, + }) + require.NoError(t, err) + + configNoAgent, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ + Tools: &autoupdate.AutoUpdateConfigSpecTools{ + Mode: update.ToolsUpdateModeEnabled, + }, + }) + require.NoError(t, err) + + versionOK, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{ + Tools: &autoupdate.AutoUpdateVersionSpecTools{ + TargetVersion: "1.2.3", + }, + Agents: &autoupdate.AutoUpdateVersionSpecAgents{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleImmediate, + Mode: update.AgentsUpdateModeEnabled, + }, + }) + require.NoError(t, err) + + versionNoAgent, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{ + Tools: &autoupdate.AutoUpdateVersionSpecTools{ + TargetVersion: "1.2.3", + }, + }) + require.NoError(t, err) + + upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }) + require.NoError(t, err) + + outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.2", + TargetVersion: "1.2.3", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }) + require.NoError(t, err) + + tests := []struct { + name string + config *autoupdate.AutoUpdateConfig + version *autoupdate.AutoUpdateVersion + existingRollout *autoupdate.AutoUpdateAgentRollout + createExpect *autoupdate.AutoUpdateAgentRollout + updateExpect *autoupdate.AutoUpdateAgentRollout + deleteExpect bool + }{ + { + name: "config and version exist, no existing rollout", + // rollout should be created + config: configOK, + version: versionOK, + createExpect: upToDateRollout, + }, + { + name: "version exist, no existing rollout nor config", + // rollout should be created + version: versionOK, + createExpect: upToDateRollout, + }, + { + name: "version exist, no existing rollout, config exist but doesn't contain agent section", + // rollout should be created + config: configNoAgent, + version: versionOK, + createExpect: upToDateRollout, + }, + { + name: "config exist, no existing rollout nor version", + // rollout should not be created as there is no version + config: configOK, + }, + { + name: "config exist, no existing rollout, version exist but doesn't contain agent section", + // rollout should not be created as there is no version + config: configOK, + version: versionNoAgent, + }, + { + name: "no existing rollout, config, nor version", + // rollout should not be created as there is no version + }, + { + name: "existing out-of-date rollout, config and version exist", + // rollout should be updated + config: configOK, + version: versionOK, + existingRollout: outOfDateRollout, + updateExpect: upToDateRollout, + }, + { + name: "existing up-to-date rollout, config and version exist", + // rollout should not be updated as its spec is already good + config: configOK, + version: versionOK, + existingRollout: upToDateRollout, + }, + { + name: "existing rollout and config but no version", + // rollout should be deleted as there is no version + config: configOK, + existingRollout: upToDateRollout, + deleteExpect: true, + }, + { + name: "existing rollout but no config nor version", + // rollout should be deleted as there is no version + existingRollout: upToDateRollout, + deleteExpect: true, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + // Test setup: creating a fake client answering fixtures + var stubs mockClientStubs + + if tt.config != nil { + stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{tt.config, nil}} + } else { + stubs.configAnswers = []callAnswer[*autoupdate.AutoUpdateConfig]{{nil, trace.NotFound("no config")}} + } + + if tt.version != nil { + stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{tt.version, nil}} + } else { + stubs.versionAnswers = []callAnswer[*autoupdate.AutoUpdateVersion]{{nil, trace.NotFound("no version")}} + } + + if tt.existingRollout != nil { + stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.existingRollout, nil}} + } else { + stubs.rolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.NotFound("no rollout")}} + } + + if tt.createExpect != nil { + stubs.createRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.createExpect, nil}} + stubs.createRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.createExpect)} + } + + if tt.updateExpect != nil { + stubs.updateRolloutAnswers = []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{tt.updateExpect, nil}} + stubs.updateRolloutExpects = []require.ValueAssertionFunc{rolloutEquals(tt.updateExpect)} + } + + if tt.deleteExpect { + stubs.deleteRolloutAnswers = []error{nil} + } + + client := newMockClient(t, stubs) + + // Test execution: Running the reconciliation + + reconciler := &Reconciler{ + clt: client, + log: log, + } + + require.NoError(t, reconciler.tryReconcile(ctx)) + // Test validation: Checking that the mock client is now empty + + client.checkIfEmpty(t) + }) + } +} + +func TestReconciler_Reconcile(t *testing.T) { + log := utils.NewSlogLoggerForTests() + ctx := context.Background() + // Test setup: creating fixtures + config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ + Tools: &autoupdate.AutoUpdateConfigSpecTools{ + Mode: update.ToolsUpdateModeEnabled, + }, + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }, + }) + require.NoError(t, err) + version, err := update.NewAutoUpdateVersion(&autoupdate.AutoUpdateVersionSpec{ + Tools: &autoupdate.AutoUpdateVersionSpecTools{ + TargetVersion: "1.2.3", + }, + Agents: &autoupdate.AutoUpdateVersionSpecAgents{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleImmediate, + Mode: update.AgentsUpdateModeEnabled, + }, + }) + require.NoError(t, err) + upToDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }) + require.NoError(t, err) + + outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.2", + TargetVersion: "1.2.3", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyHaltOnError, + }) + require.NoError(t, err) + + // Those tests are not written in table format because the fixture setup it too complex and this would harm + // readability. + t.Run("reconciliation has nothing to do, should exit", func(t *testing.T) { + // Test setup: build mock client + stubs := mockClientStubs{ + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}}, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.NoError(t, reconciler.Reconcile(ctx)) + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation succeeds on first try, should exit", func(t *testing.T) { + stubs := mockClientStubs{ + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}}, + updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)}, + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{upToDateRollout, nil}}, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.NoError(t, reconciler.Reconcile(ctx)) + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation faces conflict on first try, should retry and see that there's nothing left to do", func(t *testing.T) { + stubs := mockClientStubs{ + // because of the retry, we expect 2 GETs on every resource + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}, {upToDateRollout, nil}}, + // Single update expected, because there's nothing to do after the retry + updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)}, + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}}, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.NoError(t, reconciler.Reconcile(ctx)) + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation faces conflict on first try, should retry and update a second time", func(t *testing.T) { + rev1, err := uuid.NewUUID() + require.NoError(t, err) + rev2, err := uuid.NewUUID() + require.NoError(t, err) + rev3, err := uuid.NewUUID() + require.NoError(t, err) + + stubs := mockClientStubs{ + // because of the retry, we expect 2 GETs on every resource + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {withRevisionID(outOfDateRollout, rev1.String()), nil}, + {withRevisionID(outOfDateRollout, rev2.String()), nil}}, + // Two updates expected, one with the old revision, then a second one with the new + updateRolloutExpects: []require.ValueAssertionFunc{ + rolloutEquals(withRevisionID(upToDateRollout, rev1.String())), + rolloutEquals(withRevisionID(upToDateRollout, rev2.String())), + }, + // We mimic a race and reject the first update because of the outdated revision + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {nil, trace.Wrap(backend.ErrIncorrectRevision)}, + {withRevisionID(upToDateRollout, rev3.String()), nil}, + }, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.NoError(t, reconciler.Reconcile(ctx)) + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation faces missing rollout on first try, should retry and create the rollout", func(t *testing.T) { + stubs := mockClientStubs{ + // because of the retry, we expect 2 GETs on every resource + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}, {config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}, {version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {outOfDateRollout, nil}, + {nil, trace.NotFound("no rollout")}}, + // One update expected on the first try, the second try should create + updateRolloutExpects: []require.ValueAssertionFunc{ + rolloutEquals(upToDateRollout), + }, + // We mimic the fact the rollout got deleted in the meantime + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {nil, trace.NotFound("no rollout")}, + }, + // One create expected on the second try + createRolloutExpects: []require.ValueAssertionFunc{ + rolloutEquals(upToDateRollout), + }, + createRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {upToDateRollout, nil}, + }, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.NoError(t, reconciler.Reconcile(ctx)) + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation meets a hard unexpected failure on first try, should exit in error", func(t *testing.T) { + stubs := mockClientStubs{ + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}}, + updateRolloutExpects: []require.ValueAssertionFunc{rolloutEquals(upToDateRollout)}, + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{ + {nil, trace.ConnectionProblem(trace.Errorf("io/timeout"), "the DB fell on the floor")}, + }, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.ErrorContains(t, reconciler.Reconcile(ctx), "the DB fell on the floor") + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) + + t.Run("reconciliation faces conflict on first try, should retry but context is expired so it bails out", func(t *testing.T) { + cancelableCtx, cancel := context.WithCancel(ctx) + // just in case + t.Cleanup(cancel) + + stubs := mockClientStubs{ + // we expect a single GET because the context expires before the second retry + configAnswers: []callAnswer[*autoupdate.AutoUpdateConfig]{{config, nil}}, + versionAnswers: []callAnswer[*autoupdate.AutoUpdateVersion]{{version, nil}}, + rolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{outOfDateRollout, nil}}, + // Single update expected, because there's nothing to do after the retry. + // We wrap the update validation function into a context canceler, so the context is done after the first update + updateRolloutExpects: []require.ValueAssertionFunc{cancelContext(rolloutEquals(upToDateRollout), cancel)}, + // return a retryable error + updateRolloutAnswers: []callAnswer[*autoupdate.AutoUpdateAgentRollout]{{nil, trace.Wrap(backend.ErrIncorrectRevision)}}, + } + + client := newMockClient(t, stubs) + reconciler := &Reconciler{ + clt: client, + log: log, + } + + // Test execution: run the reconciliation loop + require.ErrorContains(t, reconciler.Reconcile(cancelableCtx), "canceled") + + // Test validation: check that all the expected calls were received + client.checkIfEmpty(t) + }) +}