diff --git a/lib/autoupdate/rollout/strategy_timebased.go b/lib/autoupdate/rollout/strategy_timebased.go new file mode 100644 index 0000000000000..8a7ef32b3159b --- /dev/null +++ b/lib/autoupdate/rollout/strategy_timebased.go @@ -0,0 +1,87 @@ +package rollout + +import ( + "context" + "log/slog" + + "github.com/gravitational/trace" + "github.com/jonboulle/clockwork" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + update "github.com/gravitational/teleport/api/types/autoupdate" +) + +type timeBasedStrategy struct { + log *slog.Logger + clock clockwork.Clock +} + +func (h *timeBasedStrategy) name() string { + return update.AgentsStrategyTimeBased +} + +func newTimeBasedStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStrategy, error) { + if log == nil { + return nil, trace.BadParameter("missing log") + } + if clock == nil { + return nil, trace.BadParameter("missing clock") + } + return &timeBasedStrategy{ + log: log.With("strategy", update.AgentsStrategyTimeBased), + clock: clock, + }, nil +} + +func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { + now := h.clock.Now() + // We always process every group regardless of the order. + var errors []error + for _, group := range groups { + switch group.State { + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + // We start any group unstarted group in window. + // Done groups can transition back to active if they enter their maintenance window again. + // Some agents might have missed the previous windows and might expected to try again. + shouldBeActive, err := inWindow(group, now) + if err != nil { + // In time-based rollouts, groups are not dependent. + // Failing to transition a group should affect other groups. + // We reflect that something went wrong in the status and go to the next group. + setGroupState(group, group.State, updateReasonReconcilerError, now) + errors = append(errors, err) + continue + } + if shouldBeActive { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) + } else { + setGroupState(group, group.State, updateReasonOutsideWindow, now) + } + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + // We don't touch any group that was manually rolled back. + // Something happened and we should not try to update again. + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: + // The group is currently being updated. We check if the maintenance + // is over and if we should transition it to the done state + shouldBeActive, err := inWindow(group, now) + if err != nil { + // In time-based rollouts, groups are not dependent. + // Failing to transition a group should affect other groups. + // We reflect that something went wrong in the status and go to the next group. + setGroupState(group, group.State, updateReasonReconcilerError, now) + errors = append(errors, err) + continue + } + + if shouldBeActive { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) + } else { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, updateReasonOutsideWindow, now) + } + default: + return trace.BadParameter("unknown autoupdate group state: %v", group.State) + } + } + return trace.NewAggregate(errors...) +} diff --git a/lib/autoupdate/rollout/strategy_timebased_test.go b/lib/autoupdate/rollout/strategy_timebased_test.go new file mode 100644 index 0000000000000..bee732b3a038d --- /dev/null +++ b/lib/autoupdate/rollout/strategy_timebased_test.go @@ -0,0 +1,296 @@ +package rollout + +import ( + "context" + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/lib/utils" +) + +func Test_progressGroupsTimeBased(t *testing.T) { + clock := clockwork.NewFakeClockAt(testSunday) + log := utils.NewSlogLoggerForTests() + strategy, err := newTimeBasedStrategy(log, clock) + require.NoError(t, err) + + groupName := "test-group" + canStartToday := everyWeekday + cannotStartToday := everyWeekdayButSunday + lastUpdate := timestamppb.New(clock.Now().Add(-5 * time.Minute)) + ctx := context.Background() + + tests := []struct { + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + }{ + { + name: "unstarted -> unstarted", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "unstarted -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "done -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "done -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "active -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "active -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "rolledback is a dead end", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName + "-in-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: groupName + "-out-of-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName + "-in-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: groupName + "-out-of-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "mix of everything", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "new group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "done group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "rolledback group should do nothing", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "old group should stop", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "new group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "done group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "rolledback group should do nothing", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "old group should stop", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := strategy.progressRollout(ctx, tt.initialState) + require.NoError(t, err) + // We use require.Equal instead of Elements match because group order matters. + // It's not super important for time-based, but is crucial for halt-on-error. + // So it's better to be more conservative and validate order never changes for + // both strategies. + require.Equal(t, tt.expectedState, tt.initialState) + }) + } +}