From e285c9e27e257b4fc6b71d06d323d775e831fe75 Mon Sep 17 00:00:00 2001 From: hugoShaka Date: Tue, 17 Dec 2024 17:29:25 -0500 Subject: [PATCH] set rollout start date and don't start updating if rollout just changed This commit does two changes: - the controller now sets the rollout start time when resetting the rollout - the controller will not start a group if the rollout changed during the maintenance window (checks if the rollout start time is in the window) --- .../teleport/autoupdate/v1/autoupdate.pb.go | 1 + .../teleport/autoupdate/v1/autoupdate.proto | 1 + lib/autoupdate/rollout/controller.go | 1 + lib/autoupdate/rollout/reconciler.go | 12 ++-- lib/autoupdate/rollout/reconciler_test.go | 57 ++++++++++------ lib/autoupdate/rollout/strategy.go | 13 +++- .../rollout/strategy_haltonerror.go | 39 +++++++---- .../rollout/strategy_haltonerror_test.go | 38 +++++++++-- lib/autoupdate/rollout/strategy_test.go | 65 +++++++++++++++++-- lib/autoupdate/rollout/strategy_timebased.go | 22 +++++-- .../rollout/strategy_timebased_test.go | 40 ++++++++++-- 11 files changed, 232 insertions(+), 57 deletions(-) diff --git a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go index 7fc41ec3f3858..8d7e52517814b 100644 --- a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go +++ b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go @@ -997,6 +997,7 @@ type AutoUpdateAgentRolloutStatus struct { // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group // will not start updating to the new version directly. The controller sees that the group theoretical start time is // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. StartTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache diff --git a/api/proto/teleport/autoupdate/v1/autoupdate.proto b/api/proto/teleport/autoupdate/v1/autoupdate.proto index 0257ec3e023b7..faf7ec93db129 100644 --- a/api/proto/teleport/autoupdate/v1/autoupdate.proto +++ b/api/proto/teleport/autoupdate/v1/autoupdate.proto @@ -180,6 +180,7 @@ message AutoUpdateAgentRolloutStatus { // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group // will not start updating to the new version directly. The controller sees that the group theoretical start time is // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. google.protobuf.Timestamp start_time = 3; } diff --git a/lib/autoupdate/rollout/controller.go b/lib/autoupdate/rollout/controller.go index 1c45a6ac4fc5b..eb253f366b6a7 100644 --- a/lib/autoupdate/rollout/controller.go +++ b/lib/autoupdate/rollout/controller.go @@ -71,6 +71,7 @@ func NewController(client Client, log *slog.Logger, clock clockwork.Clock, perio reconciler: reconciler{ clt: client, log: log, + clock: clock, rolloutStrategies: []rolloutStrategy{ // TODO(hugoShaka): add the strategies here as we implement them }, diff --git a/lib/autoupdate/rollout/reconciler.go b/lib/autoupdate/rollout/reconciler.go index 1e186156a9daa..a6264ba3cbf27 100644 --- a/lib/autoupdate/rollout/reconciler.go +++ b/lib/autoupdate/rollout/reconciler.go @@ -153,7 +153,7 @@ func (r *reconciler) tryReconcile(ctx context.Context) error { return trace.Wrap(err, "computing rollout status") } - // there was an existing rollout, we must figure if something changed + // We compute if something changed. specChanged := !proto.Equal(existingRollout.GetSpec(), newSpec) statusChanged := !proto.Equal(existingRollout.GetStatus(), newStatus) rolloutChanged := specChanged || statusChanged @@ -273,6 +273,8 @@ func (r *reconciler) computeStatus( // We create a new status if the rollout should be reset or the previous status was nil if shouldResetRollout || existingRollout.GetStatus() == nil { status = new(autoupdate.AutoUpdateAgentRolloutStatus) + // We set the start time if this is a new rollout + status.StartTime = timestamppb.New(r.clock.Now()) } else { status = utils.CloneProtoMsg(existingRollout.GetStatus()) } @@ -302,8 +304,9 @@ func (r *reconciler) computeStatus( return nil, trace.Wrap(err, "creating groups status") } } + status.Groups = groups - err = r.progressRollout(ctx, newSpec.GetStrategy(), groups) + err = r.progressRollout(ctx, newSpec.GetStrategy(), status) // Failing to progress the update is not a hard failure. // We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user. if err != nil { @@ -311,7 +314,6 @@ func (r *reconciler) computeStatus( "error", err) } - status.Groups = groups status.State = computeRolloutState(groups) return status, nil } @@ -320,10 +322,10 @@ func (r *reconciler) computeStatus( // groups are updated in place. // If an error is returned, the groups should still be upserted, depending on the strategy, // failing to update a group might not be fatal (other groups can still progress independently). -func (r *reconciler) progressRollout(ctx context.Context, strategyName string, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (r *reconciler) progressRollout(ctx context.Context, strategyName string, status *autoupdate.AutoUpdateAgentRolloutStatus) error { for _, strategy := range r.rolloutStrategies { if strategy.name() == strategyName { - return strategy.progressRollout(ctx, groups) + return strategy.progressRollout(ctx, status) } } return trace.NotImplemented("rollout strategy %q not implemented", strategyName) diff --git a/lib/autoupdate/rollout/reconciler_test.go b/lib/autoupdate/rollout/reconciler_test.go index 9518fe66280c2..b5b77cd735edb 100644 --- a/lib/autoupdate/rollout/reconciler_test.go +++ b/lib/autoupdate/rollout/reconciler_test.go @@ -139,6 +139,8 @@ func TestTryReconcile(t *testing.T) { t.Parallel() log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() + // Test setup: creating fixtures configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -186,7 +188,7 @@ func TestTryReconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) - upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -315,8 +317,9 @@ func TestTryReconcile(t *testing.T) { // Test execution: Running the reconciliation reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } require.NoError(t, reconciler.tryReconcile(ctx)) @@ -330,6 +333,7 @@ func TestTryReconcile(t *testing.T) { func TestReconciler_Reconcile(t *testing.T) { log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() // Test setup: creating fixtures config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -361,7 +365,7 @@ func TestReconciler_Reconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) - upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -407,8 +411,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -431,8 +436,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -471,8 +477,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -509,8 +516,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -533,8 +541,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -563,8 +572,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -704,7 +714,7 @@ func (f *fakeRolloutStrategy) name() string { return f.strategyName } -func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error { f.calls++ return nil } @@ -742,8 +752,9 @@ func Test_reconciler_computeStatus(t *testing.T) { newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now()) require.NoError(t, err) newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{ - Groups: newGroups, - State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + Groups: newGroups, + State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + StartTime: timestamppb.New(clock.Now()), } tests := []struct { @@ -835,7 +846,9 @@ func Test_reconciler_computeStatus(t *testing.T) { Strategy: fakeRolloutStrategyName, }, // groups should be unset - expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{}, + expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, expectedStrategyCalls: 0, }, { @@ -843,7 +856,9 @@ func Test_reconciler_computeStatus(t *testing.T) { existingRollout: &autoupdate.AutoUpdateAgentRollout{ Spec: oldSpec, // old groups were empty - Status: &autoupdate.AutoUpdateAgentRolloutStatus{}, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, }, // no spec change newSpec: oldSpec, diff --git a/lib/autoupdate/rollout/strategy.go b/lib/autoupdate/rollout/strategy.go index 025d6ae01570d..b3e214f6cebd7 100644 --- a/lib/autoupdate/rollout/strategy.go +++ b/lib/autoupdate/rollout/strategy.go @@ -33,13 +33,14 @@ const ( // Common update reasons updateReasonCreated = "created" updateReasonReconcilerError = "reconciler_error" + updateReasonRolloutChanged = "rollout_changed_during_window" ) // rolloutStrategy is responsible for rolling out the update across groups. // This interface allows us to inject dummy strategies for simpler testing. type rolloutStrategy interface { name() string - progressRollout(context.Context, []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error + progressRollout(context.Context, *autoupdate.AutoUpdateAgentRolloutStatus) error } func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time) (bool, error) { @@ -53,6 +54,16 @@ func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time return int(group.ConfigStartHour) == now.Hour(), nil } +// rolloutChangedInWindow checks if the rollout got created after the theoretical group start time +func rolloutChangedInWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now, rolloutStart time.Time) (bool, error) { + // If the rollout is older than 24h, we know it did not change during the window + if now.Sub(rolloutStart) > 24*time.Hour { + return false, nil + } + // Else we check if the rollout happened in the group window. + return inWindow(group, rolloutStart) +} + func canUpdateToday(allowedDays []string, now time.Time) (bool, error) { for _, allowedDay := range allowedDays { if allowedDay == types.Wildcard { diff --git a/lib/autoupdate/rollout/strategy_haltonerror.go b/lib/autoupdate/rollout/strategy_haltonerror.go index c93438aaa1941..0ab57a052768d 100644 --- a/lib/autoupdate/rollout/strategy_haltonerror.go +++ b/lib/autoupdate/rollout/strategy_haltonerror.go @@ -60,7 +60,7 @@ func newHaltOnErrorStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStr }, nil } -func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error { now := h.clock.Now() // We process every group in order, all the previous groups must be in the DONE state // for the next group to become active. Even if some early groups are not DONE, @@ -72,12 +72,12 @@ func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*aut // to transition "staging" to DONE. previousGroupsAreDone := true - for i, group := range groups { + for i, group := range status.Groups { switch group.State { case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: var previousGroup *autoupdate.AutoUpdateAgentRolloutStatusGroup if i != 0 { - previousGroup = groups[i-1] + previousGroup = status.Groups[i-1] } canStart, err := canStartHaltOnError(group, previousGroup, now) if err != nil { @@ -86,16 +86,31 @@ func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*aut setGroupState(group, group.State, updateReasonReconcilerError, now) return err } + + // Check if the rollout got created after the theoretical group start time + rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime()) + if err != nil { + setGroupState(group, group.State, updateReasonReconcilerError, now) + return err + } + switch { - case previousGroupsAreDone && canStart: - // We can start - setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonCanStart, now) - case previousGroupsAreDone: - // All previous groups are OK, but time-related criteria are not OK + case !previousGroupsAreDone: + // All previous groups are not DONE + setGroupState(group, group.State, updateReasonPreviousGroupsNotDone, now) + case !canStart: + // All previous groups are DONE, but time-related criteria are not met + // This can be because we are outside an update window, or because the + // specified wait_hours doesn't let us update yet. setGroupState(group, group.State, updateReasonCannotStart, now) + case rolloutChangedDuringWindow: + // All previous groups are DONE and time-related criteria are met. + // However, the rollout changed during the maintenance window. + setGroupState(group, group.State, updateReasonRolloutChanged, now) default: - // At least one previous group is not DONE - setGroupState(group, group.State, updateReasonPreviousGroupsNotDone, now) + // All previous groups are DONE and time-related criteria are met. + // We can start. + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonCanStart, now) } previousGroupsAreDone = false case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: @@ -132,10 +147,10 @@ func canStartHaltOnError(group, previousGroup *autoupdate.AutoUpdateAgentRollout previousStart := previousGroup.StartTime.AsTime() if previousStart.IsZero() || previousStart.Unix() == 0 { - return false, trace.BadParameter("the previous group doesn't have a start time, cannot check the 'wait_hours' criteria") + return false, trace.BadParameter("the previous group doesn't have a start time, cannot check the 'wait_hours' criterion") } - // Check if the wait_hours criteria is OK, if we are at least after 'wait_hours' hours since the previous start. + // Check if the wait_hours criterion is OK, if we are at least after 'wait_hours' hours since the previous start. if now.Before(previousGroup.StartTime.AsTime().Add(time.Duration(group.ConfigWaitHours) * time.Hour)) { return false, nil } diff --git a/lib/autoupdate/rollout/strategy_haltonerror_test.go b/lib/autoupdate/rollout/strategy_haltonerror_test.go index 71a653c760361..84d4de069efe3 100644 --- a/lib/autoupdate/rollout/strategy_haltonerror_test.go +++ b/lib/autoupdate/rollout/strategy_haltonerror_test.go @@ -148,9 +148,10 @@ func Test_progressGroupsHaltOnError(t *testing.T) { group3Name := "group3" tests := []struct { - name string - initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup - expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + rolloutStartTime *timestamppb.Timestamp + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup }{ { name: "single group unstarted -> unstarted", @@ -175,6 +176,30 @@ func Test_progressGroupsHaltOnError(t *testing.T) { }, }, }, + { + name: "single group unstarted -> unstarted because rollout changed in window", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + rolloutStartTime: timestamppb.New(clock.Now()), + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonRolloutChanged, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, { name: "single group unstarted -> active", initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ @@ -470,7 +495,12 @@ func Test_progressGroupsHaltOnError(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := strategy.progressRollout(ctx, tt.initialState) + status := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: tt.initialState, + State: 0, + StartTime: tt.rolloutStartTime, + } + err := strategy.progressRollout(ctx, status) require.NoError(t, err) // We use require.Equal instead of Elements match because group order matters. // It's not super important for time-based, but is crucial for halt-on-error. diff --git a/lib/autoupdate/rollout/strategy_test.go b/lib/autoupdate/rollout/strategy_test.go index fbb7ec768d644..1348716ba6c1d 100644 --- a/lib/autoupdate/rollout/strategy_test.go +++ b/lib/autoupdate/rollout/strategy_test.go @@ -151,13 +151,70 @@ func Test_inWindow(t *testing.T) { } } +func Test_rolloutChangedInWindow(t *testing.T) { + // Test setup: creating fixtures. + group := &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekdayButSunday, + ConfigStartHour: 12, + } + tests := []struct { + name string + now time.Time + rolloutStart time.Time + want bool + }{ + { + name: "zero rollout start time", + now: testSaturday, + rolloutStart: time.Time{}, + want: false, + }, + { + name: "epoch rollout start time", + now: testSaturday, + // tspb counts since epoch, wile go's zero is 0000-00-00 00:00:00 UTC + rolloutStart: (×tamppb.Timestamp{}).AsTime(), + want: false, + }, + { + name: "rollout changed a week ago", + now: testSaturday, + rolloutStart: testSaturday.Add(-7 * 24 * time.Hour), + want: false, + }, + { + name: "rollout changed the same day, before the window", + now: testSaturday, + rolloutStart: testSaturday.Add(-2 * time.Hour), + want: false, + }, + { + name: "rollout changed the same day, during the window", + now: testSaturday, + rolloutStart: testSaturday.Add(-2 * time.Minute), + want: true, + }, + { + name: "rollout just changed but we are not in a window", + now: testSunday, + rolloutStart: testSunday.Add(-2 * time.Minute), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test execution. + result, err := rolloutChangedInWindow(group, tt.now, tt.rolloutStart) + require.NoError(t, err) + require.Equal(t, tt.want, result) + }) + } +} + func Test_setGroupState(t *testing.T) { groupName := "test-group" - // TODO(hugoShaka) remove those two variables once the strategies are merged and the constants are defined. - updateReasonCanStart := "can_start" - updateReasonCannotStart := "cannot_start" - clock := clockwork.NewFakeClock() // oldUpdateTime is 5 minutes in the past oldUpdateTime := clock.Now() diff --git a/lib/autoupdate/rollout/strategy_timebased.go b/lib/autoupdate/rollout/strategy_timebased.go index c5abc34be5588..13e844f0e4a5a 100644 --- a/lib/autoupdate/rollout/strategy_timebased.go +++ b/lib/autoupdate/rollout/strategy_timebased.go @@ -56,11 +56,11 @@ func newTimeBasedStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStrat }, nil } -func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (h *timeBasedStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error { now := h.clock.Now() // We always process every group regardless of the order. var errs []error - for _, group := range groups { + for _, group := range status.Groups { switch group.State { case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: @@ -76,10 +76,22 @@ func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autou errs = append(errs, err) continue } - if shouldBeActive { - setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) - } else { + + // Check if the rollout got created after the theoretical group start time + rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime()) + if err != nil { + setGroupState(group, group.State, updateReasonReconcilerError, now) + errs = append(errs, err) + continue + } + + switch { + case !shouldBeActive: setGroupState(group, group.State, updateReasonOutsideWindow, now) + case rolloutChangedDuringWindow: + setGroupState(group, group.State, updateReasonRolloutChanged, now) + default: + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) } case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: // We don't touch any group that was manually rolled back. diff --git a/lib/autoupdate/rollout/strategy_timebased_test.go b/lib/autoupdate/rollout/strategy_timebased_test.go index 91db29d42e469..6402da3b21c44 100644 --- a/lib/autoupdate/rollout/strategy_timebased_test.go +++ b/lib/autoupdate/rollout/strategy_timebased_test.go @@ -44,9 +44,10 @@ func Test_progressGroupsTimeBased(t *testing.T) { ctx := context.Background() tests := []struct { - name string - initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup - expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + rolloutStartTime *timestamppb.Timestamp + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup }{ { name: "unstarted -> unstarted", @@ -71,6 +72,30 @@ func Test_progressGroupsTimeBased(t *testing.T) { }, }, }, + { + name: "unstarted -> unstarted because rollout just changed", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + rolloutStartTime: timestamppb.New(clock.Now()), + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonRolloutChanged, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, { name: "unstarted -> active", initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ @@ -302,13 +327,18 @@ func Test_progressGroupsTimeBased(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := strategy.progressRollout(ctx, tt.initialState) + status := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: tt.initialState, + State: 0, + StartTime: tt.rolloutStartTime, + } + err := strategy.progressRollout(ctx, status) require.NoError(t, err) // We use require.Equal instead of Elements match because group order matters. // It's not super important for time-based, but is crucial for halt-on-error. // So it's better to be more conservative and validate order never changes for // both strategies. - require.Equal(t, tt.expectedState, tt.initialState) + require.Equal(t, tt.expectedState, status.Groups) }) } }