diff --git a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go index 7fc41ec3f3858..8d7e52517814b 100644 --- a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go +++ b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go @@ -997,6 +997,7 @@ type AutoUpdateAgentRolloutStatus struct { // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group // will not start updating to the new version directly. The controller sees that the group theoretical start time is // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. StartTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache diff --git a/api/proto/teleport/autoupdate/v1/autoupdate.proto b/api/proto/teleport/autoupdate/v1/autoupdate.proto index 0257ec3e023b7..faf7ec93db129 100644 --- a/api/proto/teleport/autoupdate/v1/autoupdate.proto +++ b/api/proto/teleport/autoupdate/v1/autoupdate.proto @@ -180,6 +180,7 @@ message AutoUpdateAgentRolloutStatus { // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group // will not start updating to the new version directly. The controller sees that the group theoretical start time is // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. google.protobuf.Timestamp start_time = 3; } diff --git a/lib/autoupdate/rollout/reconciler.go b/lib/autoupdate/rollout/reconciler.go index 29adbf732bce9..79f9315538595 100644 --- a/lib/autoupdate/rollout/reconciler.go +++ b/lib/autoupdate/rollout/reconciler.go @@ -152,7 +152,7 @@ func (r *reconciler) tryReconcile(ctx context.Context) error { return trace.Wrap(err, "computing rollout status") } - // there was an existing rollout, we must figure if something changed + // we must figure if something changed specChanged := !proto.Equal(existingRollout.GetSpec(), newSpec) statusChanged := !proto.Equal(existingRollout.GetStatus(), newStatus) rolloutChanged := specChanged || statusChanged @@ -272,6 +272,8 @@ func (r *reconciler) computeStatus( // We create a new status if the rollout should be reset or the previous status was nil if shouldResetRollout || existingRollout.GetStatus() == nil { status = new(autoupdate.AutoUpdateAgentRolloutStatus) + // We set the start time if this is a new rollout + status.StartTime = timestamppb.New(r.clock.Now()) } else { status = utils.CloneProtoMsg(existingRollout.GetStatus()) } @@ -301,8 +303,9 @@ func (r *reconciler) computeStatus( return nil, trace.Wrap(err, "creating groups status") } } + status.Groups = groups - err = r.progressRollout(ctx, newSpec.GetStrategy(), groups) + err = r.progressRollout(ctx, newSpec.GetStrategy(), status) // Failing to progress the update is not a hard failure. // We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user. if err != nil { @@ -310,7 +313,6 @@ func (r *reconciler) computeStatus( "error", err) } - status.Groups = groups status.State = computeRolloutState(groups) return status, nil } @@ -319,10 +321,10 @@ func (r *reconciler) computeStatus( // groups are updated in place. // If an error is returned, the groups should still be upserted, depending on the strategy, // failing to update a group might not be fatal (other groups can still progress independently). -func (r *reconciler) progressRollout(ctx context.Context, strategyName string, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (r *reconciler) progressRollout(ctx context.Context, strategyName string, status *autoupdate.AutoUpdateAgentRolloutStatus) error { for _, strategy := range r.rolloutStrategies { if strategy.name() == strategyName { - return strategy.progressRollout(ctx, groups) + return strategy.progressRollout(ctx, status) } } return trace.NotImplemented("rollout strategy %q not implemented", strategyName) diff --git a/lib/autoupdate/rollout/reconciler_test.go b/lib/autoupdate/rollout/reconciler_test.go index 9518fe66280c2..b5b77cd735edb 100644 --- a/lib/autoupdate/rollout/reconciler_test.go +++ b/lib/autoupdate/rollout/reconciler_test.go @@ -139,6 +139,8 @@ func TestTryReconcile(t *testing.T) { t.Parallel() log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() + // Test setup: creating fixtures configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -186,7 +188,7 @@ func TestTryReconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) - upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -315,8 +317,9 @@ func TestTryReconcile(t *testing.T) { // Test execution: Running the reconciliation reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } require.NoError(t, reconciler.tryReconcile(ctx)) @@ -330,6 +333,7 @@ func TestTryReconcile(t *testing.T) { func TestReconciler_Reconcile(t *testing.T) { log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() // Test setup: creating fixtures config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -361,7 +365,7 @@ func TestReconciler_Reconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) - upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -407,8 +411,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -431,8 +436,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -471,8 +477,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -509,8 +516,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -533,8 +541,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -563,8 +572,9 @@ func TestReconciler_Reconcile(t *testing.T) { client := newMockClient(t, stubs) reconciler := &reconciler{ - clt: client, - log: log, + clt: client, + log: log, + clock: clock, } // Test execution: run the reconciliation loop @@ -704,7 +714,7 @@ func (f *fakeRolloutStrategy) name() string { return f.strategyName } -func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error { f.calls++ return nil } @@ -742,8 +752,9 @@ func Test_reconciler_computeStatus(t *testing.T) { newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now()) require.NoError(t, err) newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{ - Groups: newGroups, - State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + Groups: newGroups, + State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + StartTime: timestamppb.New(clock.Now()), } tests := []struct { @@ -835,7 +846,9 @@ func Test_reconciler_computeStatus(t *testing.T) { Strategy: fakeRolloutStrategyName, }, // groups should be unset - expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{}, + expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, expectedStrategyCalls: 0, }, { @@ -843,7 +856,9 @@ func Test_reconciler_computeStatus(t *testing.T) { existingRollout: &autoupdate.AutoUpdateAgentRollout{ Spec: oldSpec, // old groups were empty - Status: &autoupdate.AutoUpdateAgentRolloutStatus{}, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, }, // no spec change newSpec: oldSpec, diff --git a/lib/autoupdate/rollout/strategy.go b/lib/autoupdate/rollout/strategy.go index 025d6ae01570d..aec5703a8f6ac 100644 --- a/lib/autoupdate/rollout/strategy.go +++ b/lib/autoupdate/rollout/strategy.go @@ -39,7 +39,7 @@ const ( // This interface allows us to inject dummy strategies for simpler testing. type rolloutStrategy interface { name() string - progressRollout(context.Context, []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error + progressRollout(context.Context, *autoupdate.AutoUpdateAgentRolloutStatus) error } func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time) (bool, error) { diff --git a/lib/autoupdate/rollout/strategy_timebased.go b/lib/autoupdate/rollout/strategy_timebased.go index c5abc34be5588..d5abd1334bc5f 100644 --- a/lib/autoupdate/rollout/strategy_timebased.go +++ b/lib/autoupdate/rollout/strategy_timebased.go @@ -20,18 +20,19 @@ package rollout import ( "context" - "log/slog" - "github.com/gravitational/trace" "github.com/jonboulle/clockwork" + "log/slog" + "time" "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" update "github.com/gravitational/teleport/api/types/autoupdate" ) const ( - updateReasonInWindow = "in_window" - updateReasonOutsideWindow = "outside_window" + updateReasonInWindow = "in_window" + updateReasonOutsideWindow = "outside_window" + updateReasonRolloutChanged = "rollout_changed_during_window" ) type timeBasedStrategy struct { @@ -56,11 +57,11 @@ func newTimeBasedStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStrat }, nil } -func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error { +func (h *timeBasedStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error { now := h.clock.Now() // We always process every group regardless of the order. var errs []error - for _, group := range groups { + for _, group := range status.Groups { switch group.State { case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: @@ -76,10 +77,17 @@ func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autou errs = append(errs, err) continue } - if shouldBeActive { - setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) - } else { + + // Check if the rollout got created after the theoretical group start time + rolloutChangedDuringWindow := status.StartTime.AsTime().After(now.Truncate(time.Hour)) + + switch { + case !shouldBeActive: setGroupState(group, group.State, updateReasonOutsideWindow, now) + case rolloutChangedDuringWindow: + setGroupState(group, group.State, updateReasonRolloutChanged, now) + default: + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) } case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: // We don't touch any group that was manually rolled back. diff --git a/lib/autoupdate/rollout/strategy_timebased_test.go b/lib/autoupdate/rollout/strategy_timebased_test.go index 91db29d42e469..6402da3b21c44 100644 --- a/lib/autoupdate/rollout/strategy_timebased_test.go +++ b/lib/autoupdate/rollout/strategy_timebased_test.go @@ -44,9 +44,10 @@ func Test_progressGroupsTimeBased(t *testing.T) { ctx := context.Background() tests := []struct { - name string - initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup - expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + rolloutStartTime *timestamppb.Timestamp + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup }{ { name: "unstarted -> unstarted", @@ -71,6 +72,30 @@ func Test_progressGroupsTimeBased(t *testing.T) { }, }, }, + { + name: "unstarted -> unstarted because rollout just changed", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + rolloutStartTime: timestamppb.New(clock.Now()), + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonRolloutChanged, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, { name: "unstarted -> active", initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ @@ -302,13 +327,18 @@ func Test_progressGroupsTimeBased(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - err := strategy.progressRollout(ctx, tt.initialState) + status := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: tt.initialState, + State: 0, + StartTime: tt.rolloutStartTime, + } + err := strategy.progressRollout(ctx, status) require.NoError(t, err) // We use require.Equal instead of Elements match because group order matters. // It's not super important for time-based, but is crucial for halt-on-error. // So it's better to be more conservative and validate order never changes for // both strategies. - require.Equal(t, tt.expectedState, tt.initialState) + require.Equal(t, tt.expectedState, status.Groups) }) } }