Skip to content

Commit

Permalink
set rollout start date and don't start updating if rollout just changed
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoShaka committed Dec 18, 2024
1 parent b2dc107 commit cb7214e
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 41 deletions.
1 change: 1 addition & 0 deletions api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions api/proto/teleport/autoupdate/v1/autoupdate.proto
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ message AutoUpdateAgentRolloutStatus {
// For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group
// will not start updating to the new version directly. The controller sees that the group theoretical start time is
// before the rollout start time and the maintenance window belongs to the previous rollout.
// When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate.
google.protobuf.Timestamp start_time = 3;
}

Expand Down
12 changes: 7 additions & 5 deletions lib/autoupdate/rollout/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (r *reconciler) tryReconcile(ctx context.Context) error {
return trace.Wrap(err, "computing rollout status")
}

// there was an existing rollout, we must figure if something changed
// we must figure if something changed
specChanged := !proto.Equal(existingRollout.GetSpec(), newSpec)
statusChanged := !proto.Equal(existingRollout.GetStatus(), newStatus)
rolloutChanged := specChanged || statusChanged
Expand Down Expand Up @@ -273,6 +273,8 @@ func (r *reconciler) computeStatus(
// We create a new status if the rollout should be reset or the previous status was nil
if shouldResetRollout || existingRollout.GetStatus() == nil {
status = new(autoupdate.AutoUpdateAgentRolloutStatus)
// We set the start time if this is a new rollout
status.StartTime = timestamppb.New(r.clock.Now())
} else {
status = utils.CloneProtoMsg(existingRollout.GetStatus())
}
Expand Down Expand Up @@ -302,16 +304,16 @@ func (r *reconciler) computeStatus(
return nil, trace.Wrap(err, "creating groups status")
}
}
status.Groups = groups

err = r.progressRollout(ctx, newSpec.GetStrategy(), groups)
err = r.progressRollout(ctx, newSpec.GetStrategy(), status)
// Failing to progress the update is not a hard failure.
// We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user.
if err != nil {
r.log.ErrorContext(ctx, "Errors encountered during rollout progress. Some groups might not get updated properly.",
"error", err)
}

status.Groups = groups
status.State = computeRolloutState(groups)
return status, nil
}
Expand All @@ -320,10 +322,10 @@ func (r *reconciler) computeStatus(
// groups are updated in place.
// If an error is returned, the groups should still be upserted, depending on the strategy,
// failing to update a group might not be fatal (other groups can still progress independently).
func (r *reconciler) progressRollout(ctx context.Context, strategyName string, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error {
func (r *reconciler) progressRollout(ctx context.Context, strategyName string, status *autoupdate.AutoUpdateAgentRolloutStatus) error {
for _, strategy := range r.rolloutStrategies {
if strategy.name() == strategyName {
return strategy.progressRollout(ctx, groups)
return strategy.progressRollout(ctx, status)
}
}
return trace.NotImplemented("rollout strategy %q not implemented", strategyName)
Expand Down
57 changes: 36 additions & 21 deletions lib/autoupdate/rollout/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ func TestTryReconcile(t *testing.T) {
t.Parallel()
log := utils.NewSlogLoggerForTests()
ctx := context.Background()
clock := clockwork.NewFakeClock()

// Test setup: creating fixtures
configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
Tools: &autoupdate.AutoUpdateConfigSpecTools{
Expand Down Expand Up @@ -186,7 +188,7 @@ func TestTryReconcile(t *testing.T) {
Strategy: update.AgentsStrategyHaltOnError,
})
require.NoError(t, err)
upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{}
upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())}

outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
StartVersion: "1.2.2",
Expand Down Expand Up @@ -315,8 +317,9 @@ func TestTryReconcile(t *testing.T) {
// Test execution: Running the reconciliation

reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

require.NoError(t, reconciler.tryReconcile(ctx))
Expand All @@ -330,6 +333,7 @@ func TestTryReconcile(t *testing.T) {
func TestReconciler_Reconcile(t *testing.T) {
log := utils.NewSlogLoggerForTests()
ctx := context.Background()
clock := clockwork.NewFakeClock()
// Test setup: creating fixtures
config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{
Tools: &autoupdate.AutoUpdateConfigSpecTools{
Expand Down Expand Up @@ -361,7 +365,7 @@ func TestReconciler_Reconcile(t *testing.T) {
Strategy: update.AgentsStrategyHaltOnError,
})
require.NoError(t, err)
upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{}
upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())}

outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{
StartVersion: "1.2.2",
Expand Down Expand Up @@ -407,8 +411,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand All @@ -431,8 +436,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand Down Expand Up @@ -471,8 +477,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand Down Expand Up @@ -509,8 +516,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand All @@ -533,8 +541,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand Down Expand Up @@ -563,8 +572,9 @@ func TestReconciler_Reconcile(t *testing.T) {

client := newMockClient(t, stubs)
reconciler := &reconciler{
clt: client,
log: log,
clt: client,
log: log,
clock: clock,
}

// Test execution: run the reconciliation loop
Expand Down Expand Up @@ -704,7 +714,7 @@ func (f *fakeRolloutStrategy) name() string {
return f.strategyName
}

func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error {
func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error {
f.calls++
return nil
}
Expand Down Expand Up @@ -742,8 +752,9 @@ func Test_reconciler_computeStatus(t *testing.T) {
newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now())
require.NoError(t, err)
newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{
Groups: newGroups,
State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED,
Groups: newGroups,
State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED,
StartTime: timestamppb.New(clock.Now()),
}

tests := []struct {
Expand Down Expand Up @@ -835,15 +846,19 @@ func Test_reconciler_computeStatus(t *testing.T) {
Strategy: fakeRolloutStrategyName,
},
// groups should be unset
expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{},
expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{
StartTime: timestamppb.New(clock.Now()),
},
expectedStrategyCalls: 0,
},
{
name: "new groups are populated if previous ones were empty",
existingRollout: &autoupdate.AutoUpdateAgentRollout{
Spec: oldSpec,
// old groups were empty
Status: &autoupdate.AutoUpdateAgentRolloutStatus{},
Status: &autoupdate.AutoUpdateAgentRolloutStatus{
StartTime: timestamppb.New(clock.Now()),
},
},
// no spec change
newSpec: oldSpec,
Expand Down
2 changes: 1 addition & 1 deletion lib/autoupdate/rollout/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ const (
// This interface allows us to inject dummy strategies for simpler testing.
type rolloutStrategy interface {
name() string
progressRollout(context.Context, []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error
progressRollout(context.Context, *autoupdate.AutoUpdateAgentRolloutStatus) error
}

func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time) (bool, error) {
Expand Down
26 changes: 17 additions & 9 deletions lib/autoupdate/rollout/strategy_timebased.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,19 @@ package rollout

import (
"context"
"log/slog"

"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"
"log/slog"
"time"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
update "github.com/gravitational/teleport/api/types/autoupdate"
)

const (
updateReasonInWindow = "in_window"
updateReasonOutsideWindow = "outside_window"
updateReasonInWindow = "in_window"
updateReasonOutsideWindow = "outside_window"
updateReasonRolloutChanged = "rollout_changed_during_window"
)

type timeBasedStrategy struct {
Expand All @@ -56,11 +57,11 @@ func newTimeBasedStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStrat
}, nil
}

func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error {
func (h *timeBasedStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error {
now := h.clock.Now()
// We always process every group regardless of the order.
var errs []error
for _, group := range groups {
for _, group := range status.Groups {
switch group.State {
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE:
Expand All @@ -76,10 +77,17 @@ func (h *timeBasedStrategy) progressRollout(ctx context.Context, groups []*autou
errs = append(errs, err)
continue
}
if shouldBeActive {
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now)
} else {

// Check if the rollout got created after the theoretical group start time
rolloutChangedDuringWindow := status.StartTime.AsTime().After(now.Truncate(time.Hour))

switch {
case !shouldBeActive:
setGroupState(group, group.State, updateReasonOutsideWindow, now)
case rolloutChangedDuringWindow:
setGroupState(group, group.State, updateReasonRolloutChanged, now)
default:
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now)
}
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK:
// We don't touch any group that was manually rolled back.
Expand Down
40 changes: 35 additions & 5 deletions lib/autoupdate/rollout/strategy_timebased_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ func Test_progressGroupsTimeBased(t *testing.T) {
ctx := context.Background()

tests := []struct {
name string
initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
name string
initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
rolloutStartTime *timestamppb.Timestamp
expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
}{
{
name: "unstarted -> unstarted",
Expand All @@ -71,6 +72,30 @@ func Test_progressGroupsTimeBased(t *testing.T) {
},
},
},
{
name: "unstarted -> unstarted because rollout just changed",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: lastUpdate,
LastUpdateReason: updateReasonCreated,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
rolloutStartTime: timestamppb.New(clock.Now()),
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: groupName,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonRolloutChanged,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "unstarted -> active",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
Expand Down Expand Up @@ -302,13 +327,18 @@ func Test_progressGroupsTimeBased(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := strategy.progressRollout(ctx, tt.initialState)
status := &autoupdate.AutoUpdateAgentRolloutStatus{
Groups: tt.initialState,
State: 0,
StartTime: tt.rolloutStartTime,
}
err := strategy.progressRollout(ctx, status)
require.NoError(t, err)
// We use require.Equal instead of Elements match because group order matters.
// It's not super important for time-based, but is crucial for halt-on-error.
// So it's better to be more conservative and validate order never changes for
// both strategies.
require.Equal(t, tt.expectedState, tt.initialState)
require.Equal(t, tt.expectedState, status.Groups)
})
}
}

0 comments on commit cb7214e

Please sign in to comment.