Skip to content

Commit

Permalink
refuse to start the update if the rollout start date is in window
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoShaka committed Dec 18, 2024
1 parent fc8611e commit 47ce2f3
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 27 deletions.
2 changes: 1 addition & 1 deletion lib/autoupdate/rollout/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (r *reconciler) tryReconcile(ctx context.Context) error {
return trace.Wrap(err, "computing rollout status")
}

// we must figure if something changed
// We compute if something changed.
specChanged := !proto.Equal(existingRollout.GetSpec(), newSpec)
statusChanged := !proto.Equal(existingRollout.GetStatus(), newStatus)
rolloutChanged := specChanged || statusChanged
Expand Down
11 changes: 11 additions & 0 deletions lib/autoupdate/rollout/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const (
// Common update reasons
updateReasonCreated = "created"
updateReasonReconcilerError = "reconciler_error"
updateReasonRolloutChanged = "rollout_changed_during_window"
)

// rolloutStrategy is responsible for rolling out the update across groups.
Expand All @@ -53,6 +54,16 @@ func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time
return int(group.ConfigStartHour) == now.Hour(), nil
}

// rolloutChangedInWindow checks if the rollout got created after the theoretical group start time
func rolloutChangedInWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now, rolloutStart time.Time) (bool, error) {
// If the rollout is older than 24h, we know it did not change during the window
if now.Sub(rolloutStart) > 24*time.Hour {
return false, nil
}
// Else we check if the rollout happened in the group window.
return inWindow(group, rolloutStart)
}

func canUpdateToday(allowedDays []string, now time.Time) (bool, error) {
for _, allowedDay := range allowedDays {
if allowedDay == types.Wildcard {
Expand Down
35 changes: 25 additions & 10 deletions lib/autoupdate/rollout/strategy_haltonerror.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func newHaltOnErrorStrategy(log *slog.Logger, clock clockwork.Clock) (rolloutStr
}, nil
}

func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) error {
func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, status *autoupdate.AutoUpdateAgentRolloutStatus) error {
now := h.clock.Now()
// We process every group in order, all the previous groups must be in the DONE state
// for the next group to become active. Even if some early groups are not DONE,
Expand All @@ -72,12 +72,12 @@ func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*aut
// to transition "staging" to DONE.
previousGroupsAreDone := true

for i, group := range groups {
for i, group := range status.Groups {
switch group.State {
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED:
var previousGroup *autoupdate.AutoUpdateAgentRolloutStatusGroup
if i != 0 {
previousGroup = groups[i-1]
previousGroup = status.Groups[i-1]
}
canStart, err := canStartHaltOnError(group, previousGroup, now)
if err != nil {
Expand All @@ -86,16 +86,31 @@ func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, groups []*aut
setGroupState(group, group.State, updateReasonReconcilerError, now)
return err
}

// Check if the rollout got created after the theoretical group start time
rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime())
if err != nil {
setGroupState(group, group.State, updateReasonReconcilerError, now)
return err
}

switch {
case previousGroupsAreDone && canStart:
// We can start
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonCanStart, now)
case previousGroupsAreDone:
// All previous groups are OK, but time-related criteria are not OK
case !previousGroupsAreDone:
// All previous groups are not DONE
setGroupState(group, group.State, updateReasonPreviousGroupsNotDone, now)
case !canStart:
// All previous groups are DONE, but time-related criteria are not met
// This can be because we are outside an update window, or because the
// specified wait_hours doesn't let us update yet.
setGroupState(group, group.State, updateReasonCannotStart, now)
case rolloutChangedDuringWindow:
// All previous groups are DONE and time-related criterias are met.
// However, the rollout changed during the maintenance window.
setGroupState(group, group.State, updateReasonRolloutChanged, now)
default:
// At least one previous group is not DONE
setGroupState(group, group.State, updateReasonPreviousGroupsNotDone, now)
// All previous groups are DONE and time-related criterias are met.
// We can start.
setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonCanStart, now)
}
previousGroupsAreDone = false
case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK:
Expand Down
38 changes: 34 additions & 4 deletions lib/autoupdate/rollout/strategy_haltonerror_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,10 @@ func Test_progressGroupsHaltOnError(t *testing.T) {
group3Name := "group3"

tests := []struct {
name string
initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
name string
initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
rolloutStartTime *timestamppb.Timestamp
expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup
}{
{
name: "single group unstarted -> unstarted",
Expand All @@ -175,6 +176,30 @@ func Test_progressGroupsHaltOnError(t *testing.T) {
},
},
},
{
name: "single group unstarted -> unstarted because rollout changed in window",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: group1Name,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: timestamppb.New(yesterday),
LastUpdateReason: updateReasonCreated,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
rolloutStartTime: timestamppb.New(clock.Now()),
expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
{
Name: group1Name,
State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
LastUpdateTime: timestamppb.New(clock.Now()),
LastUpdateReason: updateReasonRolloutChanged,
ConfigDays: canStartToday,
ConfigStartHour: matchingStartHour,
},
},
},
{
name: "single group unstarted -> active",
initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{
Expand Down Expand Up @@ -470,7 +495,12 @@ func Test_progressGroupsHaltOnError(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := strategy.progressRollout(ctx, tt.initialState)
status := &autoupdate.AutoUpdateAgentRolloutStatus{
Groups: tt.initialState,
State: 0,
StartTime: tt.rolloutStartTime,
}
err := strategy.progressRollout(ctx, status)
require.NoError(t, err)
// We use require.Equal instead of Elements match because group order matters.
// It's not super important for time-based, but is crucial for halt-on-error.
Expand Down
65 changes: 61 additions & 4 deletions lib/autoupdate/rollout/strategy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,70 @@ func Test_inWindow(t *testing.T) {
}
}

func Test_rolloutChangedInWindow(t *testing.T) {
// Test setup: creating fixtures.
group := &autoupdate.AutoUpdateAgentRolloutStatusGroup{
Name: "test-group",
ConfigDays: everyWeekdayButSunday,
ConfigStartHour: 12,
}
tests := []struct {
name string
now time.Time
rolloutStart time.Time
want bool
}{
{
name: "zero rollout start time",
now: testSaturday,
rolloutStart: time.Time{},
want: false,
},
{
name: "epoch rollout start time",
now: testSaturday,
// tspb counts since epoch, wile go's zero is 0000-00-00 00:00:00 UTC
rolloutStart: (&timestamppb.Timestamp{}).AsTime(),
want: false,
},
{
name: "rollout changed a week ago",
now: testSaturday,
rolloutStart: testSaturday.Add(-7 * 24 * time.Hour),
want: false,
},
{
name: "rollout changed the same day, before the window",
now: testSaturday,
rolloutStart: testSaturday.Add(-2 * time.Hour),
want: false,
},
{
name: "rollout changed the same day, during the window",
now: testSaturday,
rolloutStart: testSaturday.Add(-2 * time.Minute),
want: true,
},
{
name: "rollout just changed but we are not in a window",
now: testSunday,
rolloutStart: testSunday.Add(-2 * time.Minute),
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Test execution.
result, err := rolloutChangedInWindow(group, tt.now, tt.rolloutStart)
require.NoError(t, err)
require.Equal(t, tt.want, result)
})
}
}

func Test_setGroupState(t *testing.T) {
groupName := "test-group"

// TODO(hugoShaka) remove those two variables once the strategies are merged and the constants are defined.
updateReasonCanStart := "can_start"
updateReasonCannotStart := "cannot_start"

clock := clockwork.NewFakeClock()
// oldUpdateTime is 5 minutes in the past
oldUpdateTime := clock.Now()
Expand Down
18 changes: 10 additions & 8 deletions lib/autoupdate/rollout/strategy_timebased.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,16 @@ package rollout

import (
"context"
"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
update "github.com/gravitational/teleport/api/types/autoupdate"
"github.com/gravitational/trace"
"github.com/jonboulle/clockwork"
"log/slog"
"time"

"github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
update "github.com/gravitational/teleport/api/types/autoupdate"
)

const (
updateReasonInWindow = "in_window"
updateReasonOutsideWindow = "outside_window"
updateReasonRolloutChanged = "rollout_changed_during_window"
updateReasonInWindow = "in_window"
updateReasonOutsideWindow = "outside_window"
)

type timeBasedStrategy struct {
Expand Down Expand Up @@ -79,7 +76,12 @@ func (h *timeBasedStrategy) progressRollout(ctx context.Context, status *autoupd
}

// Check if the rollout got created after the theoretical group start time
rolloutChangedDuringWindow := status.StartTime.AsTime().After(now.Truncate(time.Hour))
rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime())
if err != nil {
setGroupState(group, group.State, updateReasonReconcilerError, now)
errs = append(errs, err)
continue
}

switch {
case !shouldBeActive:
Expand Down

0 comments on commit 47ce2f3

Please sign in to comment.