Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-v0.56.x] do not allow negative requeue times #7638

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/pipelineruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,8 @@ If you set the timeout to 0, the `PipelineRun` fails immediately upon encounteri

> :warning: ** `timeout` is deprecated and will be removed in future versions. Consider using `timeouts` instead.

> :note: An internal detail of the `PipelineRun` and `TaskRun` reconcilers in the Tekton controller is that it will requeue a `PipelineRun` or `TaskRun` for re-evaluation, versus waiting for the next update, under certain conditions. The wait time for that re-queueing is the elapsed time subtracted from the timeout; however, if the timeout is set to '0', that calculation produces a negative number, and the new reconciliation event will fire immediately, which can impact overall performance, which is counter to the intent of wait time calculation. So instead, the reconcilers will use the configured global timeout as the wait time when the associated timeout has been set to '0'.

## `PipelineRun` status

### The `status` field
Expand Down
2 changes: 2 additions & 0 deletions docs/taskruns.md
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,8 @@ a different global default timeout value using the `default-timeout-minutes` fie
all `TaskRuns` that do not have a timeout set will have no timeout and will run until it completes successfully
or fails from an error.

> :note: An internal detail of the `PipelineRun` and `TaskRun` reconcilers in the Tekton controller is that it will requeue a `PipelineRun` or `TaskRun` for re-evaluation, versus waiting for the next update, under certain conditions. The wait time for that re-queueing is the elapsed time subtracted from the timeout; however, if the timeout is set to '0', that calculation produces a negative number, and the new reconciliation event will fire immediately, which can impact overall performance, which is counter to the intent of wait time calculation. So instead, the reconcilers will use the configured global timeout as the wait time when the associated timeout has been set to '0'.

### Specifying `ServiceAccount` credentials

You can execute the `Task` in your `TaskRun` with a specific set of credentials by
Expand Down
16 changes: 14 additions & 2 deletions pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"reflect"
"regexp"
"strings"
"time"

"github.com/hashicorp/go-multierror"
"github.com/tektoncd/pipeline/pkg/apis/config"
Expand Down Expand Up @@ -273,9 +274,20 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, pr *v1.PipelineRun) pkgr
// Compute the time since the task started.
elapsed := c.Clock.Since(pr.Status.StartTime.Time)
// Snooze this resource until the appropriate timeout has elapsed.
waitTime := pr.PipelineTimeout(ctx) - elapsed
if pr.Status.FinallyStartTime == nil && pr.TasksTimeout() != nil {
// but if the timeout has been disabled by setting timeout to 0, we
// do not want to subtract from 0, because a negative wait time will
// result in the requeue happening essentially immediately
timeout := pr.PipelineTimeout(ctx)
taskTimeout := pr.TasksTimeout()
waitTime := timeout - elapsed
if timeout == config.NoTimeoutDuration {
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
}
if pr.Status.FinallyStartTime == nil && taskTimeout != nil {
waitTime = pr.TasksTimeout().Duration - elapsed
if taskTimeout.Duration == config.NoTimeoutDuration {
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
}
} else if pr.Status.FinallyStartTime != nil && pr.FinallyTimeout() != nil {
finallyWaitTime := pr.FinallyTimeout().Duration - c.Clock.Since(pr.Status.FinallyStartTime.Time)
if finallyWaitTime < waitTime {
Expand Down
90 changes: 90 additions & 0 deletions pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,96 @@ spec:
}
}

func TestReconcileWithTimeoutDisabled(t *testing.T) {
testCases := []struct {
name string
timeout time.Duration
}{
{
name: "pipeline timeout is 24h",
timeout: 24 * time.Hour,
},
{
name: "pipeline timeout is way longer than 24h",
timeout: 360 * time.Hour,
},
}

for _, tc := range testCases {
startTime := time.Date(2022, time.January, 1, 0, 0, 0, 0, time.UTC).Add(-3 * tc.timeout)
t.Run(tc.name, func(t *testing.T) {
ps := []*v1.Pipeline{parse.MustParseV1Pipeline(t, `
metadata:
name: test-pipeline
namespace: foo
spec:
tasks:
- name: hello-world-1
taskRef:
name: hello-world
- name: hello-world-2
taskRef:
name: hello-world
`)}
prs := []*v1.PipelineRun{parse.MustParseV1PipelineRun(t, `
metadata:
name: test-pipeline-run-with-timeout-disabled
namespace: foo
spec:
pipelineRef:
name: test-pipeline
taskRunTemplate:
serviceAccountName: test-sa
timeouts:
pipeline: 0h0m0s
status:
startTime: "2021-12-30T00:00:00Z"
`)}
ts := []*v1.Task{simpleHelloWorldTask}

trs := []*v1.TaskRun{mustParseTaskRunWithObjectMeta(t, taskRunObjectMeta("test-pipeline-run-with-timeout-hello-world-1", "foo", "test-pipeline-run-with-timeout-disabled",
"test-pipeline", "hello-world-1", false), `
spec:
serviceAccountName: test-sa
taskRef:
name: hello-world
kind: Task
`)}
start := metav1.NewTime(startTime)
prs[0].Status.StartTime = &start

d := test.Data{
PipelineRuns: prs,
Pipelines: ps,
Tasks: ts,
TaskRuns: trs,
}
prt := newPipelineRunTest(t, d)
defer prt.Cancel()

c := prt.TestAssets.Controller
clients := prt.TestAssets.Clients
reconcileError := c.Reconciler.Reconcile(prt.TestAssets.Ctx, "foo/test-pipeline-run-with-timeout-disabled")
if reconcileError == nil {
t.Errorf("expected error, but got nil")
}
if isRequeueError, requeueDuration := controller.IsRequeueKey(reconcileError); !isRequeueError {
t.Errorf("Expected requeue error, but got: %s", reconcileError.Error())
} else if requeueDuration < 0 {
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
}
prt.Test.Logf("Getting reconciled run")
reconciledRun, err := clients.Pipeline.TektonV1().PipelineRuns("foo").Get(prt.TestAssets.Ctx, "test-pipeline-run-with-timeout-disabled", metav1.GetOptions{})
if err != nil {
prt.Test.Errorf("Somehow had error getting reconciled run out of fake client: %s", err)
}
if reconciledRun.Status.GetCondition(apis.ConditionSucceeded).Reason == "PipelineRunTimeout" {
t.Errorf("Expected PipelineRun to not be timed out, but it is timed out")
}
})
}
}

func TestReconcileWithTimeoutForALongTimeAndEtcdLimit_Pipeline(t *testing.T) {
timeout := 12 * time.Hour
testCases := []struct {
Expand Down
7 changes: 6 additions & 1 deletion pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,12 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1.TaskRun) pkgrecon
// Compute the time since the task started.
elapsed := c.Clock.Since(tr.Status.StartTime.Time)
// Snooze this resource until the timeout has elapsed.
return controller.NewRequeueAfter(tr.GetTimeout(ctx) - elapsed)
timeout := tr.GetTimeout(ctx)
waitTime := timeout - elapsed
if timeout == config.NoTimeoutDuration {
waitTime = time.Duration(config.FromContextOrDefaults(ctx).Defaults.DefaultTimeoutMinutes) * time.Minute
}
return controller.NewRequeueAfter(waitTime)
}
return nil
}
Expand Down
85 changes: 85 additions & 0 deletions pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2777,6 +2777,91 @@ status:
}
}

func TestReconcileWithTimeoutDisabled(t *testing.T) {
type testCase struct {
name string
taskRun *v1.TaskRun
}

testcases := []testCase{
{
name: "taskrun with timeout",
taskRun: parse.MustParseV1TaskRun(t, `
metadata:
name: test-taskrun-timeout
namespace: foo
spec:
taskRef:
name: test-task
timeout: 10m
status:
conditions:
- status: Unknown
type: Succeeded
`),
}, {
name: "taskrun with default timeout",
taskRun: parse.MustParseV1TaskRun(t, `
metadata:
name: test-taskrun-default-timeout-60-minutes
namespace: foo
spec:
taskRef:
name: test-task
status:
conditions:
- status: Unknown
type: Succeeded
`),
}, {
name: "task run with timeout set to 0 to disable",
taskRun: parse.MustParseV1TaskRun(t, `
metadata:
name: test-taskrun-timeout-disabled
namespace: foo
spec:
taskRef:
name: test-task
timeout: 0s
status:
conditions:
- status: Unknown
type: Succeeded
`),
}}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
start := metav1.NewTime(time.Now())
tc.taskRun.Status.StartTime = &start
pod, err := makePod(tc.taskRun, simpleTask)
d := test.Data{
TaskRuns: []*v1.TaskRun{tc.taskRun},
Tasks: []*v1.Task{simpleTask},
Pods: []*corev1.Pod{pod},
}
testAssets, cancel := getTaskRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients

err = c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.taskRun))
if err == nil {
t.Errorf("expected error when reconciling completed TaskRun : %v", err)
}
if isRequeueError, requeueDuration := controller.IsRequeueKey(err); !isRequeueError {
t.Errorf("Expected requeue error, but got: %s", err.Error())
} else if requeueDuration < 0 {
t.Errorf("Expected a positive requeue duration but got %s", requeueDuration.String())
}
_, err = clients.Pipeline.TektonV1().TaskRuns(tc.taskRun.Namespace).Get(testAssets.Ctx, tc.taskRun.Name, metav1.GetOptions{})
if err != nil {
t.Errorf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", tc.taskRun.Name, err)
}
})
}
}

func TestReconcileTimeouts(t *testing.T) {
type testCase struct {
name string
Expand Down