From e5bd333021844d0e95fc0357bf1389a0de73dabd Mon Sep 17 00:00:00 2001 From: Olha Yevtushenko Date: Tue, 28 Nov 2023 18:17:06 +0200 Subject: [PATCH] add consistent logic on error with initializer spec --- controllers/common.go | 4 ++++ controllers/k6_initialize.go | 17 ++++++++++------- controllers/k6_start.go | 2 +- controllers/testrun_controller.go | 24 ++++++++++++++++++++++-- 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/controllers/common.go b/controllers/common.go index b409b4c6..19e90876 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -19,6 +19,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +const ( + errMessageTooLong = "Creation of %s takes too long: your configuration might be off. Check if %v were created successfully." +) + // It may take some time to retrieve inspect output so indicate with boolean if it's ready // and use returnErr only for errors that require a change of behaviour. All other errors // should just be logged. diff --git a/controllers/k6_initialize.go b/controllers/k6_initialize.go index aeb1f41f..a8b2469c 100644 --- a/controllers/k6_initialize.go +++ b/controllers/k6_initialize.go @@ -44,7 +44,8 @@ func InitializeJobs(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, return res, nil } -func RunValidations(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, r *TestRunReconciler) (res ctrl.Result, err error) { +func RunValidations(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, r *TestRunReconciler) ( + res ctrl.Result, ready bool, err error) { // initializer is a quick job so check in frequently res = ctrl.Result{RequeueAfter: time.Second * 5} @@ -63,10 +64,10 @@ func RunValidations(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, } // inspectTestRun made a log message already so just return without requeue - return ctrl.Result{}, nil + return ctrl.Result{}, ready, err } if !inspectReady { - return res, nil + return res, ready, nil } log.Info(fmt.Sprintf("k6 inspect: %+v", inspectOutput)) @@ -83,11 +84,11 @@ func RunValidations(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, k6.GetStatus().Stage = "error" if _, err := r.UpdateStatus(ctx, k6, log); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, ready, err } // Don't requeue in case of this error; unless it's made into a warning as described above. - return ctrl.Result{}, nil + return ctrl.Result{}, ready, nil } if cli.HasCloudOut { @@ -104,10 +105,12 @@ func RunValidations(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, } if _, err := r.UpdateStatus(ctx, k6, log); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, ready, err } - return res, nil + ready = true + + return res, ready, nil } // SetupCloudTest inspects the output of initializer and creates a new diff --git a/controllers/k6_start.go b/controllers/k6_start.go index b8a46224..3e1199d6 100644 --- a/controllers/k6_start.go +++ b/controllers/k6_start.go @@ -70,7 +70,7 @@ func StartJobs(ctx context.Context, log logr.Logger, k6 v1alpha1.TestRunI, r *Te } else { // let's try this approach if time.Since(t).Minutes() > 5 { - msg := "Creation of runner pods takes too long: your configuration might be off. Check if runner jobs and pods were created successfully." + msg := fmt.Sprintf(errMessageTooLong, "runner pods", "runner jobs and pods") log.Info(msg) if v1alpha1.IsTrue(k6, v1alpha1.CloudTestRun) { diff --git a/controllers/testrun_controller.go b/controllers/testrun_controller.go index 1d689a38..d6b1f51e 100644 --- a/controllers/testrun_controller.go +++ b/controllers/testrun_controller.go @@ -16,6 +16,7 @@ package controllers import ( "context" + "errors" "fmt" "time" @@ -129,8 +130,27 @@ func (r *TestRunReconciler) reconcile(ctx context.Context, req ctrl.Request, log return ctrl.Result{}, nil case "initialization": - if v1alpha1.IsUnknown(k6, v1alpha1.CloudTestRun) { - return RunValidations(ctx, log, k6, r) + res, ready, err := RunValidations(ctx, log, k6, r) + if err != nil || !ready { + if t, ok := v1alpha1.LastUpdate(k6, v1alpha1.TestRunRunning); !ok { + // this should never happen + return res, errors.New("Cannot find condition TestRunRunning") + } else { + // let's try this approach + if time.Since(t).Minutes() > 5 { + msg := fmt.Sprintf(errMessageTooLong, "initializer pod", "initializer job and pod") + log.Info(msg) + + if v1alpha1.IsTrue(k6, v1alpha1.CloudTestRun) { + events := cloud.ErrorEvent(cloud.K6OperatorStartError). + WithDetail(msg). + WithAbort() + cloud.SendTestRunEvents(r.k6CloudClient, v1alpha1.TestRunID(k6), log, events) + } + } + } + + return res, err } if v1alpha1.IsFalse(k6, v1alpha1.CloudTestRun) {