From 5a9afc6b8847a1369ac427707068a08fedc4ae29 Mon Sep 17 00:00:00 2001 From: Radovan Zvoncek Date: Fri, 18 Oct 2024 08:59:21 +0100 Subject: [PATCH] When checking backup status, handle MISSING backups properly --- .../medusa/medusabackupjob_controller.go | 6 ++++ .../medusa/medusabackupjob_controller_test.go | 31 ++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/controllers/medusa/medusabackupjob_controller.go b/controllers/medusa/medusabackupjob_controller.go index 35b913080..952069311 100644 --- a/controllers/medusa/medusabackupjob_controller.go +++ b/controllers/medusa/medusabackupjob_controller.go @@ -324,10 +324,16 @@ func backupStatus(ctx context.Context, name string, pod *corev1.Pod, clientFacto addr := net.JoinHostPort(pod.Status.PodIP, fmt.Sprint(shared.BackupSidecarPort)) logger.Info("connecting to backup sidecar", "Pod", pod.Name, "Address", addr) if medusaClient, err := clientFactory.NewClient(ctx, addr); err != nil { + logger.Error(err, "Could not make a new medusa client") return medusa.StatusType_UNKNOWN, err } else { resp, err := medusaClient.BackupStatus(ctx, name) if err != nil { + if errors.IsNotFound(err) { + logger.Info(fmt.Sprintf("did not find backup %s for pod %s", name, pod.Name)) + return medusa.StatusType_UNKNOWN, nil + } + logger.Error(err, fmt.Sprintf("getting backup status for backup %s and pod %s failed", name, pod.Name)) return medusa.StatusType_UNKNOWN, err } diff --git a/controllers/medusa/medusabackupjob_controller_test.go b/controllers/medusa/medusabackupjob_controller_test.go index 1fb1d6062..dfceb0fd7 100644 --- a/controllers/medusa/medusabackupjob_controller_test.go +++ b/controllers/medusa/medusabackupjob_controller_test.go @@ -3,6 +3,7 @@ package medusa import ( "context" "fmt" + "k8s.io/apimachinery/pkg/runtime/schema" "strconv" "strings" "sync" @@ -30,6 +31,7 @@ const ( cassandraUserSecret = "medusa-secret" successfulBackupName = "good-backup" failingBackupName = "bad-backup" + missingBackupName = "missing-backup" dc1PodPrefix = "192.168.1." dc2PodPrefix = "192.168.2." fakeBackupFileCount = int64(13) @@ -40,6 +42,7 @@ const ( var ( alreadyReportedFailingBackup = false + alreadyReportedMissingBackup = false ) func testMedusaBackupDatacenter(t *testing.T, ctx context.Context, f *framework.Framework, namespace string) { @@ -157,9 +160,14 @@ func testMedusaBackupDatacenter(t *testing.T, ctx context.Context, f *framework. fmt.Sprintf("%s:%d", getPodIpAddress(2, dc1.DatacenterName()), shared.BackupSidecarPort): {successfulBackupName}, }, medusaClientFactory.GetRequestedBackups(dc1.DatacenterName())) + // a failing backup is one that actually starts but fails (on one pod) backupCreated = createAndVerifyMedusaBackup(dc1Key, dc1, f, ctx, require, t, namespace, failingBackupName) require.False(backupCreated, "the backup object shouldn't have been created") + // a missing backup is one that never gets to start (on one pod) + backupCreated = createAndVerifyMedusaBackup(dc1Key, dc1, f, ctx, require, t, namespace, missingBackupName) + require.False(backupCreated, "the backup object shouldn't have been created") + err = f.DeleteK8ssandraCluster(ctx, client.ObjectKey{Namespace: kc.Namespace, Name: kc.Name}, timeout, interval) require.NoError(err, "failed to delete K8ssandraCluster") verifyObjectDoesNotExist(ctx, t, f, dc1Key, &cassdcapi.CassandraDatacenter{}) @@ -238,10 +246,10 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa if err != nil { return false } - t.Logf("backup finish time: %v", updated.Status.FinishTime) - t.Logf("backup failed: %v", updated.Status.Failed) - t.Logf("backup finished: %v", updated.Status.Finished) - t.Logf("backup in progress: %v", updated.Status.InProgress) + t.Logf("backup %s finish time: %v", backupName, updated.Status.FinishTime) + t.Logf("backup %s failed: %v", backupName, updated.Status.Failed) + t.Logf("backup %s finished: %v", backupName, updated.Status.Finished) + t.Logf("backup %s in progress: %v", backupName, updated.Status.InProgress) return !updated.Status.FinishTime.IsZero() // && len(updated.Status.Finished) == 3 && len(updated.Status.InProgress) == 0 }, timeout, interval) @@ -411,6 +419,13 @@ func (c *fakeMedusaClient) BackupStatus(ctx context.Context, name string) (*medu } else { status = medusa.StatusType_SUCCESS } + } else if name == missingBackupName { + if !alreadyReportedMissingBackup { + alreadyReportedMissingBackup = true + return nil, newNotFoundError() + } else { + status = medusa.StatusType_SUCCESS + } } else { status = medusa.StatusType_IN_PROGRESS } @@ -419,6 +434,14 @@ func (c *fakeMedusaClient) BackupStatus(ctx context.Context, name string) (*medu }, nil } +func newNotFoundError() error { + resource := schema.GroupResource{ + Group: "error-group", + Resource: "error-resource", + } + return errors.NewNotFound(resource, "not-found-error") +} + func (c *fakeMedusaClient) PurgeBackups(ctx context.Context) (*medusa.PurgeBackupsResponse, error) { size := len(c.RequestedBackups) if size > fakeMaxBackupCount {