Skip to content

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
tobru authored Dec 20, 2024
2 parents 630bd50 + 624d8f1 commit 66f6a92
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 20 deletions.
2 changes: 1 addition & 1 deletion charts/k8up/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ keywords:
- backup
- operator
- restic
version: 4.8.1
version: 4.8.3
sources:
- https://github.com/k8up-io/k8up
maintainers:
Expand Down
6 changes: 3 additions & 3 deletions charts/k8up/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# k8up

![Version: 4.8.1](https://img.shields.io/badge/Version-4.8.1-informational?style=flat-square)
![Version: 4.8.3](https://img.shields.io/badge/Version-4.8.3-informational?style=flat-square)

Kubernetes and OpenShift Backup Operator based on restic

Expand All @@ -13,7 +13,7 @@ helm repo add k8up-io https://k8up-io.github.io/k8up
helm install k8up k8up-io/k8up
```
```bash
kubectl apply -f https://github.com/k8up-io/k8up/releases/download/k8up-4.8.1/k8up-crd.yaml --server-side
kubectl apply -f https://github.com/k8up-io/k8up/releases/download/k8up-4.8.3/k8up-crd.yaml --server-side
```

<!---
Expand Down Expand Up @@ -48,7 +48,7 @@ Document your changes in values.yaml and let `make docs:helm` generate this sect
| image.pullPolicy | string | `"IfNotPresent"` | Operator image pull policy |
| image.registry | string | `"ghcr.io"` | Operator image registry |
| image.repository | string | `"k8up-io/k8up"` | Operator image repository |
| image.tag | string | `"v2.11.0"` | Operator image tag (version) |
| image.tag | string | `"v2.11.3"` | Operator image tag (version) |
| imagePullSecrets | list | `[]` | |
| k8up.backupImage.repository | string | `""` | The backup runner image repository. Defaults to `{image.registry}/{image.repository}`. Specify an image repository including registry, e.g. `example.com/repo/image` |
| k8up.backupImage.tag | string | `""` | The backup runner image tag Defaults to `{image.tag}` |
Expand Down
2 changes: 1 addition & 1 deletion charts/k8up/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ image:
# -- Operator image repository
repository: k8up-io/k8up
# -- Operator image tag (version)
tag: v2.11.0
tag: v2.11.3

imagePullSecrets: []
serviceAccount:
Expand Down
1 change: 1 addition & 0 deletions cmd/operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
&cli.StringFlag{Destination: &cfg.Config.FileExtensionAnnotation, Name: "fileextensionannotation", EnvVars: []string{"BACKUP_FILEEXTENSIONANNOTATION"}, Value: "k8up.io/file-extension", Usage: "set the annotation name where the file extension is stored for backup commands"},

&cli.IntFlag{Destination: &cfg.Config.GlobalKeepJobs, Hidden: true, Name: "globalkeepjobs", EnvVars: []string{"BACKUP_GLOBALKEEPJOBS"}, Value: -1, DefaultText: "unlimited", Usage: "set the number of old jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalBackoffLimit, Name: "global-backoff-limit", EnvVars: []string{"BACKUP_GLOBAL_BACKOFF_LIMIT"}, Value: 6, Usage: "set the backoff limit for all backup jobs"},
&cli.IntFlag{Destination: &cfg.Config.GlobalFailedJobsHistoryLimit, Name: "global-failed-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_FAILED_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, failed jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalSuccessfulJobsHistoryLimit, Name: "global-successful-jobs-history-limit", EnvVars: []string{"BACKUP_GLOBAL_SUCCESSFUL_JOBS_HISTORY_LIMIT"}, Value: 3, Usage: "set the number of old, successful jobs to keep when cleaning up, applies to all job types"},
&cli.IntFlag{Destination: &cfg.Config.GlobalConcurrentArchiveJobsLimit, Name: "global-concurrent-archive-jobs-limit", EnvVars: []string{"BACKUP_GLOBAL_CONCURRENT_ARCHIVE_JOBS_LIMIT"}, DefaultText: "unlimited", Usage: "set the limit of concurrent archive jobs"},
Expand Down
49 changes: 49 additions & 0 deletions e2e/definitions/annotated-subject/deployment-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: annotated-subject-deployment
namespace: k8up-e2e-subject
spec:
replicas: 1
selector:
matchLabels:
app: subject
template:
metadata:
labels:
app: subject
annotations:
k8up.io/backupcommand: 'invalid'
k8up.io/backupcommand-container: subject-container
spec:
containers:
- image: busybox
imagePullPolicy: IfNotPresent
name: dummy-container-blocking-first-position
command:
- "/bin/sh"
- "-c"
- "sleep infinity"
- name: subject-container
image: quay.io/prometheus/busybox:latest
imagePullPolicy: IfNotPresent
args:
- sh
- -c
- |
sleep infinity
securityContext:
runAsUser: $ID
volumeMounts:
- name: volume
mountPath: /data
env:
- name: BACKUP_FILE_CONTENT
value: ""
- name: BACKUP_FILE_NAME
value: ""
volumes:
- name: volume
persistentVolumeClaim:
claimName: subject-pvc
4 changes: 0 additions & 4 deletions e2e/definitions/operator/deploy.yaml

This file was deleted.

2 changes: 2 additions & 0 deletions e2e/definitions/operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ k8up:
envVars:
- name: K8UP_DEBUG
value: "true"
- name: BACKUP_GLOBAL_BACKOFF_LIMIT
value: "2"
49 changes: 43 additions & 6 deletions e2e/lib/k8up.bash
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ given_a_subject() {
kubectl apply -f definitions/pv/pvc.yaml
yq e '.spec.template.spec.containers[0].securityContext.runAsUser='$(id -u)' | .spec.template.spec.containers[0].env[0].value=strenv(BACKUP_FILE_CONTENT) | .spec.template.spec.containers[0].env[1].value=strenv(BACKUP_FILE_NAME)' definitions/subject/deployment.yaml | kubectl apply -f -

# Let's wait for the deployment to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=available deployment subject-deployment

echo "✅ The subject is ready"
}

Expand All @@ -167,6 +170,21 @@ given_an_annotated_subject() {
kubectl apply -f definitions/pv/pvc.yaml
yq e '.spec.template.spec.containers[1].securityContext.runAsUser='$(id -u)' | .spec.template.spec.containers[1].env[0].value=strenv(BACKUP_FILE_CONTENT) | .spec.template.spec.containers[1].env[1].value=strenv(BACKUP_FILE_NAME)' definitions/annotated-subject/deployment.yaml | kubectl apply -f -

# Let's wait for the deployment to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=available deployment annotated-subject-deployment

echo "✅ The annotated subject is ready"
}

given_a_broken_annotated_subject() {
require_args 2 ${#}

kubectl apply -f definitions/pv/pvc.yaml
yq e '.spec.template.spec.containers[1].securityContext.runAsUser='$(id -u)' ' definitions/annotated-subject/deployment-error.yaml | kubectl apply -f -

# Let's wait for the deployment to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=available deployment annotated-subject-deployment

echo "✅ The annotated subject is ready"
}

Expand All @@ -178,6 +196,9 @@ given_an_annotated_subject_pod() {

yq e '.spec.containers[1].securityContext.runAsUser='$(id -u)' | .spec.containers[1].env[0].value=strenv(BACKUP_FILE_CONTENT) | .spec.containers[1].env[1].value=strenv(BACKUP_FILE_NAME)' definitions/annotated-subject/pod.yaml | kubectl apply -f -

# Let's wait for the pod to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=ready pod subject-pod

echo "✅ The annotated subject pod is ready"
}

Expand All @@ -189,6 +210,9 @@ given_a_rwo_pvc_subject_in_worker_node() {

yq e 'with(select(document_index == 1) .spec.template.spec; .containers[0].securityContext.runAsUser='$(id -u)' | .containers[0].env[0].value=strenv(BACKUP_FILE_CONTENT) | .containers[0].env[1].value=strenv(BACKUP_FILE_NAME))' definitions/pvc-rwo-subject/worker.yaml | kubectl apply -f -

# Let's wait for the deployment to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=available deployment pvc-rwo-subject-worker

echo "✅ The pvc rwo worker subject is ready"
}

Expand All @@ -200,6 +224,9 @@ given_a_rwo_pvc_subject_in_controlplane_node() {

yq e 'with(select(document_index == 1) .spec.template.spec; .containers[0].securityContext.runAsUser='$(id -u)' | .containers[0].env[0].value=strenv(BACKUP_FILE_CONTENT) | .containers[0].env[1].value=strenv(BACKUP_FILE_NAME))' definitions/pvc-rwo-subject/controlplane.yaml | kubectl apply -f -

# Let's wait for the deployment to actually be ready
kubectl -n k8up-e2e-subject wait --timeout 1m --for=condition=available deployment pvc-rwo-subject-controlplane

echo "✅ The pvc rwo controlplane subject is ready"
}

Expand Down Expand Up @@ -315,14 +342,10 @@ given_an_existing_mtls_backup() {
wait_until backup/k8up-backup-mtls completed
verify "'.status.conditions[?(@.type==\"Completed\")].reason' is 'Succeeded' for Backup named 'k8up-backup-mtls'"

for i in {1..3}; do
run restic dump latest "/data/subject-pvc/${backup_file_name}"
if [ ! -z "${output}" ]; then
break
fi
done
run restic dump latest "/data/subject-pvc/${backup_file_name}"

# shellcheck disable=SC2154
echo "${backup_file_content} = ${output}"
[ "${backup_file_content}" = "${output}" ]

echo "✅ An existing backup is ready"
Expand Down Expand Up @@ -457,6 +480,20 @@ wait_until() {
kubectl -n "${ns}" wait --timeout 5m --for "condition=${condition}" "${object}"
}

wait_for_until_jsonpath() {
require_args 3 ${#}

local object condition ns
object=${1}
until=${2}
jsonpath=${3}
ns=${NAMESPACE=${DETIK_CLIENT_NAMESPACE}}

echo "Waiting for '${object}' in namespace '${ns}' to become '${condition}' ..."
kubectl -n "${ns}" wait --timeout "${until}" --for "${jsonpath}" "${object}"
}


expect_file_in_container() {
require_args 4 ${#}

Expand Down
31 changes: 31 additions & 0 deletions e2e/test-12-annotated-failure.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env bats

load "lib/utils"
load "lib/detik"
load "lib/k8up"

# shellcheck disable=SC2034
DETIK_CLIENT_NAME="kubectl"
# shellcheck disable=SC2034
DETIK_CLIENT_NAMESPACE="k8up-e2e-subject"
# shellcheck disable=SC2034
DEBUG_DETIK="true"

@test "Given annotated app, When creating a backup, Then expect Error" {
expected_content="expected content: $(timestamp)"
expected_filename="expected_filename.txt"

given_a_running_operator
given_a_clean_ns
given_s3_storage
given_a_broken_annotated_subject "${expected_filename}" "${expected_content}"

kubectl apply -f definitions/secrets
yq e '.spec.podSecurityContext.runAsUser='$(id -u)'' definitions/backup/backup.yaml | kubectl apply -f -

try "at most 10 times every 5s to get backup named 'k8up-backup' and verify that '.status.started' is 'true'"
verify_object_value_by_label job 'k8up.io/owned-by=backup_k8up-backup' '.status.active' 1 true

wait_for_until_jsonpath backup/k8up-backup 2m 'jsonpath={.status.conditions[?(@.type=="Completed")].reason}=Failed'

}
28 changes: 28 additions & 0 deletions e2e/test-13-cleanup-empty-jobs.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env bats

load "lib/utils"
load "lib/detik"
load "lib/k8up"

# shellcheck disable=SC2034
DETIK_CLIENT_NAME="kubectl"
# shellcheck disable=SC2034
DETIK_CLIENT_NAMESPACE="k8up-e2e-subject"
# shellcheck disable=SC2034
DEBUG_DETIK="true"

@test "Given empty namespace, When creating multiple backups, Then expect cleanup" {
given_a_running_operator
given_a_clean_ns
given_s3_storage

kubectl apply -f definitions/secrets
yq e '.spec.podSecurityContext.runAsUser='$(id -u)' | .metadata.name="first-backup"' definitions/backup/backup.yaml | kubectl apply -f -

yq e '.spec.podSecurityContext.runAsUser='$(id -u)' | .metadata.name="second-backup"' definitions/backup/backup.yaml | kubectl apply -f -

kubectl -n "$DETIK_CLIENT_NAMESPACE" annotate backup/second-backup reconcile=now

wait_for_until_jsonpath backup/second-backup 5m 'jsonpath={.status.conditions[?(@.type=="Scrubbed")].message}="Deleted 1 resources"'

}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/k8up-io/k8up/v2

go 1.21
go 1.23

require (
github.com/firepear/qsplit/v2 v2.5.0
Expand Down
2 changes: 2 additions & 0 deletions operator/backupcontroller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ func (r *BackupReconciler) Provision(ctx context.Context, obj *k8upv1.Backup) (r
log.V(1).Info("backup just started, waiting")
return controllerruntime.Result{RequeueAfter: 5 * time.Second}, nil
}

if obj.Status.HasFinished() || isPrebackupFailed(obj) {
cleanupCond := meta.FindStatusCondition(obj.Status.Conditions, k8upv1.ConditionScrubbed.String())
if cleanupCond == nil || cleanupCond.Reason != k8upv1.ReasonSucceeded.String() {
Expand Down Expand Up @@ -110,6 +111,7 @@ func (r *BackupReconciler) ReconcileJobStatus(ctx context.Context, obj *k8upv1.B
} else if numStarted > 0 {
objStatus.SetStarted(message)
}

obj.SetStatus(objStatus)

log.V(1).Info("updating status")
Expand Down
15 changes: 12 additions & 3 deletions operator/backupcontroller/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -236,7 +237,7 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
}

index := 0
for _, batchJob := range backupJobs {
for name, batchJob := range backupJobs {
_, err = controllerruntime.CreateOrUpdate(ctx, b.Generic.Config.Client, batchJob.job, func() error {
mutateErr := job.MutateBatchJob(ctx, batchJob.job, b.backup, b.Generic.Config, b.Client)
if mutateErr != nil {
Expand All @@ -262,16 +263,17 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
}
// each job sleeps for index seconds to avoid concurrent restic repository creation. Not the prettiest way but it works and a repository
// is created only once usually.
if index > 0 {
if name == "prebackup" || index != 0 {
batchJob.job.Spec.Template.Spec.Containers[0].Env = append(batchJob.job.Spec.Template.Spec.Containers[0].Env, corev1.EnvVar{
Name: "SLEEP_DURATION",
Value: (time.Duration(index) * time.Second).String(),
Value: (3 * time.Second).String(),
})
}
b.backup.Spec.AppendEnvFromToContainer(&batchJob.job.Spec.Template.Spec.Containers[0])
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, batchJob.volumes...)
batchJob.job.Spec.Template.Spec.Volumes = append(batchJob.job.Spec.Template.Spec.Volumes, utils.AttachTLSVolumes(b.backup.Spec.Volumes)...)
batchJob.job.Spec.Template.Spec.Containers[0].VolumeMounts = append(b.newVolumeMounts(batchJob.volumes), b.attachTLSVolumeMounts()...)
batchJob.job.Spec.BackoffLimit = ptr.To(int32(cfg.Config.GlobalBackoffLimit))

batchJob.job.Spec.Template.Spec.Containers[0].Args = b.setupArgs()

Expand All @@ -287,6 +289,13 @@ func (b *BackupExecutor) startBackup(ctx context.Context) error {
}
}

if len(backupJobs) == 0 {
status := b.Obj.GetStatus()
status.SetSucceeded("nothing to backup")
b.Obj.SetStatus(status)
return b.Client.Status().Update(ctx, b.Obj)
}

return nil
}

Expand Down
1 change: 1 addition & 0 deletions operator/cfg/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type Configuration struct {
GlobalKeepJobs int
GlobalFailedJobsHistoryLimit int
GlobalSuccessfulJobsHistoryLimit int
GlobalBackoffLimit int
GlobalRepoPassword string
GlobalRestoreS3AccessKey string
GlobalRestoreS3Bucket string
Expand Down
2 changes: 1 addition & 1 deletion operator/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (ctrl *controller[T, L]) Reconcile(ctx context.Context, request controllerr
} else {
res, provisionErr = ctrl.reconciler.Provision(ctx, obj)
}
if apierrors.IsConflict(err) { // ignore "the object has been modified; please apply your changes to the latest version and try again" error, but requeue
if apierrors.IsConflict(provisionErr) { // ignore "the object has been modified; please apply your changes to the latest version and try again" error, but requeue
log := controllerruntime.LoggerFrom(ctx)
log.Info("Object has been modified, retrying...", "error", provisionErr.Error())
res.Requeue = true
Expand Down
3 changes: 3 additions & 0 deletions restic/kubernetes/pod_exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"io"
"os"
"strings"

"github.com/firepear/qsplit/v2"
Expand Down Expand Up @@ -73,6 +74,8 @@ func PodExec(pod BackupPod, log logr.Logger) (*ExecData, error) {

if err != nil {
execLogger.Error(err, "streaming data failed", "namespace", pod.Namespace, "pod", pod.PodName)
// we just completely hard fail the whole backup pod
os.Exit(1)
return
}
}()
Expand Down

0 comments on commit 66f6a92

Please sign in to comment.