PCP-2902: skip killing CSI pod after frist drain

skip killing pod with label "storage=true" after frist drain.
spectrocloud · Jul 31, 2024 · 35364a2 · 35364a2
1 parent 7e65301
commit 35364a2
Showing 1 changed file with 14 additions and 0 deletions.
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
@@ -620,6 +620,9 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 		}},
 		// SPECTRO: Even if the node is reachable, we wait 30 minutes for drain completion else move ahead
 		SkipWaitForDeleteTimeoutSeconds: 60 * 30, // 30 minutes
+		AdditionalFilters: []kubedrain.PodFilter{
+			additionalFilerToSkipDrainCSI,
+		},
 	}
 
 	if noderefutil.IsNodeUnreachable(node) {
@@ -643,6 +646,17 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
 	return ctrl.Result{}, nil
 }
 
+// additionalFilerToSkipDrainCSI skips drainning px-[cluster-name]-drain pods  
+func additionalFilerToSkipDrainCSI(pod corev1.Pod) kubedrain.PodDeleteStatus {
+	if pod.Labels == nil {
+		return kubedrain.MakePodDeleteStatusOkay()
+	}
+	if pod.Labels["storage"] == "true" {
+		return kubedrain.MakePodDeleteStatusSkip()
+	}
+	return kubedrain.MakePodDeleteStatusOkay()
+}
+
 // shouldWaitForNodeVolumes returns true if node status still have volumes attached
 // pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
 // this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic