Skip to content

Commit

Permalink
PCP-2902: skip killing CSI pod after frist drain
Browse files Browse the repository at this point in the history
skip killing pod with label "storage=true" after frist drain.
  • Loading branch information
Kun483 committed Jul 31, 2024
1 parent 7e65301 commit 35364a2
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions internal/controllers/machine/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,9 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
}},
// SPECTRO: Even if the node is reachable, we wait 30 minutes for drain completion else move ahead
SkipWaitForDeleteTimeoutSeconds: 60 * 30, // 30 minutes
AdditionalFilters: []kubedrain.PodFilter{
additionalFilerToSkipDrainCSI,
},
}

if noderefutil.IsNodeUnreachable(node) {
Expand All @@ -643,6 +646,17 @@ func (r *Reconciler) drainNode(ctx context.Context, cluster *clusterv1.Cluster,
return ctrl.Result{}, nil
}

// additionalFilerToSkipDrainCSI skips drainning px-[cluster-name]-drain pods
func additionalFilerToSkipDrainCSI(pod corev1.Pod) kubedrain.PodDeleteStatus {
if pod.Labels == nil {
return kubedrain.MakePodDeleteStatusOkay()
}
if pod.Labels["storage"] == "true" {
return kubedrain.MakePodDeleteStatusSkip()
}
return kubedrain.MakePodDeleteStatusOkay()
}

// shouldWaitForNodeVolumes returns true if node status still have volumes attached
// pod deletion and volume detach happen asynchronously, so pod could be deleted before volume detached from the node
// this could cause issue for some storage provisioner, for example, vsphere-volume this is problematic
Expand Down

0 comments on commit 35364a2

Please sign in to comment.