Skip to content

Commit

Permalink
make the reset script more resilient (#4908)
Browse files Browse the repository at this point in the history
* print success/failure messages when resetting kurl

* retry removing paths when resetting

* print a list of unremoved files
  • Loading branch information
laverya authored Oct 24, 2023
1 parent 7a77235 commit b42b504
Showing 1 changed file with 69 additions and 16 deletions.
85 changes: 69 additions & 16 deletions scripts/tasks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function tasks() {
generate_admin_user
;;
reset)
reset
reset $@
;;
kotsadm-accept-tls-uploads|kotsadm_accept_tls_uploads)
kotsadm_accept_tls_uploads
Expand Down Expand Up @@ -182,8 +182,33 @@ function generate_admin_user() {
printf "\n"
}

# TODO kube-proxy ipvs cleanup
RESET_UNREMOVED_FILES=
function reset() {
if ! reset_impl "$@"; then
printf "\n"
printf "${RED}Failed to reset this system. Please correct any errors manually and try again.${NC}\n"
printf "\n"
return
fi

# if RESET_UNREMOVED_FILES is set, then we tried and failed to remove those files
# we will tell the user that the system has not been successfully reset and direct them to remove the files themselves
if [ -n "$RESET_UNREMOVED_FILES" ]; then
printf "\n"
printf "${RED}Failed to remove the following files. Please remove them manually.${NC}\n"
printf "\n"
printf "${YELLOW}"
printf "%s\n" "$RESET_UNREMOVED_FILES"
printf "${NC}"
printf "\n"
return
else
printf "${GREEN}Successfully reset this system.${NC}\n"
fi
}

# TODO kube-proxy ipvs cleanup
function reset_impl() {
set +e

shift # the first param is reset
Expand Down Expand Up @@ -258,28 +283,28 @@ function reset() {
systemctl disable kubelet || true

printf "Removing host files\n"
rm -rf /etc/cni
rm -rf /etc/kubernetes
rm -rf /opt/cni
rm -rf /opt/replicated
reset_retry_rm /etc/cni
reset_retry_rm /etc/kubernetes
reset_retry_rm /opt/cni
reset_retry_rm /opt/replicated
rm -f /usr/bin/kubeadm /usr/bin/kubelet /usr/bin/kubectl /usr/bin/crtctl
rm -f /usr/local/bin/kustomize*
rm -rf /var/lib/calico
rm -rf /var/lib/etcd
rm -rf /var/lib/kubelet
rm -rf /var/lib/rook
rm -rf /var/lib/weave
rm -rf /var/lib/longhorn
rm -rf /etc/haproxy
rm -rf "$KURL_INSTALL_DIRECTORY"
rm -rf "$KURL_INSTALL_DIRECTORY.repos"
reset_retry_rm /var/lib/calico
reset_retry_rm /var/lib/etcd
reset_retry_rm /var/lib/kubelet
reset_retry_rm /var/lib/rook
reset_retry_rm /var/lib/weave
reset_retry_rm /var/lib/longhorn
reset_retry_rm /etc/haproxy
reset_retry_rm "$KURL_INSTALL_DIRECTORY"
reset_retry_rm "$KURL_INSTALL_DIRECTORY.repos"

printf "Removing flannel networks\n"
# if /var/lib/cni/flannel exists, remove it entirely
if [ -d /var/lib/cni/flannel ]; then
ip link set cni0 down && ip link set flannel.1 down
ip link delete cni0 && ip link delete flannel.1
rm -rf /var/lib/cni
reset_retry_rm /var/lib/cni
fi

printf "Killing haproxy\n"
Expand All @@ -291,6 +316,34 @@ function reset() {
printf "Reset script completed\n"
}

# reset_retry_rm attempts 10x to remove the path passed as an argument
# if the path still exists after 10 attempts, the function prints the path that failed to be removed and returns 0
function reset_retry_rm() {
local path="$1"
local attempts=0
while [ -e "$path" ]; do
if [ "$attempts" -gt "10" ]; then
printf "\n"
printf "${RED}Failed to remove %s after 10 attempts${NC}\n" "$path"
printf "\n"

# add this path to RESET_UNREMOVED_FILES
if [ -z "$RESET_UNREMOVED_FILES" ]; then
RESET_UNREMOVED_FILES="$path"
else
RESET_UNREMOVED_FILES="$RESET_UNREMOVED_FILES\n$path"
fi

return 0
fi
if ! rm -rf "$path" ; then
sleep 1
fi
attempts=$((attempts+1))
done
return 0
}

function kotsadm_accept_tls_uploads() {
export KUBECONFIG=/etc/kubernetes/admin.conf

Expand Down

0 comments on commit b42b504

Please sign in to comment.