Skip to content

Commit

Permalink
Merge pull request #149 from dabradley/fix-upgrades
Browse files Browse the repository at this point in the history
Uninstall existing driver if upgrade fails
  • Loading branch information
joe-atzinger authored Dec 6, 2023
2 parents d3f5772 + aec9632 commit 0a2d093
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 12 deletions.
10 changes: 6 additions & 4 deletions deploy/csi-azurelustre-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ spec:
- --csi-address=$(ADDRESS)
- --kubelet-registration-path=$(DRIVER_REG_SOCK_PATH)
- --v=2
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "rm -rf /registration/azurelustre.csi.azure.com-reg.sock /csi/csi.sock"]
livenessProbe:
exec:
command:
Expand Down Expand Up @@ -90,6 +86,10 @@ spec:
- name: azurelustre
image: mcr.microsoft.com/oss/kubernetes-csi/azurelustre-csi:v0.1.12
imagePullPolicy: IfNotPresent
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "lustre_rmmod"]
args:
- "-v=5"
- "--endpoint=$(CSI_ENDPOINT)"
Expand Down Expand Up @@ -117,6 +117,8 @@ spec:
value: "yes"
- name: LUSTRE_VERSION
value: "2.15.1"
- name: CLIENT_SHA_SUFFIX
value: "33-g0168b83"
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
Expand Down
48 changes: 40 additions & 8 deletions pkg/azurelustreplugin/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ echo "installClientPackages: ${installClientPackages}"
requiredLustreVersion=${LUSTRE_VERSION:-"2.15.1"}
echo "requiredLustreVersion: ${requiredLustreVersion}"

pkgVersion="${requiredLustreVersion}-33-g0168b83"
requiredClientSha=${CLIENT_SHA_SUFFIX:-"33-g0168b83"}
echo "requiredClientSha: ${requiredClientSha}"

pkgVersion="${requiredLustreVersion}-${requiredClientSha}"
echo "pkgVersion: ${pkgVersion}"

pkgName="amlfs-lustre-client-${pkgVersion}"
Expand All @@ -89,7 +92,7 @@ elif [[ ! -z $(grep -R 'jammy' /etc/os-release) ]]; then
# deb http://azure.archive.ubuntu.com/ubuntu/ jammy-security universe
# deb http://azure.archive.ubuntu.com/ubuntu/ jammy-security multiverse
# EOF
#
#
osReleaseCodeName="jammy"
else
echo "Unsupported Linux distro"
Expand All @@ -108,15 +111,44 @@ if [[ "${installClientPackages}" == "yes" ]]; then
echo "deb [arch=amd64] https://packages.microsoft.com/repos/amlfs-${osReleaseCodeName}/ ${osReleaseCodeName} main" | tee /etc/apt/sources.list.d/amlfs.list
apt-get update
fi

echo "$(date -u) Installing Lustre client modules: ${pkgName}=${kernelVersion}"

# grub issue
# https://stackoverflow.com/questions/40748363/virtual-machine-apt-get-grub-issue/40751712
DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends -o DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" \
${pkgName}=${kernelVersion}
tries=3
sleep_before_retry=15
install_success=false
while [[ tries -gt 0 ]]; do
# grub issue
# https://stackoverflow.com/questions/40748363/virtual-machine-apt-get-grub-issue/40751712
if ! DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends -o DPkg::options::="--force-confdef" -o DPkg::options::="--force-confold" \
${pkgName}=${kernelVersion}; then
echo "$(date -u) Error installing Lustre client modules. Will try removing existing versions"
# Check if lustre_rmmod is available, attempt to unload the modules if so.
# If modules are already uninstalled, this will still pass
if type lustre_rmmod >/dev/null 2>&1 && ! lustre_rmmod; then
echo "$(date -u) Error: Unable to unload running module. Are there still mounted Lustre filesystems on this node? Old Lustre client version may continue running."
fi
if existing_versions=$(dpkg-query --showformat=' ${Package}=${Version}' --show '*lustre-client*'); then
echo "$(date -u) The following existing versions of the Lustre client are installed and will be removed:${existing_versions}"
fi
echo "$(date -u) Uninstalling existing Lustre client versions."
apt-get remove --purge -y '*lustre-client*' || true
tries=$((tries - 1))
sleep $sleep_before_retry
sleep_before_retry=$((sleep_before_retry * 2))
else
install_success=true
break
fi
done

echo "$(date -u) Install success: ${install_success}, Tries left: ${tries}"

echo "$(date -u) Installed Lustre client packages."
if ! ${install_success}; then
echo "$(date -u) Error: Could not install necessary Lustre drivers for: ${pkgName}=${kernelVersion}"
else
echo "$(date -u) Installed Lustre client packages for: ${pkgName}=${kernelVersion}"
fi

init_lnet="true"

Expand Down

0 comments on commit 0a2d093

Please sign in to comment.