Skip to content

Commit

Permalink
Use force unmount and explicitly unmount bad mount points
Browse files Browse the repository at this point in the history
There have been cases where the logic to cleanup a mount point
has caused the driver to get into a bad state. This is most
obvious when a subdirectory is mounted to a volume and a parent
directory of that subdirectory is deleted. The Lustre driver
doesn't handle that case in the way that Kubernetes expects
and returns invalid data. To avoid this scenario causing our
driver to get into a bad state, leak mount points, etc, we
must explicitly check that we can read the necessary information
about the mount point, and if not, explicitly unmount that
mount point before allowing Kubernetes to clean up the directory.
To ensure that we don't end up in a bad state, this change
enables force unmounting as well. The force unmount will only
occur after a timeout has expired, since force unmounts can
cause issues with the Lustre driver. However, in this case, it
is better if we are in a bad enough situation to be able to
eventually return to a good state rather than require manual
intervention.
  • Loading branch information
dabradley committed Nov 25, 2024
1 parent 4613c77 commit 028a78b
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 3 deletions.
8 changes: 8 additions & 0 deletions pkg/azurelustre/azurelustre.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ type Driver struct {
// enableAzureLustreMockMount is only for testing, DO NOT set as true in non-testing scenario
enableAzureLustreMockMount bool
mounter *mount.SafeFormatAndMount // TODO_JUSJIN: check any other alternatives
forceMounter *mount.MounterForceUnmounter
volLockMap *util.LockMap
// Directory to temporarily mount to for subdirectory creation
workingMountDir string
Expand Down Expand Up @@ -132,6 +133,13 @@ func (d *Driver) Run(endpoint string, testBool bool) {
Interface: mount.New(""),
Exec: utilexec.New(),
}
forceUnmounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
if ok {
klog.V(4).Infof("Using force unmounter interface")
d.forceMounter = &forceUnmounter
} else {
klog.Fatalf("Mounter does not support force unmount")
}

// TODO_JUSJIN: revisit these caps
// Initialize default library driver
Expand Down
5 changes: 5 additions & 0 deletions pkg/azurelustre/fake_mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package azurelustre
import (
"fmt"
"strings"
"time"

mount "k8s.io/mount-utils"
)
Expand Down Expand Up @@ -69,3 +70,7 @@ func (f *fakeMounter) IsLikelyNotMountPoint(file string) (bool, error) {
}
return true, nil
}

func (f *fakeMounter) UnmountWithForce(target string, _ time.Duration) error {
return f.Unmount(target)
}
41 changes: 38 additions & 3 deletions pkg/azurelustre/nodeserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"path/filepath"
"strings"
"time"

"github.com/container-storage-interface/spec/lib/go/csi"
"golang.org/x/net/context"
Expand Down Expand Up @@ -317,10 +318,44 @@ func (d *Driver) NodeUnpublishVolume(
}

func unmountVolumeAtPath(d *Driver, targetPath string) error {
shouldUnmountBadPath := false

d.kernelModuleLock.Lock()
defer d.kernelModuleLock.Unlock()
err := mount.CleanupMountPoint(targetPath, d.mounter,
true /*extensiveMountPointCheck*/)

parent := filepath.Dir(targetPath)
klog.V(2).Infof("Listing dir: %s", parent)
entries, err := os.ReadDir(parent)
if err != nil {
klog.Warningf("could not list directory %s, will explicitly unmount path before cleanup %s: %q", parent, targetPath, err)
shouldUnmountBadPath = true
}

for _, e := range entries {
if e.Name() == filepath.Base(targetPath) {
_, err := e.Info()
if err != nil {
klog.Warningf("could not get info for entry %s, will explicitly unmount path before cleanup %s: %q", e.Name(), targetPath, err)
shouldUnmountBadPath = true
}
}
}

if shouldUnmountBadPath {
// In these cases, if we only ran mount.CleanupMountWithForce,
// it would have issues trying to stat the directory before
// cleanup, so we need to explicitly unmount the path, with
// force if necessary. Then the directory can be cleaned up
// by the mount.CleanupMountWithForce call.
klog.V(4).Infof("unmounting bad mount: %s)", targetPath)
forceUnmounter := *d.forceMounter
if err := forceUnmounter.UnmountWithForce(targetPath, 30*time.Second); err != nil {
klog.Warningf("couldn't unmount %s: %q", targetPath, err)
}
}

err = mount.CleanupMountWithForce(targetPath, *d.forceMounter,
true /*extensiveMountPointCheck*/, 10*time.Second)
return err
}

Expand Down Expand Up @@ -653,7 +688,7 @@ func (d *Driver) internalUnmount(mountPath string) error {

klog.V(4).Infof("internally unmounting %v", target)

err = mount.CleanupMountPoint(target, d.mounter, true)
err = mount.CleanupMountWithForce(target, *d.forceMounter, true, 10*time.Second)
if err != nil {
err = status.Errorf(codes.Internal, "failed to unmount staging target %q: %v", target, err)
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/azurelustre/nodeserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ func TestEnsureMountPoint(t *testing.T) {
Interface: fakeMounter,
Exec: fakeExec,
}
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
d.forceMounter = &forceMounter

for _, test := range tests {
err := makeDir(alreadyExistTarget)
Expand Down Expand Up @@ -542,6 +545,9 @@ func TestNodePublishVolume(t *testing.T) {
Interface: fakeMounter,
Exec: fakeExec,
}
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
d.forceMounter = &forceMounter
d.workingMountDir = workingMountDir
err := makeDir(targetTest)
require.NoError(t, err)
Expand Down Expand Up @@ -736,6 +742,9 @@ func TestNodeUnpublishVolume(t *testing.T) {
Interface: fakeMounter,
Exec: fakeExec,
}
forceMounter, ok := d.mounter.Interface.(mount.MounterForceUnmounter)
require.True(t, ok, "Mounter should implement MounterForceUnmounter")
d.forceMounter = &forceMounter
err := makeDir(targetTest)
require.NoError(t, err)

Expand Down

0 comments on commit 028a78b

Please sign in to comment.