Skip to content

Commit

Permalink
fix(auto-delete-snapshot): remove empty elements from DeleteCandidate…
Browse files Browse the repository at this point in the history
…Chain (#322)

* Fix issue with deletion of user created snapshots
* update and improve functional tests

Signed-off-by: Payes Anand <[email protected]>
  • Loading branch information
payes authored and kmova committed Aug 15, 2020
1 parent 8203958 commit 135112d
Show file tree
Hide file tree
Showing 15 changed files with 184 additions and 116 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ test_resiliency:
sudo -E bash -x ./ci/resiliency_tests.sh

test_functional:
go build && cp ./jiva tests/functional/
go build --tags=debug && cp ./jiva tests/functional/
cd tests/functional && go build --tags=debug && sudo bash -x test.sh

test_e2e:
Expand Down
6 changes: 6 additions & 0 deletions controller/control.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,12 @@ func (c *Controller) Snapshot(name string) (string, error) {
if name == "" {
name = util.UUID()
}
if c.RWReplicaCount != c.ReplicationFactor {
return "", fmt.Errorf(
"RWReplicaCount(%v) != ReplicationFactor(%v)",
c.RWReplicaCount, c.ReplicationFactor,
)
}

if remain, err := c.backend.RemainSnapshots(); err != nil {
return "", err
Expand Down
18 changes: 7 additions & 11 deletions replica/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,8 @@ func (r *Replica) hardlinkDisk(target, source string) error {
// and remove and close both source and target and open
// new instance of file for R/W and update the file index
// with new reference.
// If replace disk is being used then it is possible
// to delete a snapshot whose parent or child is user created
func (r *Replica) ReplaceDisk(target, source string) error {
r.Lock()
defer r.Unlock()
Expand Down Expand Up @@ -585,22 +587,16 @@ func (r *Replica) PrepareRemoveDisk(name string) ([]PrepareRemoveAction, error)
if data.Parent == "" {
return nil, fmt.Errorf("Can't delete base snapshot: %s", disk)
}

logrus.Infof("Mark disk %v as removed", disk)
if err := r.markDiskAsRemoved(disk); err != nil {
return nil, fmt.Errorf("Fail to mark disk %v as removed: %v", disk, err)
}

targetDisks := []string{}
if data.Parent != "" {
// check if metadata of parent exists for the snapshot
// going to be deleted.
_, exists := r.diskData[data.Parent]
if !exists {
return nil, fmt.Errorf("Can not find snapshot %v's parent %v", disk, data.Parent)
}
// check if metadata of parent exists for the snapshot
// going to be deleted.
if _, exists = r.diskData[data.Parent]; !exists {
return nil, fmt.Errorf("Can not find snapshot %v's parent %v", disk, data.Parent)
}

targetDisks := []string{}
targetDisks = append(targetDisks, disk)
actions, err := r.processPrepareRemoveDisks(targetDisks)
if err != nil {
Expand Down
7 changes: 7 additions & 0 deletions sync/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,10 @@ func GetDeleteCandidateChain(r *replica.Replica, checkpoint string) ([]string, e
if replicaDisks[disk].UserCreated && !replicaDisks[disk].Removed {
continue
}
parent := replicaDisks[disk].Parent
if replicaDisks[parent].UserCreated && !replicaDisks[parent].Removed {
continue
}
snapList[i].name = disk
snapList[i].size, err = strconv.ParseInt(replicaDisks[disk].Size, 10, 64)
if err != nil {
Expand All @@ -771,6 +775,9 @@ func GetDeleteCandidateChain(r *replica.Replica, checkpoint string) ([]string, e

var sortedList []string
for _, snap := range snapList {
if snap.name == "" {
continue
}
sortedList = append(sortedList, snap.name)
}
return sortedList, err
Expand Down
57 changes: 41 additions & 16 deletions tests/e2e/chaos.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (
"github.com/sirupsen/logrus"
)

var TestRunTime time.Duration = 60

func restartOneReplicaTest() {
config := buildConfig("172.18.0.10", []string{"172.18.0.11", "172.18.0.12", "172.18.0.13"})
setupTest(config)
config.verifyRWReplicaCount(3)
config.runIOs()
go config.testSequentialData()
go config.snapshotCreateDelete()
ctrlClient := getControllerClient(config.ControllerIP)
startTime := time.Now()
Expand All @@ -25,26 +27,31 @@ func restartOneReplicaTest() {
stopContainer(config.Replicas[striped(replicas[0].Address)])
startContainer(config.Replicas[striped(replicas[0].Address)])
config.verifyRWReplicaCount(3)
time.Sleep(30 * time.Second)
if time.Since(startTime) > time.Minute*20 {
time.Sleep(65 * time.Second) // 65 so that auto-snapshot delete is triggered
if time.Since(startTime) > time.Minute*TestRunTime {
break
}
iteration++
}
config.Stop = true
for {
for range time.NewTicker(5 * time.Second).C {
if config.ThreadCount == 0 {
break
}
logrus.Infof(
"RestartControllerTest Completed, waiting for threads to exit, pending tc:%v",
config.ThreadCount,
)
}
logrus.Infof("Exiting RestartOneReplicaTest, threads exited successfully")
scrap(config)
}

func restartTwoReplicasTest() {
config := buildConfig("172.18.0.20", []string{"172.18.0.21", "172.18.0.22", "172.18.0.23"})
setupTest(config)
config.verifyRWReplicaCount(3)
config.runIOs()
go config.testSequentialData()
go config.snapshotCreateDelete()
ctrlClient := getControllerClient(config.ControllerIP)
startTime := time.Now()
Expand All @@ -60,26 +67,31 @@ func restartTwoReplicasTest() {
startContainer(config.Replicas[striped(replicas[0].Address)])
startContainer(config.Replicas[striped(replicas[1].Address)])
config.verifyRWReplicaCount(3)
time.Sleep(30 * time.Second)
if time.Since(startTime) > time.Minute*20 {
time.Sleep(65 * time.Second) // 65 so that auto-snapshot delete is triggered
if time.Since(startTime) > time.Minute*TestRunTime {
break
}
iteration++
}
config.Stop = true
for {
for range time.NewTicker(5 * time.Second).C {
if config.ThreadCount == 0 {
break
}
logrus.Infof(
"RestartControllerTest Completed, waiting for threads to exit, pending tc:%v",
config.ThreadCount,
)
}
logrus.Infof("Exiting RestartTwoReplicaTest, threads exited successfully")
scrap(config)
}

func restartThreeReplicasTest() {
config := buildConfig("172.18.0.30", []string{"172.18.0.31", "172.18.0.32", "172.18.0.33"})
setupTest(config)
config.verifyRWReplicaCount(3)
config.runIOs()
go config.testSequentialData()
go config.snapshotCreateDelete()
ctrlClient := getControllerClient(config.ControllerIP)
startTime := time.Now()
Expand All @@ -97,26 +109,31 @@ func restartThreeReplicasTest() {
startContainer(config.Replicas[striped(replicas[1].Address)])
startContainer(config.Replicas[striped(replicas[2].Address)])
config.verifyRWReplicaCount(3)
time.Sleep(30 * time.Second)
if time.Since(startTime) > time.Minute*20 {
time.Sleep(65 * time.Second) // 65 so that auto-snapshot delete is triggered
if time.Since(startTime) > time.Minute*TestRunTime {
break
}
iteration++
}
config.Stop = true
for {
for range time.NewTicker(5 * time.Second).C {
if config.ThreadCount == 0 {
break
}
logrus.Infof(
"RestartControllerTest Completed, waiting for threads to exit, pending tc:%v",
config.ThreadCount,
)
}
logrus.Infof("Exiting RestartThreeReplicaTest, threads exited successfully")
scrap(config)
}

func restartControllerTest() {
config := buildConfig("172.18.0.40", []string{"172.18.0.41", "172.18.0.42", "172.18.0.43"})
setupTest(config)
config.verifyRWReplicaCount(3)
config.runIOs()
go config.testSequentialData()
go config.snapshotCreateDelete()
startTime := time.Now()
iteration := 1
Expand All @@ -125,23 +142,30 @@ func restartControllerTest() {
stopContainer(config.Controller[striped(config.ControllerIP)])
startContainer(config.Controller[striped(config.ControllerIP)])
config.verifyRWReplicaCount(3)
time.Sleep(30 * time.Second)
if time.Since(startTime) > time.Minute*20 {
time.Sleep(65 * time.Second) // 65 so that auto-snapshot delete is triggered
if time.Since(startTime) > time.Minute*TestRunTime {
break
}
iteration++
}
config.Stop = true
for {
for range time.NewTicker(5 * time.Second).C {
if config.ThreadCount == 0 {
break
}
logrus.Infof(
"RestartControllerTest Completed, waiting for threads to exit, pending tc:%v",
config.ThreadCount,
)
}
logrus.Infof("Exiting RestartControllerTest, threads exited successfully")
scrap(config)
}
func chaosTest() {
logrus.Infof("Start chaos Test")
var wg sync.WaitGroup
wg.Add(4)

go func() {
restartOneReplicaTest()
wg.Done()
Expand All @@ -159,4 +183,5 @@ func chaosTest() {
wg.Done()
}()
wg.Wait()
logrus.Infof("Chaos Test completed successfully")
}
4 changes: 3 additions & 1 deletion tests/e2e/container_start_stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"context"
"strings"
"time"

"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
Expand Down Expand Up @@ -90,7 +91,7 @@ func createReplica(replicaIP string, config *testConfig) string {
},
NetworkMode: "stg-net",
PublishAllPorts: true,
Binds: []string{"/tmp/" + replicaIP + "vol:/vol"},
Binds: []string{"/tmp1/" + replicaIP + "vol:/vol"},
},
&network.NetworkingConfig{
EndpointsConfig: map[string]*network.EndpointSettings{
Expand Down Expand Up @@ -209,5 +210,6 @@ func verifyRestartCount(containerID string, restartCount int) {
if containerInspect.ContainerJSONBase.RestartCount >= restartCount {
break
}
time.Sleep(5 * time.Second)
}
}
Loading

0 comments on commit 135112d

Please sign in to comment.