From 60827df755cfd2d56ee53457e4674fffc38f2477 Mon Sep 17 00:00:00 2001 From: Gauri Lamunion Date: Wed, 6 Nov 2024 11:17:16 -0800 Subject: [PATCH 1/4] Added code to fallback to LMT if source hash is missing --- cmd/syncComparator.go | 20 ++++++++++++++++---- e2etest/zt_newe2e_sync_test.go | 6 ++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/cmd/syncComparator.go b/cmd/syncComparator.go index b99bfe733..ca7f1bfcd 100644 --- a/cmd/syncComparator.go +++ b/cmd/syncComparator.go @@ -98,8 +98,14 @@ func (f *syncDestinationComparator) processIfNecessary(destinationObject StoredO switch f.comparisonHashType { case common.ESyncHashType.MD5(): if sourceObjectInMap.md5 == nil { - syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true) - return nil + if sourceObjectInMap.isMoreRecentThan(destinationObject, f.preferSMBTime) { + syncComparatorLog(sourceObjectInMap.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMT, false) + return f.copyTransferScheduler(sourceObjectInMap) + } else { + // skip if dest is more recent + syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonTime, false) + return nil + } } if !reflect.DeepEqual(sourceObjectInMap.md5, destinationObject.md5) { @@ -177,8 +183,14 @@ func (f *syncSourceComparator) processIfNecessary(sourceObject StoredObject) err switch f.comparisonHashType { case common.ESyncHashType.MD5(): if sourceObject.md5 == nil { - syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonMissingHash, true) - return nil + if sourceObject.isMoreRecentThan(destinationObjectInMap, f.preferSMBTime) { + syncComparatorLog(sourceObject.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMT, false) + return f.copyTransferScheduler(sourceObject) + } else { + // skip if dest is more recent + syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonTime, false) + return nil + } } if !reflect.DeepEqual(sourceObject.md5, destinationObjectInMap.md5) { diff --git a/e2etest/zt_newe2e_sync_test.go b/e2etest/zt_newe2e_sync_test.go index 1b0bb0477..a1aa2fd77 100644 --- a/e2etest/zt_newe2e_sync_test.go +++ b/e2etest/zt_newe2e_sync_test.go @@ -261,3 +261,9 @@ func (s *SyncTestSuite) Scenario_TestSyncDeleteDestinationIfNecessary(svm *Scena }, }, true) } + +func (s *SyncTestSuite) Scenario_TestSyncHashTypeNoHash(svm *ScenarioVariationManager) { + //srcHash := ResolveVariation(svm, []bool{true, false}) + //dstHash := ResolveVariation(svm, []bool{true, false}) + +} From 46531bb1dd7f7be94b0103037dfb0667d570c97a Mon Sep 17 00:00:00 2001 From: Gauri Lamunion Date: Thu, 14 Nov 2024 07:43:02 -0800 Subject: [PATCH 2/4] sync with compare hash to fall back to LMT if source hash is nil --- cmd/syncComparator.go | 26 +++++----- e2etest/zt_newe2e_sync_test.go | 86 ++++++++++++++++++++++++++++++++-- 2 files changed, 97 insertions(+), 15 deletions(-) diff --git a/cmd/syncComparator.go b/cmd/syncComparator.go index ca7f1bfcd..def959717 100644 --- a/cmd/syncComparator.go +++ b/cmd/syncComparator.go @@ -28,14 +28,16 @@ import ( ) const ( - syncSkipReasonTime = "the source has an older LMT than the destination" - syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5" - syncSkipReasonSameHash = "the source has the same hash" - syncOverwriteReasonNewerHash = "the source has a differing hash" - syncOverwriteReasonNewerLMT = "the source is more recent than the destination" - syncStatusSkipped = "skipped" - syncStatusOverwritten = "overwritten" - syncOverwriteReasonDeleteDestinationFile = "the flag delete-destination-file is set to true" + syncSkipReasonTime = "the source has an older LMT than the destination" + syncSkipReasonTimeAndMissingHash = "the source lacks an associated hash and has an older LMT than the destination" + syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5" + syncSkipReasonSameHash = "the source has the same hash" + syncOverwriteReasonNewerHash = "the source has a differing hash" + syncOverwriteReasonNewerLMT = "the source is more recent than the destination" + syncOverwriteReasonNewerLMTAndMissingHash = "the source lacks an associated hash and is more recent than the destination" + syncStatusSkipped = "skipped" + syncStatusOverwritten = "overwritten" + syncOverwriteReasonDeleteDestinationFile = "the flag delete-destination-file is set to true" ) func syncComparatorLog(fileName, status, skipReason string, stdout bool) { @@ -99,11 +101,11 @@ func (f *syncDestinationComparator) processIfNecessary(destinationObject StoredO case common.ESyncHashType.MD5(): if sourceObjectInMap.md5 == nil { if sourceObjectInMap.isMoreRecentThan(destinationObject, f.preferSMBTime) { - syncComparatorLog(sourceObjectInMap.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMT, false) + syncComparatorLog(sourceObjectInMap.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false) return f.copyTransferScheduler(sourceObjectInMap) } else { // skip if dest is more recent - syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonTime, false) + syncComparatorLog(sourceObjectInMap.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false) return nil } } @@ -184,11 +186,11 @@ func (f *syncSourceComparator) processIfNecessary(sourceObject StoredObject) err case common.ESyncHashType.MD5(): if sourceObject.md5 == nil { if sourceObject.isMoreRecentThan(destinationObjectInMap, f.preferSMBTime) { - syncComparatorLog(sourceObject.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMT, false) + syncComparatorLog(sourceObject.relativePath, syncStatusOverwritten, syncOverwriteReasonNewerLMTAndMissingHash, false) return f.copyTransferScheduler(sourceObject) } else { // skip if dest is more recent - syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonTime, false) + syncComparatorLog(sourceObject.relativePath, syncStatusSkipped, syncSkipReasonTimeAndMissingHash, false) return nil } } diff --git a/e2etest/zt_newe2e_sync_test.go b/e2etest/zt_newe2e_sync_test.go index a1aa2fd77..c91c8fa5d 100644 --- a/e2etest/zt_newe2e_sync_test.go +++ b/e2etest/zt_newe2e_sync_test.go @@ -262,8 +262,88 @@ func (s *SyncTestSuite) Scenario_TestSyncDeleteDestinationIfNecessary(svm *Scena }, true) } -func (s *SyncTestSuite) Scenario_TestSyncHashTypeNoHash(svm *ScenarioVariationManager) { - //srcHash := ResolveVariation(svm, []bool{true, false}) - //dstHash := ResolveVariation(svm, []bool{true, false}) +// Note : For local sources, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way +// for local sources to have no source hash. As such these tests only cover remote sources. +func (s *SyncTestSuite) Scenario_TestSyncHashTypeSourceHash(svm *ScenarioVariationManager) { + + // There are 4 cases to consider, this test will cover all of them + // 1. Has hash and is equal -> skip + // 2. Has hash and is not equal -> overwrite + // 3. Has no hash and src LMT after dest LMT -> overwrite + // 4. Has no hash and src LMT before dest LMT -> skip + + // Create dest + hashEqualBody := NewRandomObjectContentContainer(512) + hashNotEqualBody := NewRandomObjectContentContainer(512) + noHashDestSrc := NewRandomObjectContentContainer(512) + noHashSrcDest := NewRandomObjectContentContainer(512) + + zeroBody := NewZeroObjectContentContainer(512) + + dest := CreateResource[ContainerResourceManager](svm, + GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})), + ResourceDefinitionContainer{ + Objects: ObjectResourceMappingFlat{ + "hashequal": ResourceDefinitionObject{Body: hashEqualBody}, + "hashnotequal": ResourceDefinitionObject{Body: zeroBody}, + "nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc}, + "nohashsrcdest": ResourceDefinitionObject{Body: zeroBody}, + }, + }, + ) + + time.Sleep(time.Second * 10) // Make sure source is newer + + srcObjs := ObjectResourceMappingFlat{ + "hashequal": ResourceDefinitionObject{Body: hashEqualBody}, + "hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody}, + "nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc}, + "nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest}, + } + + src := CreateResource[ContainerResourceManager](svm, + GetRootResource(svm, common.ELocation.Blob()), + ResourceDefinitionContainer{ + Objects: srcObjs, + }, + ) + + // Need to manually unset the md5 + src.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil}) + src.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil}) + + time.Sleep(time.Second * 10) // Make sure destination is newer + + // Re-create nohashsrcdest so the src LMT is before dest LMT + dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, noHashSrcDest, ObjectProperties{}) + + stdOut, _ := RunAzCopy( + svm, + AzCopyCommand{ + Verb: AzCopyVerbSync, + Targets: []ResourceManager{src, dest}, + Flags: SyncFlags{ + CopySyncCommonFlags: CopySyncCommonFlags{ + Recursive: pointerTo(true), + }, + CompareHash: pointerTo(common.ESyncHashType.MD5()), + }, + }) + + // All source, dest should match + ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{ + Objects: srcObjs, + }, true) + // Only non skipped paths should be in plan file + ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{ + Objects: map[PlanFilePath]PlanFileObject{ + PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: { + Properties: ObjectProperties{}, + }, + PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: { + Properties: ObjectProperties{}, + }, + }, + }) } From dc1ad2c10f8d8312fa574debfcd9a7dec3850ab5 Mon Sep 17 00:00:00 2001 From: Gauri Lamunion Date: Thu, 14 Nov 2024 07:44:52 -0800 Subject: [PATCH 3/4] added info for missing hash message --- cmd/syncComparator.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/syncComparator.go b/cmd/syncComparator.go index def959717..249016984 100644 --- a/cmd/syncComparator.go +++ b/cmd/syncComparator.go @@ -29,15 +29,14 @@ import ( const ( syncSkipReasonTime = "the source has an older LMT than the destination" - syncSkipReasonTimeAndMissingHash = "the source lacks an associated hash and has an older LMT than the destination" + syncSkipReasonTimeAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and has an older LMT than the destination" syncSkipReasonMissingHash = "the source lacks an associated hash; please upload with --put-md5" syncSkipReasonSameHash = "the source has the same hash" syncOverwriteReasonNewerHash = "the source has a differing hash" syncOverwriteReasonNewerLMT = "the source is more recent than the destination" - syncOverwriteReasonNewerLMTAndMissingHash = "the source lacks an associated hash and is more recent than the destination" + syncOverwriteReasonNewerLMTAndMissingHash = "the source lacks an associated hash (please upload with --put-md5 for hash comparison) and is more recent than the destination" syncStatusSkipped = "skipped" syncStatusOverwritten = "overwritten" - syncOverwriteReasonDeleteDestinationFile = "the flag delete-destination-file is set to true" ) func syncComparatorLog(fileName, status, skipReason string, stdout bool) { From ac601a97799b6e566939810e6edb0599855e6ecc Mon Sep 17 00:00:00 2001 From: Gauri Lamunion Date: Fri, 15 Nov 2024 16:08:27 -0800 Subject: [PATCH 4/4] added destination test --- e2etest/zt_newe2e_sync_test.go | 89 ++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/e2etest/zt_newe2e_sync_test.go b/e2etest/zt_newe2e_sync_test.go index c91c8fa5d..f9e6c282c 100644 --- a/e2etest/zt_newe2e_sync_test.go +++ b/e2etest/zt_newe2e_sync_test.go @@ -347,3 +347,92 @@ func (s *SyncTestSuite) Scenario_TestSyncHashTypeSourceHash(svm *ScenarioVariati }, }) } + +// Note : For local destinations, the hash is computed by a hashProcessor created in zc_traverser_local, so there is no way +// for local destinations to have no source hash. As such these tests only cover remote destinations. +func (s *SyncTestSuite) Scenario_TestSyncHashTypeDestinationHash(svm *ScenarioVariationManager) { + + // There are 4 cases to consider, this test will cover all of them + // 1. Has hash and is equal -> skip + // 2. Has hash and is not equal -> overwrite + // 3. Has no hash and src LMT after dest LMT -> overwrite + // 4. Has no hash and src LMT before dest LMT -> overwrite + + // Create dest + hashEqualBody := NewRandomObjectContentContainer(512) + hashNotEqualBody := NewRandomObjectContentContainer(512) + noHashDestSrc := NewRandomObjectContentContainer(512) + noHashSrcDest := NewRandomObjectContentContainer(512) + + zeroBody := NewZeroObjectContentContainer(512) + + dest := CreateResource[ContainerResourceManager](svm, + GetRootResource(svm, common.ELocation.Blob()), + ResourceDefinitionContainer{ + Objects: ObjectResourceMappingFlat{ + "hashequal": ResourceDefinitionObject{Body: hashEqualBody}, + "hashnotequal": ResourceDefinitionObject{Body: zeroBody}, + "nohashdestsrc": ResourceDefinitionObject{Body: zeroBody}, + "nohashsrcdest": ResourceDefinitionObject{Body: zeroBody}, + }, + }, + ) + + time.Sleep(time.Second * 10) // Make sure source is newer + + srcObjs := ObjectResourceMappingFlat{ + "hashequal": ResourceDefinitionObject{Body: hashEqualBody}, + "hashnotequal": ResourceDefinitionObject{Body: hashNotEqualBody}, + "nohashdestsrc": ResourceDefinitionObject{Body: noHashDestSrc}, + "nohashsrcdest": ResourceDefinitionObject{Body: noHashSrcDest}, + } + + src := CreateResource[ContainerResourceManager](svm, + GetRootResource(svm, ResolveVariation(svm, []common.Location{common.ELocation.Blob(), common.ELocation.Local()})), + ResourceDefinitionContainer{ + Objects: srcObjs, + }, + ) + + // Need to manually unset the md5 + dest.GetObject(svm, "nohashdestsrc", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil}) + dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).SetHTTPHeaders(svm, contentHeaders{contentMD5: nil}) + + time.Sleep(time.Second * 10) // Make sure destination is newer + + // Re-create nohashsrcdest so the src LMT is before dest LMT + dest.GetObject(svm, "nohashsrcdest", common.EEntityType.File()).Create(svm, zeroBody, ObjectProperties{}) + + stdOut, _ := RunAzCopy( + svm, + AzCopyCommand{ + Verb: AzCopyVerbSync, + Targets: []ResourceManager{src, dest}, + Flags: SyncFlags{ + CopySyncCommonFlags: CopySyncCommonFlags{ + Recursive: pointerTo(true), + }, + CompareHash: pointerTo(common.ESyncHashType.MD5()), + }, + }) + + // All source, dest should match + ValidateResource[ContainerResourceManager](svm, dest, ResourceDefinitionContainer{ + Objects: srcObjs, + }, true) + + // Only non skipped paths should be in plan file + ValidatePlanFiles(svm, stdOut, ExpectedPlanFile{ + Objects: map[PlanFilePath]PlanFileObject{ + PlanFilePath{SrcPath: "/hashnotequal", DstPath: "/hashnotequal"}: { + Properties: ObjectProperties{}, + }, + PlanFilePath{SrcPath: "/nohashdestsrc", DstPath: "/nohashdestsrc"}: { + Properties: ObjectProperties{}, + }, + PlanFilePath{SrcPath: "/nohashsrcdest", DstPath: "/nohashsrcdest"}: { + Properties: ObjectProperties{}, + }, + }, + }) +}