From 0bc05ec8698396282d875256699235c15dfd146b Mon Sep 17 00:00:00 2001 From: Harish Bhakuni Date: Tue, 5 Sep 2023 23:43:06 -0700 Subject: [PATCH] [Snapshot Interop] Fix Flakiness in Snapshot Interop Code Signed-off-by: Harish Bhakuni --- .../remotestore/RemoteRestoreSnapshotIT.java | 119 +++++++----------- .../BlobStoreRepositoryHelperTests.java | 36 ++++++ .../BlobStoreRepositoryRemoteIndexTests.java | 103 +++++++-------- .../AbstractSnapshotIntegTestCase.java | 42 +++++-- 4 files changed, 159 insertions(+), 141 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 70e571604ca53..8d9dd0b0bd5d2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -10,7 +10,6 @@ import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; -import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.opensearch.action.admin.indices.get.GetIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexResponse; @@ -26,6 +25,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; +import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.InternalTestCluster; import org.junit.After; @@ -33,7 +33,9 @@ import java.io.IOException; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.concurrent.ExecutionException; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; @@ -128,32 +130,22 @@ public void testRestoreOperationsShallowCopyEnabled() throws IOException, Execut internalCluster().startDataOnlyNode(); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName1) - .setWaitForCompletion(true) - .setIndices(indexName1, indexName2) - .get(); - assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()) - ); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1, indexName2))); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); - CreateSnapshotResponse createSnapshotResponse2 = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName2) - .setWaitForCompletion(true) - .setIndices(indexName1, indexName2) - .get(); - assertThat(createSnapshotResponse2.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse2.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse2.getSnapshotInfo().totalShards()) + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) ); - assertThat(createSnapshotResponse2.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); @@ -257,7 +249,6 @@ public void testRestoreOperationsShallowCopyEnabled() throws IOException, Execut assertDocsPresentInIndex(client, restoredIndexName1Doc, numDocsInIndex1 + 2); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9326") public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); String primary = internalCluster().startDataOnlyNode(); @@ -288,32 +279,24 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { internalCluster().startDataOnlyNode(); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName1) - .setWaitForCompletion(true) - .setIndices(indexName1, indexName2) - .get(); - assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()) + SnapshotInfo snapshotInfo1 = createSnapshot( + snapshotRepoName, + snapshotName1, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) ); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); updateRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, false)); - CreateSnapshotResponse createSnapshotResponse2 = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName2) - .setWaitForCompletion(true) - .setIndices(indexName1, indexName2) - .get(); - assertThat(createSnapshotResponse2.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse2.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse2.getSnapshotInfo().totalShards()) + SnapshotInfo snapshotInfo2 = createSnapshot( + snapshotRepoName, + snapshotName2, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) ); - assertThat(createSnapshotResponse2.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo2.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo2.successfulShards(), equalTo(snapshotInfo2.totalShards())); + assertThat(snapshotInfo2.state(), equalTo(SnapshotState.SUCCESS)); DeleteResponse deleteResponse = client().prepareDelete(indexName1, "0").execute().actionGet(); assertEquals(deleteResponse.getResult(), DocWriteResponse.Result.DELETED); @@ -341,6 +324,10 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { ensureGreen(indexName1, restoredIndexName2); assertDocsPresentInIndex(client, indexName1, numDocsInIndex1); assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); + // indexing some new docs and validating + indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(indexName1); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 2); // deleting data for restoredIndexName1 and restoring from remote store. internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); @@ -355,9 +342,9 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { ensureGreen(indexName1); assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); // indexing some new docs and validating - indexDocuments(client, indexName1, numDocsInIndex1, numDocsInIndex1 + 2); + indexDocuments(client, indexName1, numDocsInIndex1 + 2, numDocsInIndex1 + 4); ensureGreen(indexName1); - assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 2); + assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); } public void testRestoreShallowCopySnapshotWithDifferentRepo() throws IOException { @@ -391,18 +378,14 @@ public void testRestoreShallowCopySnapshotWithDifferentRepo() throws IOException internalCluster().startDataOnlyNode(); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName1) - .setWaitForCompletion(true) - .setIndices(indexName1, indexName2) - .get(); - assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()) + SnapshotInfo snapshotInfo1 = createSnapshot( + snapshotRepoName, + snapshotName1, + new ArrayList<>(Arrays.asList(indexName1, indexName2)) ); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); Settings remoteStoreIndexSettings = Settings.builder() .put(IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, remoteStoreRepo2Name) @@ -478,18 +461,10 @@ public void testRestoreShallowSnapshotRepositoryOverriden() throws ExecutionExce ensureGreen(indexName1); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepoName, snapshotName1) - .setWaitForCompletion(true) - .setIndices(indexName1) - .get(); - assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); - assertThat( - createSnapshotResponse.getSnapshotInfo().successfulShards(), - equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()) - ); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.SUCCESS)); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); createRepository(BASE_REMOTE_REPO, "fs", absolutePath2); diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java index 0dbc0372458b5..66fed65588464 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryHelperTests.java @@ -8,6 +8,7 @@ package org.opensearch.repositories.blobstore; +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; import org.opensearch.cluster.metadata.IndexMetadata; @@ -15,6 +16,7 @@ import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.index.IndexModule; @@ -29,6 +31,8 @@ import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.snapshots.SnapshotInfo; +import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchSingleNodeTestCase; @@ -36,9 +40,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Map; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.is; public class BlobStoreRepositoryHelperTests extends OpenSearchSingleNodeTestCase { @@ -122,6 +129,35 @@ protected Settings getRemoteStoreBackedIndexSettings(String remoteStoreRepo) { .build(); } + protected SnapshotInfo createSnapshot(String repositoryName, String snapshot, List indices) { + logger.info("--> creating snapshot [{}] of {} in [{}]", snapshot, indices, repositoryName); + int num_retries = 0; + final String MD_FILE_NOT_FOUND_ISSUE_PREFIX = "Metadata file is not present for given primary term"; + SnapshotInfo snapshotInfo = null; + while (num_retries++ < 3) { + final CreateSnapshotResponse response = client().admin() + .cluster() + .prepareCreateSnapshot(repositoryName, snapshot) + .setIndices(indices.toArray(Strings.EMPTY_ARRAY)) + .setWaitForCompletion(true) + .get(); + snapshotInfo = response.getSnapshotInfo(); + // currently, if for some reason during snapshot creation, md for the snapshot commit is not + // uploaded to remote store, we fail the snapshot for the shard, so that user can retry the snapshot. + // if it happens during test runs, retrying it for 3 times, to fix the snapshot. + if (snapshotInfo.state() != SnapshotState.PARTIAL + && !snapshotInfo.shardFailures().get(0).reason().contains(MD_FILE_NOT_FOUND_ISSUE_PREFIX)) { + break; + } + logger.info("--> delete snapshot"); + client().admin().cluster().prepareDeleteSnapshot(repositoryName, snapshot).get(); + } + assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.failedShards(), equalTo(0)); + return snapshotInfo; + } + protected void indexDocuments(Client client, String indexName) { int numDocs = randomIntBetween(10, 20); for (int i = 0; i < numDocs; i++) { diff --git a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java index d69fb1fd7b349..01de6c3ca6ef2 100644 --- a/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java +++ b/server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryRemoteIndexTests.java @@ -33,7 +33,6 @@ package org.opensearch.repositories.blobstore; import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; -import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.client.Client; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.common.settings.Settings; @@ -46,10 +45,12 @@ import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryData; import org.opensearch.snapshots.SnapshotId; +import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.test.FeatureFlagSetter; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -115,13 +116,12 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { indexDocuments(client, remoteStoreIndexName); logger.info("--> create first snapshot"); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-1") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId1 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + SnapshotInfo snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-1", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId1 = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 0) : "there should be no lock files present in directory, but found " + Arrays.toString(lockFiles); @@ -132,13 +132,12 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { .build(); updateRepository(client, snapshotRepositoryName, snapshotRepoSettingsForShallowCopy); - createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-2") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId2 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-2", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId2 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); @@ -146,13 +145,12 @@ public void testRetrieveShallowCopySnapshotCase1() throws IOException { logger.info("--> create another normal snapshot"); updateRepository(client, snapshotRepositoryName, snapshotRepoSettings); - createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-3") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId3 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-3", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId3 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); @@ -215,13 +213,8 @@ public void testGetRemoteStoreShallowCopyShardMetadata() throws IOException { .build(); updateRepository(client, snapshotRepositoryName, snapshotRepoSettingsForShallowCopy); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-2") - .setWaitForCompletion(true) - .setIndices(remoteStoreIndexName) - .get(); - final SnapshotId snapshotId = createSnapshotResponse.getSnapshotInfo().snapshotId(); + SnapshotInfo snapshotInfo = createSnapshot(snapshotRepositoryName, "test-snap-2", new ArrayList<>(List.of(remoteStoreIndexName))); + final SnapshotId snapshotId = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 1) : "there should be only one lock file, but found " + Arrays.toString(lockFiles); @@ -295,26 +288,24 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { assertTrue(updatedRepositoryMetadata.settings().getAsBoolean(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), false)); - CreateSnapshotResponse createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-1") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId1 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + SnapshotInfo snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-1", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId1 = snapshotInfo.snapshotId(); String[] lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 1) : "lock files are " + Arrays.toString(lockFiles); assert lockFiles[0].endsWith(snapshotId1.getUUID() + ".lock"); logger.info("--> create second remote index shallow snapshot"); - createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-2") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId2 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-2", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId2 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 2) : "lock files are " + Arrays.toString(lockFiles); @@ -323,13 +314,12 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { assert lockFiles[0].contains(snapshotId.getUUID()) || lockFiles[1].contains(snapshotId.getUUID()); } logger.info("--> create third remote index shallow snapshot"); - createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-3") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId3 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-3", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId3 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 3); @@ -341,13 +331,12 @@ public void testRetrieveShallowCopySnapshotCase2() throws IOException { } logger.info("--> create normal snapshot"); createRepository(client, snapshotRepositoryName, snapshotRepoSettings); - createSnapshotResponse = client.admin() - .cluster() - .prepareCreateSnapshot(snapshotRepositoryName, "test-snap-4") - .setWaitForCompletion(true) - .setIndices(indexName, remoteStoreIndexName) - .get(); - final SnapshotId snapshotId4 = createSnapshotResponse.getSnapshotInfo().snapshotId(); + snapshotInfo = createSnapshot( + snapshotRepositoryName, + "test-snap-4", + new ArrayList<>(Arrays.asList(indexName, remoteStoreIndexName)) + ); + final SnapshotId snapshotId4 = snapshotInfo.snapshotId(); lockFiles = getLockFilesInRemoteStore(remoteStoreIndexName, remoteStoreRepositoryName); assert (lockFiles.length == 3) : "lock files are " + Arrays.toString(lockFiles); diff --git a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java index a17d71957167a..9a23fe6c66979 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -32,6 +32,7 @@ package org.opensearch.snapshots; import org.opensearch.Version; +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotRequestBuilder; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.opensearch.action.admin.cluster.state.ClusterStateResponse; import org.opensearch.action.index.IndexRequestBuilder; @@ -480,11 +481,7 @@ protected String initWithSnapshotVersion(String repoName, Path repoPath, Version protected SnapshotInfo createFullSnapshot(String repoName, String snapshotName) { logger.info("--> creating full snapshot [{}] in [{}]", snapshotName, repoName); - CreateSnapshotResponse createSnapshotResponse = clusterAdmin().prepareCreateSnapshot(repoName, snapshotName) - .setIncludeGlobalState(true) - .setWaitForCompletion(true) - .get(); - final SnapshotInfo snapshotInfo = createSnapshotResponse.getSnapshotInfo(); + final SnapshotInfo snapshotInfo = snapshotWithRetries(repoName, snapshotName, null); assertThat(snapshotInfo.successfulShards(), is(snapshotInfo.totalShards())); assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS)); return snapshotInfo; @@ -492,20 +489,41 @@ protected SnapshotInfo createFullSnapshot(String repoName, String snapshotName) protected SnapshotInfo createSnapshot(String repositoryName, String snapshot, List indices) { logger.info("--> creating snapshot [{}] of {} in [{}]", snapshot, indices, repositoryName); - final CreateSnapshotResponse response = client().admin() - .cluster() - .prepareCreateSnapshot(repositoryName, snapshot) - .setIndices(indices.toArray(Strings.EMPTY_ARRAY)) - .setWaitForCompletion(true) - .get(); + SnapshotInfo snapshotInfo = snapshotWithRetries(repositoryName, snapshot, indices); - final SnapshotInfo snapshotInfo = response.getSnapshotInfo(); assertThat(snapshotInfo.state(), is(SnapshotState.SUCCESS)); assertThat(snapshotInfo.successfulShards(), greaterThan(0)); assertThat(snapshotInfo.failedShards(), equalTo(0)); return snapshotInfo; } + protected SnapshotInfo snapshotWithRetries(String repositoryName, String snapshot, List indices) { + int num_retries = 0; + final String MD_FILE_NOT_FOUND_ISSUE_PREFIX = "Metadata file is not present for given primary term"; + SnapshotInfo snapshotInfo = null; + while (num_retries++ < 3) { + CreateSnapshotRequestBuilder snapshotRequestBuilder = client().admin() + .cluster() + .prepareCreateSnapshot(repositoryName, snapshot) + .setWaitForCompletion(true); + if (indices != null) { + snapshotRequestBuilder.setIndices(indices.toArray(Strings.EMPTY_ARRAY)); + } + final CreateSnapshotResponse response = snapshotRequestBuilder.get(); + snapshotInfo = response.getSnapshotInfo(); + // currently, if for some reason during snapshot creation, md for the snapshot commit is not + // uploaded to remote store, we fail the snapshot for the shard, so that user can retry the snapshot. + // if it happens during test runs, retrying it for 3 times, to fix the snapshot. + if (snapshotInfo.state() != SnapshotState.PARTIAL + && !snapshotInfo.shardFailures().get(0).reason().contains(MD_FILE_NOT_FOUND_ISSUE_PREFIX)) { + break; + } + logger.info("--> delete snapshot"); + client().admin().cluster().prepareDeleteSnapshot(repositoryName, snapshot).get(); + } + return snapshotInfo; + } + protected void createIndexWithRandomDocs(String indexName, int docCount) throws InterruptedException { createIndex(indexName); ensureGreen();