diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index e2ef5f85abc74..bccca283ba772 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -353,7 +353,13 @@ public void assertRemoteStoreRepositoryOnAllNodes(String repositoryName) { // Validated that all the restricted settings are entact on all the nodes. repository.getRestrictedSystemRepositorySettings() .stream() - .forEach(setting -> assertEquals(setting.get(actualRepository.settings()), setting.get(expectedRepository.settings()))); + .forEach( + setting -> assertEquals( + String.format(Locale.ROOT, "Restricted Settings mismatch [%s]", setting.getKey()), + setting.get(actualRepository.settings()), + setting.get(expectedRepository.settings()) + ) + ); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 5e92bb195680b..3df4cc4e34d93 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -8,23 +8,22 @@ package org.opensearch.remotestore; -import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; -import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; -import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.settings.Settings; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.nio.file.Files; -import java.util.Locale; +import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.concurrent.ExecutionException; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; -import static org.opensearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; -import static org.opensearch.indices.ShardLimitValidator.SETTING_MAX_SHARDS_PER_CLUSTER_KEY; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT { @@ -48,47 +47,10 @@ private Map initialTestSetup(int shardCount, int replicaCount, int private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().stopAllNodes(); - addNewNodes(dataNodeCount, clusterManagerNodeCount); + internalCluster().startClusterManagerOnlyNodes(clusterManagerNodeCount); + internalCluster().startDataOnlyNodes(dataNodeCount); } - private void restoreAndValidate(String clusterUUID, Map indexStats) throws Exception { - restoreAndValidate(clusterUUID, indexStats, true); - } - - private void restoreAndValidate(String clusterUUID, Map indexStats, boolean validate) throws Exception { - // TODO once auto restore is merged, the remote cluster state will be restored - - if (validate) { - // Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - verifyRestoredData(indexStats, INDEX_NAME); - } - } - - private void restoreAndValidateFails( - String clusterUUID, - PlainActionFuture actionListener, - Class clazz, - String errorSubString - ) { - - try { - restoreAndValidate(clusterUUID, null, false); - } catch (Exception e) { - assertTrue( - String.format(Locale.ROOT, "%s %s", clazz, e), - clazz.isAssignableFrom(e.getClass()) - || clazz.isAssignableFrom(e.getCause().getClass()) - || (e.getCause().getCause() != null && clazz.isAssignableFrom(e.getCause().getCause().getClass())) - ); - assertTrue( - String.format(Locale.ROOT, "Error message mismatch. Expected: [%s]. Actual: [%s]", errorSubString, e.getMessage()), - e.getMessage().contains(errorSubString) - ); - } - } - - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -106,10 +68,10 @@ public void testFullClusterRestore() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore and validate - restoreAndValidate(prevClusterUUID, indexStats); + validateMetadata(List.of(INDEX_NAME)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -134,155 +96,100 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore - restoreAndValidate(prevClusterUUID, indexStats); - ensureGreen(secondIndexName); - verifyRestoredData(indexStats2, secondIndexName); + validateMetadata(List.of(INDEX_NAME, secondIndexName)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreFailureValidationFailures() throws Exception { + public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; - // index some data to generate files in remote directory - Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - String prevClusterUUID = clusterService().state().metadata().clusterUUID(); - - // Start of Test - 1 - // Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails("randomUUID", future, IllegalStateException.class, "Remote Cluster State not found - randomUUID"); - // End of Test - 1 + // Step - 1 index some data to generate files in remote directory + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - // Start of Test - 3 - // Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - clusterService().state().metadata().clusterUUID(), - future, - IllegalArgumentException.class, - "clusterUUID to restore from should be different from current cluster UUID" - ); - // End of Test - 2 + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + String clusterName = clusterService().state().getClusterName().value(); - // Start of Test - 3 // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - // Restarting cluster with just 1 data node helps with applying cluster settings - resetCluster(1, clusterManagerNodeCount); - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - reduceShardLimits(1, 1); - - // Step - 4 Trigger full cluster restore and validate it fails - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalArgumentException.class, - "this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open" - ); - resetShardLimits(); - // End of Test - 3 - - // Start of Test - 4 - // Test -4 Reset cluster and trigger full restore with same name index in the cluster - // Test -4 Add required nodes for this test after last reset. - addNewNodes(dataNodeCount - 1, 0); - - newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - // Test -4 Step - 2 Create a new index with same name - createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); - ensureYellowAndNoInitializingShards(INDEX_NAME); - ensureGreen(INDEX_NAME); - - future = PlainActionFuture.newFuture(); - - // Test -4 Step - 3 Trigger full cluster restore and validate fails - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalStateException.class, - "cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster" - ); + internalCluster().stopAllNodes(); + // Step - 3 Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + assertThrows(IllegalStateException.class, () -> addNewNodes(dataNodeCount, clusterManagerNodeCount)); + // Test is complete - // Test -4 Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - // End of Test - 4 + // Starting a node without remote state to ensure test cleanup + internalCluster().startNode(Settings.builder().put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false).build()); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { + public void testRemoteStateFullRestart() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); - int clusterManagerNodeCount = 1; - - // Step - 1 index some data to generate files in remote directory - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + int clusterManagerNodeCount = 3; + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); - - // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - resetCluster(dataNodeCount, clusterManagerNodeCount); - - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - // Step - 4 Delete index metadata file in remote + // Delete index metadata file in remote try { Files.move( segmentRepoPath.resolve( RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) + "/cluster-state/" + prevClusterUUID - + "/index" + + "/manifest" ), segmentRepoPath.resolve("cluster-state/") ); } catch (IOException e) { throw new RuntimeException(e); } - - // Step - 5 Trigger full cluster restore and validate fails - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails(prevClusterUUID, future, IllegalStateException.class, "asdsa"); + internalCluster().fullRestart(); + ensureGreen(INDEX_NAME); + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; + validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME); } - private void reduceShardLimits(int maxShardsPerNode, int maxShardsPerCluster) { - // Step 3 - Reduce shard limits to hit shard limit with less no of shards - try { - client().admin() - .cluster() - .updateSettings( - new ClusterUpdateSettingsRequest().transientSettings( - Settings.builder() - .put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode) - .put(SETTING_MAX_SHARDS_PER_CLUSTER_KEY, maxShardsPerCluster) - ) - ) - .get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); + private void validateMetadata(List indexNames) { + assertEquals(clusterService().state().metadata().indices().size(), indexNames.size()); + for (String indexName : indexNames) { + assertTrue(clusterService().state().metadata().hasIndex(indexName)); } } - private void resetShardLimits() { - // Step - 5 Reset the cluster settings - ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest(); - resetRequest.transientSettings( - Settings.builder().putNull(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()).putNull(SETTING_MAX_SHARDS_PER_CLUSTER_KEY) + private void validateCurrentMetadata() throws Exception { + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); - - try { - client().admin().cluster().updateSettings(resetRequest).get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + assertBusy(() -> { + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + ClusterState clusterState = getClusterState(); + Metadata currentMetadata = clusterState.metadata(); + assertEquals(currentMetadata.indices().size(), manifest.getIndices().size()); + assertEquals(currentMetadata.coordinationMetadata().term(), manifest.getClusterTerm()); + assertEquals(clusterState.version(), manifest.getStateVersion()); + assertEquals(clusterState.stateUUID(), manifest.getStateUUID()); + assertEquals(currentMetadata.clusterUUIDCommitted(), manifest.isClusterUUIDCommitted()); + for (UploadedIndexMetadata uploadedIndexMetadata : manifest.getIndices()) { + IndexMetadata currentIndexMetadata = currentMetadata.index(uploadedIndexMetadata.getIndexName()); + assertEquals(currentIndexMetadata.getIndex().getUUID(), uploadedIndexMetadata.getIndexUUID()); + } + }); } - } diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 3c7423f73685f..a290f3a1751a0 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1853,10 +1853,12 @@ public synchronized void stopRandomNodeNotCurrentMaster() throws IOException { */ public void stopAllNodes() { try { - int totalDataNodes = numDataNodes(); - while (totalDataNodes > 0) { - stopRandomDataNode(); - totalDataNodes -= 1; + if (numDataAndClusterManagerNodes() != numClusterManagerNodes()) { + int totalDataNodes = numDataNodes(); + while (totalDataNodes > 0) { + stopRandomDataNode(); + totalDataNodes -= 1; + } } int totalClusterManagerNodes = numClusterManagerNodes(); while (totalClusterManagerNodes > 1) {