Skip to content

Commit

Permalink
[Remote Store] Update index settings on shard movement during remote …
Browse files Browse the repository at this point in the history
…store migration (opensearch-project#13316)

Signed-off-by: Shourya Dutta Biswas <[email protected]>
  • Loading branch information
shourya035 authored Apr 30, 2024
1 parent f1228e9 commit 1f406db
Show file tree
Hide file tree
Showing 20 changed files with 1,480 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING;
import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING;
Expand Down Expand Up @@ -199,4 +200,25 @@ public void setRefreshFrequency(int refreshFrequency) {
this.refreshFrequency = refreshFrequency;
}
}

public void excludeNodeSet(String attr, String value) {
assertAcked(
internalCluster().client()
.admin()
.cluster()
.prepareUpdateSettings()
.setTransientSettings(Settings.builder().put("cluster.routing.allocation.exclude._" + attr, value))
.get()
);
}

public void stopShardRebalancing() {
assertAcked(
client().admin()
.cluster()
.prepareUpdateSettings()
.setPersistentSettings(Settings.builder().put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build())
.get()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.opensearch.test.transport.MockTransportService;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception {

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Index some docs
- Move primary copy from docrep to remote through _cluster/reroute
Expand All @@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data nodes");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
logger.info("---> Creating index with 1 replica");
Settings zeroReplicas = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "1s")
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "1s")
.build();
Expand Down Expand Up @@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ
pollAndCheckRetentionLeases(REMOTE_PRI_DOCREP_REMOTE_REP);
}

/*
Scenario:
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Index some docs
- Move primary copy from docrep to remote through _cluster/reroute
- Starts another remote backed data node
- Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node
- Index some more docs
- Assert retention lease consistency
*/
public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting docrep data node");
internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);

Settings zeroReplicasAndOverridenSyncIntervals = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms")
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms")
.build();
Expand Down Expand Up @@ -323,25 +336,24 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception {

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Move primary copy from docrep to remote through _cluster/reroute
- Expands index to 1 replica
- Stops remote enabled node
- Ensure doc count is same after failover
- Index some more docs to ensure working of failed-over primary
*/
public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data nodes");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build();
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
createIndex(FAILOVER_REMOTE_TO_DOCREP, excludeRemoteNode);
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);
initDocRepToRemoteMigration();
Expand Down Expand Up @@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
);
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);

logger.info("---> Expanding index to 1 replica copy");
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
logger.info("---> Expanding index to 2 replica copies");
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build();
assertAcked(
internalCluster().client()
.admin()
Expand Down Expand Up @@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {

logger.info("---> Stop remote store enabled node");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName));
ensureStableCluster(2);
ensureStableCluster(3);
ensureYellow(FAILOVER_REMOTE_TO_DOCREP);

shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
Expand All @@ -433,16 +445,16 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
refreshAndWaitForReplication(FAILOVER_REMOTE_TO_DOCREP);

shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
assertEquals(1, shardStatsMap.size());
assertEquals(2, shardStatsMap.size());
shardStatsMap.forEach(
(shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); }
);
}

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data nodes
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Moves primary copy from docrep to remote through _cluster/reroute
- Starts 1 more remote backed data node
Expand All @@ -455,13 +467,13 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data node");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
List<String> docrepNodeNames = internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build());
logger.info("---> Creating index with 1 replica");
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build());
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
initDocRepToRemoteMigration();

Expand All @@ -484,15 +496,17 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE);
asyncIndexingService.startIndexing();

logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", docrepNodeName, remoteNodeName1);
String primaryNodeName = primaryNodeName(FAILOVER_REMOTE_TO_REMOTE);
logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", primaryNodeName, remoteNodeName1);
assertAcked(
internalCluster().client()
.admin()
.cluster()
.prepareReroute()
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, docrepNodeName, remoteNodeName1))
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, primaryNodeName, remoteNodeName1))
.get()
);
waitForRelocation();
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1);

Expand All @@ -507,7 +521,13 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
.indices()
.prepareUpdateSettings()
.setIndices(FAILOVER_REMOTE_TO_REMOTE)
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build())
.setSettings(
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2)
// prevent replica copy from being allocated to the extra docrep node
.put("index.routing.allocation.exclude._name", primaryNodeName)
.build()
)
.get()
);
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
Expand Down Expand Up @@ -536,8 +556,8 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {

logger.info("---> Stop remote store enabled node hosting the primary");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1));
ensureStableCluster(3);
ensureYellow(FAILOVER_REMOTE_TO_REMOTE);
ensureStableCluster(4);
ensureYellowAndNoInitializingShards(FAILOVER_REMOTE_TO_REMOTE);
DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes();

waitUntil(() -> {
Expand Down Expand Up @@ -580,7 +600,6 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
- Creates an index with 0 replica
- Starts 1 remote backed data node
- Move primary copy from docrep to remote through _cluster/reroute
- Expands index to 1 replica
- Stops remote enabled node
- Ensure doc count is same after failover
- Index some more docs to ensure working of failed-over primary
Expand Down Expand Up @@ -664,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch
RemoteSegmentStats remoteSegmentStats = shardStats.getSegments().getRemoteSegmentStats();
assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0);
assertTrue(remoteSegmentStats.getTotalUploadTime() > 0);
} else {
}
if (shardRouting.unassigned() == false && shardRouting.primary() == false) {
boolean remoteNode = nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode();
assertEquals(
"Mismatched doc count. Is this on remote node ? " + remoteNode,
Expand Down
Loading

0 comments on commit 1f406db

Please sign in to comment.