Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [Remote Store] Update index settings on shard movement during remote store migration #13466

Merged
merged 1 commit into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING;
import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING;
Expand Down Expand Up @@ -199,4 +200,25 @@ public void setRefreshFrequency(int refreshFrequency) {
this.refreshFrequency = refreshFrequency;
}
}

public void excludeNodeSet(String attr, String value) {
assertAcked(
internalCluster().client()
.admin()
.cluster()
.prepareUpdateSettings()
.setTransientSettings(Settings.builder().put("cluster.routing.allocation.exclude._" + attr, value))
.get()
);
}

public void stopShardRebalancing() {
assertAcked(
client().admin()
.cluster()
.prepareUpdateSettings()
.setPersistentSettings(Settings.builder().put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build())
.get()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.opensearch.test.transport.MockTransportService;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception {

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Index some docs
- Move primary copy from docrep to remote through _cluster/reroute
Expand All @@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data nodes");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
logger.info("---> Creating index with 1 replica");
Settings zeroReplicas = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "1s")
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "1s")
.build();
Expand Down Expand Up @@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ
pollAndCheckRetentionLeases(REMOTE_PRI_DOCREP_REMOTE_REP);
}

/*
Scenario:
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Index some docs
- Move primary copy from docrep to remote through _cluster/reroute
- Starts another remote backed data node
- Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node
- Index some more docs
- Assert retention lease consistency
*/
public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting docrep data node");
internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);

Settings zeroReplicasAndOverridenSyncIntervals = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms")
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms")
.build();
Expand Down Expand Up @@ -323,25 +336,24 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception {

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data node
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Move primary copy from docrep to remote through _cluster/reroute
- Expands index to 1 replica
- Stops remote enabled node
- Ensure doc count is same after failover
- Index some more docs to ensure working of failed-over primary
*/
public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data nodes");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build();
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
createIndex(FAILOVER_REMOTE_TO_DOCREP, excludeRemoteNode);
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);
initDocRepToRemoteMigration();
Expand Down Expand Up @@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
);
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);

logger.info("---> Expanding index to 1 replica copy");
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
logger.info("---> Expanding index to 2 replica copies");
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build();
assertAcked(
internalCluster().client()
.admin()
Expand Down Expand Up @@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {

logger.info("---> Stop remote store enabled node");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName));
ensureStableCluster(2);
ensureStableCluster(3);
ensureYellow(FAILOVER_REMOTE_TO_DOCREP);

shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
Expand All @@ -433,16 +445,16 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
refreshAndWaitForReplication(FAILOVER_REMOTE_TO_DOCREP);

shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
assertEquals(1, shardStatsMap.size());
assertEquals(2, shardStatsMap.size());
shardStatsMap.forEach(
(shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); }
);
}

/*
Scenario:
- Starts 1 docrep backed data node
- Creates an index with 0 replica
- Starts 2 docrep backed data nodes
- Creates an index with 1 replica
- Starts 1 remote backed data node
- Moves primary copy from docrep to remote through _cluster/reroute
- Starts 1 more remote backed data node
Expand All @@ -455,13 +467,13 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
internalCluster().startClusterManagerOnlyNode();

logger.info("---> Starting 1 docrep data node");
String docrepNodeName = internalCluster().startDataOnlyNode();
logger.info("---> Starting 2 docrep data nodes");
List<String> docrepNodeNames = internalCluster().startDataOnlyNodes(2);
internalCluster().validateClusterFormed();
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);

logger.info("---> Creating index with 0 replica");
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build());
logger.info("---> Creating index with 1 replica");
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build());
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
initDocRepToRemoteMigration();

Expand All @@ -484,15 +496,17 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE);
asyncIndexingService.startIndexing();

logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", docrepNodeName, remoteNodeName1);
String primaryNodeName = primaryNodeName(FAILOVER_REMOTE_TO_REMOTE);
logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", primaryNodeName, remoteNodeName1);
assertAcked(
internalCluster().client()
.admin()
.cluster()
.prepareReroute()
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, docrepNodeName, remoteNodeName1))
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, primaryNodeName, remoteNodeName1))
.get()
);
waitForRelocation();
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1);

Expand All @@ -507,7 +521,13 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
.indices()
.prepareUpdateSettings()
.setIndices(FAILOVER_REMOTE_TO_REMOTE)
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build())
.setSettings(
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2)
// prevent replica copy from being allocated to the extra docrep node
.put("index.routing.allocation.exclude._name", primaryNodeName)
.build()
)
.get()
);
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
Expand Down Expand Up @@ -536,8 +556,8 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {

logger.info("---> Stop remote store enabled node hosting the primary");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1));
ensureStableCluster(3);
ensureYellow(FAILOVER_REMOTE_TO_REMOTE);
ensureStableCluster(4);
ensureYellowAndNoInitializingShards(FAILOVER_REMOTE_TO_REMOTE);
DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes();

waitUntil(() -> {
Expand Down Expand Up @@ -580,7 +600,6 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
- Creates an index with 0 replica
- Starts 1 remote backed data node
- Move primary copy from docrep to remote through _cluster/reroute
- Expands index to 1 replica
- Stops remote enabled node
- Ensure doc count is same after failover
- Index some more docs to ensure working of failed-over primary
Expand Down Expand Up @@ -664,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch
RemoteSegmentStats remoteSegmentStats = shardStats.getSegments().getRemoteSegmentStats();
assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0);
assertTrue(remoteSegmentStats.getTotalUploadTime() > 0);
} else {
}
if (shardRouting.unassigned() == false && shardRouting.primary() == false) {
boolean remoteNode = nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode();
assertEquals(
"Mismatched doc count. Is this on remote node ? " + remoteNode,
Expand Down
Loading
Loading