Skip to content

Commit

Permalink
Fix flaky RemoteIndexRecoveryIT testRerouteRecovery test opensearch-p…
Browse files Browse the repository at this point in the history
…roject#9580

Signed-off-by: Ashish Singh <[email protected]>
  • Loading branch information
ashking94 committed Jan 18, 2024
1 parent c132db9 commit 047fbca
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ private void assertOnGoingRecoveryState(
}

private void slowDownRecovery(ByteSizeValue shardSize) {
long chunkSize = Math.max(1, shardSize.getBytes() / 10);
long chunkSize = getChunkSize(shardSize);
assertTrue(
client().admin()
.cluster()
Expand All @@ -270,6 +270,10 @@ private void slowDownRecovery(ByteSizeValue shardSize) {
);
}

protected long getChunkSize(ByteSizeValue shardSize) {
return Math.max(1, shardSize.getBytes() / 10);
}

private void restoreRecoverySpeed() {
assertTrue(
client().admin()
Expand Down Expand Up @@ -523,12 +527,12 @@ public void testRerouteRecovery() throws Exception {

logger.info("--> waiting for recovery to start both on source and target");
final Index index = resolveIndex(INDEX_NAME);
assertBusy(() -> {
assertBusyWithFixedSleepTime(() -> {
IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeA);
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsSource(), equalTo(1));
indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsTarget(), equalTo(1));
});
}, TimeValue.timeValueSeconds(10), TimeValue.timeValueMillis(500));

logger.info("--> request recoveries");
RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.unit.ByteSizeValue;
import org.opensearch.index.IndexModule;
import org.opensearch.index.IndexSettings;
import org.opensearch.indices.recovery.IndexRecoveryIT;
Expand Down Expand Up @@ -158,9 +159,8 @@ public void testReplicaRecovery() {

}

@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9580")
public void testRerouteRecovery() {

@Override
protected long getChunkSize(ByteSizeValue shardSize) {
return Math.max(1, shardSize.getBytes() / 8);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateUtils;
import org.opensearch.common.time.FormatNames;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.common.util.MockBigArrays;
import org.opensearch.common.util.MockPageCacheRecycler;
import org.opensearch.common.util.concurrent.ThreadContext;
Expand Down Expand Up @@ -1095,6 +1096,38 @@ public static void assertBusy(CheckedRunnable<Exception> codeBlock, long maxWait
}
}

/**
* Runs the code block for the provided max wait time and sleeping for fixed sleep time, waiting for no assertions to trip.
*/
public static void assertBusyWithFixedSleepTime(CheckedRunnable<Exception> codeBlock, TimeValue maxWaitTime, TimeValue sleepTime)
throws Exception {
long maxTimeInMillis = maxWaitTime.millis();
long sleepTimeInMillis = sleepTime.millis();
if (sleepTimeInMillis > maxTimeInMillis) {
throw new IllegalArgumentException("sleepTime is more than the maxWaitTime");
}
long sum = 0;
List<AssertionError> failures = new ArrayList<>();
while (sum <= maxTimeInMillis) {
try {
codeBlock.run();
return;
} catch (AssertionError e) {
failures.add(e);
}
sum += sleepTimeInMillis;
Thread.sleep(sleepTimeInMillis);
}
try {
codeBlock.run();
} catch (AssertionError e) {
for (AssertionError failure : failures) {
e.addSuppressed(failure);
}
throw e;
}
}

/**
* Periodically execute the supplied function until it returns true, or a timeout
* is reached. This version uses a timeout of 10 seconds. If at all possible,
Expand Down

0 comments on commit 047fbca

Please sign in to comment.