From 9dfedfa410ab665f289e1e843ea926a2b31053fa Mon Sep 17 00:00:00 2001 From: Ashish Date: Wed, 31 Jan 2024 16:44:19 +0530 Subject: [PATCH] Fix flaky RemoteIndexRecoveryIT testRerouteRecovery test #9580 (#11918) Signed-off-by: Ashish Singh --- .../indices/recovery/IndexRecoveryIT.java | 4 +-- .../remotestore/RemoteIndexRecoveryIT.java | 6 ---- .../opensearch/test/OpenSearchTestCase.java | 33 +++++++++++++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index e4f1f8717f899..72e680e22ed75 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -523,12 +523,12 @@ public void testRerouteRecovery() throws Exception { logger.info("--> waiting for recovery to start both on source and target"); final Index index = resolveIndex(INDEX_NAME); - assertBusy(() -> { + assertBusyWithFixedSleepTime(() -> { IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeA); assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsSource(), equalTo(1)); indicesService = internalCluster().getInstance(IndicesService.class, nodeB); assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsTarget(), equalTo(1)); - }); + }, TimeValue.timeValueSeconds(10), TimeValue.timeValueMillis(500)); logger.info("--> request recoveries"); RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java index c957f1b338bfe..6de61cf203c60 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java @@ -157,10 +157,4 @@ public void testDisconnectsDuringRecovery() { public void testReplicaRecovery() { } - - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9580") - public void testRerouteRecovery() { - - } - } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index b5ff30deecf5c..96bffcf2d3692 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -83,6 +83,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateUtils; import org.opensearch.common.time.FormatNames; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.MockBigArrays; import org.opensearch.common.util.MockPageCacheRecycler; import org.opensearch.common.util.concurrent.ThreadContext; @@ -1095,6 +1096,38 @@ public static void assertBusy(CheckedRunnable codeBlock, long maxWait } } + /** + * Runs the code block for the provided max wait time and sleeping for fixed sleep time, waiting for no assertions to trip. + */ + public static void assertBusyWithFixedSleepTime(CheckedRunnable codeBlock, TimeValue maxWaitTime, TimeValue sleepTime) + throws Exception { + long maxTimeInMillis = maxWaitTime.millis(); + long sleepTimeInMillis = sleepTime.millis(); + if (sleepTimeInMillis > maxTimeInMillis) { + throw new IllegalArgumentException("sleepTime is more than the maxWaitTime"); + } + long sum = 0; + List failures = new ArrayList<>(); + while (sum <= maxTimeInMillis) { + try { + codeBlock.run(); + return; + } catch (AssertionError e) { + failures.add(e); + } + sum += sleepTimeInMillis; + Thread.sleep(sleepTimeInMillis); + } + try { + codeBlock.run(); + } catch (AssertionError e) { + for (AssertionError failure : failures) { + e.addSuppressed(failure); + } + throw e; + } + } + /** * Periodically execute the supplied function until it returns true, or a timeout * is reached. This version uses a timeout of 10 seconds. If at all possible,