diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 5a07f964f94a4..9ebdfbe07b448 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -386,6 +386,7 @@ public void onFailure(String source, Exception e) { @Override public void onSuccess(String source) { + closePrevotingAndElectionScheduler(); applyListener.onResponse(null); } }); @@ -472,17 +473,29 @@ private static Optional joinWithDestination(Optional lastJoin, Disco } private void closePrevotingAndElectionScheduler() { + closePrevoting(); + closeElectionScheduler(); + } + + private void closePrevoting() { if (prevotingRound != null) { prevotingRound.close(); prevotingRound = null; } + } + private void closeElectionScheduler() { if (electionScheduler != null) { electionScheduler.close(); electionScheduler = null; } } + // package-visible for testing + boolean isElectionSchedulerRunning() { + return electionScheduler != null; + } + private void updateMaxTermSeen(final long term) { synchronized (mutex) { maxTermSeen = Math.max(maxTermSeen, term); @@ -724,7 +737,7 @@ void becomeLeader(String method) { lastKnownLeader = Optional.of(getLocalNode()); peerFinder.deactivate(getLocalNode()); clusterFormationFailureHelper.stop(); - closePrevotingAndElectionScheduler(); + closePrevoting(); preVoteCollector.update(getPreVoteResponse(), getLocalNode()); assert leaderChecker.leader() == null : leaderChecker.leader(); @@ -761,7 +774,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) { lastKnownLeader = Optional.of(leaderNode); peerFinder.deactivate(leaderNode); clusterFormationFailureHelper.stop(); - closePrevotingAndElectionScheduler(); + closePrevoting(); cancelActivePublication("become follower: " + method); preVoteCollector.update(getPreVoteResponse(), leaderNode); @@ -927,7 +940,6 @@ public void invariant() { assert lastKnownLeader.isPresent() && lastKnownLeader.get().equals(getLocalNode()); assert joinAccumulator instanceof JoinHelper.LeaderJoinAccumulator; assert peerFinderLeader.equals(lastKnownLeader) : peerFinderLeader; - assert electionScheduler == null : electionScheduler; assert prevotingRound == null : prevotingRound; assert becomingClusterManager || getStateForClusterManagerService().nodes().getClusterManagerNodeId() != null : getStateForClusterManagerService(); @@ -972,7 +984,6 @@ assert getLocalNode().equals(applierState.nodes().getClusterManagerNode()) assert lastKnownLeader.isPresent() && (lastKnownLeader.get().equals(getLocalNode()) == false); assert joinAccumulator instanceof JoinHelper.FollowerJoinAccumulator; assert peerFinderLeader.equals(lastKnownLeader) : peerFinderLeader; - assert electionScheduler == null : electionScheduler; assert prevotingRound == null : prevotingRound; assert getStateForClusterManagerService().nodes().getClusterManagerNodeId() == null : getStateForClusterManagerService(); assert leaderChecker.currentNodeIsClusterManager() == false; @@ -1687,6 +1698,7 @@ public void onFailure(String source, Exception e) { public void onSuccess(String source) { synchronized (mutex) { assert currentPublication.get() == CoordinatorPublication.this; + closePrevotingAndElectionScheduler(); currentPublication = Optional.empty(); logger.debug("publication ended successfully: {}", CoordinatorPublication.this); // trigger term bump if new term was found during publication diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java index a3129655148ab..5eeebd2588416 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinatorTests.java @@ -270,7 +270,7 @@ public void testNodesJoinAfterStableCluster() { public void testExpandsConfigurationWhenGrowingFromOneNodeToThreeButDoesNotShrink() { try (Cluster cluster = new Cluster(1)) { cluster.runRandomly(); - cluster.stabilise(); + cluster.stabilise(DEFAULT_STABILISATION_TIME * 2); final ClusterNode leader = cluster.getAnyLeader(); @@ -1750,7 +1750,7 @@ public void testDoesNotPerformElectionWhenRestartingFollower() { public void testImproveConfigurationPerformsVotingConfigExclusionStateCheck() { try (Cluster cluster = new Cluster(1)) { cluster.runRandomly(); - cluster.stabilise(); + cluster.stabilise(DEFAULT_STABILISATION_TIME * 2); final Coordinator coordinator = cluster.getAnyLeader().coordinator; final ClusterState currentState = coordinator.getLastAcceptedState(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 28d7706fb1493..0754cc1793dc8 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.OpenSearchAllocationTestCase; import org.opensearch.cluster.coordination.AbstractCoordinatorTestCase.Cluster.ClusterNode; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.coordination.LinearizabilityChecker.History; import org.opensearch.cluster.coordination.LinearizabilityChecker.SequentialSpec; import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; @@ -653,6 +654,12 @@ void stabilise(long stabilisationDurationMillis) { leader.getLastAppliedClusterState().getNodes().nodeExists(nodeId) ); } + if (clusterNode.coordinator.getMode() == Mode.LEADER || clusterNode.coordinator.getMode() == Mode.FOLLOWER) { + assertFalse( + "Election scheduler should stop after cluster has stabilised", + clusterNode.coordinator.isElectionSchedulerRunning() + ); + } } final Set connectedNodeIds = clusterNodes.stream()