From 4d5c10576a073b9ba6d88693231d50ac3a7c096f Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 30 Aug 2024 12:18:24 +0530 Subject: [PATCH 01/36] Add custom connect to node for handleJoinRequest + info logs + comments Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 290 ++++++++++++++++++ .../cluster/NodeConnectionsService.java | 31 +- .../cluster/coordination/Coordinator.java | 27 +- .../coordination/FollowersChecker.java | 17 + .../cluster/coordination/JoinHelper.java | 4 +- .../cluster/coordination/LeaderChecker.java | 16 +- .../cluster/coordination/Publication.java | 4 +- .../PublicationTransportHandler.java | 12 +- .../service/ClusterApplierService.java | 35 ++- .../cluster/service/MasterService.java | 4 +- .../main/java/org/opensearch/node/Node.java | 18 ++ .../transport/ClusterConnectionManager.java | 110 ++++++- .../transport/ConnectionManager.java | 11 + .../transport/RemoteConnectionManager.java | 18 ++ .../transport/TransportService.java | 23 ++ .../transport/StubbableConnectionManager.java | 24 ++ 16 files changed, 604 insertions(+), 40 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java new file mode 100644 index 0000000000000..9e8fa249f5b2f --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -0,0 +1,290 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.cluster.coordination; + +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicBoolean; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterStateApplier; +import org.opensearch.cluster.NodeConnectionsService; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.MockEngineFactoryPlugin; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.plugins.Plugin; +import org.opensearch.tasks.Task; +import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; +import org.opensearch.test.OpenSearchIntegTestCase.Scope; +import org.opensearch.test.store.MockFSIndexStore; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.test.transport.StubbableTransport; +import org.opensearch.transport.*; + +import static org.hamcrest.Matchers.is; +import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; + +@ClusterScope(scope = Scope.TEST, numDataNodes = 0) +public class NodeJoinLeftIT extends OpenSearchIntegTestCase { + + private static final String INDEX_NAME = "test-idx-1"; + private static final String REPO_NAME = "test-repo-1"; + private static final String SNAP_NAME = "test-snap-1"; + + private static final int MIN_DOC_COUNT = 500; + private static final int MAX_DOC_COUNT = 1000; + private static final int SHARD_COUNT = 1; + private static final int REPLICA_COUNT = 0; + + @Override + protected Collection> nodePlugins() { + return Arrays.asList( + MockTransportService.TestPlugin.class, + MockFSIndexStore.TestPlugin.class, + InternalSettingsPlugin.class, + MockEngineFactoryPlugin.class + ); + } + + @Override + protected void beforeIndexDeletion() throws Exception { + super.beforeIndexDeletion(); + internalCluster().assertConsistentHistoryBetweenTranslogAndLuceneIndex(); + internalCluster().assertSeqNos(); + internalCluster().assertSameDocIdsOnShards(); + } + + public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { + final String indexName = "test"; + final Settings nodeSettings = Settings.builder() + .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .build(); + // start a cluster-manager node + final String cm =internalCluster().startNode(nodeSettings); + + System.out.println("--> spawning node t1"); + final String blueNodeName = internalCluster().startNode( + Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build() + ); + System.out.println("--> spawning node t2"); + final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + + System.out.println("--> initial health check"); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); + assertThat(response.isTimedOut(), is(false)); + System.out.println("--> done initial health check"); + + System.out.println("--> creating index"); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get(); + System.out.println("--> done creating index"); + MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); + MockTransportService redTransportService = + (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName); + + ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); + // simulate a slow applier on the cm + cmClsService.addStateApplier(new ClusterStateApplier() { + @Override + public void applyClusterState(ClusterChangedEvent event) { + if (event.nodesRemoved()) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + }); + cmTransportService.connectionManager().addListener(new TransportConnectionListener() { + + @Override + public void onConnectionOpened(Transport.Connection connection) { + // try { + // Thread.sleep(500); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } + + } + + @Override + public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) { + // if (node.getName().equals("node_t2")) { + // try { + // Thread.sleep(250); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } + // } + } + + // @Override + // public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { + // try { + // Thread.sleep(5000); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } + // } + }); + AtomicBoolean bb = new AtomicBoolean(); + // simulate followerchecker failure + + ConnectionDelay handlingBehavior = new ConnectionDelay( + FOLLOWER_CHECK_ACTION_NAME, + ()->{ + if (bb.get()) { + return; + } + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + throw new NodeHealthCheckFailureException("non writable exception"); + }); + redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, handlingBehavior); + + +// for (int i=0 ;i < 1; i++) { +// //cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); +// System.out.println("--> follower check, iteration: " + i); +// bb.set(true); // pass followerchecker +// System.out.println("--> setting bb to true, sleeping for 1500ms, iteration: " + i); +// Thread.sleep(1500); +// bb.set(false); // fail followerchecker +// System.out.println("--> setting bb to false, iteration: " + i); +// System.out.println("--> checking cluster health 2 nodes, iteration: " + i); +// ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); +// assertThat(response1.isTimedOut(), is(false)); +// System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); +// //internalCluster().stopRandomNode(InternalTestCluster.nameFilter(blueNodeName)); +// System.out.println("--> checking cluster health 3 nodes, iteration: " + i); +// ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); +// assertThat(response2.isTimedOut(), is(false)); +// System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); +// } +// for (int i=0 ;i < 1; i++) { +// +// bb.set(true); // pass followerchecker +// +// Thread.sleep(1500); +// System.out.println("--> manually disconnecting node, iteration: " + i); +// cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); +// } + + // FAILS WITHOUT CODE CHANGES + for (int i=0 ; i < 10; i++) { + bb.set(false); // fail followerchecker by force to trigger node disconnect + System.out.println("--> disconnecting from red node, iteration: " + i); + // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + // now followerchecker should fail and trigger node left + System.out.println("--> checking cluster health 2 nodes, iteration: " + i); + ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response1.isTimedOut(), is(false)); + System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); + + // once we know a node has left, we can re-enable followerchecker to work normally + bb.set(true); + Thread.sleep(1500); + System.out.println("--> checking cluster health 3 nodes, iteration: " + i); + ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response2.isTimedOut(), is(false)); + System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); + + Thread.sleep(1500); + + // Checking again + System.out.println("--> checking cluster health 3 nodes again, iteration: " + i); + ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response3.isTimedOut(), is(false)); + System.out.println("--> completed checking cluster health 3 nodes again, iteration: " + i); + } + + bb.set(true); + System.out.println("-->first validation outside loop"); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + + System.out.println("-->sleeping for 20s"); + Thread.sleep(20000); + + System.out.println("-->second validation outside loop after sleep"); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + private class ConnectionDelay implements StubbableTransport.RequestHandlingBehavior { + + private final String actionName; + private final Runnable connectionBreaker; + + private ConnectionDelay( + String actionName, + Runnable connectionBreaker + ) { + this.actionName = actionName; + this.connectionBreaker = connectionBreaker; + } + + @Override + public void messageReceived( + TransportRequestHandler handler, + TransportRequest request, + TransportChannel channel, + Task task + ) throws Exception { + + + connectionBreaker.run(); + handler.messageReceived(request, channel, task); + } + } +} diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 1c12c260b3929..b25c77c7bfadb 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -172,6 +172,33 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { } for (final DiscoveryNode discoveryNode : nodesToDisconnect) { + logger.info("NodeConnectionsService - disconnecting from node [{}] in loop", discoveryNode); + runnables.add(targetsByNode.get(discoveryNode).disconnect()); + } + } + runnables.forEach(Runnable::run); + } + + public void markPendingJoinsAsComplete(List nodesConnected) { + for (final DiscoveryNode discoveryNode: nodesConnected) { + transportService.markPendingJoinAsCompleted(discoveryNode); + } + } + public void disconnectFromNonBlockedNodesExcept(DiscoveryNodes discoveryNodes, DiscoveryNodes.Delta nodesDelta) { + final List runnables = new ArrayList<>(); + synchronized (mutex) { + final Set nodesToDisconnect = new HashSet<>(targetsByNode.keySet()); + for (final DiscoveryNode discoveryNode : discoveryNodes) { + nodesToDisconnect.remove(discoveryNode); + } + + for (final DiscoveryNode discoveryNode : nodesToDisconnect) { + // if node is trying to be disconnected (node-left) and pendingjoin , skip disconnect and then remove the blocking + if (transportService.getNodesJoinInProgress().contains(discoveryNode)) { + logger.info("Skipping disconnection for node [{}] as it has a join in progress", discoveryNode); + continue; + } + logger.info("NodeConnectionsService - disconnecting from node [{}] in loop", discoveryNode); runnables.add(targetsByNode.get(discoveryNode).disconnect()); } } @@ -388,9 +415,10 @@ public String toString() { @Override protected void doRun() { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; + logger.info("disconnecting from {}", discoveryNode); transportService.disconnectFromNode(discoveryNode); consecutiveFailureCount.set(0); - logger.debug("disconnected from {}", discoveryNode); + logger.info("disconnected from {}", discoveryNode); onCompletion(ActivityType.DISCONNECTING, null, connectActivity); } @@ -419,6 +447,7 @@ Runnable connect(@Nullable ActionListener listener) { } Runnable disconnect() { + logger.info("running runnable disconnect"); return addListenerAndStartActivity( null, ActivityType.DISCONNECTING, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 13a57d93f03f0..c2ed258accab8 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -371,6 +371,7 @@ void onFollowerCheckRequest(FollowerCheckRequest followerCheckRequest) { } else if (joinHelper.isJoinPending()) { logger.trace("onFollowerCheckRequest: rejoining cluster-manager, responding successfully to {}", followerCheckRequest); } else { + logger.info("Mode: {}, ", mode); logger.trace("onFollowerCheckRequest: received check from faulty cluster-manager, rejecting {}", followerCheckRequest); throw new CoordinationStateRejectedException( "onFollowerCheckRequest: received check from faulty cluster-manager, rejecting " + followerCheckRequest @@ -419,6 +420,7 @@ PublishWithJoinResponse handlePublishRequest(PublishRequest publishRequest) { synchronized (mutex) { final DiscoveryNode sourceNode = publishRequest.getAcceptedState().nodes().getClusterManagerNode(); logger.trace("handlePublishRequest: handling [{}] from [{}]", publishRequest, sourceNode); + logger.info("handlePublishRequest: handling version [{}] from [{}]", publishRequest.getAcceptedState().getVersion(), sourceNode); if (sourceNode.equals(getLocalNode()) && mode != Mode.LEADER) { // Rare case in which we stood down as leader between starting this publication and receiving it ourselves. The publication @@ -628,9 +630,9 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback return; } - transportService.connectToNode(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { + // cluster manager connects to the node + transportService.connectToNodeAndBlockDisconnects(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { final ClusterState stateForJoinValidation = getStateForClusterManagerService(); - if (stateForJoinValidation.nodes().isLocalNodeElectedClusterManager()) { onJoinValidators.forEach(a -> a.accept(joinRequest.getSourceNode(), stateForJoinValidation)); if (stateForJoinValidation.getBlocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) { @@ -767,7 +769,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) { if (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) { logger.trace("{}: coordinator remaining FOLLOWER of [{}] in term {}", method, leaderNode, getCurrentTerm()); } else { - logger.debug( + logger.info( "{}: coordinator becoming FOLLOWER of [{}] in term {} (was {}, lastKnownLeader was [{}])", method, leaderNode, @@ -914,6 +916,7 @@ public DiscoveryStats stats() { @Override public void startInitialJoin() { synchronized (mutex) { + logger.info("Starting initial join, becoming candidate"); becomeCandidate("startInitialJoin"); } clusterBootstrapService.scheduleUnconfiguredBootstrap(); @@ -1356,10 +1359,27 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) currentPublication = Optional.of(publication); final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); + leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); coordinationState.get().handlePrePublish(clusterState); + // join publish is failing + // before we publish, we might need + // can we recreate connections as part of publish if we don't find it? + // reconnect to any nodes that are trying to join, redundancy to avoid node connection wiping by concurrent node-join and left + // find diff of nodes from old state and new publishNodes + // this fails because we can't add blocking code to cluster manager thread +// for (DiscoveryNode addedNode : clusterChangedEvent.nodesDelta().addedNodes()) { +// // maybe add a listener here to handle failures +// try { +// transportService.connectToNode(addedNode); +// } +// catch (Exception e) { +// logger.info(() -> new ParameterizedMessage("[{}] failed reconnecting to [{}]", clusterChangedEvent.source(), addedNode), e); +// } +// } + publication.start(followersChecker.getFaultyNodes()); } } catch (Exception e) { @@ -1460,6 +1480,7 @@ private class CoordinatorPeerFinder extends PeerFinder { protected void onActiveClusterManagerFound(DiscoveryNode clusterManagerNode, long term) { synchronized (mutex) { ensureTermAtLeast(clusterManagerNode, term); + logger.info("sending join request to {}", clusterManagerNode); joinHelper.sendJoinRequest(clusterManagerNode, getCurrentTerm(), joinWithDestination(lastJoin, clusterManagerNode, term)); } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 2ec0dabd91786..3d8c137f8a9f8 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -161,6 +161,7 @@ public FollowersChecker( transportService.addConnectionListener(new TransportConnectionListener() { @Override public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { + logger.info("in transport listener onNodeDisconnected"); handleDisconnectedNode(node); } }); @@ -175,6 +176,7 @@ private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) { * Update the set of known nodes, starting to check any new ones and stopping checking any previously-known-but-now-unknown ones. */ public void setCurrentNodes(DiscoveryNodes discoveryNodes) { + logger.info("[{}]Setting followerschecker currentnodes to {}", Thread.currentThread().getName(), discoveryNodes); synchronized (mutex) { final Predicate isUnknownNode = n -> discoveryNodes.nodeExists(n) == false; followerCheckers.keySet().removeIf(isUnknownNode); @@ -356,6 +358,9 @@ private void handleWakeUp() { final FollowerCheckRequest request = new FollowerCheckRequest(fastResponseState.term, transportService.getLocalNode()); logger.trace("handleWakeUp: checking {} with {}", discoveryNode, request); + if (discoveryNode.getName().equals("node_t2")) { + logger.info("handleWakeUp: checking {} with {}", discoveryNode, request); + } transportService.sendRequest( discoveryNode, @@ -390,6 +395,18 @@ public void handleException(TransportException exp) { failureCountSinceLastSuccess++; final String reason; + +// if (exp instanceof NodeNotConnectedException || exp.getCause() instanceof NodeNotConnectedException){ +// // NodeNotConnectedException will only happen if getConnection fails in TransportService.sendRequest +// // This only happens if clusterConnectionManager.getConnection() does not find the entry in connectedNodes list +// // This happens on node disconnection +// // Need to validate that this only gets triggered from node-left side. we want to ensure actual disconnections work +// failureCountSinceLastSuccess--; +// logger.info(() -> new ParameterizedMessage("{} cache entry not found, but node is still in cluster state. ignoring this failure", FollowerChecker.this), exp); +// scheduleNextWakeUp(); +// return; +// } + if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.info(() -> new ParameterizedMessage("{} disconnected", FollowerChecker.this), exp); reason = "disconnected"; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index 9bf6bac07da53..becd9f9e05c2d 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -379,7 +379,7 @@ public void sendJoinRequest(DiscoveryNode destination, long term, Optional final JoinRequest joinRequest = new JoinRequest(transportService.getLocalNode(), term, optionalJoin); final Tuple dedupKey = Tuple.tuple(destination, joinRequest); if (pendingOutgoingJoins.add(dedupKey)) { - logger.debug("attempting to join {} with {}", destination, joinRequest); + logger.info("attempting to join {} with {}", destination, joinRequest); transportService.sendRequest( destination, JOIN_ACTION_NAME, @@ -394,7 +394,7 @@ public Empty read(StreamInput in) { @Override public void handleResponse(Empty response) { pendingOutgoingJoins.remove(dedupKey); - logger.debug("successfully joined {} with {}", destination, joinRequest); + logger.info("successfully joined {} with {}", destination, joinRequest); lastFailedJoinAttempt.set(null); nodeCommissioned.accept(true); onCompletion.run(); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index 4fd2c0eb13073..a8fc524667eaa 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; import org.opensearch.cluster.ClusterManagerMetrics; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; @@ -69,6 +70,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; +import java.util.function.Supplier; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; @@ -218,20 +220,20 @@ private void handleLeaderCheck(LeaderCheckRequest request) { + "since node is unhealthy [" + statusInfo.getInfo() + "]"; - logger.debug(message); + logger.info(message); throw new NodeHealthCheckFailureException(message); } else if (discoveryNodes.isLocalNodeElectedClusterManager() == false) { - logger.debug("rejecting leader check on non-cluster-manager {}", request); + logger.info("rejecting leader check on non-cluster-manager {}", request); throw new CoordinationStateRejectedException( "rejecting leader check from [" + request.getSender() + "] sent to a node that is no longer the cluster-manager" ); } else if (discoveryNodes.nodeExists(request.getSender()) == false) { - logger.debug("rejecting leader check from removed node: {}", request); + logger.info("rejecting leader check from removed node: {}", request); throw new CoordinationStateRejectedException( "rejecting leader check since [" + request.getSender() + "] has been removed from the cluster" ); } else { - logger.trace("handling {}", request); + logger.info("handling {}", request); } } @@ -306,17 +308,17 @@ public void handleException(TransportException exp) { return; } if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { - logger.debug(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); + logger.info(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); leaderFailed(new ConnectTransportException(leader, "disconnected during check", exp)); return; } else if (exp.getCause() instanceof NodeHealthCheckFailureException) { - logger.debug(new ParameterizedMessage("leader [{}] health check failed", leader), exp); + logger.info(new ParameterizedMessage("leader [{}] health check failed", leader), exp); leaderFailed(new NodeHealthCheckFailureException("node [" + leader + "] failed health checks", exp)); return; } long failureCount = failureCountSinceLastSuccess.incrementAndGet(); if (failureCount >= leaderCheckRetryCount) { - logger.debug( + logger.info( new ParameterizedMessage( "leader [{}] has failed {} consecutive checks (limit [{}] is {}); last failure was:", leader, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java index 43801a05dbc24..5ca197f2f9a4b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java @@ -86,8 +86,10 @@ public Publication(PublishRequest publishRequest, AckListener ackListener, LongS public void start(Set faultyNodes) { logger.trace("publishing {} to {}", publishRequest, publicationTargets); + logger.info("publishing {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); for (final DiscoveryNode faultyNode : faultyNodes) { + logger.info("in publish.start, found faulty node: [{}]", faultyNode); onFaultyNode(faultyNode); } onPossibleCommitFailure(); @@ -332,7 +334,7 @@ void setFailed(Exception e) { void onFaultyNode(DiscoveryNode faultyNode) { if (isActive() && discoveryNode.equals(faultyNode)) { - logger.debug("onFaultyNode: [{}] is faulty, failing target in publication {}", faultyNode, Publication.this); + logger.info("onFaultyNode: [{}] is faulty, failing target in publication {}", faultyNode, Publication.this); setFailed(new OpenSearchException("faulty node")); onPossibleCommitFailure(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index cdf331b7bb577..1e406da603c40 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -52,12 +52,7 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.BytesTransportRequest; -import org.opensearch.transport.TransportChannel; -import org.opensearch.transport.TransportException; -import org.opensearch.transport.TransportRequestOptions; -import org.opensearch.transport.TransportResponseHandler; -import org.opensearch.transport.TransportService; +import org.opensearch.transport.*; import java.io.IOException; import java.util.HashMap; @@ -592,8 +587,9 @@ private void sendClusterState( if (retryWithFullClusterStateOnFailure && exp.unwrapCause() instanceof IncompatibleClusterStateVersionException) { logger.debug("resending full cluster state to node {} reason {}", destination, exp.getDetailedMessage()); sendFullClusterState(destination, listener); - } else { - logger.debug(() -> new ParameterizedMessage("failed to send cluster state to {}", destination), exp); + } + else { + logger.info(() -> new ParameterizedMessage("failed to send cluster state to {}", destination), exp); listener.onFailure(exp); } }; diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index 47080cfbde692..a4dc9d66fbf49 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.TimeoutClusterStateListener; import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException; +import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; @@ -68,11 +69,7 @@ import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; -import java.util.Arrays; -import java.util.Collection; -import java.util.Map; -import java.util.Objects; -import java.util.Optional; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; @@ -502,6 +499,7 @@ private void runTask(UpdateTask task) { try { applyChanges(task, previousClusterState, newClusterState, stopWatch); TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); + // at this point, cluster state appliers and listeners are completed logger.debug( "processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, @@ -510,6 +508,7 @@ private void runTask(UpdateTask task) { newClusterState.stateUUID() ); warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch); + // then we call the ClusterApplyListener of the task task.listener.onSuccess(task.source); } catch (Exception e) { TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); @@ -552,6 +551,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl if (nodesDelta.hasChanges() && logger.isInfoEnabled()) { String summary = nodesDelta.shortSummary(); if (summary.length() > 0) { + // print node deltas logger.info( "{}, term: {}, version: {}, reason: {}", summary, @@ -562,9 +562,9 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl } } - logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version()); + logger.info("connecting to nodes of cluster state with version {}", newClusterState.version()); try (TimingHandle ignored = stopWatch.timing("connecting to new nodes")) { - connectToNodesAndWait(newClusterState); + connectToNodesAndWaitAndMarkCompletedJoins(newClusterState, clusterChangedEvent.nodesDelta().addedNodes()); } // nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency @@ -576,10 +576,11 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl } } - logger.debug("apply cluster state with version {}", newClusterState.version()); + logger.info("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); - nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); + //nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); + nodeConnectionsService.disconnectFromNonBlockedNodesExcept(newClusterState.nodes(), clusterChangedEvent.nodesDelta()); assert newClusterState.coordinationMetadata() .getLastAcceptedConfiguration() @@ -590,10 +591,24 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl + " on " + newClusterState.nodes().getLocalNode(); - logger.debug("set locally applied cluster state to version {}", newClusterState.version()); + logger.info("set locally applied cluster state to version {}", newClusterState.version()); state.set(newClusterState); callClusterStateListeners(clusterChangedEvent, stopWatch); + logger.info("completed appliers and listeners of cluster state for version {}", newClusterState.version()); + } + + protected void connectToNodesAndWaitAndMarkCompletedJoins(ClusterState newClusterState, List nodesAdded) { + // can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch. + final CountDownLatch countDownLatch = new CountDownLatch(1); + nodeConnectionsService.connectToNodes(newClusterState.nodes(), countDownLatch::countDown); + try { + countDownLatch.await(); + nodeConnectionsService.markPendingJoinsAsComplete(nodesAdded); + } catch (InterruptedException e) { + logger.debug("interrupted while connecting to nodes, continuing", e); + Thread.currentThread().interrupt(); + } } protected void connectToNodesAndWait(ClusterState newClusterState) { diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index 713de8cdd0fda..e264326b4066f 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -363,7 +363,7 @@ private void runTasks(TaskInputs taskInputs) { } } - logger.debug("publishing cluster state version [{}]", newClusterState.version()); + logger.info("publishing cluster state version [{}]", newClusterState.version()); publish(clusterChangedEvent, taskOutputs, publicationStartTime); } catch (Exception e) { handleException(shortSummary, publicationStartTime, newClusterState, e); @@ -464,6 +464,7 @@ private void handleException(String summary, long startTimeMillis, ClusterState private TaskOutputs calculateTaskOutputs(TaskInputs taskInputs, ClusterState previousClusterState, String taskSummary) { ClusterTasksResult clusterTasksResult = executeTasks(taskInputs, previousClusterState, taskSummary); ClusterState newClusterState = patchVersions(previousClusterState, clusterTasksResult); + logger.info("in cluster compute, finished computing new cluster state for version: {}", newClusterState.getVersion()); return new TaskOutputs( taskInputs, previousClusterState, @@ -910,6 +911,7 @@ private ClusterTasksResult executeTasks(TaskInputs taskInputs, ClusterSt ClusterTasksResult clusterTasksResult; try { List inputs = taskInputs.updateTasks.stream().map(tUpdateTask -> tUpdateTask.task).collect(Collectors.toList()); + clusterTasksResult = taskInputs.executor.execute(previousClusterState, inputs); if (previousClusterState != clusterTasksResult.resultingState && previousClusterState.nodes().isLocalNodeElectedClusterManager() diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index a8d4ebcf23dab..0040e96213b67 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -681,6 +681,24 @@ protected Node( clusterManagerMetrics ); clusterService.addStateApplier(scriptService); + /* + if (DiscoveryNode.isClusterManagerNode(settings)) { + // dummy applier for repro race condition + clusterService.addStateApplier(new ClusterStateApplier() { + @Override + public void applyClusterState(ClusterChangedEvent event) { + if (event.nodesRemoved()) { + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + }); + } + + */ resourcesToClose.add(clusterService); final Set> consistentSettings = settingsModule.getConsistentSettings(); if (consistentSettings.isEmpty() == false) { diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index e634323d58269..1cdaaeeacd110 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -43,10 +43,7 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; -import java.util.Collections; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; @@ -64,6 +61,7 @@ public class ClusterConnectionManager implements ConnectionManager { private final ConcurrentMap connectedNodes = ConcurrentCollections.newConcurrentMap(); private final ConcurrentMap> pendingConnections = ConcurrentCollections.newConcurrentMap(); + private final Set pendingJoins = new HashSet<>(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @Override protected void closeInternal() { @@ -110,6 +108,16 @@ public void openConnection(DiscoveryNode node, ConnectionProfile connectionProfi internalOpenConnection(node, resolvedProfile, listener); } + @Override + public Set getNodesJoinInProgress(){ + return this.pendingJoins; + } + + @Override + public boolean markPendingJoinCompleted(DiscoveryNode discoveryNode){ + return pendingJoins.remove(discoveryNode); + } + /** * Connects to a node with the given connection profile. If the node is already connected this method has no effect. * Once a successful is established, it can be validated before being exposed. @@ -122,6 +130,7 @@ public void connectToNode( ConnectionValidator connectionValidator, ActionListener listener ) throws ConnectTransportException { + logger.info("[{}]connecting to node [{}]", Thread.currentThread().getName(), node); ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); if (node == null) { listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); @@ -134,6 +143,7 @@ public void connectToNode( } if (connectedNodes.containsKey(node)) { + logger.info("connectedNodes already has key for node [{}]", node); connectingRefCounter.decRef(); listener.onResponse(null); return; @@ -159,16 +169,16 @@ public void connectToNode( assert Transports.assertNotTransportThread("connection validator success"); try { if (connectedNodes.putIfAbsent(node, conn) != null) { - logger.debug("existing connection to node [{}], closing new redundant connection", node); + logger.info("existing connection to node [{}], closing new redundant connection", node); IOUtils.closeWhileHandlingException(conn); } else { - logger.debug("connected to node [{}]", node); + logger.info("connected to node [{}]", node); try { connectionListener.onNodeConnected(node, conn); } finally { final Transport.Connection finalConnection = conn; conn.addCloseListener(ActionListener.wrap(() -> { - logger.trace("unregistering {} after connection close and marking as disconnected", node); + logger.info("unregistering {} after connection close and marking as disconnected", node); connectedNodes.remove(node, finalConnection); connectionListener.onNodeDisconnected(node, conn); })); @@ -191,6 +201,90 @@ public void connectToNode( })); } + // THIS IS ALMOST A COMPLETE COPY OF connectToNode, with a few lines added for pendingJoins list tracking + @Override + public void connectToNodeAndBlockDisconnects( + DiscoveryNode node, + ConnectionProfile connectionProfile, + ConnectionValidator connectionValidator, + ActionListener listener + ) throws ConnectTransportException { + logger.info("[{}]connecting to node [{}] while blocking disconnects", Thread.currentThread().getName(), node); + ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); + if (node == null) { + listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); + return; + } + + if (connectingRefCounter.tryIncRef() == false) { + listener.onFailure(new IllegalStateException("connection manager is closed")); + return; + } + + if (connectedNodes.containsKey(node)) { + logger.info("connectedNodes already has key for node [{}]", node); + pendingJoins.add(node); + connectingRefCounter.decRef(); + listener.onResponse(null); + return; + } + + final ListenableFuture currentListener = new ListenableFuture<>(); + final ListenableFuture existingListener = pendingConnections.putIfAbsent(node, currentListener); + if (existingListener != null) { + try { + // wait on previous entry to complete connection attempt + existingListener.addListener(listener, OpenSearchExecutors.newDirectExecutorService()); + } finally { + connectingRefCounter.decRef(); + } + return; + } + + currentListener.addListener(listener, OpenSearchExecutors.newDirectExecutorService()); + + final RunOnce releaseOnce = new RunOnce(connectingRefCounter::decRef); + internalOpenConnection(node, resolvedProfile, ActionListener.wrap(conn -> { + connectionValidator.validate(conn, resolvedProfile, ActionListener.wrap(ignored -> { + assert Transports.assertNotTransportThread("connection validator success"); + try { + if (connectedNodes.putIfAbsent(node, conn) != null) { + logger.info("existing connection to node [{}], marking as blocked and closing new redundant connection", node); + pendingJoins.add(node); + IOUtils.closeWhileHandlingException(conn); + } else { + logger.info("connected to node [{}]", node); + try { + connectionListener.onNodeConnected(node, conn); + } finally { + pendingJoins.add(node); + final Transport.Connection finalConnection = conn; + conn.addCloseListener(ActionListener.wrap(() -> { + logger.info("unregistering {} after connection close and marking as disconnected", node); + connectedNodes.remove(node, finalConnection); + pendingJoins.remove(node); + connectionListener.onNodeDisconnected(node, conn); + })); + } + } + } finally { + ListenableFuture future = pendingConnections.remove(node); + assert future == currentListener : "Listener in pending map is different than the expected listener"; + releaseOnce.run(); + future.onResponse(null); + } + }, e -> { + assert Transports.assertNotTransportThread("connection validator failure"); + IOUtils.closeWhileHandlingException(conn); + failConnectionListeners(node, releaseOnce, e, currentListener); + })); + }, e -> { + assert Transports.assertNotTransportThread("internalOpenConnection failure"); + failConnectionListeners(node, releaseOnce, e, currentListener); + })); + } + + /** * Returns a connection for the given node if the node is connected. * Connections returned from this method must not be closed. The lifecycle of this connection is @@ -271,6 +365,7 @@ private void internalOpenConnection( ConnectionProfile connectionProfile, ActionListener listener ) { + logger.info("calling transport.openConnection"); transport.openConnection(node, connectionProfile, ActionListener.map(listener, connection -> { assert Transports.assertNotTransportThread("internalOpenConnection success"); try { @@ -279,6 +374,7 @@ private void internalOpenConnection( connection.addCloseListener(ActionListener.wrap(() -> connectionListener.onConnectionClosed(connection))); } if (connection.isClosed()) { + logger.info("channel closed while connecting, throwing exception"); throw new ConnectTransportException(node, "a channel closed while connecting"); } return connection; diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 10cfc2907098f..cea0cfe7fee6b 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -59,10 +59,21 @@ void connectToNode( ActionListener listener ) throws ConnectTransportException; + void connectToNodeAndBlockDisconnects( + DiscoveryNode node, + ConnectionProfile connectionProfile, + ConnectionValidator connectionValidator, + ActionListener listener + ) throws ConnectTransportException; + Transport.Connection getConnection(DiscoveryNode node); boolean nodeConnected(DiscoveryNode node); + Set getNodesJoinInProgress(); + + boolean markPendingJoinCompleted(DiscoveryNode node); + void disconnectFromNode(DiscoveryNode node); Set getAllConnectedNodes(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index bd646f10df517..7a991dbc36039 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -119,6 +119,24 @@ public ConnectionProfile getConnectionProfile() { return delegate.getConnectionProfile(); } + @Override + public void connectToNodeAndBlockDisconnects( + DiscoveryNode node, + ConnectionProfile connectionProfile, + ConnectionValidator connectionValidator, + ActionListener listener + ) throws ConnectTransportException { + throw new UnsupportedOperationException("not implemented"); + } + @Override + public Set getNodesJoinInProgress() { + throw new UnsupportedOperationException("not implemented"); + } + + @Override + public boolean markPendingJoinCompleted(DiscoveryNode node) { + throw new UnsupportedOperationException("not implemented"); + } public Transport.Connection getAnyRemoteConnection() { List localConnectedNodes = this.connectedNodes; long curr; diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index fff6d82b23c7e..5d8feeef7aed0 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -40,6 +40,7 @@ import org.opensearch.action.ActionListenerResponseHandler; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.coordination.ClusterStateTermVersion; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.Nullable; import org.opensearch.common.io.stream.Streamables; @@ -482,6 +483,28 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } + public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { + connectToNodeAndBlockDisconnects(node, null, listener); + } + + public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener) throws ConnectTransportException { + if (isLocalNode(node)) { + listener.onResponse(null); + return; + } + connectionManager.connectToNodeAndBlockDisconnects(node, connectionProfile, connectionValidator(node), listener); + } + + public Set getNodesJoinInProgress() { + return connectionManager.getNodesJoinInProgress(); + } + + public boolean markPendingJoinAsCompleted(DiscoveryNode node) { + return connectionManager.markPendingJoinCompleted(node); + } + + + public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { connectToExtensionNode(node, null, listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 37df90fb103a3..c7cb34ea386c6 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -118,6 +118,30 @@ public void connectToNode( delegate.connectToNode(node, connectionProfile, connectionValidator, listener); } + @Override + public void connectToNodeAndBlockDisconnects( + DiscoveryNode node, + ConnectionProfile connectionProfile, + ConnectionValidator connectionValidator, + ActionListener listener + ) throws ConnectTransportException { + delegate.connectToNodeAndBlockDisconnects( + node, + connectionProfile, + connectionValidator, + listener + ); + } + @Override + public Set getNodesJoinInProgress() { + return delegate.getNodesJoinInProgress(); + } + + @Override + public boolean markPendingJoinCompleted(DiscoveryNode node) { + return delegate.markPendingJoinCompleted(node); + } + @Override public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); From 85905eedbad8f3520207a9e8f14750e9b46afae3 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 30 Aug 2024 13:53:52 +0530 Subject: [PATCH 02/36] spotless apply and import cleanup Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 150 +++++++++--------- .../cluster/NodeConnectionsService.java | 3 +- .../cluster/coordination/Coordinator.java | 28 ++-- .../coordination/FollowersChecker.java | 22 +-- .../cluster/coordination/LeaderChecker.java | 2 - .../PublicationTransportHandler.java | 10 +- .../service/ClusterApplierService.java | 9 +- .../transport/ClusterConnectionManager.java | 11 +- .../transport/RemoteConnectionManager.java | 2 + .../transport/TransportService.java | 6 +- .../transport/StubbableConnectionManager.java | 8 +- 11 files changed, 134 insertions(+), 117 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 9e8fa249f5b2f..f427872b5681c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -32,9 +32,6 @@ package org.opensearch.cluster.coordination; -import java.util.Arrays; -import java.util.Collection; -import java.util.concurrent.atomic.AtomicBoolean; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterStateApplier; @@ -54,10 +51,19 @@ import org.opensearch.test.store.MockFSIndexStore; import org.opensearch.test.transport.MockTransportService; import org.opensearch.test.transport.StubbableTransport; -import org.opensearch.transport.*; +import org.opensearch.transport.Transport; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportConnectionListener; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; +import org.opensearch.transport.TransportService; + +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicBoolean; -import static org.hamcrest.Matchers.is; import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; +import static org.hamcrest.Matchers.is; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { @@ -99,7 +105,7 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) .build(); // start a cluster-manager node - final String cm =internalCluster().startNode(nodeSettings); + final String cm = internalCluster().startNode(nodeSettings); System.out.println("--> spawning node t1"); final String blueNodeName = internalCluster().startNode( @@ -126,8 +132,10 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { .get(); System.out.println("--> done creating index"); MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); - MockTransportService redTransportService = - (MockTransportService) internalCluster().getInstance(TransportService.class, redNodeName); + MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + redNodeName + ); ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); // simulate a slow applier on the cm @@ -147,82 +155,79 @@ public void applyClusterState(ClusterChangedEvent event) { @Override public void onConnectionOpened(Transport.Connection connection) { - // try { - // Thread.sleep(500); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } + // try { + // Thread.sleep(500); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } } @Override public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) { - // if (node.getName().equals("node_t2")) { - // try { - // Thread.sleep(250); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } - // } + // if (node.getName().equals("node_t2")) { + // try { + // Thread.sleep(250); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } + // } } - // @Override - // public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { - // try { - // Thread.sleep(5000); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } - // } + // @Override + // public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { + // try { + // Thread.sleep(5000); + // } catch (InterruptedException e) { + // throw new RuntimeException(e); + // } + // } }); AtomicBoolean bb = new AtomicBoolean(); // simulate followerchecker failure - ConnectionDelay handlingBehavior = new ConnectionDelay( - FOLLOWER_CHECK_ACTION_NAME, - ()->{ - if (bb.get()) { - return; - } - try { - Thread.sleep(10); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - throw new NodeHealthCheckFailureException("non writable exception"); - }); + ConnectionDelay handlingBehavior = new ConnectionDelay(FOLLOWER_CHECK_ACTION_NAME, () -> { + if (bb.get()) { + return; + } + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + throw new NodeHealthCheckFailureException("non writable exception"); + }); redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, handlingBehavior); - -// for (int i=0 ;i < 1; i++) { -// //cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); -// System.out.println("--> follower check, iteration: " + i); -// bb.set(true); // pass followerchecker -// System.out.println("--> setting bb to true, sleeping for 1500ms, iteration: " + i); -// Thread.sleep(1500); -// bb.set(false); // fail followerchecker -// System.out.println("--> setting bb to false, iteration: " + i); -// System.out.println("--> checking cluster health 2 nodes, iteration: " + i); -// ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); -// assertThat(response1.isTimedOut(), is(false)); -// System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); -// //internalCluster().stopRandomNode(InternalTestCluster.nameFilter(blueNodeName)); -// System.out.println("--> checking cluster health 3 nodes, iteration: " + i); -// ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); -// assertThat(response2.isTimedOut(), is(false)); -// System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); -// } -// for (int i=0 ;i < 1; i++) { -// -// bb.set(true); // pass followerchecker -// -// Thread.sleep(1500); -// System.out.println("--> manually disconnecting node, iteration: " + i); -// cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); -// } + // for (int i=0 ;i < 1; i++) { + // //cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + // System.out.println("--> follower check, iteration: " + i); + // bb.set(true); // pass followerchecker + // System.out.println("--> setting bb to true, sleeping for 1500ms, iteration: " + i); + // Thread.sleep(1500); + // bb.set(false); // fail followerchecker + // System.out.println("--> setting bb to false, iteration: " + i); + // System.out.println("--> checking cluster health 2 nodes, iteration: " + i); + // ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + // assertThat(response1.isTimedOut(), is(false)); + // System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); + // //internalCluster().stopRandomNode(InternalTestCluster.nameFilter(blueNodeName)); + // System.out.println("--> checking cluster health 3 nodes, iteration: " + i); + // ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + // assertThat(response2.isTimedOut(), is(false)); + // System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); + // } + // for (int i=0 ;i < 1; i++) { + // + // bb.set(true); // pass followerchecker + // + // Thread.sleep(1500); + // System.out.println("--> manually disconnecting node, iteration: " + i); + // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + // } // FAILS WITHOUT CODE CHANGES - for (int i=0 ; i < 10; i++) { + for (int i = 0; i < 10; i++) { bb.set(false); // fail followerchecker by force to trigger node disconnect System.out.println("--> disconnecting from red node, iteration: " + i); // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); @@ -261,15 +266,13 @@ public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } + private class ConnectionDelay implements StubbableTransport.RequestHandlingBehavior { private final String actionName; private final Runnable connectionBreaker; - private ConnectionDelay( - String actionName, - Runnable connectionBreaker - ) { + private ConnectionDelay(String actionName, Runnable connectionBreaker) { this.actionName = actionName; this.connectionBreaker = connectionBreaker; } @@ -282,7 +285,6 @@ public void messageReceived( Task task ) throws Exception { - connectionBreaker.run(); handler.messageReceived(request, channel, task); } diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index b25c77c7bfadb..dbaddab97c575 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -180,10 +180,11 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { } public void markPendingJoinsAsComplete(List nodesConnected) { - for (final DiscoveryNode discoveryNode: nodesConnected) { + for (final DiscoveryNode discoveryNode : nodesConnected) { transportService.markPendingJoinAsCompleted(discoveryNode); } } + public void disconnectFromNonBlockedNodesExcept(DiscoveryNodes discoveryNodes, DiscoveryNodes.Delta nodesDelta) { final List runnables = new ArrayList<>(); synchronized (mutex) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index c2ed258accab8..9938f573b7104 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -420,7 +420,11 @@ PublishWithJoinResponse handlePublishRequest(PublishRequest publishRequest) { synchronized (mutex) { final DiscoveryNode sourceNode = publishRequest.getAcceptedState().nodes().getClusterManagerNode(); logger.trace("handlePublishRequest: handling [{}] from [{}]", publishRequest, sourceNode); - logger.info("handlePublishRequest: handling version [{}] from [{}]", publishRequest.getAcceptedState().getVersion(), sourceNode); + logger.info( + "handlePublishRequest: handling version [{}] from [{}]", + publishRequest.getAcceptedState().getVersion(), + sourceNode + ); if (sourceNode.equals(getLocalNode()) && mode != Mode.LEADER) { // Rare case in which we stood down as leader between starting this publication and receiving it ourselves. The publication @@ -1367,18 +1371,20 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) // join publish is failing // before we publish, we might need // can we recreate connections as part of publish if we don't find it? - // reconnect to any nodes that are trying to join, redundancy to avoid node connection wiping by concurrent node-join and left + // reconnect to any nodes that are trying to join, redundancy to avoid node connection wiping by concurrent node-join and + // left // find diff of nodes from old state and new publishNodes // this fails because we can't add blocking code to cluster manager thread -// for (DiscoveryNode addedNode : clusterChangedEvent.nodesDelta().addedNodes()) { -// // maybe add a listener here to handle failures -// try { -// transportService.connectToNode(addedNode); -// } -// catch (Exception e) { -// logger.info(() -> new ParameterizedMessage("[{}] failed reconnecting to [{}]", clusterChangedEvent.source(), addedNode), e); -// } -// } + // for (DiscoveryNode addedNode : clusterChangedEvent.nodesDelta().addedNodes()) { + // // maybe add a listener here to handle failures + // try { + // transportService.connectToNode(addedNode); + // } + // catch (Exception e) { + // logger.info(() -> new ParameterizedMessage("[{}] failed reconnecting to [{}]", clusterChangedEvent.source(), addedNode), + // e); + // } + // } publication.start(followersChecker.getFaultyNodes()); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 3d8c137f8a9f8..18175376a1bdf 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -396,16 +396,18 @@ public void handleException(TransportException exp) { final String reason; -// if (exp instanceof NodeNotConnectedException || exp.getCause() instanceof NodeNotConnectedException){ -// // NodeNotConnectedException will only happen if getConnection fails in TransportService.sendRequest -// // This only happens if clusterConnectionManager.getConnection() does not find the entry in connectedNodes list -// // This happens on node disconnection -// // Need to validate that this only gets triggered from node-left side. we want to ensure actual disconnections work -// failureCountSinceLastSuccess--; -// logger.info(() -> new ParameterizedMessage("{} cache entry not found, but node is still in cluster state. ignoring this failure", FollowerChecker.this), exp); -// scheduleNextWakeUp(); -// return; -// } + // if (exp instanceof NodeNotConnectedException || exp.getCause() instanceof NodeNotConnectedException){ + // // NodeNotConnectedException will only happen if getConnection fails in TransportService.sendRequest + // // This only happens if clusterConnectionManager.getConnection() does not find the entry in connectedNodes list + // // This happens on node disconnection + // // Need to validate that this only gets triggered from node-left side. we want to ensure actual disconnections + // work + // failureCountSinceLastSuccess--; + // logger.info(() -> new ParameterizedMessage("{} cache entry not found, but node is still in cluster state. + // ignoring this failure", FollowerChecker.this), exp); + // scheduleNextWakeUp(); + // return; + // } if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.info(() -> new ParameterizedMessage("{} disconnected", FollowerChecker.this), exp); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index a8fc524667eaa..ac14194e38bfb 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -37,7 +37,6 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; import org.opensearch.cluster.ClusterManagerMetrics; -import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; @@ -70,7 +69,6 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; -import java.util.function.Supplier; import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 1e406da603c40..49b18b3c12ded 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -52,7 +52,12 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.*; +import org.opensearch.transport.BytesTransportRequest; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportRequestOptions; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; import java.io.IOException; import java.util.HashMap; @@ -587,8 +592,7 @@ private void sendClusterState( if (retryWithFullClusterStateOnFailure && exp.unwrapCause() instanceof IncompatibleClusterStateVersionException) { logger.debug("resending full cluster state to node {} reason {}", destination, exp.getDetailedMessage()); sendFullClusterState(destination, listener); - } - else { + } else { logger.info(() -> new ParameterizedMessage("failed to send cluster state to {}", destination), exp); listener.onFailure(exp); } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index a4dc9d66fbf49..2291528d66e3a 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -69,7 +69,12 @@ import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; -import java.util.*; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; @@ -579,7 +584,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.info("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); - //nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); + // nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); nodeConnectionsService.disconnectFromNonBlockedNodesExcept(newClusterState.nodes(), clusterChangedEvent.nodesDelta()); assert newClusterState.coordinationMetadata() diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 1cdaaeeacd110..9c2a72f550e40 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -43,7 +43,11 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; -import java.util.*; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; @@ -109,12 +113,12 @@ public void openConnection(DiscoveryNode node, ConnectionProfile connectionProfi } @Override - public Set getNodesJoinInProgress(){ + public Set getNodesJoinInProgress() { return this.pendingJoins; } @Override - public boolean markPendingJoinCompleted(DiscoveryNode discoveryNode){ + public boolean markPendingJoinCompleted(DiscoveryNode discoveryNode) { return pendingJoins.remove(discoveryNode); } @@ -284,7 +288,6 @@ public void connectToNodeAndBlockDisconnects( })); } - /** * Returns a connection for the given node if the node is connected. * Connections returned from this method must not be closed. The lifecycle of this connection is diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index 7a991dbc36039..e72986f4ec097 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -128,6 +128,7 @@ public void connectToNodeAndBlockDisconnects( ) throws ConnectTransportException { throw new UnsupportedOperationException("not implemented"); } + @Override public Set getNodesJoinInProgress() { throw new UnsupportedOperationException("not implemented"); @@ -137,6 +138,7 @@ public Set getNodesJoinInProgress() { public boolean markPendingJoinCompleted(DiscoveryNode node) { throw new UnsupportedOperationException("not implemented"); } + public Transport.Connection getAnyRemoteConnection() { List localConnectedNodes = this.connectedNodes; long curr; diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 5d8feeef7aed0..4885c0f5d0902 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -40,7 +40,6 @@ import org.opensearch.action.ActionListenerResponseHandler; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterName; -import org.opensearch.cluster.coordination.ClusterStateTermVersion; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.Nullable; import org.opensearch.common.io.stream.Streamables; @@ -487,7 +486,8 @@ public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ActionListener< connectToNodeAndBlockDisconnects(node, null, listener); } - public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener) throws ConnectTransportException { + public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener) + throws ConnectTransportException { if (isLocalNode(node)) { listener.onResponse(null); return; @@ -503,8 +503,6 @@ public boolean markPendingJoinAsCompleted(DiscoveryNode node) { return connectionManager.markPendingJoinCompleted(node); } - - public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { connectToExtensionNode(node, null, listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index c7cb34ea386c6..e6b3b9c029f27 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -125,13 +125,9 @@ public void connectToNodeAndBlockDisconnects( ConnectionValidator connectionValidator, ActionListener listener ) throws ConnectTransportException { - delegate.connectToNodeAndBlockDisconnects( - node, - connectionProfile, - connectionValidator, - listener - ); + delegate.connectToNodeAndBlockDisconnects(node, connectionProfile, connectionValidator, listener); } + @Override public Set getNodesJoinInProgress() { return delegate.getNodesJoinInProgress(); From f3efb03272f10313cf8ce585ee22fe7f2f2db54c Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 30 Aug 2024 18:21:02 +0530 Subject: [PATCH 03/36] Update system.out to logger.info Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 60 +++++-------------- 1 file changed, 16 insertions(+), 44 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index f427872b5681c..fb4dcf0ce9063 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -107,19 +107,19 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { // start a cluster-manager node final String cm = internalCluster().startNode(nodeSettings); - System.out.println("--> spawning node t1"); + logger.info("--> spawning node t1"); final String blueNodeName = internalCluster().startNode( Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build() ); - System.out.println("--> spawning node t2"); + logger.info("--> spawning node t2"); final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); - System.out.println("--> initial health check"); + logger.info("--> initial health check"); ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); assertThat(response.isTimedOut(), is(false)); - System.out.println("--> done initial health check"); + logger.info("--> done initial health check"); - System.out.println("--> creating index"); + logger.info("--> creating index"); client().admin() .indices() .prepareCreate(indexName) @@ -130,7 +130,7 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) ) .get(); - System.out.println("--> done creating index"); + logger.info("--> done creating index"); MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( TransportService.class, @@ -199,70 +199,42 @@ public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) }); redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, handlingBehavior); - // for (int i=0 ;i < 1; i++) { - // //cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); - // System.out.println("--> follower check, iteration: " + i); - // bb.set(true); // pass followerchecker - // System.out.println("--> setting bb to true, sleeping for 1500ms, iteration: " + i); - // Thread.sleep(1500); - // bb.set(false); // fail followerchecker - // System.out.println("--> setting bb to false, iteration: " + i); - // System.out.println("--> checking cluster health 2 nodes, iteration: " + i); - // ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); - // assertThat(response1.isTimedOut(), is(false)); - // System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); - // //internalCluster().stopRandomNode(InternalTestCluster.nameFilter(blueNodeName)); - // System.out.println("--> checking cluster health 3 nodes, iteration: " + i); - // ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); - // assertThat(response2.isTimedOut(), is(false)); - // System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); - // } - // for (int i=0 ;i < 1; i++) { - // - // bb.set(true); // pass followerchecker - // - // Thread.sleep(1500); - // System.out.println("--> manually disconnecting node, iteration: " + i); - // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); - // } - - // FAILS WITHOUT CODE CHANGES for (int i = 0; i < 10; i++) { bb.set(false); // fail followerchecker by force to trigger node disconnect - System.out.println("--> disconnecting from red node, iteration: " + i); + logger.info("--> disconnecting from red node, iteration: " + i); // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); // now followerchecker should fail and trigger node left - System.out.println("--> checking cluster health 2 nodes, iteration: " + i); + logger.info("--> checking cluster health 2 nodes, iteration: " + i); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response1.isTimedOut(), is(false)); - System.out.println("--> completed checking cluster health 2 nodes, iteration: " + i); + logger.info("--> completed checking cluster health 2 nodes, iteration: " + i); // once we know a node has left, we can re-enable followerchecker to work normally bb.set(true); Thread.sleep(1500); - System.out.println("--> checking cluster health 3 nodes, iteration: " + i); + logger.info("--> checking cluster health 3 nodes, iteration: " + i); ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response2.isTimedOut(), is(false)); - System.out.println("--> completed checking cluster health 3 nodes, iteration: " + i); + logger.info("--> completed checking cluster health 3 nodes, iteration: " + i); Thread.sleep(1500); // Checking again - System.out.println("--> checking cluster health 3 nodes again, iteration: " + i); + logger.info("--> checking cluster health 3 nodes again, iteration: " + i); ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response3.isTimedOut(), is(false)); - System.out.println("--> completed checking cluster health 3 nodes again, iteration: " + i); + logger.info("--> completed checking cluster health 3 nodes again, iteration: " + i); } bb.set(true); - System.out.println("-->first validation outside loop"); + logger.info("-->first validation outside loop"); response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); - System.out.println("-->sleeping for 20s"); + logger.info("-->sleeping for 20s"); Thread.sleep(20000); - System.out.println("-->second validation outside loop after sleep"); + logger.info("-->second validation outside loop after sleep"); response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } From f5fa84696536b5c1c331e6ae50bd961b24a13395 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 2 Sep 2024 10:41:59 +0530 Subject: [PATCH 04/36] Changes to mark disconnects as part of publish Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 46 ++++---- .../cluster/coordination/Coordinator.java | 33 +++--- .../service/ClusterApplierService.java | 7 +- .../transport/ClusterConnectionManager.java | 110 ++++-------------- .../transport/ConnectionManager.java | 16 +-- .../transport/RemoteConnectionManager.java | 24 ++-- .../transport/TransportService.java | 24 ++-- .../transport/StubbableConnectionManager.java | 30 +++-- 8 files changed, 124 insertions(+), 166 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index dbaddab97c575..9a43d6ce400b9 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -52,8 +52,10 @@ import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.core.action.ActionListener; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.ConnectTransportException; import org.opensearch.transport.TransportService; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -185,27 +187,6 @@ public void markPendingJoinsAsComplete(List nodesConnected) { } } - public void disconnectFromNonBlockedNodesExcept(DiscoveryNodes discoveryNodes, DiscoveryNodes.Delta nodesDelta) { - final List runnables = new ArrayList<>(); - synchronized (mutex) { - final Set nodesToDisconnect = new HashSet<>(targetsByNode.keySet()); - for (final DiscoveryNode discoveryNode : discoveryNodes) { - nodesToDisconnect.remove(discoveryNode); - } - - for (final DiscoveryNode discoveryNode : nodesToDisconnect) { - // if node is trying to be disconnected (node-left) and pendingjoin , skip disconnect and then remove the blocking - if (transportService.getNodesJoinInProgress().contains(discoveryNode)) { - logger.info("Skipping disconnection for node [{}] as it has a join in progress", discoveryNode); - continue; - } - logger.info("NodeConnectionsService - disconnecting from node [{}] in loop", discoveryNode); - runnables.add(targetsByNode.get(discoveryNode).disconnect()); - } - } - runnables.forEach(Runnable::run); - } - void ensureConnections(Runnable onCompletion) { // Called by tests after some disruption has concluded. It is possible that one or more targets are currently CONNECTING and have // been since the disruption was active, and that the connection attempt was thwarted by a concurrent disruption to the connection. @@ -362,20 +343,30 @@ private class ConnectionTarget { final AbstractRunnable abstractRunnable = this; @Override - protected void doRun() { + protected void doRun() throws IOException { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; + // if we are trying a connect activity while a node left is in progress, fail + if (transportService.getNodesLeftInProgress().contains(discoveryNode)) { + throw new ConnectTransportException(discoveryNode, "failed to connect while node-left in progress"); + } if (transportService.nodeConnected(discoveryNode)) { // transportService.connectToNode is a no-op if already connected, but we don't want any DEBUG logging in this case // since we run this for every node on every cluster state update. logger.trace("still connected to {}", discoveryNode); onConnected(); } else { - logger.debug("connecting to {}", discoveryNode); + logger.info("connecting to {}", discoveryNode); transportService.connectToNode(discoveryNode, new ActionListener() { @Override public void onResponse(Void aVoid) { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; - logger.debug("connected to {}", discoveryNode); + logger.info("connected to {}", discoveryNode); + transportService.markPendingJoinAsCompleted(discoveryNode); + logger.info( + "marked pending join for {} as completed. new list of nodes pending join: {}", + discoveryNode, + transportService.getNodesJoinInProgress() + ); onConnected(); } @@ -418,8 +409,13 @@ protected void doRun() { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; logger.info("disconnecting from {}", discoveryNode); transportService.disconnectFromNode(discoveryNode); + transportService.markPendingLeftAsCompleted(discoveryNode); consecutiveFailureCount.set(0); - logger.info("disconnected from {}", discoveryNode); + logger.info( + "disconnected from {} and marked pending left as completed. " + "pending lefts: [{}]", + discoveryNode, + transportService.getNodesLeftInProgress() + ); onCompletion(ActivityType.DISCONNECTING, null, connectActivity); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 9938f573b7104..2aa439db6f2b8 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -633,9 +633,19 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback ); return; } + // add a check here, if node-left is still in progress, we fail the connection + if (transportService.getNodesLeftInProgress().contains(joinRequest.getSourceNode())) { + joinCallback.onFailure( + new IllegalStateException( + "cannot join node [" + joinRequest.getSourceNode() + "] because node-left is currently in progress for this node" + + ) + ); + return; + } // cluster manager connects to the node - transportService.connectToNodeAndBlockDisconnects(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { + transportService.connectToNode(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { final ClusterState stateForJoinValidation = getStateForClusterManagerService(); if (stateForJoinValidation.nodes().isLocalNodeElectedClusterManager()) { onJoinValidators.forEach(a -> a.accept(joinRequest.getSourceNode(), stateForJoinValidation)); @@ -1368,24 +1378,9 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); coordinationState.get().handlePrePublish(clusterState); - // join publish is failing - // before we publish, we might need - // can we recreate connections as part of publish if we don't find it? - // reconnect to any nodes that are trying to join, redundancy to avoid node connection wiping by concurrent node-join and - // left - // find diff of nodes from old state and new publishNodes - // this fails because we can't add blocking code to cluster manager thread - // for (DiscoveryNode addedNode : clusterChangedEvent.nodesDelta().addedNodes()) { - // // maybe add a listener here to handle failures - // try { - // transportService.connectToNode(addedNode); - // } - // catch (Exception e) { - // logger.info(() -> new ParameterizedMessage("[{}] failed reconnecting to [{}]", clusterChangedEvent.source(), addedNode), - // e); - // } - // } - + // trying to mark pending connects/disconnects before publish + // if we try to connect during pending disconnect or vice versa - fail + transportService.markPendingConnections(clusterChangedEvent.nodesDelta()); publication.start(followersChecker.getFaultyNodes()); } } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index 2291528d66e3a..15013929fdff1 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -569,7 +569,8 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.info("connecting to nodes of cluster state with version {}", newClusterState.version()); try (TimingHandle ignored = stopWatch.timing("connecting to new nodes")) { - connectToNodesAndWaitAndMarkCompletedJoins(newClusterState, clusterChangedEvent.nodesDelta().addedNodes()); + // connectToNodesAndWaitAndMarkCompletedJoins(newClusterState, clusterChangedEvent.nodesDelta().addedNodes()); + connectToNodesAndWait(newClusterState); } // nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency @@ -584,8 +585,8 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.info("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); - // nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); - nodeConnectionsService.disconnectFromNonBlockedNodesExcept(newClusterState.nodes(), clusterChangedEvent.nodesDelta()); + nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); + // nodeConnectionsService.disconnectFromNonBlockedNodesExcept(newClusterState.nodes(), clusterChangedEvent.nodesDelta()); assert newClusterState.coordinationMetadata() .getLastAcceptedConfiguration() diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 9c2a72f550e40..d97ba0ed775a3 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -44,8 +44,8 @@ import org.opensearch.core.action.ActionListener; import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentMap; @@ -65,7 +65,8 @@ public class ClusterConnectionManager implements ConnectionManager { private final ConcurrentMap connectedNodes = ConcurrentCollections.newConcurrentMap(); private final ConcurrentMap> pendingConnections = ConcurrentCollections.newConcurrentMap(); - private final Set pendingJoins = new HashSet<>(); + private final Set pendingJoins = ConcurrentCollections.newConcurrentSet(); + private final Set pendingLeft = ConcurrentCollections.newConcurrentSet(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @Override protected void closeInternal() { @@ -117,11 +118,33 @@ public Set getNodesJoinInProgress() { return this.pendingJoins; } + @Override + public Set getNodesLeftInProgress() { + return this.pendingLeft; + } + + @Override + public void markPendingJoins(List nodes) { + logger.info("marking pending join for nodes: [{}]", nodes); + pendingJoins.addAll(nodes); + } + + @Override + public void markPendingLefts(List nodes) { + logger.info("marking pending left for nodes: [{}]", nodes); + pendingLeft.addAll(nodes); + } + @Override public boolean markPendingJoinCompleted(DiscoveryNode discoveryNode) { return pendingJoins.remove(discoveryNode); } + @Override + public boolean markPendingLeftCompleted(DiscoveryNode discoveryNode) { + return pendingLeft.remove(discoveryNode); + } + /** * Connects to a node with the given connection profile. If the node is already connected this method has no effect. * Once a successful is established, it can be validated before being exposed. @@ -205,89 +228,6 @@ public void connectToNode( })); } - // THIS IS ALMOST A COMPLETE COPY OF connectToNode, with a few lines added for pendingJoins list tracking - @Override - public void connectToNodeAndBlockDisconnects( - DiscoveryNode node, - ConnectionProfile connectionProfile, - ConnectionValidator connectionValidator, - ActionListener listener - ) throws ConnectTransportException { - logger.info("[{}]connecting to node [{}] while blocking disconnects", Thread.currentThread().getName(), node); - ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); - if (node == null) { - listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); - return; - } - - if (connectingRefCounter.tryIncRef() == false) { - listener.onFailure(new IllegalStateException("connection manager is closed")); - return; - } - - if (connectedNodes.containsKey(node)) { - logger.info("connectedNodes already has key for node [{}]", node); - pendingJoins.add(node); - connectingRefCounter.decRef(); - listener.onResponse(null); - return; - } - - final ListenableFuture currentListener = new ListenableFuture<>(); - final ListenableFuture existingListener = pendingConnections.putIfAbsent(node, currentListener); - if (existingListener != null) { - try { - // wait on previous entry to complete connection attempt - existingListener.addListener(listener, OpenSearchExecutors.newDirectExecutorService()); - } finally { - connectingRefCounter.decRef(); - } - return; - } - - currentListener.addListener(listener, OpenSearchExecutors.newDirectExecutorService()); - - final RunOnce releaseOnce = new RunOnce(connectingRefCounter::decRef); - internalOpenConnection(node, resolvedProfile, ActionListener.wrap(conn -> { - connectionValidator.validate(conn, resolvedProfile, ActionListener.wrap(ignored -> { - assert Transports.assertNotTransportThread("connection validator success"); - try { - if (connectedNodes.putIfAbsent(node, conn) != null) { - logger.info("existing connection to node [{}], marking as blocked and closing new redundant connection", node); - pendingJoins.add(node); - IOUtils.closeWhileHandlingException(conn); - } else { - logger.info("connected to node [{}]", node); - try { - connectionListener.onNodeConnected(node, conn); - } finally { - pendingJoins.add(node); - final Transport.Connection finalConnection = conn; - conn.addCloseListener(ActionListener.wrap(() -> { - logger.info("unregistering {} after connection close and marking as disconnected", node); - connectedNodes.remove(node, finalConnection); - pendingJoins.remove(node); - connectionListener.onNodeDisconnected(node, conn); - })); - } - } - } finally { - ListenableFuture future = pendingConnections.remove(node); - assert future == currentListener : "Listener in pending map is different than the expected listener"; - releaseOnce.run(); - future.onResponse(null); - } - }, e -> { - assert Transports.assertNotTransportThread("connection validator failure"); - IOUtils.closeWhileHandlingException(conn); - failConnectionListeners(node, releaseOnce, e, currentListener); - })); - }, e -> { - assert Transports.assertNotTransportThread("internalOpenConnection failure"); - failConnectionListeners(node, releaseOnce, e, currentListener); - })); - } - /** * Returns a connection for the given node if the node is connected. * Connections returned from this method must not be closed. The lifecycle of this connection is diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index cea0cfe7fee6b..5ae845c634fc2 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -36,6 +36,7 @@ import org.opensearch.core.action.ActionListener; import java.io.Closeable; +import java.util.List; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; @@ -59,21 +60,22 @@ void connectToNode( ActionListener listener ) throws ConnectTransportException; - void connectToNodeAndBlockDisconnects( - DiscoveryNode node, - ConnectionProfile connectionProfile, - ConnectionValidator connectionValidator, - ActionListener listener - ) throws ConnectTransportException; - Transport.Connection getConnection(DiscoveryNode node); boolean nodeConnected(DiscoveryNode node); Set getNodesJoinInProgress(); + Set getNodesLeftInProgress(); + boolean markPendingJoinCompleted(DiscoveryNode node); + boolean markPendingLeftCompleted(DiscoveryNode node); + + void markPendingJoins(List nodes); + + void markPendingLefts(List nodes); + void disconnectFromNode(DiscoveryNode node); Set getAllConnectedNodes(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index e72986f4ec097..d0b8d6f57959d 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -120,17 +120,12 @@ public ConnectionProfile getConnectionProfile() { } @Override - public void connectToNodeAndBlockDisconnects( - DiscoveryNode node, - ConnectionProfile connectionProfile, - ConnectionValidator connectionValidator, - ActionListener listener - ) throws ConnectTransportException { + public Set getNodesJoinInProgress() { throw new UnsupportedOperationException("not implemented"); } @Override - public Set getNodesJoinInProgress() { + public Set getNodesLeftInProgress() { throw new UnsupportedOperationException("not implemented"); } @@ -139,6 +134,21 @@ public boolean markPendingJoinCompleted(DiscoveryNode node) { throw new UnsupportedOperationException("not implemented"); } + @Override + public boolean markPendingLeftCompleted(DiscoveryNode node) { + throw new UnsupportedOperationException("not implemented"); + } + + @Override + public void markPendingJoins(List nodes) { + throw new UnsupportedOperationException("not implemented"); + } + + @Override + public void markPendingLefts(List nodes) { + throw new UnsupportedOperationException("not implemented"); + } + public Transport.Connection getAnyRemoteConnection() { List localConnectedNodes = this.connectedNodes; long curr; diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 4885c0f5d0902..715a4101f7c6e 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -41,6 +41,7 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; import org.opensearch.common.io.stream.Streamables; import org.opensearch.common.lease.Releasable; @@ -482,27 +483,28 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } - public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { - connectToNodeAndBlockDisconnects(node, null, listener); + public Set getNodesJoinInProgress() { + return connectionManager.getNodesJoinInProgress(); } - public void connectToNodeAndBlockDisconnects(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener) - throws ConnectTransportException { - if (isLocalNode(node)) { - listener.onResponse(null); - return; - } - connectionManager.connectToNodeAndBlockDisconnects(node, connectionProfile, connectionValidator(node), listener); + public Set getNodesLeftInProgress() { + return connectionManager.getNodesLeftInProgress(); } - public Set getNodesJoinInProgress() { - return connectionManager.getNodesJoinInProgress(); + // this + public void markPendingConnections(DiscoveryNodes.Delta nodesDelta) { + connectionManager.markPendingJoins(nodesDelta.addedNodes()); + connectionManager.markPendingLefts(nodesDelta.removedNodes()); } public boolean markPendingJoinAsCompleted(DiscoveryNode node) { return connectionManager.markPendingJoinCompleted(node); } + public boolean markPendingLeftAsCompleted(DiscoveryNode node) { + return connectionManager.markPendingLeftCompleted(node); + } + public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { connectToExtensionNode(node, null, listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index e6b3b9c029f27..de8ef2381972e 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -40,6 +40,7 @@ import org.opensearch.transport.Transport; import org.opensearch.transport.TransportConnectionListener; +import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -119,18 +120,14 @@ public void connectToNode( } @Override - public void connectToNodeAndBlockDisconnects( - DiscoveryNode node, - ConnectionProfile connectionProfile, - ConnectionValidator connectionValidator, - ActionListener listener - ) throws ConnectTransportException { - delegate.connectToNodeAndBlockDisconnects(node, connectionProfile, connectionValidator, listener); + public Set getNodesJoinInProgress() { + + return delegate.getNodesJoinInProgress(); } @Override - public Set getNodesJoinInProgress() { - return delegate.getNodesJoinInProgress(); + public Set getNodesLeftInProgress() { + return delegate.getNodesLeftInProgress(); } @Override @@ -138,6 +135,21 @@ public boolean markPendingJoinCompleted(DiscoveryNode node) { return delegate.markPendingJoinCompleted(node); } + @Override + public boolean markPendingLeftCompleted(DiscoveryNode node) { + return delegate.markPendingLeftCompleted(node); + } + + @Override + public void markPendingJoins(List nodes) { + delegate.markPendingJoins(nodes); + } + + @Override + public void markPendingLefts(List nodes) { + delegate.markPendingLefts(nodes); + } + @Override public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); From 2db5587b536cbe06839ec2324c8fc7d382bc632f Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 2 Sep 2024 12:16:26 +0530 Subject: [PATCH 05/36] cleanup unused code and remove added log lines Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 28 +++---------------- .../cluster/coordination/Coordinator.java | 15 ++++------ .../coordination/FollowersChecker.java | 17 ----------- .../cluster/coordination/JoinHelper.java | 4 +-- .../cluster/coordination/LeaderChecker.java | 14 +++++----- .../cluster/coordination/Publication.java | 5 ++-- .../PublicationTransportHandler.java | 16 ++++++++++- .../service/ClusterApplierService.java | 26 +++-------------- .../cluster/service/MasterService.java | 5 ++-- .../main/java/org/opensearch/node/Node.java | 18 ------------ .../transport/ClusterConnectionManager.java | 17 ----------- .../transport/ConnectionManager.java | 6 ---- .../transport/RemoteConnectionManager.java | 15 ---------- .../transport/TransportService.java | 10 +------ .../transport/StubbableConnectionManager.java | 16 ----------- 15 files changed, 43 insertions(+), 169 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 9a43d6ce400b9..3a27860b9c8d4 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -52,10 +52,8 @@ import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.core.action.ActionListener; import org.opensearch.threadpool.ThreadPool; -import org.opensearch.transport.ConnectTransportException; import org.opensearch.transport.TransportService; -import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -181,12 +179,6 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { runnables.forEach(Runnable::run); } - public void markPendingJoinsAsComplete(List nodesConnected) { - for (final DiscoveryNode discoveryNode : nodesConnected) { - transportService.markPendingJoinAsCompleted(discoveryNode); - } - } - void ensureConnections(Runnable onCompletion) { // Called by tests after some disruption has concluded. It is possible that one or more targets are currently CONNECTING and have // been since the disruption was active, and that the connection attempt was thwarted by a concurrent disruption to the connection. @@ -343,30 +335,20 @@ private class ConnectionTarget { final AbstractRunnable abstractRunnable = this; @Override - protected void doRun() throws IOException { + protected void doRun() { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; - // if we are trying a connect activity while a node left is in progress, fail - if (transportService.getNodesLeftInProgress().contains(discoveryNode)) { - throw new ConnectTransportException(discoveryNode, "failed to connect while node-left in progress"); - } if (transportService.nodeConnected(discoveryNode)) { // transportService.connectToNode is a no-op if already connected, but we don't want any DEBUG logging in this case // since we run this for every node on every cluster state update. logger.trace("still connected to {}", discoveryNode); onConnected(); } else { - logger.info("connecting to {}", discoveryNode); + logger.debug("connecting to {}", discoveryNode); transportService.connectToNode(discoveryNode, new ActionListener() { @Override public void onResponse(Void aVoid) { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; - logger.info("connected to {}", discoveryNode); - transportService.markPendingJoinAsCompleted(discoveryNode); - logger.info( - "marked pending join for {} as completed. new list of nodes pending join: {}", - discoveryNode, - transportService.getNodesJoinInProgress() - ); + logger.debug("connected to {}", discoveryNode); onConnected(); } @@ -407,11 +389,10 @@ public String toString() { @Override protected void doRun() { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; - logger.info("disconnecting from {}", discoveryNode); transportService.disconnectFromNode(discoveryNode); transportService.markPendingLeftAsCompleted(discoveryNode); consecutiveFailureCount.set(0); - logger.info( + logger.debug( "disconnected from {} and marked pending left as completed. " + "pending lefts: [{}]", discoveryNode, transportService.getNodesLeftInProgress() @@ -444,7 +425,6 @@ Runnable connect(@Nullable ActionListener listener) { } Runnable disconnect() { - logger.info("running runnable disconnect"); return addListenerAndStartActivity( null, ActivityType.DISCONNECTING, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 2aa439db6f2b8..0ac70bd1ae59c 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -419,8 +419,7 @@ PublishWithJoinResponse handlePublishRequest(PublishRequest publishRequest) { synchronized (mutex) { final DiscoveryNode sourceNode = publishRequest.getAcceptedState().nodes().getClusterManagerNode(); - logger.trace("handlePublishRequest: handling [{}] from [{}]", publishRequest, sourceNode); - logger.info( + logger.debug( "handlePublishRequest: handling version [{}] from [{}]", publishRequest.getAcceptedState().getVersion(), sourceNode @@ -633,7 +632,7 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback ); return; } - // add a check here, if node-left is still in progress, we fail the connection + // if node-left is still in progress, we fail the joinRequest early if (transportService.getNodesLeftInProgress().contains(joinRequest.getSourceNode())) { joinCallback.onFailure( new IllegalStateException( @@ -644,7 +643,6 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback return; } - // cluster manager connects to the node transportService.connectToNode(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { final ClusterState stateForJoinValidation = getStateForClusterManagerService(); if (stateForJoinValidation.nodes().isLocalNodeElectedClusterManager()) { @@ -783,7 +781,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) { if (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) { logger.trace("{}: coordinator remaining FOLLOWER of [{}] in term {}", method, leaderNode, getCurrentTerm()); } else { - logger.info( + logger.debug( "{}: coordinator becoming FOLLOWER of [{}] in term {} (was {}, lastKnownLeader was [{}])", method, leaderNode, @@ -930,7 +928,7 @@ public DiscoveryStats stats() { @Override public void startInitialJoin() { synchronized (mutex) { - logger.info("Starting initial join, becoming candidate"); + logger.trace("Starting initial join, becoming candidate"); becomeCandidate("startInitialJoin"); } clusterBootstrapService.scheduleUnconfiguredBootstrap(); @@ -1373,13 +1371,12 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) currentPublication = Optional.of(publication); final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); - leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); coordinationState.get().handlePrePublish(clusterState); - // trying to mark pending connects/disconnects before publish - // if we try to connect during pending disconnect or vice versa - fail + // trying to mark pending disconnects before publish + // if we try to joinRequest during pending disconnect, it should fail transportService.markPendingConnections(clusterChangedEvent.nodesDelta()); publication.start(followersChecker.getFaultyNodes()); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 18175376a1bdf..5a2ef2fa11709 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -176,7 +176,6 @@ private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) { * Update the set of known nodes, starting to check any new ones and stopping checking any previously-known-but-now-unknown ones. */ public void setCurrentNodes(DiscoveryNodes discoveryNodes) { - logger.info("[{}]Setting followerschecker currentnodes to {}", Thread.currentThread().getName(), discoveryNodes); synchronized (mutex) { final Predicate isUnknownNode = n -> discoveryNodes.nodeExists(n) == false; followerCheckers.keySet().removeIf(isUnknownNode); @@ -358,9 +357,6 @@ private void handleWakeUp() { final FollowerCheckRequest request = new FollowerCheckRequest(fastResponseState.term, transportService.getLocalNode()); logger.trace("handleWakeUp: checking {} with {}", discoveryNode, request); - if (discoveryNode.getName().equals("node_t2")) { - logger.info("handleWakeUp: checking {} with {}", discoveryNode, request); - } transportService.sendRequest( discoveryNode, @@ -396,19 +392,6 @@ public void handleException(TransportException exp) { final String reason; - // if (exp instanceof NodeNotConnectedException || exp.getCause() instanceof NodeNotConnectedException){ - // // NodeNotConnectedException will only happen if getConnection fails in TransportService.sendRequest - // // This only happens if clusterConnectionManager.getConnection() does not find the entry in connectedNodes list - // // This happens on node disconnection - // // Need to validate that this only gets triggered from node-left side. we want to ensure actual disconnections - // work - // failureCountSinceLastSuccess--; - // logger.info(() -> new ParameterizedMessage("{} cache entry not found, but node is still in cluster state. - // ignoring this failure", FollowerChecker.this), exp); - // scheduleNextWakeUp(); - // return; - // } - if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.info(() -> new ParameterizedMessage("{} disconnected", FollowerChecker.this), exp); reason = "disconnected"; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java index becd9f9e05c2d..9bf6bac07da53 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinHelper.java @@ -379,7 +379,7 @@ public void sendJoinRequest(DiscoveryNode destination, long term, Optional final JoinRequest joinRequest = new JoinRequest(transportService.getLocalNode(), term, optionalJoin); final Tuple dedupKey = Tuple.tuple(destination, joinRequest); if (pendingOutgoingJoins.add(dedupKey)) { - logger.info("attempting to join {} with {}", destination, joinRequest); + logger.debug("attempting to join {} with {}", destination, joinRequest); transportService.sendRequest( destination, JOIN_ACTION_NAME, @@ -394,7 +394,7 @@ public Empty read(StreamInput in) { @Override public void handleResponse(Empty response) { pendingOutgoingJoins.remove(dedupKey); - logger.info("successfully joined {} with {}", destination, joinRequest); + logger.debug("successfully joined {} with {}", destination, joinRequest); lastFailedJoinAttempt.set(null); nodeCommissioned.accept(true); onCompletion.run(); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index ac14194e38bfb..f5aecdfd31462 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -218,20 +218,20 @@ private void handleLeaderCheck(LeaderCheckRequest request) { + "since node is unhealthy [" + statusInfo.getInfo() + "]"; - logger.info(message); + logger.debug(message); throw new NodeHealthCheckFailureException(message); } else if (discoveryNodes.isLocalNodeElectedClusterManager() == false) { - logger.info("rejecting leader check on non-cluster-manager {}", request); + logger.debug("rejecting leader check on non-cluster-manager {}", request); throw new CoordinationStateRejectedException( "rejecting leader check from [" + request.getSender() + "] sent to a node that is no longer the cluster-manager" ); } else if (discoveryNodes.nodeExists(request.getSender()) == false) { - logger.info("rejecting leader check from removed node: {}", request); + logger.debug("rejecting leader check from removed node: {}", request); throw new CoordinationStateRejectedException( "rejecting leader check since [" + request.getSender() + "] has been removed from the cluster" ); } else { - logger.info("handling {}", request); + logger.debug("handling {}", request); } } @@ -306,17 +306,17 @@ public void handleException(TransportException exp) { return; } if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { - logger.info(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); + logger.debug(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); leaderFailed(new ConnectTransportException(leader, "disconnected during check", exp)); return; } else if (exp.getCause() instanceof NodeHealthCheckFailureException) { - logger.info(new ParameterizedMessage("leader [{}] health check failed", leader), exp); + logger.debug(new ParameterizedMessage("leader [{}] health check failed", leader), exp); leaderFailed(new NodeHealthCheckFailureException("node [" + leader + "] failed health checks", exp)); return; } long failureCount = failureCountSinceLastSuccess.incrementAndGet(); if (failureCount >= leaderCheckRetryCount) { - logger.info( + logger.debug( new ParameterizedMessage( "leader [{}] has failed {} consecutive checks (limit [{}] is {}); last failure was:", leader, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java index 5ca197f2f9a4b..65ce59d968189 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java @@ -86,10 +86,9 @@ public Publication(PublishRequest publishRequest, AckListener ackListener, LongS public void start(Set faultyNodes) { logger.trace("publishing {} to {}", publishRequest, publicationTargets); - logger.info("publishing {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); + logger.info("publishing version {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); for (final DiscoveryNode faultyNode : faultyNodes) { - logger.info("in publish.start, found faulty node: [{}]", faultyNode); onFaultyNode(faultyNode); } onPossibleCommitFailure(); @@ -334,7 +333,7 @@ void setFailed(Exception e) { void onFaultyNode(DiscoveryNode faultyNode) { if (isActive() && discoveryNode.equals(faultyNode)) { - logger.info("onFaultyNode: [{}] is faulty, failing target in publication {}", faultyNode, Publication.this); + logger.debug("onFaultyNode: [{}] is faulty, failing target in publication {}", faultyNode, Publication.this); setFailed(new OpenSearchException("faulty node")); onPossibleCommitFailure(); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 49b18b3c12ded..d11165f5ca357 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -502,7 +502,21 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } +<<<<<<< HEAD sendClusterState(destination, responseActionListener); +======= + // TODO Decide to send remote state before starting publication by checking remote publication on all nodes + if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) { + logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination); + sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); + } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { + logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); + sendFullClusterState(destination, responseActionListener); + } else { + logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination); + sendClusterStateDiff(destination, responseActionListener); + } +>>>>>>> f0cf40ce03f (cleanup unused code and remove added log lines) } public void sendApplyCommit( @@ -593,7 +607,7 @@ private void sendClusterState( logger.debug("resending full cluster state to node {} reason {}", destination, exp.getDetailedMessage()); sendFullClusterState(destination, listener); } else { - logger.info(() -> new ParameterizedMessage("failed to send cluster state to {}", destination), exp); + logger.debug(() -> new ParameterizedMessage("failed to send cluster state to {}", destination), exp); listener.onFailure(exp); } }; diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index 15013929fdff1..d68bd5edf0617 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -47,7 +47,6 @@ import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.TimeoutClusterStateListener; import org.opensearch.cluster.metadata.ProcessClusterEventTimeoutException; -import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; @@ -71,7 +70,6 @@ import java.util.Arrays; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -504,7 +502,7 @@ private void runTask(UpdateTask task) { try { applyChanges(task, previousClusterState, newClusterState, stopWatch); TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); - // at this point, cluster state appliers and listeners are completed + // At this point, cluster state appliers and listeners are completed logger.debug( "processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, @@ -513,7 +511,7 @@ private void runTask(UpdateTask task) { newClusterState.stateUUID() ); warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch); - // then we call the ClusterApplyListener of the task + // Then we call the ClusterApplyListener of the task task.listener.onSuccess(task.source); } catch (Exception e) { TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); @@ -556,7 +554,6 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl if (nodesDelta.hasChanges() && logger.isInfoEnabled()) { String summary = nodesDelta.shortSummary(); if (summary.length() > 0) { - // print node deltas logger.info( "{}, term: {}, version: {}, reason: {}", summary, @@ -567,9 +564,8 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl } } - logger.info("connecting to nodes of cluster state with version {}", newClusterState.version()); + logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version()); try (TimingHandle ignored = stopWatch.timing("connecting to new nodes")) { - // connectToNodesAndWaitAndMarkCompletedJoins(newClusterState, clusterChangedEvent.nodesDelta().addedNodes()); connectToNodesAndWait(newClusterState); } @@ -582,11 +578,10 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl } } - logger.info("apply cluster state with version {}", newClusterState.version()); + logger.debug("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); - // nodeConnectionsService.disconnectFromNonBlockedNodesExcept(newClusterState.nodes(), clusterChangedEvent.nodesDelta()); assert newClusterState.coordinationMetadata() .getLastAcceptedConfiguration() @@ -604,19 +599,6 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.info("completed appliers and listeners of cluster state for version {}", newClusterState.version()); } - protected void connectToNodesAndWaitAndMarkCompletedJoins(ClusterState newClusterState, List nodesAdded) { - // can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch. - final CountDownLatch countDownLatch = new CountDownLatch(1); - nodeConnectionsService.connectToNodes(newClusterState.nodes(), countDownLatch::countDown); - try { - countDownLatch.await(); - nodeConnectionsService.markPendingJoinsAsComplete(nodesAdded); - } catch (InterruptedException e) { - logger.debug("interrupted while connecting to nodes, continuing", e); - Thread.currentThread().interrupt(); - } - } - protected void connectToNodesAndWait(ClusterState newClusterState) { // can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch. final CountDownLatch countDownLatch = new CountDownLatch(1); diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index e264326b4066f..d295db2c5289d 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -363,7 +363,7 @@ private void runTasks(TaskInputs taskInputs) { } } - logger.info("publishing cluster state version [{}]", newClusterState.version()); + logger.debug("publishing cluster state version [{}]", newClusterState.version()); publish(clusterChangedEvent, taskOutputs, publicationStartTime); } catch (Exception e) { handleException(shortSummary, publicationStartTime, newClusterState, e); @@ -464,7 +464,7 @@ private void handleException(String summary, long startTimeMillis, ClusterState private TaskOutputs calculateTaskOutputs(TaskInputs taskInputs, ClusterState previousClusterState, String taskSummary) { ClusterTasksResult clusterTasksResult = executeTasks(taskInputs, previousClusterState, taskSummary); ClusterState newClusterState = patchVersions(previousClusterState, clusterTasksResult); - logger.info("in cluster compute, finished computing new cluster state for version: {}", newClusterState.getVersion()); + logger.debug("in cluster compute, finished computing new cluster state for version: {}", newClusterState.getVersion()); return new TaskOutputs( taskInputs, previousClusterState, @@ -911,7 +911,6 @@ private ClusterTasksResult executeTasks(TaskInputs taskInputs, ClusterSt ClusterTasksResult clusterTasksResult; try { List inputs = taskInputs.updateTasks.stream().map(tUpdateTask -> tUpdateTask.task).collect(Collectors.toList()); - clusterTasksResult = taskInputs.executor.execute(previousClusterState, inputs); if (previousClusterState != clusterTasksResult.resultingState && previousClusterState.nodes().isLocalNodeElectedClusterManager() diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 0040e96213b67..a8d4ebcf23dab 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -681,24 +681,6 @@ protected Node( clusterManagerMetrics ); clusterService.addStateApplier(scriptService); - /* - if (DiscoveryNode.isClusterManagerNode(settings)) { - // dummy applier for repro race condition - clusterService.addStateApplier(new ClusterStateApplier() { - @Override - public void applyClusterState(ClusterChangedEvent event) { - if (event.nodesRemoved()) { - try { - Thread.sleep(5000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - }); - } - - */ resourcesToClose.add(clusterService); final Set> consistentSettings = settingsModule.getConsistentSettings(); if (consistentSettings.isEmpty() == false) { diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index d97ba0ed775a3..e00e9aa7013e3 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -65,7 +65,6 @@ public class ClusterConnectionManager implements ConnectionManager { private final ConcurrentMap connectedNodes = ConcurrentCollections.newConcurrentMap(); private final ConcurrentMap> pendingConnections = ConcurrentCollections.newConcurrentMap(); - private final Set pendingJoins = ConcurrentCollections.newConcurrentSet(); private final Set pendingLeft = ConcurrentCollections.newConcurrentSet(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @Override @@ -113,33 +112,17 @@ public void openConnection(DiscoveryNode node, ConnectionProfile connectionProfi internalOpenConnection(node, resolvedProfile, listener); } - @Override - public Set getNodesJoinInProgress() { - return this.pendingJoins; - } - @Override public Set getNodesLeftInProgress() { return this.pendingLeft; } - @Override - public void markPendingJoins(List nodes) { - logger.info("marking pending join for nodes: [{}]", nodes); - pendingJoins.addAll(nodes); - } - @Override public void markPendingLefts(List nodes) { logger.info("marking pending left for nodes: [{}]", nodes); pendingLeft.addAll(nodes); } - @Override - public boolean markPendingJoinCompleted(DiscoveryNode discoveryNode) { - return pendingJoins.remove(discoveryNode); - } - @Override public boolean markPendingLeftCompleted(DiscoveryNode discoveryNode) { return pendingLeft.remove(discoveryNode); diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 5ae845c634fc2..1015c71342ac4 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -64,16 +64,10 @@ void connectToNode( boolean nodeConnected(DiscoveryNode node); - Set getNodesJoinInProgress(); - Set getNodesLeftInProgress(); - boolean markPendingJoinCompleted(DiscoveryNode node); - boolean markPendingLeftCompleted(DiscoveryNode node); - void markPendingJoins(List nodes); - void markPendingLefts(List nodes); void disconnectFromNode(DiscoveryNode node); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index d0b8d6f57959d..d486cb7cde143 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -119,31 +119,16 @@ public ConnectionProfile getConnectionProfile() { return delegate.getConnectionProfile(); } - @Override - public Set getNodesJoinInProgress() { - throw new UnsupportedOperationException("not implemented"); - } - @Override public Set getNodesLeftInProgress() { throw new UnsupportedOperationException("not implemented"); } - @Override - public boolean markPendingJoinCompleted(DiscoveryNode node) { - throw new UnsupportedOperationException("not implemented"); - } - @Override public boolean markPendingLeftCompleted(DiscoveryNode node) { throw new UnsupportedOperationException("not implemented"); } - @Override - public void markPendingJoins(List nodes) { - throw new UnsupportedOperationException("not implemented"); - } - @Override public void markPendingLefts(List nodes) { throw new UnsupportedOperationException("not implemented"); diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 715a4101f7c6e..f502c64a2f238 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -483,24 +483,16 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } - public Set getNodesJoinInProgress() { - return connectionManager.getNodesJoinInProgress(); - } - public Set getNodesLeftInProgress() { return connectionManager.getNodesLeftInProgress(); } // this public void markPendingConnections(DiscoveryNodes.Delta nodesDelta) { - connectionManager.markPendingJoins(nodesDelta.addedNodes()); + // connectionManager.markPendingJoins(nodesDelta.addedNodes()); connectionManager.markPendingLefts(nodesDelta.removedNodes()); } - public boolean markPendingJoinAsCompleted(DiscoveryNode node) { - return connectionManager.markPendingJoinCompleted(node); - } - public boolean markPendingLeftAsCompleted(DiscoveryNode node) { return connectionManager.markPendingLeftCompleted(node); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index de8ef2381972e..0f35fd90fae30 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -119,32 +119,16 @@ public void connectToNode( delegate.connectToNode(node, connectionProfile, connectionValidator, listener); } - @Override - public Set getNodesJoinInProgress() { - - return delegate.getNodesJoinInProgress(); - } - @Override public Set getNodesLeftInProgress() { return delegate.getNodesLeftInProgress(); } - @Override - public boolean markPendingJoinCompleted(DiscoveryNode node) { - return delegate.markPendingJoinCompleted(node); - } - @Override public boolean markPendingLeftCompleted(DiscoveryNode node) { return delegate.markPendingLeftCompleted(node); } - @Override - public void markPendingJoins(List nodes) { - delegate.markPendingJoins(nodes); - } - @Override public void markPendingLefts(List nodes) { delegate.markPendingLefts(nodes); From 12b89b41226947a186a798a1eae192cfc41a46a1 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Tue, 3 Sep 2024 13:03:59 +0530 Subject: [PATCH 06/36] Move all logic to transportService/ClusterConnectionManager as per comments Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 8 +---- .../cluster/coordination/Coordinator.java | 16 ++------- .../coordination/FollowersChecker.java | 2 -- .../cluster/coordination/LeaderChecker.java | 2 +- .../service/ClusterApplierService.java | 5 +-- .../transport/ClusterConnectionManager.java | 36 +++++++++++-------- .../transport/ConnectionManager.java | 6 +--- .../transport/RemoteConnectionManager.java | 14 ++------ .../transport/TransportService.java | 13 ++----- .../transport/StubbableConnectionManager.java | 14 ++------ 10 files changed, 37 insertions(+), 79 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 3a27860b9c8d4..1c12c260b3929 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -172,7 +172,6 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { } for (final DiscoveryNode discoveryNode : nodesToDisconnect) { - logger.info("NodeConnectionsService - disconnecting from node [{}] in loop", discoveryNode); runnables.add(targetsByNode.get(discoveryNode).disconnect()); } } @@ -390,13 +389,8 @@ public String toString() { protected void doRun() { assert Thread.holdsLock(mutex) == false : "mutex unexpectedly held"; transportService.disconnectFromNode(discoveryNode); - transportService.markPendingLeftAsCompleted(discoveryNode); consecutiveFailureCount.set(0); - logger.debug( - "disconnected from {} and marked pending left as completed. " + "pending lefts: [{}]", - discoveryNode, - transportService.getNodesLeftInProgress() - ); + logger.debug("disconnected from {}", discoveryNode); onCompletion(ActivityType.DISCONNECTING, null, connectActivity); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 0ac70bd1ae59c..50d5dfa3313e2 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -632,16 +632,6 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback ); return; } - // if node-left is still in progress, we fail the joinRequest early - if (transportService.getNodesLeftInProgress().contains(joinRequest.getSourceNode())) { - joinCallback.onFailure( - new IllegalStateException( - "cannot join node [" + joinRequest.getSourceNode() + "] because node-left is currently in progress for this node" - - ) - ); - return; - } transportService.connectToNode(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { final ClusterState stateForJoinValidation = getStateForClusterManagerService(); @@ -1371,13 +1361,13 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) currentPublication = Optional.of(publication); final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); + // marking pending disconnects before publish + // if we try to joinRequest during pending disconnect, it should fail + transportService.markPendingDisconnects(clusterChangedEvent.nodesDelta()); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); coordinationState.get().handlePrePublish(clusterState); - // trying to mark pending disconnects before publish - // if we try to joinRequest during pending disconnect, it should fail - transportService.markPendingConnections(clusterChangedEvent.nodesDelta()); publication.start(followersChecker.getFaultyNodes()); } } catch (Exception e) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 5a2ef2fa11709..2ec0dabd91786 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -161,7 +161,6 @@ public FollowersChecker( transportService.addConnectionListener(new TransportConnectionListener() { @Override public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { - logger.info("in transport listener onNodeDisconnected"); handleDisconnectedNode(node); } }); @@ -391,7 +390,6 @@ public void handleException(TransportException exp) { failureCountSinceLastSuccess++; final String reason; - if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.info(() -> new ParameterizedMessage("{} disconnected", FollowerChecker.this), exp); reason = "disconnected"; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index f5aecdfd31462..4fd2c0eb13073 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -231,7 +231,7 @@ private void handleLeaderCheck(LeaderCheckRequest request) { "rejecting leader check since [" + request.getSender() + "] has been removed from the cluster" ); } else { - logger.debug("handling {}", request); + logger.trace("handling {}", request); } } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index d68bd5edf0617..d0b6f812e9ee2 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -580,6 +580,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.debug("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); + logger.debug("completed calling appliers of cluster state for version {}", newClusterState.version()); nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); @@ -592,11 +593,11 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl + " on " + newClusterState.nodes().getLocalNode(); - logger.info("set locally applied cluster state to version {}", newClusterState.version()); + logger.debug("set locally applied cluster state to version {}", newClusterState.version()); state.set(newClusterState); callClusterStateListeners(clusterChangedEvent, stopWatch); - logger.info("completed appliers and listeners of cluster state for version {}", newClusterState.version()); + logger.debug("completed calling listeners of cluster state for version {}", newClusterState.version()); } protected void connectToNodesAndWait(ClusterState newClusterState) { diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index e00e9aa7013e3..6a7ea5c627d7e 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -65,7 +65,13 @@ public class ClusterConnectionManager implements ConnectionManager { private final ConcurrentMap connectedNodes = ConcurrentCollections.newConcurrentMap(); private final ConcurrentMap> pendingConnections = ConcurrentCollections.newConcurrentMap(); - private final Set pendingLeft = ConcurrentCollections.newConcurrentSet(); + /** + This set is used only by cluster-manager nodes. + Nodes are marked as pending disconnect right before cluster state publish phase. + They are cleared up as part of cluster state apply commit phase + This is to avoid connections from being made to nodes that are in the process of leaving the cluster + */ + private final Set pendingDisconnections = ConcurrentCollections.newConcurrentSet(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @Override protected void closeInternal() { @@ -113,19 +119,9 @@ public void openConnection(DiscoveryNode node, ConnectionProfile connectionProfi } @Override - public Set getNodesLeftInProgress() { - return this.pendingLeft; - } - - @Override - public void markPendingLefts(List nodes) { - logger.info("marking pending left for nodes: [{}]", nodes); - pendingLeft.addAll(nodes); - } - - @Override - public boolean markPendingLeftCompleted(DiscoveryNode discoveryNode) { - return pendingLeft.remove(discoveryNode); + public void markPendingDisconnects(List nodes) { + logger.info("marking pending disconnects for nodes: [{}]", nodes); + pendingDisconnections.addAll(nodes); } /** @@ -147,6 +143,16 @@ public void connectToNode( return; } + // if node-left is still in progress, we fail the connect request early + if (pendingDisconnections.contains(node)) { + listener.onFailure( + new IllegalStateException( + "blocked connection to node [" + node + "] because node-left is currently in progress for this node" + ) + ); + return; + } + if (connectingRefCounter.tryIncRef() == false) { listener.onFailure(new IllegalStateException("connection manager is closed")); return; @@ -246,6 +252,8 @@ public void disconnectFromNode(DiscoveryNode node) { // if we found it and removed it we close nodeChannels.close(); } + pendingDisconnections.remove(node); + logger.info("Removed node {} from pending disconnects list", node); } /** diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 1015c71342ac4..5bc963d2d4815 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -64,11 +64,7 @@ void connectToNode( boolean nodeConnected(DiscoveryNode node); - Set getNodesLeftInProgress(); - - boolean markPendingLeftCompleted(DiscoveryNode node); - - void markPendingLefts(List nodes); + void markPendingDisconnects(List nodes); void disconnectFromNode(DiscoveryNode node); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index d486cb7cde143..c175879ee6a6a 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -120,18 +120,8 @@ public ConnectionProfile getConnectionProfile() { } @Override - public Set getNodesLeftInProgress() { - throw new UnsupportedOperationException("not implemented"); - } - - @Override - public boolean markPendingLeftCompleted(DiscoveryNode node) { - throw new UnsupportedOperationException("not implemented"); - } - - @Override - public void markPendingLefts(List nodes) { - throw new UnsupportedOperationException("not implemented"); + public void markPendingDisconnects(List nodes) { + delegate.markPendingDisconnects(nodes); } public Transport.Connection getAnyRemoteConnection() { diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index f502c64a2f238..ce67e674598f2 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -483,18 +483,9 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } - public Set getNodesLeftInProgress() { - return connectionManager.getNodesLeftInProgress(); - } - - // this - public void markPendingConnections(DiscoveryNodes.Delta nodesDelta) { - // connectionManager.markPendingJoins(nodesDelta.addedNodes()); - connectionManager.markPendingLefts(nodesDelta.removedNodes()); - } - public boolean markPendingLeftAsCompleted(DiscoveryNode node) { - return connectionManager.markPendingLeftCompleted(node); + public void markPendingDisconnects(DiscoveryNodes.Delta nodesDelta) { + connectionManager.markPendingDisconnects(nodesDelta.removedNodes()); } public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 0f35fd90fae30..44bd726223544 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -120,18 +120,8 @@ public void connectToNode( } @Override - public Set getNodesLeftInProgress() { - return delegate.getNodesLeftInProgress(); - } - - @Override - public boolean markPendingLeftCompleted(DiscoveryNode node) { - return delegate.markPendingLeftCompleted(node); - } - - @Override - public void markPendingLefts(List nodes) { - delegate.markPendingLefts(nodes); + public void markPendingDisconnects(List nodes) { + delegate.markPendingDisconnects(nodes); } @Override From e8029c98c96f0fbb8e0b78206433de2b65e37118 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Tue, 3 Sep 2024 13:59:22 +0530 Subject: [PATCH 07/36] apply spotless Signed-off-by: Rahul Karajgikar --- .../src/main/java/org/opensearch/transport/TransportService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index ce67e674598f2..01d90699c7588 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -483,7 +483,6 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } - public void markPendingDisconnects(DiscoveryNodes.Delta nodesDelta) { connectionManager.markPendingDisconnects(nodesDelta.removedNodes()); } From eb58389841eb694950978f824d3e95f2ace5cc14 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 4 Sep 2024 10:21:16 +0530 Subject: [PATCH 08/36] remove log line Signed-off-by: Rahul Karajgikar --- .../java/org/opensearch/transport/ClusterConnectionManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 6a7ea5c627d7e..990aac62382c9 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -299,7 +299,6 @@ private void internalOpenConnection( ConnectionProfile connectionProfile, ActionListener listener ) { - logger.info("calling transport.openConnection"); transport.openConnection(node, connectionProfile, ActionListener.map(listener, connection -> { assert Transports.assertNotTransportThread("internalOpenConnection success"); try { From 136aa4fc7238b299445455187595bdb80e0e3b64 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 4 Sep 2024 11:42:10 +0530 Subject: [PATCH 09/36] fix merge conflict Signed-off-by: Rahul Karajgikar --- .../coordination/PublicationTransportHandler.java | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index d11165f5ca357..cdf331b7bb577 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -502,21 +502,7 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } -<<<<<<< HEAD sendClusterState(destination, responseActionListener); -======= - // TODO Decide to send remote state before starting publication by checking remote publication on all nodes - if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) { - logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination); - sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); - } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { - logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); - sendFullClusterState(destination, responseActionListener); - } else { - logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination); - sendClusterStateDiff(destination, responseActionListener); - } ->>>>>>> f0cf40ce03f (cleanup unused code and remove added log lines) } public void sendApplyCommit( From 514ec9f8d3666e642bdd2e54b4de94f70ff138b7 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 4 Sep 2024 12:47:04 +0530 Subject: [PATCH 10/36] remove log line Signed-off-by: Rahul Karajgikar --- .../java/org/opensearch/transport/ClusterConnectionManager.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 990aac62382c9..978e81e34c504 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -159,7 +159,6 @@ public void connectToNode( } if (connectedNodes.containsKey(node)) { - logger.info("connectedNodes already has key for node [{}]", node); connectingRefCounter.decRef(); listener.onResponse(null); return; From c5bcafa14e2d417c913093689f53adad0d54d4c4 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 4 Sep 2024 20:01:06 +0530 Subject: [PATCH 11/36] Additional check to fix UTs Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 16 ++++++++++++++++ .../transport/ClusterConnectionManager.java | 11 +++++++++++ .../opensearch/transport/ConnectionManager.java | 4 ++++ .../transport/RemoteConnectionManager.java | 10 ++++++++++ .../opensearch/transport/TransportService.java | 8 ++++++++ .../transport/StubbableConnectionManager.java | 10 ++++++++++ 6 files changed, 59 insertions(+) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 1c12c260b3929..37f02911d5c14 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -62,6 +62,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; import static org.opensearch.common.settings.Setting.Property; import static org.opensearch.common.settings.Setting.positiveTimeSetting; @@ -170,10 +171,23 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { for (final DiscoveryNode discoveryNode : discoveryNodes) { nodesToDisconnect.remove(discoveryNode); } + logger.info(" targetsByNode is {}", targetsByNode.keySet()); + logger.info(" nodes to disconnect set is [{}]", nodesToDisconnect); for (final DiscoveryNode discoveryNode : nodesToDisconnect) { runnables.add(targetsByNode.get(discoveryNode).disconnect()); } + + // There might be some stale nodes that are in pendingDisconnect set from before but are not connected anymore + // This code block clears the pending disconnect for these nodes to avoid permanently blocking node joins + // This situation should ideally not happen + transportService.markDisconnectAsCompleted( + transportService.getPendingDisconnections() + .stream() + .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode)) + .collect(Collectors.toSet()) + ); + } runnables.forEach(Runnable::run); } @@ -511,6 +525,8 @@ private void onCompletion(ActivityType completedActivityType, @Nullable Exceptio if (completedActivityType.equals(ActivityType.DISCONNECTING)) { final ConnectionTarget removedTarget = targetsByNode.remove(discoveryNode); + // if we remove from targetsByNode, we also remove from underlying pendingDisconnects for consistency + // transportService.markDisconnectAsCompleted(new HashSet<>(Collections.singleton(discoveryNode))); assert removedTarget == this : removedTarget + " vs " + this; } } else { diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 978e81e34c504..47c2fb325f217 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -124,6 +124,17 @@ public void markPendingDisconnects(List nodes) { pendingDisconnections.addAll(nodes); } + @Override + public Set getPendingDisconnections() { + return pendingDisconnections; + } + + @Override + public void markDisconnectAsCompleted(Set nodes) { + logger.info("marking disconnect as completed for nodes: [{}]"); + pendingDisconnections.removeAll(nodes); + } + /** * Connects to a node with the given connection profile. If the node is already connected this method has no effect. * Once a successful is established, it can be validated before being exposed. diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 5bc963d2d4815..607ff05848289 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -53,6 +53,10 @@ public interface ConnectionManager extends Closeable { void openConnection(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener); + Set getPendingDisconnections(); + + void markDisconnectAsCompleted(Set nodes); + void connectToNode( DiscoveryNode node, ConnectionProfile connectionProfile, diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index c175879ee6a6a..40a0e5bdb3d36 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -124,6 +124,16 @@ public void markPendingDisconnects(List nodes) { delegate.markPendingDisconnects(nodes); } + @Override + public Set getPendingDisconnections() { + return delegate.getPendingDisconnections(); + } + + @Override + public void markDisconnectAsCompleted(Set nodes) { + delegate.markDisconnectAsCompleted(nodes); + } + public Transport.Connection getAnyRemoteConnection() { List localConnectedNodes = this.connectedNodes; long curr; diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 01d90699c7588..db8a425df57ba 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -487,6 +487,14 @@ public void markPendingDisconnects(DiscoveryNodes.Delta nodesDelta) { connectionManager.markPendingDisconnects(nodesDelta.removedNodes()); } + public Set getPendingDisconnections() { + return connectionManager.getPendingDisconnections(); + } + + public void markDisconnectAsCompleted(Set nodes) { + connectionManager.markDisconnectAsCompleted(nodes); + } + public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { connectToExtensionNode(node, null, listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 44bd726223544..34977ffa240f7 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -124,6 +124,16 @@ public void markPendingDisconnects(List nodes) { delegate.markPendingDisconnects(nodes); } + @Override + public Set getPendingDisconnections() { + return delegate.getPendingDisconnections(); + } + + @Override + public void markDisconnectAsCompleted(Set nodes) { + delegate.markDisconnectAsCompleted(nodes); + } + @Override public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); From dddeb0bc1604c5300f7ebb42adb52d38465c14f0 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 5 Sep 2024 01:29:03 +0530 Subject: [PATCH 12/36] empty commit for gradle check rerun Signed-off-by: Rahul Karajgikar From 38b10866da8a3c936609e69c7ad30aa94f2345cb Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 5 Sep 2024 11:05:15 +0530 Subject: [PATCH 13/36] fix log Signed-off-by: Rahul Karajgikar --- .../java/org/opensearch/transport/ClusterConnectionManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 47c2fb325f217..94d73e36e2238 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -131,7 +131,7 @@ public Set getPendingDisconnections() { @Override public void markDisconnectAsCompleted(Set nodes) { - logger.info("marking disconnect as completed for nodes: [{}]"); + logger.debug("marking disconnect as completed for nodes: [{}]", nodes); pendingDisconnections.removeAll(nodes); } From 82748db396a3e9e84566545f8c600457c503fca7 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 5 Sep 2024 12:27:12 +0530 Subject: [PATCH 14/36] cleanup Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 110 ++++-------------- .../cluster/NodeConnectionsService.java | 4 +- .../cluster/coordination/Coordinator.java | 4 +- .../cluster/coordination/Publication.java | 3 +- .../cluster/service/MasterService.java | 1 - .../transport/ClusterConnectionManager.java | 46 ++++---- .../transport/ConnectionManager.java | 13 +-- .../transport/RemoteConnectionManager.java | 16 +-- .../transport/TransportService.java | 24 ++-- .../transport/StubbableConnectionManager.java | 13 +-- 10 files changed, 77 insertions(+), 157 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index fb4dcf0ce9063..9dd49483f1f61 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -33,11 +33,8 @@ package org.opensearch.cluster.coordination; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; -import org.opensearch.cluster.ClusterChangedEvent; -import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.metadata.IndexMetadata; -import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; import org.opensearch.index.MockEngineFactoryPlugin; @@ -51,9 +48,7 @@ import org.opensearch.test.store.MockFSIndexStore; import org.opensearch.test.transport.MockTransportService; import org.opensearch.test.transport.StubbableTransport; -import org.opensearch.transport.Transport; import org.opensearch.transport.TransportChannel; -import org.opensearch.transport.TransportConnectionListener; import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; import org.opensearch.transport.TransportService; @@ -104,22 +99,14 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) .build(); - // start a cluster-manager node + // start a 3 node cluster with 1 cluster-manager final String cm = internalCluster().startNode(nodeSettings); - - logger.info("--> spawning node t1"); - final String blueNodeName = internalCluster().startNode( - Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build() - ); - logger.info("--> spawning node t2"); + internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); - logger.info("--> initial health check"); ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); assertThat(response.isTimedOut(), is(false)); - logger.info("--> done initial health check"); - logger.info("--> creating index"); client().admin() .indices() .prepareCreate(indexName) @@ -130,63 +117,22 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) ) .get(); - logger.info("--> done creating index"); - MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); - MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( - TransportService.class, - redNodeName - ); ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); - // simulate a slow applier on the cm - cmClsService.addStateApplier(new ClusterStateApplier() { - @Override - public void applyClusterState(ClusterChangedEvent event) { - if (event.nodesRemoved()) { - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } + // Simulate a slow applier on the cm to delay node-left state application + cmClsService.addStateApplier(event -> { + if (event.nodesRemoved()) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); } } }); - cmTransportService.connectionManager().addListener(new TransportConnectionListener() { - - @Override - public void onConnectionOpened(Transport.Connection connection) { - // try { - // Thread.sleep(500); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } - - } - - @Override - public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) { - // if (node.getName().equals("node_t2")) { - // try { - // Thread.sleep(250); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } - // } - } - - // @Override - // public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { - // try { - // Thread.sleep(5000); - // } catch (InterruptedException e) { - // throw new RuntimeException(e); - // } - // } - }); AtomicBoolean bb = new AtomicBoolean(); - // simulate followerchecker failure - ConnectionDelay handlingBehavior = new ConnectionDelay(FOLLOWER_CHECK_ACTION_NAME, () -> { + // Simulate followerchecker failure on 1 node when bb is false + ConnectionDelay handlingBehavior = new ConnectionDelay(() -> { if (bb.get()) { return; } @@ -197,55 +143,39 @@ public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) } throw new NodeHealthCheckFailureException("non writable exception"); }); + MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + redNodeName + ); redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, handlingBehavior); + // Loop runs 10 times to ensure race condition gets reproduced for (int i = 0; i < 10; i++) { - bb.set(false); // fail followerchecker by force to trigger node disconnect - logger.info("--> disconnecting from red node, iteration: " + i); - // cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + bb.set(false); + // fail followerchecker by force to trigger node disconnect // now followerchecker should fail and trigger node left - logger.info("--> checking cluster health 2 nodes, iteration: " + i); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response1.isTimedOut(), is(false)); - logger.info("--> completed checking cluster health 2 nodes, iteration: " + i); // once we know a node has left, we can re-enable followerchecker to work normally bb.set(true); - Thread.sleep(1500); - logger.info("--> checking cluster health 3 nodes, iteration: " + i); ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response2.isTimedOut(), is(false)); - logger.info("--> completed checking cluster health 3 nodes, iteration: " + i); - - Thread.sleep(1500); - // Checking again - logger.info("--> checking cluster health 3 nodes again, iteration: " + i); + // Checking again to validate stability ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response3.isTimedOut(), is(false)); - logger.info("--> completed checking cluster health 3 nodes again, iteration: " + i); } bb.set(true); - logger.info("-->first validation outside loop"); - response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); - assertThat(response.isTimedOut(), is(false)); - - logger.info("-->sleeping for 20s"); - Thread.sleep(20000); - - logger.info("-->second validation outside loop after sleep"); response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } private class ConnectionDelay implements StubbableTransport.RequestHandlingBehavior { - - private final String actionName; private final Runnable connectionBreaker; - private ConnectionDelay(String actionName, Runnable connectionBreaker) { - this.actionName = actionName; + private ConnectionDelay(Runnable connectionBreaker) { this.connectionBreaker = connectionBreaker; } diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 37f02911d5c14..a19b3e24ba9c9 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -171,8 +171,6 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { for (final DiscoveryNode discoveryNode : discoveryNodes) { nodesToDisconnect.remove(discoveryNode); } - logger.info(" targetsByNode is {}", targetsByNode.keySet()); - logger.info(" nodes to disconnect set is [{}]", nodesToDisconnect); for (final DiscoveryNode discoveryNode : nodesToDisconnect) { runnables.add(targetsByNode.get(discoveryNode).disconnect()); @@ -181,7 +179,7 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { // There might be some stale nodes that are in pendingDisconnect set from before but are not connected anymore // This code block clears the pending disconnect for these nodes to avoid permanently blocking node joins // This situation should ideally not happen - transportService.markDisconnectAsCompleted( + transportService.removePendingDisconnections( transportService.getPendingDisconnections() .stream() .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode)) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 50d5dfa3313e2..e2f72dd11c3c4 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -371,7 +371,6 @@ void onFollowerCheckRequest(FollowerCheckRequest followerCheckRequest) { } else if (joinHelper.isJoinPending()) { logger.trace("onFollowerCheckRequest: rejoining cluster-manager, responding successfully to {}", followerCheckRequest); } else { - logger.info("Mode: {}, ", mode); logger.trace("onFollowerCheckRequest: received check from faulty cluster-manager, rejecting {}", followerCheckRequest); throw new CoordinationStateRejectedException( "onFollowerCheckRequest: received check from faulty cluster-manager, rejecting " + followerCheckRequest @@ -1363,7 +1362,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); // marking pending disconnects before publish // if we try to joinRequest during pending disconnect, it should fail - transportService.markPendingDisconnects(clusterChangedEvent.nodesDelta()); + transportService.setPendingDisconnections(clusterChangedEvent.nodesDelta()); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); @@ -1468,7 +1467,6 @@ private class CoordinatorPeerFinder extends PeerFinder { protected void onActiveClusterManagerFound(DiscoveryNode clusterManagerNode, long term) { synchronized (mutex) { ensureTermAtLeast(clusterManagerNode, term); - logger.info("sending join request to {}", clusterManagerNode); joinHelper.sendJoinRequest(clusterManagerNode, getCurrentTerm(), joinWithDestination(lastJoin, clusterManagerNode, term)); } } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java index 65ce59d968189..3f7218939be92 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java @@ -85,8 +85,7 @@ public Publication(PublishRequest publishRequest, AckListener ackListener, LongS } public void start(Set faultyNodes) { - logger.trace("publishing {} to {}", publishRequest, publicationTargets); - logger.info("publishing version {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); + logger.debug("publishing version {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); for (final DiscoveryNode faultyNode : faultyNodes) { onFaultyNode(faultyNode); diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index d295db2c5289d..713de8cdd0fda 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -464,7 +464,6 @@ private void handleException(String summary, long startTimeMillis, ClusterState private TaskOutputs calculateTaskOutputs(TaskInputs taskInputs, ClusterState previousClusterState, String taskSummary) { ClusterTasksResult clusterTasksResult = executeTasks(taskInputs, previousClusterState, taskSummary); ClusterState newClusterState = patchVersions(previousClusterState, clusterTasksResult); - logger.debug("in cluster compute, finished computing new cluster state for version: {}", newClusterState.getVersion()); return new TaskOutputs( taskInputs, previousClusterState, diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 94d73e36e2238..c022aa6c2a2dd 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -45,7 +45,6 @@ import java.util.Collections; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentMap; @@ -118,23 +117,6 @@ public void openConnection(DiscoveryNode node, ConnectionProfile connectionProfi internalOpenConnection(node, resolvedProfile, listener); } - @Override - public void markPendingDisconnects(List nodes) { - logger.info("marking pending disconnects for nodes: [{}]", nodes); - pendingDisconnections.addAll(nodes); - } - - @Override - public Set getPendingDisconnections() { - return pendingDisconnections; - } - - @Override - public void markDisconnectAsCompleted(Set nodes) { - logger.debug("marking disconnect as completed for nodes: [{}]", nodes); - pendingDisconnections.removeAll(nodes); - } - /** * Connects to a node with the given connection profile. If the node is already connected this method has no effect. * Once a successful is established, it can be validated before being exposed. @@ -147,7 +129,7 @@ public void connectToNode( ConnectionValidator connectionValidator, ActionListener listener ) throws ConnectTransportException { - logger.info("[{}]connecting to node [{}]", Thread.currentThread().getName(), node); + logger.trace("[{}]connecting to node [{}]", Thread.currentThread().getName(), node); ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); if (node == null) { listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); @@ -195,16 +177,16 @@ public void connectToNode( assert Transports.assertNotTransportThread("connection validator success"); try { if (connectedNodes.putIfAbsent(node, conn) != null) { - logger.info("existing connection to node [{}], closing new redundant connection", node); + logger.debug("existing connection to node [{}], closing new redundant connection", node); IOUtils.closeWhileHandlingException(conn); } else { - logger.info("connected to node [{}]", node); + logger.debug("connected to node [{}]", node); try { connectionListener.onNodeConnected(node, conn); } finally { final Transport.Connection finalConnection = conn; conn.addCloseListener(ActionListener.wrap(() -> { - logger.info("unregistering {} after connection close and marking as disconnected", node); + logger.trace("unregistering {} after connection close and marking as disconnected", node); connectedNodes.remove(node, finalConnection); connectionListener.onNodeDisconnected(node, conn); })); @@ -263,7 +245,24 @@ public void disconnectFromNode(DiscoveryNode node) { nodeChannels.close(); } pendingDisconnections.remove(node); - logger.info("Removed node {} from pending disconnects list", node); + logger.debug("Removed node [{}] from pending disconnections list", node); + } + + @Override + public Set getPendingDisconnections() { + return pendingDisconnections; + } + + @Override + public void setPendingDisconnections(Set nodes) { + logger.debug("set pending disconnection for nodes: [{}]", nodes); + pendingDisconnections.addAll(nodes); + } + + @Override + public void removePendingDisconnections(Set nodes) { + logger.debug("marking disconnection as completed for nodes: [{}]", nodes); + pendingDisconnections.removeAll(nodes); } /** @@ -317,7 +316,6 @@ private void internalOpenConnection( connection.addCloseListener(ActionListener.wrap(() -> connectionListener.onConnectionClosed(connection))); } if (connection.isClosed()) { - logger.info("channel closed while connecting, throwing exception"); throw new ConnectTransportException(node, "a channel closed while connecting"); } return connection; diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 607ff05848289..88d89d952a2f6 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -36,7 +36,6 @@ import org.opensearch.core.action.ActionListener; import java.io.Closeable; -import java.util.List; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; @@ -53,10 +52,6 @@ public interface ConnectionManager extends Closeable { void openConnection(DiscoveryNode node, ConnectionProfile connectionProfile, ActionListener listener); - Set getPendingDisconnections(); - - void markDisconnectAsCompleted(Set nodes); - void connectToNode( DiscoveryNode node, ConnectionProfile connectionProfile, @@ -68,10 +63,14 @@ void connectToNode( boolean nodeConnected(DiscoveryNode node); - void markPendingDisconnects(List nodes); - void disconnectFromNode(DiscoveryNode node); + Set getPendingDisconnections(); + + void setPendingDisconnections(Set nodes); + + void removePendingDisconnections(Set nodes); + Set getAllConnectedNodes(); int size(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index 40a0e5bdb3d36..aabc2a6b6a796 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -115,23 +115,23 @@ public void disconnectFromNode(DiscoveryNode node) { } @Override - public ConnectionProfile getConnectionProfile() { - return delegate.getConnectionProfile(); + public Set getPendingDisconnections() { + return delegate.getPendingDisconnections(); } @Override - public void markPendingDisconnects(List nodes) { - delegate.markPendingDisconnects(nodes); + public void setPendingDisconnections(Set nodes) { + delegate.setPendingDisconnections(nodes); } @Override - public Set getPendingDisconnections() { - return delegate.getPendingDisconnections(); + public void removePendingDisconnections(Set nodes) { + delegate.removePendingDisconnections(nodes); } @Override - public void markDisconnectAsCompleted(Set nodes) { - delegate.markDisconnectAsCompleted(nodes); + public ConnectionProfile getConnectionProfile() { + return delegate.getConnectionProfile(); } public Transport.Connection getAnyRemoteConnection() { diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index db8a425df57ba..50d8fb7ab6dda 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -483,18 +483,6 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr connectToNode(node, null, listener); } - public void markPendingDisconnects(DiscoveryNodes.Delta nodesDelta) { - connectionManager.markPendingDisconnects(nodesDelta.removedNodes()); - } - - public Set getPendingDisconnections() { - return connectionManager.getPendingDisconnections(); - } - - public void markDisconnectAsCompleted(Set nodes) { - connectionManager.markDisconnectAsCompleted(nodes); - } - public void connectToExtensionNode(DiscoveryNode node, ActionListener listener) throws ConnectTransportException { connectToExtensionNode(node, null, listener); } @@ -786,6 +774,18 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } + public Set getPendingDisconnections() { + return connectionManager.getPendingDisconnections(); + } + + public void setPendingDisconnections(DiscoveryNodes.Delta nodesDelta) { + connectionManager.setPendingDisconnections(new HashSet<>(nodesDelta.removedNodes())); + } + + public void removePendingDisconnections(Set nodes) { + connectionManager.removePendingDisconnections(nodes); + } + public void addMessageListener(TransportMessageListener listener) { messageListener.listeners.add(listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 34977ffa240f7..78b894c6431df 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -40,7 +40,6 @@ import org.opensearch.transport.Transport; import org.opensearch.transport.TransportConnectionListener; -import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -120,8 +119,8 @@ public void connectToNode( } @Override - public void markPendingDisconnects(List nodes) { - delegate.markPendingDisconnects(nodes); + public void disconnectFromNode(DiscoveryNode node) { + delegate.disconnectFromNode(node); } @Override @@ -130,13 +129,13 @@ public Set getPendingDisconnections() { } @Override - public void markDisconnectAsCompleted(Set nodes) { - delegate.markDisconnectAsCompleted(nodes); + public void setPendingDisconnections(Set nodes) { + delegate.setPendingDisconnections(nodes); } @Override - public void disconnectFromNode(DiscoveryNode node) { - delegate.disconnectFromNode(node); + public void removePendingDisconnections(Set nodes) { + delegate.removePendingDisconnections(nodes); } @Override From ec9a0079d65026f09381ecbc5147d5e013fc0d20 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 5 Sep 2024 12:49:41 +0530 Subject: [PATCH 15/36] rename variable names, update logs and comments Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 10 +++++----- .../opensearch/cluster/coordination/Coordinator.java | 2 +- .../coordination/PublicationTransportHandler.java | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 9dd49483f1f61..80ac5596cc8d4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -129,11 +129,11 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { } } }); - AtomicBoolean bb = new AtomicBoolean(); + AtomicBoolean succeedFollowerChecker = new AtomicBoolean(); // Simulate followerchecker failure on 1 node when bb is false ConnectionDelay handlingBehavior = new ConnectionDelay(() -> { - if (bb.get()) { + if (succeedFollowerChecker.get()) { return; } try { @@ -151,14 +151,14 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { // Loop runs 10 times to ensure race condition gets reproduced for (int i = 0; i < 10; i++) { - bb.set(false); // fail followerchecker by force to trigger node disconnect // now followerchecker should fail and trigger node left + succeedFollowerChecker.set(false); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response1.isTimedOut(), is(false)); // once we know a node has left, we can re-enable followerchecker to work normally - bb.set(true); + succeedFollowerChecker.set(true); ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response2.isTimedOut(), is(false)); @@ -167,7 +167,7 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { assertThat(response3.isTimedOut(), is(false)); } - bb.set(true); + succeedFollowerChecker.set(true); response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index e2f72dd11c3c4..a4c295bf8c921 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -1361,7 +1361,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); // marking pending disconnects before publish - // if we try to joinRequest during pending disconnect, it should fail + // if a nodes tries to send a joinRequest while it is pending disconnect, it should fail transportService.setPendingDisconnections(clusterChangedEvent.nodesDelta()); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index cdf331b7bb577..caed2b6eceb49 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -542,7 +542,7 @@ public String executor() { } public void sendClusterState(DiscoveryNode destination, ActionListener listener) { - logger.debug("sending cluster state over transport to node: {}", destination.getName()); + logger.trace("sending cluster state over transport to node: {}", destination.getName()); if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, listener); From 0964e0bbbf0f5e81f6a1450c5f85eec158723676 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 5 Sep 2024 16:04:35 +0530 Subject: [PATCH 16/36] add changelog Signed-off-by: Rahul Karajgikar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfec81c070bab..2fe6970f36696 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix search_as_you_type not supporting multi-fields ([#15988](https://github.com/opensearch-project/OpenSearch/pull/15988)) - Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) - Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931)) +- Fix race condition in node-join and node-left ([#15521](https://github.com/opensearch-project/OpenSearch/pull/15521)) ### Security From a0f45b94515a1da03c6ce2a287b03b6a200680b6 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 6 Sep 2024 14:58:52 +0530 Subject: [PATCH 17/36] Address comments + minor changes Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 81 +++++++++++++++++++ .../cluster/NodeConnectionsService.java | 9 ++- .../cluster/coordination/Coordinator.java | 2 +- .../transport/ClusterConnectionManager.java | 28 +++---- .../transport/ConnectionManager.java | 6 +- .../transport/RemoteConnectionManager.java | 13 +-- .../transport/TransportService.java | 11 +-- .../transport/StubbableConnectionManager.java | 13 +-- 8 files changed, 111 insertions(+), 52 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 80ac5596cc8d4..51d71f95f33c8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -172,6 +172,87 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { assertThat(response.isTimedOut(), is(false)); } + public void testRestartDataNode() throws Exception { + final String indexName = "test"; + final Settings nodeSettings = Settings.builder() + .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .build(); + // start a 3 node cluster + internalCluster().startNode(nodeSettings); + internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); + final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); + assertThat(response.isTimedOut(), is(false)); + + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get(); + + Settings redNodeDataPathSettings = internalCluster().dataPathSettings(redNodeName); + logger.info("-> stopping data node"); + internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(redNodeName)); + response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response.isTimedOut(), is(false)); + + logger.info("-> restarting stopped node"); + internalCluster().startNode(Settings.builder().put("node.name", redNodeName).put(redNodeDataPathSettings).build()); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + + public void testRestartCmNode() throws Exception { + final String indexName = "test"; + final Settings nodeSettings = Settings.builder() + .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .build(); + // start a 3 node cluster + final String cm = internalCluster().startNode(Settings.builder().put("node.attr.color", "yellow").put(nodeSettings).build()); + internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); + internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); + assertThat(response.isTimedOut(), is(false)); + + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get(); + + Settings cmNodeSettings = internalCluster().dataPathSettings(cm); + + logger.info("-> stopping cluster-manager node"); + internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(cm)); + response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response.isTimedOut(), is(false)); + + logger.info("-> restarting stopped node"); + internalCluster().startNode(Settings.builder().put("node.name", cm).put(cmNodeSettings).build()); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + private class ConnectionDelay implements StubbableTransport.RequestHandlingBehavior { private final Runnable connectionBreaker; diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index a19b3e24ba9c9..de6b154fd097a 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -177,15 +177,16 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { } // There might be some stale nodes that are in pendingDisconnect set from before but are not connected anymore - // This code block clears the pending disconnect for these nodes to avoid permanently blocking node joins - // This situation should ideally not happen + // So these nodes would not be there in targetsByNode and would not have disconnect() called for them + // This code block clears the pending disconnect for these nodes that don't have entries in targetsByNode + // to avoid permanently blocking node joins + // This situation should ideally not happen, this is just for extra safety transportService.removePendingDisconnections( - transportService.getPendingDisconnections() + targetsByNode.keySet() .stream() .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode)) .collect(Collectors.toSet()) ); - } runnables.forEach(Runnable::run); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index a4c295bf8c921..fa455112b9101 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -1362,7 +1362,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); // marking pending disconnects before publish // if a nodes tries to send a joinRequest while it is pending disconnect, it should fail - transportService.setPendingDisconnections(clusterChangedEvent.nodesDelta()); + transportService.setPendingDisconnections(new HashSet<>(clusterChangedEvent.nodesDelta().removedNodes())); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index c022aa6c2a2dd..58be31135c46e 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -69,6 +69,8 @@ public class ClusterConnectionManager implements ConnectionManager { Nodes are marked as pending disconnect right before cluster state publish phase. They are cleared up as part of cluster state apply commit phase This is to avoid connections from being made to nodes that are in the process of leaving the cluster + Note: If a disconnect is initiated while a connect is in progress, this Set will not handle this case. + Callers need to ensure that connects and disconnects are sequenced. */ private final Set pendingDisconnections = ConcurrentCollections.newConcurrentSet(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @@ -129,7 +131,7 @@ public void connectToNode( ConnectionValidator connectionValidator, ActionListener listener ) throws ConnectTransportException { - logger.trace("[{}]connecting to node [{}]", Thread.currentThread().getName(), node); + logger.trace("connecting to node [{}]", node); ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); if (node == null) { listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); @@ -138,11 +140,7 @@ public void connectToNode( // if node-left is still in progress, we fail the connect request early if (pendingDisconnections.contains(node)) { - listener.onFailure( - new IllegalStateException( - "blocked connection to node [" + node + "] because node-left is currently in progress for this node" - ) - ); + listener.onFailure(new IllegalStateException("cannot make a new connection as disconnect to node [" + node + "] is pending")); return; } @@ -188,6 +186,7 @@ public void connectToNode( conn.addCloseListener(ActionListener.wrap(() -> { logger.trace("unregistering {} after connection close and marking as disconnected", node); connectedNodes.remove(node, finalConnection); + pendingDisconnections.remove(node); connectionListener.onNodeDisconnected(node, conn); })); } @@ -249,20 +248,15 @@ public void disconnectFromNode(DiscoveryNode node) { } @Override - public Set getPendingDisconnections() { - return pendingDisconnections; + public void setPendingDisconnection(DiscoveryNode node) { + logger.debug("marking disconnection as pending for node: [{}]", node); + pendingDisconnections.add(node); } @Override - public void setPendingDisconnections(Set nodes) { - logger.debug("set pending disconnection for nodes: [{}]", nodes); - pendingDisconnections.addAll(nodes); - } - - @Override - public void removePendingDisconnections(Set nodes) { - logger.debug("marking disconnection as completed for nodes: [{}]", nodes); - pendingDisconnections.removeAll(nodes); + public void removePendingDisconnection(DiscoveryNode node) { + logger.debug("marking disconnection as completed for node: [{}]", node); + pendingDisconnections.remove(node); } /** diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 88d89d952a2f6..1a5726887f246 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -65,11 +65,9 @@ void connectToNode( void disconnectFromNode(DiscoveryNode node); - Set getPendingDisconnections(); + void setPendingDisconnection(DiscoveryNode node); - void setPendingDisconnections(Set nodes); - - void removePendingDisconnections(Set nodes); + void removePendingDisconnection(DiscoveryNode node); Set getAllConnectedNodes(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index aabc2a6b6a796..9eab2c4599e3b 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -115,18 +115,13 @@ public void disconnectFromNode(DiscoveryNode node) { } @Override - public Set getPendingDisconnections() { - return delegate.getPendingDisconnections(); + public void setPendingDisconnection(DiscoveryNode node) { + delegate.setPendingDisconnection(node); } @Override - public void setPendingDisconnections(Set nodes) { - delegate.setPendingDisconnections(nodes); - } - - @Override - public void removePendingDisconnections(Set nodes) { - delegate.removePendingDisconnections(nodes); + public void removePendingDisconnection(DiscoveryNode node) { + delegate.removePendingDisconnection(node); } @Override diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 50d8fb7ab6dda..e65c7a081ab14 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -41,7 +41,6 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; import org.opensearch.common.io.stream.Streamables; import org.opensearch.common.lease.Releasable; @@ -774,16 +773,12 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } - public Set getPendingDisconnections() { - return connectionManager.getPendingDisconnections(); - } - - public void setPendingDisconnections(DiscoveryNodes.Delta nodesDelta) { - connectionManager.setPendingDisconnections(new HashSet<>(nodesDelta.removedNodes())); + public void setPendingDisconnections(Set nodes) { + nodes.forEach(connectionManager::setPendingDisconnection); } public void removePendingDisconnections(Set nodes) { - connectionManager.removePendingDisconnections(nodes); + nodes.forEach(connectionManager::removePendingDisconnection); } public void addMessageListener(TransportMessageListener listener) { diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 78b894c6431df..f68648b31f3b4 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -124,18 +124,13 @@ public void disconnectFromNode(DiscoveryNode node) { } @Override - public Set getPendingDisconnections() { - return delegate.getPendingDisconnections(); + public void setPendingDisconnection(DiscoveryNode node) { + delegate.setPendingDisconnection(node); } @Override - public void setPendingDisconnections(Set nodes) { - delegate.setPendingDisconnections(nodes); - } - - @Override - public void removePendingDisconnections(Set nodes) { - delegate.removePendingDisconnections(nodes); + public void removePendingDisconnection(DiscoveryNode node) { + delegate.removePendingDisconnection(node); } @Override From 3c0ff586aed564c39e875b49a487238561473f14 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 6 Sep 2024 15:11:33 +0530 Subject: [PATCH 18/36] fix targetsbynode logic Signed-off-by: Rahul Karajgikar --- .../java/org/opensearch/cluster/NodeConnectionsService.java | 4 ++-- .../org/opensearch/transport/ClusterConnectionManager.java | 5 +++++ .../java/org/opensearch/transport/ConnectionManager.java | 2 ++ .../org/opensearch/transport/RemoteConnectionManager.java | 5 +++++ .../main/java/org/opensearch/transport/TransportService.java | 4 ++++ .../test/transport/StubbableConnectionManager.java | 5 +++++ 6 files changed, 23 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index de6b154fd097a..f5f15253d42d9 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -182,9 +182,9 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { // to avoid permanently blocking node joins // This situation should ideally not happen, this is just for extra safety transportService.removePendingDisconnections( - targetsByNode.keySet() + transportService.getPendingDisconnections() .stream() - .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode)) + .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode) && !targetsByNode.containsKey(discoveryNode)) .collect(Collectors.toSet()) ); } diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 58be31135c46e..a5514a74cd437 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -247,6 +247,11 @@ public void disconnectFromNode(DiscoveryNode node) { logger.debug("Removed node [{}] from pending disconnections list", node); } + @Override + public Set getPendingDisconnections() { + return pendingDisconnections; + } + @Override public void setPendingDisconnection(DiscoveryNode node) { logger.debug("marking disconnection as pending for node: [{}]", node); diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 1a5726887f246..ef82910790ec1 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -65,6 +65,8 @@ void connectToNode( void disconnectFromNode(DiscoveryNode node); + Set getPendingDisconnections(); + void setPendingDisconnection(DiscoveryNode node); void removePendingDisconnection(DiscoveryNode node); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index 9eab2c4599e3b..fadcda9c514d9 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -114,6 +114,11 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } + @Override + public Set getPendingDisconnections() { + return delegate.getPendingDisconnections(); + } + @Override public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index e65c7a081ab14..d1f04e25322e2 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -773,6 +773,10 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } + public Set getPendingDisconnections() { + return connectionManager.getPendingDisconnections(); + } + public void setPendingDisconnections(Set nodes) { nodes.forEach(connectionManager::setPendingDisconnection); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index f68648b31f3b4..feffccd445952 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -123,6 +123,11 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } + @Override + public Set getPendingDisconnections() { + return delegate.getPendingDisconnections(); + } + @Override public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); From 4555c799013aabd67ff77de21f6f78d38c88415c Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 6 Sep 2024 18:13:00 +0530 Subject: [PATCH 19/36] fix tests instead of updating disconnectFromNodesExcept Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 24 +++++--------- .../transport/ClusterConnectionManager.java | 11 ------- .../transport/ConnectionManager.java | 4 --- .../transport/RemoteConnectionManager.java | 10 ------ .../transport/TransportService.java | 8 ----- .../snapshots/SnapshotResiliencyTests.java | 25 +++++++++++---- .../AbstractCoordinatorTestCase.java | 31 ++++++++++++++----- .../transport/StubbableConnectionManager.java | 10 ------ 8 files changed, 50 insertions(+), 73 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index f5f15253d42d9..13c6dcd6890f0 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -62,7 +62,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; import static org.opensearch.common.settings.Setting.Property; import static org.opensearch.common.settings.Setting.positiveTimeSetting; @@ -104,7 +103,7 @@ public class NodeConnectionsService extends AbstractLifecycleComponent { // contains an entry for every node in the latest cluster state, as well as for nodes from which we are in the process of // disconnecting - private final Map targetsByNode = new HashMap<>(); + protected final Map targetsByNode = new HashMap<>(); private final TimeValue reconnectInterval; private volatile ConnectionChecker connectionChecker; @@ -116,6 +115,11 @@ public NodeConnectionsService(Settings settings, ThreadPool threadPool, Transpor this.reconnectInterval = NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.get(settings); } + // exposed for testing + protected ConnectionTarget createConnectionTarget(DiscoveryNode discoveryNode) { + return new ConnectionTarget(discoveryNode); + } + /** * Connect to all the given nodes, but do not disconnect from any extra nodes. Calls the completion handler on completion of all * connection attempts to _new_ nodes, but not on attempts to re-establish connections to nodes that are already known. @@ -175,18 +179,6 @@ public void disconnectFromNodesExcept(DiscoveryNodes discoveryNodes) { for (final DiscoveryNode discoveryNode : nodesToDisconnect) { runnables.add(targetsByNode.get(discoveryNode).disconnect()); } - - // There might be some stale nodes that are in pendingDisconnect set from before but are not connected anymore - // So these nodes would not be there in targetsByNode and would not have disconnect() called for them - // This code block clears the pending disconnect for these nodes that don't have entries in targetsByNode - // to avoid permanently blocking node joins - // This situation should ideally not happen, this is just for extra safety - transportService.removePendingDisconnections( - transportService.getPendingDisconnections() - .stream() - .filter(discoveryNode -> !discoveryNodes.nodeExists(discoveryNode) && !targetsByNode.containsKey(discoveryNode)) - .collect(Collectors.toSet()) - ); } runnables.forEach(Runnable::run); } @@ -334,7 +326,7 @@ private enum ActivityType { * * @opensearch.internal */ - private class ConnectionTarget { + protected class ConnectionTarget { private final DiscoveryNode discoveryNode; private PlainListenableActionFuture future = PlainListenableActionFuture.newListenableFuture(); @@ -524,8 +516,6 @@ private void onCompletion(ActivityType completedActivityType, @Nullable Exceptio if (completedActivityType.equals(ActivityType.DISCONNECTING)) { final ConnectionTarget removedTarget = targetsByNode.remove(discoveryNode); - // if we remove from targetsByNode, we also remove from underlying pendingDisconnects for consistency - // transportService.markDisconnectAsCompleted(new HashSet<>(Collections.singleton(discoveryNode))); assert removedTarget == this : removedTarget + " vs " + this; } } else { diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index a5514a74cd437..ccb95a90e762d 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -247,23 +247,12 @@ public void disconnectFromNode(DiscoveryNode node) { logger.debug("Removed node [{}] from pending disconnections list", node); } - @Override - public Set getPendingDisconnections() { - return pendingDisconnections; - } - @Override public void setPendingDisconnection(DiscoveryNode node) { logger.debug("marking disconnection as pending for node: [{}]", node); pendingDisconnections.add(node); } - @Override - public void removePendingDisconnection(DiscoveryNode node) { - logger.debug("marking disconnection as completed for node: [{}]", node); - pendingDisconnections.remove(node); - } - /** * Returns the number of nodes this manager is connected to. */ diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index ef82910790ec1..18a08ce5a38c8 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -65,12 +65,8 @@ void connectToNode( void disconnectFromNode(DiscoveryNode node); - Set getPendingDisconnections(); - void setPendingDisconnection(DiscoveryNode node); - void removePendingDisconnection(DiscoveryNode node); - Set getAllConnectedNodes(); int size(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index fadcda9c514d9..5e3e6ce03cd0d 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -114,21 +114,11 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } - @Override - public Set getPendingDisconnections() { - return delegate.getPendingDisconnections(); - } - @Override public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); } - @Override - public void removePendingDisconnection(DiscoveryNode node) { - delegate.removePendingDisconnection(node); - } - @Override public ConnectionProfile getConnectionProfile() { return delegate.getConnectionProfile(); diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index d1f04e25322e2..afa9a52e72070 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -773,18 +773,10 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } - public Set getPendingDisconnections() { - return connectionManager.getPendingDisconnections(); - } - public void setPendingDisconnections(Set nodes) { nodes.forEach(connectionManager::setPendingDisconnection); } - public void removePendingDisconnections(Set nodes) { - nodes.forEach(connectionManager::removePendingDisconnection); - } - public void addMessageListener(TransportMessageListener listener) { messageListener.listeners.add(listener); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 350c6f9ae8f6b..1ee282c3d3663 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -1923,11 +1923,6 @@ private final class TestClusterNode { protected PrioritizedOpenSearchThreadPoolExecutor createThreadPoolExecutor() { return new MockSinglePrioritizingExecutor(node.getName(), deterministicTaskQueue, threadPool); } - - @Override - protected void connectToNodesAndWait(ClusterState newClusterState) { - // don't do anything, and don't block - } } ); recoverySettings = new RecoverySettings(settings, clusterSettings); @@ -2094,7 +2089,7 @@ public void onFailure(final Exception e) { rerouteService, threadPool ); - nodeConnectionsService = new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService); + nodeConnectionsService = createTestNodeConnectionsService(clusterService.getSettings(), threadPool, transportService); final MetadataMappingService metadataMappingService = new MetadataMappingService(clusterService, indicesService); indicesClusterStateService = new IndicesClusterStateService( settings, @@ -2492,6 +2487,24 @@ protected void assertSnapshotOrGenericThread() { } } + public NodeConnectionsService createTestNodeConnectionsService( + Settings settings, + ThreadPool threadPool, + TransportService transportService + ) { + return new NodeConnectionsService(settings, threadPool, transportService) { + @Override + public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) { + // just update targetsByNode to ensure disconnect runs for these nodes + // we rely on disconnect to run for keeping track of pendingDisconnects and ensuring node-joins can happen + for (final DiscoveryNode discoveryNode : discoveryNodes) { + this.targetsByNode.put(discoveryNode, createConnectionTarget(discoveryNode)); + } + onCompletion.run(); + } + }; + } + public ClusterInfoService getMockClusterInfoService() { return clusterInfoService; } diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index b432e5411404e..f3aa5c97b49f8 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -55,6 +55,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterApplierService; import org.opensearch.cluster.service.ClusterService; @@ -1150,9 +1151,12 @@ protected Optional getDisruptableMockTransport(Transpo new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterService = new ClusterService(settings, clusterSettings, clusterManagerService, clusterApplierService); - clusterService.setNodeConnectionsService( - new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService) + NodeConnectionsService nodeConnectionsService = createTestNodeConnectionsService( + clusterService.getSettings(), + threadPool, + transportService ); + clusterService.setNodeConnectionsService(nodeConnectionsService); repositoriesService = new RepositoriesService( settings, clusterService, @@ -1588,6 +1592,24 @@ public void onNodeAck(DiscoveryNode node, Exception e) { } } + public static NodeConnectionsService createTestNodeConnectionsService( + Settings settings, + ThreadPool threadPool, + TransportService transportService + ) { + return new NodeConnectionsService(settings, threadPool, transportService) { + @Override + public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) { + // just update targetsByNode to ensure disconnect runs for these nodes + // we rely on disconnect to run for keeping track of pendingDisconnects and ensuring node-joins can happen + for (final DiscoveryNode discoveryNode : discoveryNodes) { + this.targetsByNode.put(discoveryNode, createConnectionTarget(discoveryNode)); + } + onCompletion.run(); + } + }; + } + static class DisruptableClusterApplierService extends ClusterApplierService { private final String nodeName; private final DeterministicTaskQueue deterministicTaskQueue; @@ -1641,11 +1663,6 @@ public void onNewClusterState(String source, Supplier clusterState } } - @Override - protected void connectToNodesAndWait(ClusterState newClusterState) { - // don't do anything, and don't block - } - @Override protected boolean applicationMayFail() { return this.applicationMayFail; diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index feffccd445952..3917fa7bee3e3 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -123,21 +123,11 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } - @Override - public Set getPendingDisconnections() { - return delegate.getPendingDisconnections(); - } - @Override public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); } - @Override - public void removePendingDisconnection(DiscoveryNode node) { - delegate.removePendingDisconnection(node); - } - @Override public int size() { return delegate.size(); From 6c963b4d24e8a332234e4e91a81933e24fce8b63 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Sat, 7 Sep 2024 19:08:45 +0530 Subject: [PATCH 20/36] Minor changes in IT Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 64 +++++++++++++------ 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 51d71f95f33c8..412bda2ec9d6e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -60,18 +60,37 @@ import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; import static org.hamcrest.Matchers.is; +/** + ITs for reproducing the scenario and validating some new cases for https://github.com/opensearch-project/OpenSearch/issues/4874 + This issue is about an infinite loop of node joining and leaving the cluster. + This race condition would happen when a node-join task was queued into cluster-manager thread while a node-left task for the same node was still processing. + + Scenario: + Suppose a node disconnects from the cluster due to some normal reason. + This queues a node-left task on cluster manager thread. + Then cluster manager then computes the new cluster state based on the node-left task. + The cluster manager now tries to send the new state to all the nodes and waits for all nodes to ack back. + Suppose this takes a long time due to lagging nodes or slow applying of the state or any other reason. + While this is happening, the node that just left sends a join request to the cluster manager to rejoin the cluster. + The role of this join request is to re-establish any required connections and do some pre-validations before queuing a new task. + After join request is validated by cluster manager node, cluster manager queues a node-join task into its thread. + This node-join task would only start after the node-left task is completed. + Now suppose the node-left task has completed publication and has started to apply the new state on the cluster manager. + As part of applying the cluster state of node-left task, cluster manager wipes out the connection info of the leaving node. + The node-left task then completes and the node-join task begins. + Now the node-join task starts. This task assumes that because the previous join request succeeded, that all connection info would still be there. + So then the cluster manager computes the new state. + Then it tells the followerchecker thread to add this new node. + Then it tries to publish the new state to all the nodes. + However, at this point, the followerchecker thread fails because the connection was wiped and triggers a new node-left task. + If the new node-left task also takes time, we end up in an infinite loop of node-left and node-joins. + +Fix: + As part of the fix for this, we now reject the initial join request from a node that has an ongoing node-left task. + The join request will only succeed after the node-left task completes, so the connection that gets created as part of the join request does not get wiped out and cause node-join task to fail. + */ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { - - private static final String INDEX_NAME = "test-idx-1"; - private static final String REPO_NAME = "test-repo-1"; - private static final String SNAP_NAME = "test-snap-1"; - - private static final int MIN_DOC_COUNT = 500; - private static final int MAX_DOC_COUNT = 1000; - private static final int SHARD_COUNT = 1; - private static final int REPLICA_COUNT = 0; - @Override protected Collection> nodePlugins() { return Arrays.asList( @@ -90,7 +109,7 @@ protected void beforeIndexDeletion() throws Exception { internalCluster().assertSameDocIdsOnShards(); } - public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { + public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throws Exception { final String indexName = "test"; final Settings nodeSettings = Settings.builder() .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") @@ -129,10 +148,11 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { } } }); + // Toggle to succeed/fail the followerchecker to simulate the initial node leaving. AtomicBoolean succeedFollowerChecker = new AtomicBoolean(); - // Simulate followerchecker failure on 1 node when bb is false - ConnectionDelay handlingBehavior = new ConnectionDelay(() -> { + // Simulate followerchecker failure on 1 node when succeedFollowerChecker is false + FollowerCheckerBehaviour simulatedFailureBehaviour = new FollowerCheckerBehaviour(() -> { if (succeedFollowerChecker.get()) { return; } @@ -141,28 +161,30 @@ public void testTransientErrorsDuringRecovery1AreRetried() throws Exception { } catch (InterruptedException e) { throw new RuntimeException(e); } - throw new NodeHealthCheckFailureException("non writable exception"); + throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); }); MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( TransportService.class, redNodeName ); - redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, handlingBehavior); + redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); // Loop runs 10 times to ensure race condition gets reproduced for (int i = 0; i < 10; i++) { - // fail followerchecker by force to trigger node disconnect - // now followerchecker should fail and trigger node left + // Fail followerchecker by force to trigger node disconnect and node left + logger.info("--> simulating followerchecker failure to trigger node-left"); succeedFollowerChecker.set(false); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response1.isTimedOut(), is(false)); - // once we know a node has left, we can re-enable followerchecker to work normally + // once we know a node has left, we can re-enable followerchecker to work normally and validate node rejoins + logger.info("--> re-enabling normal followerchecker and validating cluster is stable"); succeedFollowerChecker.set(true); ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response2.isTimedOut(), is(false)); - // Checking again to validate stability + Thread.sleep(1000); + // checking again to validate stability and ensure node did not leave ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response3.isTimedOut(), is(false)); } @@ -253,10 +275,10 @@ public void testRestartCmNode() throws Exception { assertThat(response.isTimedOut(), is(false)); } - private class ConnectionDelay implements StubbableTransport.RequestHandlingBehavior { + private class FollowerCheckerBehaviour implements StubbableTransport.RequestHandlingBehavior { private final Runnable connectionBreaker; - private ConnectionDelay(Runnable connectionBreaker) { + private FollowerCheckerBehaviour(Runnable connectionBreaker) { this.connectionBreaker = connectionBreaker; } From cba4b9bbd227f8d368e3f42f0c816d09bfc2c83e Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 9 Sep 2024 11:01:05 +0530 Subject: [PATCH 21/36] empty commit for gradle check Signed-off-by: Rahul Karajgikar From 47c1141d46f91495655f1fc537c63944b2796098 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 9 Sep 2024 15:21:04 +0530 Subject: [PATCH 22/36] Add test for disconnect during node-left Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 412bda2ec9d6e..9a85d9254a0e0 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -117,6 +117,7 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "100ms") .build(); // start a 3 node cluster with 1 cluster-manager final String cm = internalCluster().startNode(nodeSettings); @@ -194,6 +195,102 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw assertThat(response.isTimedOut(), is(false)); } + public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Exception { + final String indexName = "test"; + final Settings nodeSettings = Settings.builder() + .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "100ms") + .build(); + // start a 3 node cluster with 1 cluster-manager + final String cm = internalCluster().startNode(nodeSettings); + internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); + final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); + assertThat(response.isTimedOut(), is(false)); + + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get(); + + ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); + // Simulate a slow applier on the cm to delay node-left state application + cmClsService.addStateApplier(event -> { + if (event.nodesRemoved()) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + // Toggle to succeed/fail the followerchecker to simulate the initial node leaving. + AtomicBoolean succeedFollowerChecker = new AtomicBoolean(); + + // Simulate followerchecker failure on 1 node when succeedFollowerChecker is false + FollowerCheckerBehaviour simulatedFailureBehaviour = new FollowerCheckerBehaviour(() -> { + if (succeedFollowerChecker.get()) { + return; + } + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); + }); + MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + cm + ); + MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + redNodeName + ); + redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); + + // Loop runs 10 times to ensure race condition gets reproduced + for (int i = 0; i < 10; i++) { + // Fail followerchecker by force to trigger node disconnect and node left + logger.info("--> simulating followerchecker failure to trigger node-left"); + succeedFollowerChecker.set(false); + Thread.sleep(1000); + + // Trigger a node disconnect while node-left task is still processing + logger.info("--> triggering a simulated disconnect on red node, after the follower checker failed to see how node-left task deals with this"); + cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + + ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response1.isTimedOut(), is(false)); + + // once we know a node has left, we can re-enable followerchecker to work normally and validate node rejoins + logger.info("--> re-enabling normal followerchecker and validating cluster is stable"); + succeedFollowerChecker.set(true); + ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response2.isTimedOut(), is(false)); + + Thread.sleep(1000); + // checking again to validate stability and ensure node did not leave + ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response3.isTimedOut(), is(false)); + } + + succeedFollowerChecker.set(true); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + public void testRestartDataNode() throws Exception { final String indexName = "test"; final Settings nodeSettings = Settings.builder() From 36d84c706f5020b913465b2b6884f24a4b279bf2 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 9 Sep 2024 15:52:22 +0530 Subject: [PATCH 23/36] fix spotless Signed-off-by: Rahul Karajgikar --- .../opensearch/cluster/coordination/NodeJoinLeftIT.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 9a85d9254a0e0..bb6a3d7e4109a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -250,10 +250,7 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex } throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); }); - MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance( - TransportService.class, - cm - ); + MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( TransportService.class, redNodeName @@ -268,7 +265,9 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex Thread.sleep(1000); // Trigger a node disconnect while node-left task is still processing - logger.info("--> triggering a simulated disconnect on red node, after the follower checker failed to see how node-left task deals with this"); + logger.info( + "--> triggering a simulated disconnect on red node, after the follower checker failed to see how node-left task deals with this" + ); cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); From 678ebd0a080b73603585db56b95636b9ad6c3878 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Mon, 9 Sep 2024 18:56:13 +0530 Subject: [PATCH 24/36] Cleanup pendingDisconnection entries during cluster-manager failover Signed-off-by: Rahul Karajgikar --- .../org/opensearch/cluster/coordination/Coordinator.java | 4 ++++ .../opensearch/transport/ClusterConnectionManager.java | 5 +++++ .../java/org/opensearch/transport/ConnectionManager.java | 2 ++ .../org/opensearch/transport/RemoteConnectionManager.java | 5 +++++ .../java/org/opensearch/transport/TransportService.java | 8 ++++++++ .../test/transport/StubbableConnectionManager.java | 5 +++++ 6 files changed, 29 insertions(+) diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index fa455112b9101..2a36af29acbd6 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -817,6 +817,10 @@ public void onFailure(String source, Exception e) { public ClusterTasksResult execute(ClusterState currentState) { if (currentState.nodes().isLocalNodeElectedClusterManager() == false) { allocationService.cleanCaches(); + // This set only needs to be maintained on active cluster-manager + // This is cleaned up to avoid stale entries which would block future reconnections + logger.trace("Removing all pending disconnections as part of cluster-manager cleanup"); + transportService.clearPendingDisconnections(); } return unchanged(); } diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index ccb95a90e762d..18308d9da11ac 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -253,6 +253,11 @@ public void setPendingDisconnection(DiscoveryNode node) { pendingDisconnections.add(node); } + @Override + public void clearPendingDisconnections() { + pendingDisconnections.clear(); + } + /** * Returns the number of nodes this manager is connected to. */ diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 18a08ce5a38c8..ebd5ccf29c8cc 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -67,6 +67,8 @@ void connectToNode( void setPendingDisconnection(DiscoveryNode node); + void clearPendingDisconnections(); + Set getAllConnectedNodes(); int size(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index 5e3e6ce03cd0d..52f29bea8050d 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -119,6 +119,11 @@ public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); } + @Override + public void clearPendingDisconnections() { + delegate.clearPendingDisconnections(); + } + @Override public ConnectionProfile getConnectionProfile() { return delegate.getConnectionProfile(); diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index afa9a52e72070..7c2699e1d4b22 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -777,6 +777,14 @@ public void setPendingDisconnections(Set nodes) { nodes.forEach(connectionManager::setPendingDisconnection); } + /** + * Wipes out all pending disconnections. + * This is called on cluster-manager failover to remove stale entries + */ + public void clearPendingDisconnections() { + connectionManager.clearPendingDisconnections(); + } + public void addMessageListener(TransportMessageListener listener) { messageListener.listeners.add(listener); } diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 3917fa7bee3e3..d1e1a3e8af17c 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -128,6 +128,11 @@ public void setPendingDisconnection(DiscoveryNode node) { delegate.setPendingDisconnection(node); } + @Override + public void clearPendingDisconnections() { + delegate.clearPendingDisconnections(); + } + @Override public int size() { return delegate.size(); From 6aa25ee7cb712ea0c322b70bea4f70f60465c4de Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 12 Sep 2024 17:47:07 +0530 Subject: [PATCH 25/36] Use NodeConnectionsService instead of transportService in Coordinator Signed-off-by: Rahul Karajgikar --- .../opensearch/cluster/NodeConnectionsService.java | 8 ++++++++ .../opensearch/cluster/coordination/Coordinator.java | 12 ++++++++++-- .../java/org/opensearch/discovery/Discovery.java | 5 +++++ server/src/main/java/org/opensearch/node/Node.java | 1 + .../org/opensearch/transport/TransportService.java | 4 ++-- .../snapshots/SnapshotResiliencyTests.java | 3 ++- .../test/java/org/opensearch/test/NoopDiscovery.java | 6 ++++++ .../coordination/AbstractCoordinatorTestCase.java | 1 + 8 files changed, 35 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 13c6dcd6890f0..21a5a5e08933b 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -164,6 +164,14 @@ public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) runnables.forEach(Runnable::run); } + public void setPendingDisconnections(Set nodes) { + nodes.forEach(transportService::setPendingDisconnection); + } + + public void clearPendingDisconnections() { + transportService.clearPendingDisconnections(); + } + /** * Disconnect from any nodes to which we are currently connected which do not appear in the given nodes. Does not wait for the * disconnections to complete, because they might have to wait for ongoing connection attempts first. diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 2a36af29acbd6..9859abe503eaa 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.LocalClusterUpdateTask; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; @@ -187,6 +188,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery private final NodeHealthService nodeHealthService; private final PersistedStateRegistry persistedStateRegistry; private final RemoteStoreNodeService remoteStoreNodeService; + private NodeConnectionsService nodeConnectionsService; /** * @param nodeName The name of the node, used to name the {@link java.util.concurrent.ExecutorService} of the {@link SeedHostsResolver}. @@ -820,7 +822,7 @@ public ClusterTasksResult execute(ClusterState currentSt // This set only needs to be maintained on active cluster-manager // This is cleaned up to avoid stale entries which would block future reconnections logger.trace("Removing all pending disconnections as part of cluster-manager cleanup"); - transportService.clearPendingDisconnections(); + nodeConnectionsService.clearPendingDisconnections(); } return unchanged(); } @@ -927,6 +929,12 @@ public void startInitialJoin() { clusterBootstrapService.scheduleUnconfiguredBootstrap(); } + @Override + public void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService) { + assert this.nodeConnectionsService == null : "nodeConnectionsService is already set"; + this.nodeConnectionsService = nodeConnectionsService; + } + @Override protected void doStop() { configuredHostsResolver.stop(); @@ -1366,7 +1374,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); // marking pending disconnects before publish // if a nodes tries to send a joinRequest while it is pending disconnect, it should fail - transportService.setPendingDisconnections(new HashSet<>(clusterChangedEvent.nodesDelta().removedNodes())); + nodeConnectionsService.setPendingDisconnections(new HashSet<>(clusterChangedEvent.nodesDelta().removedNodes())); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); diff --git a/server/src/main/java/org/opensearch/discovery/Discovery.java b/server/src/main/java/org/opensearch/discovery/Discovery.java index 9d6807b6522c9..6d9fb1f4985df 100644 --- a/server/src/main/java/org/opensearch/discovery/Discovery.java +++ b/server/src/main/java/org/opensearch/discovery/Discovery.java @@ -32,6 +32,7 @@ package org.opensearch.discovery; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.coordination.ClusterStatePublisher; import org.opensearch.common.lifecycle.LifecycleComponent; @@ -54,4 +55,8 @@ public interface Discovery extends LifecycleComponent, ClusterStatePublisher { */ void startInitialJoin(); + /** + * Sets the NodeConnectionsService which is an abstraction used for connection management + */ + void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService); } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index a8d4ebcf23dab..4962d72d8728a 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1602,6 +1602,7 @@ public Node start() throws NodeValidationException { injector.getInstance(GatewayService.class).start(); Discovery discovery = injector.getInstance(Discovery.class); + discovery.setNodeConnectionsService(nodeConnectionsService); clusterService.getClusterManagerService().setClusterStatePublisher(discovery::publish); // Start the transport service now so the publish address will be added to the local disco node in ClusterService diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index 7c2699e1d4b22..fe8631aa5ca3d 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -773,8 +773,8 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } - public void setPendingDisconnections(Set nodes) { - nodes.forEach(connectionManager::setPendingDisconnection); + public void setPendingDisconnection(DiscoveryNode node) { + connectionManager.setPendingDisconnection(node); } /** diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 1ee282c3d3663..440227436175d 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2576,10 +2576,11 @@ public void start(ClusterState initialState) { new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), null ); + coordinator.setNodeConnectionsService(nodeConnectionsService); clusterManagerService.setClusterStatePublisher(coordinator); - coordinator.start(); clusterService.getClusterApplierService().setNodeConnectionsService(nodeConnectionsService); nodeConnectionsService.start(); + coordinator.start(); clusterService.start(); indicesService.start(); indicesClusterStateService.start(); diff --git a/server/src/test/java/org/opensearch/test/NoopDiscovery.java b/server/src/test/java/org/opensearch/test/NoopDiscovery.java index 42d3f1887ab4d..c35503a556db6 100644 --- a/server/src/test/java/org/opensearch/test/NoopDiscovery.java +++ b/server/src/test/java/org/opensearch/test/NoopDiscovery.java @@ -32,6 +32,7 @@ package org.opensearch.test; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.common.lifecycle.Lifecycle; import org.opensearch.common.lifecycle.LifecycleListener; import org.opensearch.core.action.ActionListener; @@ -55,6 +56,11 @@ public void startInitialJoin() { } + @Override + public void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService) { + + } + @Override public Lifecycle.State lifecycleState() { return null; diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index f3aa5c97b49f8..3efcc538a1b25 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1191,6 +1191,7 @@ protected Optional getDisruptableMockTransport(Transpo new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), null ); + coordinator.setNodeConnectionsService(nodeConnectionsService); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService( settings, From a23214a5651614388e629d7da51d1adf00ee742d Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 13 Sep 2024 12:09:23 +0530 Subject: [PATCH 26/36] remove doc from IT Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 29 ++----------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index bb6a3d7e4109a..81561ab0d0b1a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -61,33 +61,8 @@ import static org.hamcrest.Matchers.is; /** - ITs for reproducing the scenario and validating some new cases for https://github.com/opensearch-project/OpenSearch/issues/4874 - This issue is about an infinite loop of node joining and leaving the cluster. - This race condition would happen when a node-join task was queued into cluster-manager thread while a node-left task for the same node was still processing. - - Scenario: - Suppose a node disconnects from the cluster due to some normal reason. - This queues a node-left task on cluster manager thread. - Then cluster manager then computes the new cluster state based on the node-left task. - The cluster manager now tries to send the new state to all the nodes and waits for all nodes to ack back. - Suppose this takes a long time due to lagging nodes or slow applying of the state or any other reason. - While this is happening, the node that just left sends a join request to the cluster manager to rejoin the cluster. - The role of this join request is to re-establish any required connections and do some pre-validations before queuing a new task. - After join request is validated by cluster manager node, cluster manager queues a node-join task into its thread. - This node-join task would only start after the node-left task is completed. - Now suppose the node-left task has completed publication and has started to apply the new state on the cluster manager. - As part of applying the cluster state of node-left task, cluster manager wipes out the connection info of the leaving node. - The node-left task then completes and the node-join task begins. - Now the node-join task starts. This task assumes that because the previous join request succeeded, that all connection info would still be there. - So then the cluster manager computes the new state. - Then it tells the followerchecker thread to add this new node. - Then it tries to publish the new state to all the nodes. - However, at this point, the followerchecker thread fails because the connection was wiped and triggers a new node-left task. - If the new node-left task also takes time, we end up in an infinite loop of node-left and node-joins. - -Fix: - As part of the fix for this, we now reject the initial join request from a node that has an ongoing node-left task. - The join request will only succeed after the node-left task completes, so the connection that gets created as part of the join request does not get wiped out and cause node-join task to fail. + Check https://github.com/opensearch-project/OpenSearch/issues/4874 and + https://github.com/opensearch-project/OpenSearch/pull/15521 for context */ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { From b7619006bc1d92da722629a3ffe58bb01069926a Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 18 Sep 2024 11:54:49 +0530 Subject: [PATCH 27/36] empty commit to rerun gradle check Signed-off-by: Rahul Karajgikar From 36a300d7e8a809f4cf2e68a7c3372a7080465f57 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 18 Sep 2024 12:01:12 +0530 Subject: [PATCH 28/36] change debug logs to trace logs to remove noise Signed-off-by: Rahul Karajgikar --- .../org/opensearch/transport/ClusterConnectionManager.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index 18308d9da11ac..3a3e8c964b6c5 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -244,12 +244,12 @@ public void disconnectFromNode(DiscoveryNode node) { nodeChannels.close(); } pendingDisconnections.remove(node); - logger.debug("Removed node [{}] from pending disconnections list", node); + logger.trace("Removed node [{}] from pending disconnections list", node); } @Override public void setPendingDisconnection(DiscoveryNode node) { - logger.debug("marking disconnection as pending for node: [{}]", node); + logger.trace("marking disconnection as pending for node: [{}]", node); pendingDisconnections.add(node); } From 5cf7a51bf9804cb640e432da31e697cd9589253b Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 18 Sep 2024 16:56:01 +0530 Subject: [PATCH 29/36] add new tests and refactor existing tests Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 128 +++++------------- .../cluster/NodeConnectionsServiceTests.java | 90 ++++++++++++ .../ClusterConnectionManagerTests.java | 44 ++++++ 3 files changed, 166 insertions(+), 96 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index 81561ab0d0b1a..c0133e0883ec8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -52,6 +52,7 @@ import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; import org.opensearch.transport.TransportService; +import org.junit.Before; import java.util.Arrays; import java.util.Collection; @@ -66,6 +67,10 @@ */ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { + + private String clusterManager; + private String redNodeName; + @Override protected Collection> nodePlugins() { return Arrays.asList( @@ -84,8 +89,11 @@ protected void beforeIndexDeletion() throws Exception { internalCluster().assertSameDocIdsOnShards(); } - public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throws Exception { - final String indexName = "test"; + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + String indexName = "test"; final Settings nodeSettings = Settings.builder() .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") @@ -95,13 +103,15 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "100ms") .build(); // start a 3 node cluster with 1 cluster-manager - final String cm = internalCluster().startNode(nodeSettings); + this.clusterManager = internalCluster().startNode(nodeSettings); internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); - final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + this.redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + // validate the 3 node cluster is up ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); assertThat(response.isTimedOut(), is(false)); + // create an index client().admin() .indices() .prepareCreate(indexName) @@ -112,10 +122,13 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) ) .get(); + } - ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); + public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throws Exception { + + ClusterService clusterManagerClsService = internalCluster().getInstance(ClusterService.class, clusterManager); // Simulate a slow applier on the cm to delay node-left state application - cmClsService.addStateApplier(event -> { + clusterManagerClsService.addStateApplier(event -> { if (event.nodesRemoved()) { try { Thread.sleep(3000); @@ -166,42 +179,14 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw } succeedFollowerChecker.set(true); - response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Exception { - final String indexName = "test"; - final Settings nodeSettings = Settings.builder() - .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") - .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") - .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") - .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") - .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) - .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "100ms") - .build(); - // start a 3 node cluster with 1 cluster-manager - final String cm = internalCluster().startNode(nodeSettings); - internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); - final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); - - ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); - assertThat(response.isTimedOut(), is(false)); - - client().admin() - .indices() - .prepareCreate(indexName) - .setSettings( - Settings.builder() - .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - ) - .get(); - - ClusterService cmClsService = internalCluster().getInstance(ClusterService.class, cm); + ClusterService clusterManagerClsService = internalCluster().getInstance(ClusterService.class, clusterManager); // Simulate a slow applier on the cm to delay node-left state application - cmClsService.addStateApplier(event -> { + clusterManagerClsService.addStateApplier(event -> { if (event.nodesRemoved()) { try { Thread.sleep(3000); @@ -225,7 +210,10 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex } throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); }); - MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, cm); + MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + clusterManager + ); MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( TransportService.class, redNodeName @@ -261,42 +249,16 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex } succeedFollowerChecker.set(true); - response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } public void testRestartDataNode() throws Exception { - final String indexName = "test"; - final Settings nodeSettings = Settings.builder() - .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") - .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") - .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") - .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") - .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) - .build(); - // start a 3 node cluster - internalCluster().startNode(nodeSettings); - internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); - final String redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); - - ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); - assertThat(response.isTimedOut(), is(false)); - - client().admin() - .indices() - .prepareCreate(indexName) - .setSettings( - Settings.builder() - .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - ) - .get(); Settings redNodeDataPathSettings = internalCluster().dataPathSettings(redNodeName); logger.info("-> stopping data node"); internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(redNodeName)); - response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response.isTimedOut(), is(false)); logger.info("-> restarting stopped node"); @@ -306,42 +268,16 @@ public void testRestartDataNode() throws Exception { } public void testRestartCmNode() throws Exception { - final String indexName = "test"; - final Settings nodeSettings = Settings.builder() - .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") - .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") - .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") - .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") - .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) - .build(); - // start a 3 node cluster - final String cm = internalCluster().startNode(Settings.builder().put("node.attr.color", "yellow").put(nodeSettings).build()); - internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); - internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); - - ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); - assertThat(response.isTimedOut(), is(false)); - - client().admin() - .indices() - .prepareCreate(indexName) - .setSettings( - Settings.builder() - .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) - ) - .get(); - Settings cmNodeSettings = internalCluster().dataPathSettings(cm); + Settings cmNodeSettings = internalCluster().dataPathSettings(clusterManager); logger.info("-> stopping cluster-manager node"); - internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(cm)); - response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(clusterManager)); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); assertThat(response.isTimedOut(), is(false)); logger.info("-> restarting stopped node"); - internalCluster().startNode(Settings.builder().put("node.name", cm).put(cmNodeSettings).build()); + internalCluster().startNode(Settings.builder().put("node.name", clusterManager).put(cmNodeSettings).build()); response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); } diff --git a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java index 4cf82f1dabab3..507c4d9a24757 100644 --- a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java @@ -77,6 +77,7 @@ import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import static java.util.Collections.emptySet; @@ -86,6 +87,7 @@ import static org.opensearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; import static org.opensearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; public class NodeConnectionsServiceTests extends OpenSearchTestCase { @@ -490,6 +492,72 @@ public void testDebugLogging() throws IllegalAccessException { } } + public void testConnectionCheckerRetriesIfPendingDisconnection() throws InterruptedException { + final Settings.Builder settings = Settings.builder(); + final long reconnectIntervalMillis = 50; + settings.put(CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), reconnectIntervalMillis + "ms"); + + final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue( + builder().put(NODE_NAME_SETTING.getKey(), "node").build(), + random() + ); + + MockTransport transport = new MockTransport(deterministicTaskQueue.getThreadPool()); + TestTransportService transportService = new TestTransportService(transport, deterministicTaskQueue.getThreadPool()); + transportService.start(); + transportService.acceptIncomingRequests(); + + final NodeConnectionsService service = new NodeConnectionsService( + settings.build(), + deterministicTaskQueue.getThreadPool(), + transportService + ); + service.start(); + + // setup the connections + final DiscoveryNode node = new DiscoveryNode("node0", buildNewFakeTransportAddress(), Version.CURRENT); + ; + final DiscoveryNodes nodes = DiscoveryNodes.builder().add(node).build(); + + final AtomicBoolean connectionCompleted = new AtomicBoolean(); + service.connectToNodes(nodes, () -> connectionCompleted.set(true)); + deterministicTaskQueue.runAllRunnableTasks(); + assertTrue(connectionCompleted.get()); + + // now trigger a disconnect, and then set pending disconnections to true to fail any new connections + final long maxDisconnectionTime = 1000; + final long disconnectionTime = 100; + deterministicTaskQueue.scheduleAt(disconnectionTime, new Runnable() { + @Override + public void run() { + transportService.disconnectFromNode(node); + logger.info("--> setting pending disconnections to fail next connection attempts"); + service.setPendingDisconnections(new HashSet<>(Collections.singleton(node))); + transportService.resetConnectToNodeCallCount(); + } + + @Override + public String toString() { + return "scheduled disconnection of " + node; + } + }); + + // ensure the disconnect task completes, give extra time also for connection checker tasks + runTasksUntil(deterministicTaskQueue, maxDisconnectionTime); + + // verify that connectionchecker is trying to call connectToNode multiple times + logger.info("--> verifying connectionchecker is trying to reconnect"); + logger.info("--> number of reconnection attempts: {}", transportService.getConnectToNodeCallCount()); + assertThat("ConnectToNode should be called multiple times", transportService.getConnectToNodeCallCount(), greaterThan(5)); + assertFalse("connected to " + node, transportService.nodeConnected(node)); + + // clear the pending disconnections and ensure the connection gets re-established automatically by connectionchecker + logger.info("--> clearing pending disconnections to allow connections to re-establish"); + service.clearPendingDisconnections(); + runTasksUntil(deterministicTaskQueue, maxDisconnectionTime + 2 * reconnectIntervalMillis); + assertConnectedExactlyToNodes(transportService, nodes); + } + private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long endTimeMillis) { while (deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) { if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) { @@ -516,12 +584,22 @@ private void assertConnectedExactlyToNodes(TransportService transportService, Di assertThat(transportService.getConnectionManager().size(), equalTo(discoveryNodes.getSize())); } + private void assertNotConnectedToNodes(TransportService transportService, DiscoveryNodes discoveryNodes) { + assertNotConnected(transportService, discoveryNodes); + } + private void assertConnected(TransportService transportService, Iterable nodes) { for (DiscoveryNode node : nodes) { assertTrue("not connected to " + node, transportService.nodeConnected(node)); } } + private void assertNotConnected(TransportService transportService, Iterable nodes) { + for (DiscoveryNode node : nodes) { + assertFalse("connected to " + node, transportService.nodeConnected(node)); + } + } + @Override @Before public void setUp() throws Exception { @@ -545,6 +623,8 @@ public void tearDown() throws Exception { private final class TestTransportService extends TransportService { + private final AtomicInteger connectToNodeCallCount = new AtomicInteger(0); + private TestTransportService(Transport transport, ThreadPool threadPool) { super( Settings.EMPTY, @@ -588,6 +668,16 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr } else { super.connectToNode(node, listener); } + logger.info("calling connectToNode"); + connectToNodeCallCount.incrementAndGet(); + } + + public int getConnectToNodeCallCount() { + return connectToNodeCallCount.get(); + } + + public void resetConnectToNodeCallCount() { + connectToNodeCallCount.set(0); } } diff --git a/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java b/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java index 1d734a56ef189..fdf762aa096f0 100644 --- a/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java +++ b/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java @@ -320,6 +320,50 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti assertEquals(0, nodeDisconnectedCount.get()); } + public void testConnectFailsWhenDisconnectIsPending() { + AtomicInteger nodeConnectedCount = new AtomicInteger(); + AtomicInteger nodeDisconnectedCount = new AtomicInteger(); + connectionManager.addListener(new TransportConnectionListener() { + @Override + public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) { + nodeConnectedCount.incrementAndGet(); + } + + @Override + public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { + nodeDisconnectedCount.incrementAndGet(); + } + }); + + DiscoveryNode node = new DiscoveryNode("", new TransportAddress(InetAddress.getLoopbackAddress(), 0), Version.CURRENT); + ConnectionManager.ConnectionValidator validator = (c, p, l) -> l.onResponse(null); + Transport.Connection connection = new TestConnect(node); + doAnswer(invocationOnMock -> { + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(connection); + return null; + }).when(transport).openConnection(eq(node), eq(connectionProfile), any(ActionListener.class)); + assertFalse(connectionManager.nodeConnected(node)); + + // Mark connection as pending disconnect, any connection attempt should fail + connectionManager.setPendingDisconnection(node); + PlainActionFuture fut = new PlainActionFuture<>(); + connectionManager.connectToNode(node, connectionProfile, validator, fut); + expectThrows(IllegalStateException.class, () -> fut.actionGet()); + + // clear the pending disconnect and assert that connection succeeds + connectionManager.clearPendingDisconnections(); + assertFalse(connectionManager.nodeConnected(node)); + PlainActionFuture.get( + future -> connectionManager.connectToNode(node, connectionProfile, validator, ActionListener.map(future, x -> null)) + ); + assertFalse(connection.isClosed()); + assertTrue(connectionManager.nodeConnected(node)); + assertEquals(1, connectionManager.size()); + assertEquals(1, nodeConnectedCount.get()); + assertEquals(0, nodeDisconnectedCount.get()); + } + private static class TestConnect extends CloseableConnection { private final DiscoveryNode node; From fff66e1cc7464782d7b7681120b4ea2fd5f17a55 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 18 Sep 2024 17:01:13 +0530 Subject: [PATCH 30/36] remove unused code Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsServiceTests.java | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java index 507c4d9a24757..cbae19dab01fa 100644 --- a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java @@ -584,22 +584,12 @@ private void assertConnectedExactlyToNodes(TransportService transportService, Di assertThat(transportService.getConnectionManager().size(), equalTo(discoveryNodes.getSize())); } - private void assertNotConnectedToNodes(TransportService transportService, DiscoveryNodes discoveryNodes) { - assertNotConnected(transportService, discoveryNodes); - } - private void assertConnected(TransportService transportService, Iterable nodes) { for (DiscoveryNode node : nodes) { assertTrue("not connected to " + node, transportService.nodeConnected(node)); } } - private void assertNotConnected(TransportService transportService, Iterable nodes) { - for (DiscoveryNode node : nodes) { - assertFalse("connected to " + node, transportService.nodeConnected(node)); - } - } - @Override @Before public void setUp() throws Exception { From db3d23adf67e01d0bb20eadce67df3903b54e4af Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 18 Sep 2024 19:15:49 +0530 Subject: [PATCH 31/36] add assertions on exception message Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index c0133e0883ec8..e5ab0f80a832c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -32,6 +32,14 @@ package org.opensearch.cluster.coordination; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.core.layout.PatternLayout; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.metadata.IndexMetadata; @@ -48,15 +56,20 @@ import org.opensearch.test.store.MockFSIndexStore; import org.opensearch.test.transport.MockTransportService; import org.opensearch.test.transport.StubbableTransport; +import org.opensearch.transport.ClusterConnectionManager; import org.opensearch.transport.TransportChannel; import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportRequestHandler; import org.opensearch.transport.TransportService; +import org.junit.After; import org.junit.Before; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Pattern; import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; import static org.hamcrest.Matchers.is; @@ -68,8 +81,10 @@ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { + private TestAppender testAppender; private String clusterManager; private String redNodeName; + private LoggerContext loggerContext; @Override protected Collection> nodePlugins() { @@ -93,6 +108,13 @@ protected void beforeIndexDeletion() throws Exception { @Before public void setUp() throws Exception { super.setUp(); + testAppender = new TestAppender(); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.addAppender(testAppender, null, null); + loggerContext.updateLoggers(); + String indexName = "test"; final Settings nodeSettings = Settings.builder() .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") @@ -124,6 +146,16 @@ public void setUp() throws Exception { .get(); } + @After + public void tearDown() throws Exception { + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.removeAppender(testAppender.getName()); + loggerContext.updateLoggers(); + super.tearDown(); + } + public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throws Exception { ClusterService clusterManagerClsService = internalCluster().getInstance(ClusterService.class, clusterManager); @@ -181,6 +213,13 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw succeedFollowerChecker.set(true); ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); + + // assert that the right exception message showed up in logs + assertTrue( + "Expected IllegalStateException was not logged", + testAppender.containsExceptionMessage("IllegalStateException[cannot make a new connection as disconnect to node") + ); + } public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Exception { @@ -251,6 +290,12 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex succeedFollowerChecker.set(true); ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); + + // assert that the right exception message showed up in logs + assertTrue( + "Expected IllegalStateException was not logged", + testAppender.containsExceptionMessage("IllegalStateException[cannot make a new connection as disconnect to node") + ); } public void testRestartDataNode() throws Exception { @@ -301,4 +346,29 @@ public void messageReceived( handler.messageReceived(request, channel, task); } } + + private static class TestAppender extends AbstractAppender { + private final List logs = new ArrayList<>(); + + TestAppender() { + super("TestAppender", null, PatternLayout.createDefaultLayout(), false, Property.EMPTY_ARRAY); + start(); + } + + @Override + public void append(LogEvent event) { + logs.add(event.getMessage().getFormattedMessage()); + if (event.getThrown() != null) { + logs.add(event.getThrown().toString()); + for (StackTraceElement element : event.getThrown().getStackTrace()) { + logs.add(element.toString()); + } + } + } + + boolean containsExceptionMessage(String exceptionMessage) { + Pattern pattern = Pattern.compile(Pattern.quote(exceptionMessage), Pattern.CASE_INSENSITIVE); + return logs.stream().anyMatch(log -> pattern.matcher(log).find()); + } + } } From a6a6c38671cf78baae5fe870b76f21b0e8f4352a Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Thu, 19 Sep 2024 19:30:38 +0530 Subject: [PATCH 32/36] changes to tests based on comments Signed-off-by: Rahul Karajgikar --- .../cluster/coordination/NodeJoinLeftIT.java | 81 +++++++----------- .../cluster/NodeConnectionsServiceTests.java | 83 +++++++++++++++++-- .../org/opensearch/test/TestLogsAppender.java | 74 +++++++++++++++++ 3 files changed, 179 insertions(+), 59 deletions(-) create mode 100644 test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java index e5ab0f80a832c..014e2bf642a4d 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -33,13 +33,9 @@ package org.opensearch.cluster.coordination; import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.core.LogEvent; import org.apache.logging.log4j.core.LoggerContext; -import org.apache.logging.log4j.core.appender.AbstractAppender; import org.apache.logging.log4j.core.config.Configuration; import org.apache.logging.log4j.core.config.LoggerConfig; -import org.apache.logging.log4j.core.config.Property; -import org.apache.logging.log4j.core.layout.PatternLayout; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.metadata.IndexMetadata; @@ -53,6 +49,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; import org.opensearch.test.OpenSearchIntegTestCase.Scope; +import org.opensearch.test.TestLogsAppender; import org.opensearch.test.store.MockFSIndexStore; import org.opensearch.test.transport.MockTransportService; import org.opensearch.test.transport.StubbableTransport; @@ -64,12 +61,11 @@ import org.junit.After; import org.junit.Before; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.regex.Pattern; import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; import static org.hamcrest.Matchers.is; @@ -81,7 +77,7 @@ @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class NodeJoinLeftIT extends OpenSearchIntegTestCase { - private TestAppender testAppender; + private TestLogsAppender testLogsAppender; private String clusterManager; private String redNodeName; private LoggerContext loggerContext; @@ -108,11 +104,13 @@ protected void beforeIndexDeletion() throws Exception { @Before public void setUp() throws Exception { super.setUp(); - testAppender = new TestAppender(); + // Add any other specific messages you want to capture + List messagesToCapture = Arrays.asList("failed to join", "IllegalStateException"); + testLogsAppender = new TestLogsAppender(messagesToCapture); loggerContext = (LoggerContext) LogManager.getContext(false); Configuration config = loggerContext.getConfiguration(); LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); - loggerConfig.addAppender(testAppender, null, null); + loggerConfig.addAppender(testLogsAppender, null, null); loggerContext.updateLoggers(); String indexName = "test"; @@ -148,10 +146,11 @@ public void setUp() throws Exception { @After public void tearDown() throws Exception { + testLogsAppender.clearCapturedLogs(); loggerContext = (LoggerContext) LogManager.getContext(false); Configuration config = loggerContext.getConfiguration(); LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); - loggerConfig.removeAppender(testAppender.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); loggerContext.updateLoggers(); super.tearDown(); } @@ -190,9 +189,9 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw ); redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); - // Loop runs 10 times to ensure race condition gets reproduced - for (int i = 0; i < 10; i++) { - // Fail followerchecker by force to trigger node disconnect and node left + // Loop runs 5 times to ensure race condition gets reproduced + testLogsAppender.clearCapturedLogs(); + for (int i = 0; i < 5; i++) { logger.info("--> simulating followerchecker failure to trigger node-left"); succeedFollowerChecker.set(false); ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); @@ -214,12 +213,14 @@ public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throw ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); - // assert that the right exception message showed up in logs - assertTrue( - "Expected IllegalStateException was not logged", - testAppender.containsExceptionMessage("IllegalStateException[cannot make a new connection as disconnect to node") - ); - + // assert that join requests fail with the right exception + boolean logFound = testLogsAppender.waitForLog("failed to join", 30, TimeUnit.SECONDS) + && testLogsAppender.waitForLog( + "IllegalStateException[cannot make a new connection as disconnect to node", + 30, + TimeUnit.SECONDS + ); + assertTrue("Expected log was not found within the timeout period", logFound); } public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Exception { @@ -259,8 +260,9 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex ); redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); - // Loop runs 10 times to ensure race condition gets reproduced - for (int i = 0; i < 10; i++) { + // Loop runs 5 times to ensure race condition gets reproduced + testLogsAppender.clearCapturedLogs(); + for (int i = 0; i < 5; i++) { // Fail followerchecker by force to trigger node disconnect and node left logger.info("--> simulating followerchecker failure to trigger node-left"); succeedFollowerChecker.set(false); @@ -291,11 +293,15 @@ public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Ex ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); assertThat(response.isTimedOut(), is(false)); - // assert that the right exception message showed up in logs - assertTrue( - "Expected IllegalStateException was not logged", - testAppender.containsExceptionMessage("IllegalStateException[cannot make a new connection as disconnect to node") + // assert that join requests fail with the right exception + boolean logFound = testLogsAppender.waitForLog("failed to join", 30, TimeUnit.SECONDS); + assertTrue("Expected log was not found within the timeout period", logFound); + logFound = testLogsAppender.waitForLog( + "IllegalStateException[cannot make a new connection as disconnect to node", + 30, + TimeUnit.SECONDS ); + assertTrue("Expected log was not found within the timeout period", logFound); } public void testRestartDataNode() throws Exception { @@ -346,29 +352,4 @@ public void messageReceived( handler.messageReceived(request, channel, task); } } - - private static class TestAppender extends AbstractAppender { - private final List logs = new ArrayList<>(); - - TestAppender() { - super("TestAppender", null, PatternLayout.createDefaultLayout(), false, Property.EMPTY_ARRAY); - start(); - } - - @Override - public void append(LogEvent event) { - logs.add(event.getMessage().getFormattedMessage()); - if (event.getThrown() != null) { - logs.add(event.getThrown().toString()); - for (StackTraceElement element : event.getThrown().getStackTrace()) { - logs.add(element.toString()); - } - } - } - - boolean containsExceptionMessage(String exceptionMessage) { - Pattern pattern = Pattern.compile(Pattern.quote(exceptionMessage), Pattern.CASE_INSENSITIVE); - return logs.stream().anyMatch(log -> pattern.matcher(log).find()); - } - } } diff --git a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java index cbae19dab01fa..2958ac50c6a82 100644 --- a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java @@ -35,6 +35,9 @@ import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.support.PlainActionFuture; @@ -53,9 +56,11 @@ import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.MockLogAppender; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.TestLogsAppender; import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.ClusterConnectionManager; import org.opensearch.transport.ConnectTransportException; import org.opensearch.transport.ConnectionProfile; import org.opensearch.transport.Transport; @@ -69,6 +74,7 @@ import org.junit.Before; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -94,6 +100,8 @@ public class NodeConnectionsServiceTests extends OpenSearchTestCase { private ThreadPool threadPool; private TransportService transportService; private Map> nodeConnectionBlocks; + private TestLogsAppender testLogsAppender; + private LoggerContext loggerContext; private List generateNodes() { List nodes = new ArrayList<>(); @@ -516,7 +524,7 @@ public void testConnectionCheckerRetriesIfPendingDisconnection() throws Interrup // setup the connections final DiscoveryNode node = new DiscoveryNode("node0", buildNewFakeTransportAddress(), Version.CURRENT); - ; + final DiscoveryNodes nodes = DiscoveryNodes.builder().add(node).build(); final AtomicBoolean connectionCompleted = new AtomicBoolean(); @@ -526,13 +534,15 @@ public void testConnectionCheckerRetriesIfPendingDisconnection() throws Interrup // now trigger a disconnect, and then set pending disconnections to true to fail any new connections final long maxDisconnectionTime = 1000; - final long disconnectionTime = 100; - deterministicTaskQueue.scheduleAt(disconnectionTime, new Runnable() { + deterministicTaskQueue.scheduleNow(new Runnable() { @Override public void run() { transportService.disconnectFromNode(node); logger.info("--> setting pending disconnections to fail next connection attempts"); service.setPendingDisconnections(new HashSet<>(Collections.singleton(node))); + // we reset the connection count during the first disconnection + // we also clear the captured logs as we want to assert for exceptions that show up after this + testLogsAppender.clearCapturedLogs(); transportService.resetConnectToNodeCallCount(); } @@ -541,20 +551,39 @@ public String toString() { return "scheduled disconnection of " + node; } }); + final long maxReconnectionTime = 2000; + final int expectedReconnectionAttempts = 5; - // ensure the disconnect task completes, give extra time also for connection checker tasks - runTasksUntil(deterministicTaskQueue, maxDisconnectionTime); - - // verify that connectionchecker is trying to call connectToNode multiple times + // ensure the disconnect task completes, and run for additional time to check for reconnections + // exit early if we see enough reconnection attempts logger.info("--> verifying connectionchecker is trying to reconnect"); + runTasksUntilExpectedReconnectionAttempts( + deterministicTaskQueue, + maxDisconnectionTime + maxReconnectionTime, + transportService, + expectedReconnectionAttempts + ); + + // assert that we saw at least the required number of reconnection attempts, and the exceptions that showed up are as expected logger.info("--> number of reconnection attempts: {}", transportService.getConnectToNodeCallCount()); - assertThat("ConnectToNode should be called multiple times", transportService.getConnectToNodeCallCount(), greaterThan(5)); + assertThat( + "Did not see enough reconnection attempts from connection checker", + transportService.getConnectToNodeCallCount(), + greaterThan(expectedReconnectionAttempts) + ); + boolean logFound = testLogsAppender.waitForLog("failed to connect", 1, TimeUnit.SECONDS) + && testLogsAppender.waitForLog( + "IllegalStateException: cannot make a new connection as disconnect to node", + 1, + TimeUnit.SECONDS + ); + assertTrue("Expected log for reconnection failure was not found in the required time period", logFound); assertFalse("connected to " + node, transportService.nodeConnected(node)); // clear the pending disconnections and ensure the connection gets re-established automatically by connectionchecker logger.info("--> clearing pending disconnections to allow connections to re-establish"); service.clearPendingDisconnections(); - runTasksUntil(deterministicTaskQueue, maxDisconnectionTime + 2 * reconnectIntervalMillis); + runTasksUntil(deterministicTaskQueue, maxDisconnectionTime + maxReconnectionTime + 2 * reconnectIntervalMillis); assertConnectedExactlyToNodes(transportService, nodes); } @@ -569,6 +598,24 @@ private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long e deterministicTaskQueue.runAllRunnableTasks(); } + private void runTasksUntilExpectedReconnectionAttempts( + DeterministicTaskQueue deterministicTaskQueue, + long endTimeMillis, + TestTransportService transportService, + int expectedReconnectionAttempts + ) { + // break the loop if we timeout or if we have enough reconnection attempts + while ((deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) + && (transportService.getConnectToNodeCallCount() <= expectedReconnectionAttempts)) { + if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) { + deterministicTaskQueue.runRandomTask(); + } else if (deterministicTaskQueue.hasDeferredTasks()) { + deterministicTaskQueue.advanceTime(); + } + } + deterministicTaskQueue.runAllRunnableTasks(); + } + private void ensureConnections(NodeConnectionsService service) { final PlainActionFuture future = new PlainActionFuture<>(); service.ensureConnections(() -> future.onResponse(null)); @@ -594,6 +641,16 @@ private void assertConnected(TransportService transportService, Iterable messagesToCapture = Arrays.asList("failed to connect", "IllegalStateException"); + testLogsAppender = new TestLogsAppender(messagesToCapture); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(NodeConnectionsService.class.getName()); + loggerConfig.addAppender(testLogsAppender, null, null); + loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.addAppender(testLogsAppender, null, null); + loggerContext.updateLoggers(); ThreadPool threadPool = new TestThreadPool(getClass().getName()); this.threadPool = threadPool; nodeConnectionBlocks = newConcurrentMap(); @@ -605,6 +662,14 @@ public void setUp() throws Exception { @Override @After public void tearDown() throws Exception { + testLogsAppender.clearCapturedLogs(); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(NodeConnectionsService.class.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); + loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); + loggerContext.updateLoggers(); transportService.stop(); ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); threadPool = null; diff --git a/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java b/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java new file mode 100644 index 0000000000000..030f399a5bcc0 --- /dev/null +++ b/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.test; + +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.core.layout.PatternLayout; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Test logs appender that provides functionality to extract specific logs/exception messages and wait for it to show up + * @opensearch.internal + */ +public class TestLogsAppender extends AbstractAppender { + private final List capturedLogs = new ArrayList<>(); + private final List messagesToCapture; + + public TestLogsAppender(List messagesToCapture) { + super("TestAppender", null, PatternLayout.createDefaultLayout(), false, Property.EMPTY_ARRAY); + this.messagesToCapture = messagesToCapture; + start(); + } + + @Override + public void append(LogEvent event) { + if (shouldCaptureMessage(event.getMessage().getFormattedMessage())) capturedLogs.add(event.getMessage().getFormattedMessage()); + if (event.getThrown() != null) { + if (shouldCaptureMessage(event.getThrown().toString())) capturedLogs.add(event.getThrown().toString()); + for (StackTraceElement element : event.getThrown().getStackTrace()) + if (shouldCaptureMessage(element.toString())) capturedLogs.add(element.toString()); + } + } + + public boolean shouldCaptureMessage(String log) { + return messagesToCapture.stream().anyMatch(log::contains); + } + + public List getCapturedLogs() { + return new ArrayList<>(capturedLogs); + } + + public boolean waitForLog(String expectedLog, long timeout, TimeUnit unit) { + long startTime = System.currentTimeMillis(); + long timeoutInMillis = unit.toMillis(timeout); + + while (System.currentTimeMillis() - startTime < timeoutInMillis) { + if (capturedLogs.stream().anyMatch(log -> log.contains(expectedLog))) { + return true; + } + try { + Thread.sleep(100); // Wait for 100ms before checking again + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + return false; + } + + // Clear captured logs + public void clearCapturedLogs() { + capturedLogs.clear(); + } +} From 1ded2ccfa9f63c3efaa8d5ab4435268257ba5a61 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Fri, 20 Sep 2024 17:23:46 +0530 Subject: [PATCH 33/36] empty commit Signed-off-by: Rahul Karajgikar From 9a060cc70d00fd271512716d366a81c00ed2607f Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 25 Sep 2024 16:21:21 +0530 Subject: [PATCH 34/36] update nodeconnectionsservice test Signed-off-by: Rahul Karajgikar --- .../cluster/NodeConnectionsService.java | 4 +- .../cluster/NodeConnectionsServiceTests.java | 78 +++++++++++++++---- .../coordination/DeterministicTaskQueue.java | 11 +++ 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 21a5a5e08933b..8ce11c8183cf6 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -106,7 +106,7 @@ public class NodeConnectionsService extends AbstractLifecycleComponent { protected final Map targetsByNode = new HashMap<>(); private final TimeValue reconnectInterval; - private volatile ConnectionChecker connectionChecker; + protected volatile ConnectionChecker connectionChecker; @Inject public NodeConnectionsService(Settings settings, ThreadPool threadPool, TransportService transportService) { @@ -224,7 +224,7 @@ private void awaitPendingActivity(Runnable onCompletion) { * nodes which are in the process of disconnecting. The onCompletion handler is called after all ongoing connection/disconnection * attempts have completed. */ - private void connectDisconnectedTargets(Runnable onCompletion) { + protected void connectDisconnectedTargets(Runnable onCompletion) { final List runnables = new ArrayList<>(); synchronized (mutex) { final Collection connectionTargets = targetsByNode.values(); diff --git a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java index 2958ac50c6a82..4500860c937ea 100644 --- a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java @@ -515,7 +515,7 @@ public void testConnectionCheckerRetriesIfPendingDisconnection() throws Interrup transportService.start(); transportService.acceptIncomingRequests(); - final NodeConnectionsService service = new NodeConnectionsService( + final TestNodeConnectionsService service = new TestNodeConnectionsService( settings.build(), deterministicTaskQueue.getThreadPool(), transportService @@ -532,18 +532,25 @@ public void testConnectionCheckerRetriesIfPendingDisconnection() throws Interrup deterministicTaskQueue.runAllRunnableTasks(); assertTrue(connectionCompleted.get()); - // now trigger a disconnect, and then set pending disconnections to true to fail any new connections + // reset any logs as we want to assert for exceptions that show up after this + // reset connect to node count to assert for later + logger.info("--> resetting captured logs and counters"); + testLogsAppender.clearCapturedLogs(); + // this ensures we only track connection attempts that happen after the disconnection + transportService.resetConnectToNodeCallCount(); + + // block connection checker reconnection attempts until after we set pending disconnections + logger.info("--> disabling connection checker, and triggering disconnect"); + service.setShouldReconnect(false); + transportService.disconnectFromNode(node); + + // set pending disconnections to true to fail future reconnection attempts final long maxDisconnectionTime = 1000; deterministicTaskQueue.scheduleNow(new Runnable() { @Override public void run() { - transportService.disconnectFromNode(node); logger.info("--> setting pending disconnections to fail next connection attempts"); service.setPendingDisconnections(new HashSet<>(Collections.singleton(node))); - // we reset the connection count during the first disconnection - // we also clear the captured logs as we want to assert for exceptions that show up after this - testLogsAppender.clearCapturedLogs(); - transportService.resetConnectToNodeCallCount(); } @Override @@ -551,18 +558,27 @@ public String toString() { return "scheduled disconnection of " + node; } }); + // our task queue will have the first task as the runnable to set pending disconnections + // here we re-enable the connection checker to enqueue next tasks for attempting reconnection + logger.info("--> re-enabling reconnection checker"); + service.setShouldReconnect(true); + final long maxReconnectionTime = 2000; - final int expectedReconnectionAttempts = 5; + final int expectedReconnectionAttempts = 10; - // ensure the disconnect task completes, and run for additional time to check for reconnections - // exit early if we see enough reconnection attempts - logger.info("--> verifying connectionchecker is trying to reconnect"); - runTasksUntilExpectedReconnectionAttempts( + // this will first run the task to set the pending disconnections, then will execute the reconnection tasks + // exit early when we have enough reconnection attempts + logger.info("--> running tasks in order until expected reconnection attempts"); + runTasksInOrderUntilExpectedReconnectionAttempts( deterministicTaskQueue, maxDisconnectionTime + maxReconnectionTime, transportService, expectedReconnectionAttempts ); + logger.info("--> verifying that connectionchecker tried to reconnect"); + + // assert that the connections failed + assertFalse("connected to " + node, transportService.nodeConnected(node)); // assert that we saw at least the required number of reconnection attempts, and the exceptions that showed up are as expected logger.info("--> number of reconnection attempts: {}", transportService.getConnectToNodeCallCount()); @@ -578,7 +594,6 @@ public String toString() { TimeUnit.SECONDS ); assertTrue("Expected log for reconnection failure was not found in the required time period", logFound); - assertFalse("connected to " + node, transportService.nodeConnected(node)); // clear the pending disconnections and ensure the connection gets re-established automatically by connectionchecker logger.info("--> clearing pending disconnections to allow connections to re-establish"); @@ -598,7 +613,7 @@ private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long e deterministicTaskQueue.runAllRunnableTasks(); } - private void runTasksUntilExpectedReconnectionAttempts( + private void runTasksInOrderUntilExpectedReconnectionAttempts( DeterministicTaskQueue deterministicTaskQueue, long endTimeMillis, TestTransportService transportService, @@ -608,12 +623,12 @@ private void runTasksUntilExpectedReconnectionAttempts( while ((deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) && (transportService.getConnectToNodeCallCount() <= expectedReconnectionAttempts)) { if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) { - deterministicTaskQueue.runRandomTask(); + deterministicTaskQueue.runNextTask(); } else if (deterministicTaskQueue.hasDeferredTasks()) { deterministicTaskQueue.advanceTime(); } } - deterministicTaskQueue.runAllRunnableTasks(); + deterministicTaskQueue.runAllRunnableTasksInEnqueuedOrder(); } private void ensureConnections(NodeConnectionsService service) { @@ -736,6 +751,37 @@ public void resetConnectToNodeCallCount() { } } + private class TestNodeConnectionsService extends NodeConnectionsService { + private boolean shouldReconnect = true; + + public TestNodeConnectionsService(Settings settings, ThreadPool threadPool, TransportService transportService) { + super(settings, threadPool, transportService); + } + + public void setShouldReconnect(boolean shouldReconnect) { + this.shouldReconnect = shouldReconnect; + } + + @Override + protected void doStart() { + final StoppableConnectionChecker connectionChecker = new StoppableConnectionChecker(); + this.connectionChecker = connectionChecker; + connectionChecker.scheduleNextCheck(); + } + + class StoppableConnectionChecker extends NodeConnectionsService.ConnectionChecker { + @Override + protected void doRun() { + if (connectionChecker == this && shouldReconnect) { + connectDisconnectedTargets(this::scheduleNextCheck); + } else { + // Skip reconnection attempt but still schedule the next check + scheduleNextCheck(); + } + } + } + } + private static final class MockTransport implements Transport { private final ResponseHandlers responseHandlers = new ResponseHandlers(); private final RequestHandlers requestHandlers = new RequestHandlers(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java index 1ad18bf89d5ba..4f692c7bc8f62 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java @@ -92,6 +92,12 @@ public void runAllRunnableTasks() { } } + public void runAllRunnableTasksInEnqueuedOrder() { + while (hasRunnableTasks()) { + runTask(0); + } + } + public void runAllTasks() { while (hasDeferredTasks() || hasRunnableTasks()) { if (hasDeferredTasks() && random.nextBoolean()) { @@ -141,6 +147,11 @@ public void runRandomTask() { runTask(RandomNumbers.randomIntBetween(random, 0, runnableTasks.size() - 1)); } + public void runNextTask() { + assert hasRunnableTasks(); + runTask(0); + } + private void runTask(final int index) { final Runnable task = runnableTasks.remove(index); logger.trace("running task {} of {}: {}", index, runnableTasks.size() + 1, task); From 7b0d28b0fc312b067fb682cb1e7238f400d5b30b Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 25 Sep 2024 18:47:35 +0530 Subject: [PATCH 35/36] empty commit for gradle check Signed-off-by: Rahul Karajgikar From e0a0ae248e82a36d4b8fc35f597dfaaa6e213108 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar Date: Wed, 25 Sep 2024 22:28:05 +0530 Subject: [PATCH 36/36] empty commit Signed-off-by: Rahul Karajgikar