private boolean isLocalNode(DiscoveryNode discoveryNode) { return Objects.requireNonNull(discoveryNode, "discovery node must not be null").equals(localNode); } }
public void addPingResponseToCollection(PingResponse pingResponse) { if (localNode.equals(pingResponse.node()) == false) { pingCollection.addPing(pingResponse); } }
/** * Determine if a given node exists * * @param node of the node which existence should be verified * @return <code>true</code> if the node exists. Otherwise <code>false</code> */ public boolean nodeExists(DiscoveryNode node) { DiscoveryNode existing = nodes.get(node.getId()); return existing != null && existing.equals(node); }
@Override public void handleResponse(MasterPingResponseResponse response) { if (!running) { return; } // reset the counter, we got a good result MasterFaultDetection.this.retryCount = 0; // check if the master node did not get switched on us..., if it did, we simply return with no reschedule if (masterToPing.equals(MasterFaultDetection.this.masterNode())) { // we don't stop on disconnection from master, we keep pinging it threadPool.schedule(pingInterval, ThreadPool.Names.SAME, MasterPinger.this); } }
AckCountDownListener(AckedClusterStateTaskListener ackedTaskListener, long clusterStateVersion, DiscoveryNodes nodes, ThreadPool threadPool) { this.ackedTaskListener = ackedTaskListener; this.clusterStateVersion = clusterStateVersion; this.threadPool = threadPool; this.masterNode = nodes.getMasterNode(); int countDown = 0; for (DiscoveryNode node : nodes) { //we always wait for at least the master node if (node.equals(masterNode) || ackedTaskListener.mustAck(node)) { countDown++; } } logger.trace("expecting {} acknowledgements for cluster_state update (version: {})", countDown, clusterStateVersion); this.countDown = new CountDown(countDown + 1); // we also wait for onCommit to be called }
@Override public void onNodeAck(DiscoveryNode node, @Nullable Exception e) { if (node.equals(masterNode) == false && ackedTaskListener.mustAck(node) == false) { return; } if (e == null) { logger.trace("ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion); } else { this.lastFailure = e; logger.debug(() -> new ParameterizedMessage( "ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion), e); } if (countDown.countDown()) { finish(); } }
@Override public void messageReceived(PingRequest request, TransportChannel channel) throws Exception { // if we are not the node we are supposed to be pinged, send an exception // this can happen when a kill -9 is sent, and another node is started using the same port if (!localNode.equals(request.targetNode())) { throw new IllegalStateException("Got pinged as node " + request.targetNode() + "], but I am node " + localNode ); } // PingRequest will have clusterName set to null if it came from a node of version <1.4.0 if (request.clusterName != null && !request.clusterName.equals(clusterName)) { // Don't introduce new exception for bwc reasons throw new IllegalStateException("Got pinged with cluster name [" + request.clusterName + "], but I'm part of cluster [" + clusterName + "]"); } notifyPingReceived(request); channel.sendResponse(new PingResponse()); } }
/** * make sure that nodes in clusterState are pinged. Any pinging to nodes which are not * part of the cluster will be stopped */ public void updateNodesAndPing(ClusterState clusterState) { // remove any nodes we don't need, this will cause their FD to stop for (DiscoveryNode monitoredNode : nodesFD.keySet()) { if (!clusterState.nodes().nodeExists(monitoredNode)) { nodesFD.remove(monitoredNode); } } // add any missing nodes for (DiscoveryNode node : clusterState.nodes()) { if (node.equals(localNode)) { // no need to monitor the local node continue; } if (!nodesFD.containsKey(node)) { NodeFD fd = new NodeFD(node); // it's OK to overwrite an existing nodeFD - it will just stop and the new one will pick things up. nodesFD.put(node, fd); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, fd); } } }
public Builder remove(DiscoveryNode node) { if (node.equals(nodes.get(node.getId()))) { nodes.remove(node.getId()); } return this; }
public CheckedBiConsumer<Transport.Connection, ConnectionProfile, IOException> connectionValidator(DiscoveryNode node) { return (newConnection, actualProfile) -> { // We don't validate cluster names to allow for CCS connections. final DiscoveryNode remote = handshake(newConnection, actualProfile.getHandshakeTimeout().millis(), cn -> true).discoveryNode; if (validateConnections && node.equals(remote) == false) { throw new ConnectTransportException(node, "handshake failed. unexpected remote node " + remote); } }; }
if (masterToPing.equals(MasterFaultDetection.this.masterNode())) { if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { handleTransportDisconnect(masterToPing);
@Override protected void handleTransportDisconnect(DiscoveryNode node) { synchronized (masterNodeMutex) { if (!node.equals(this.masterNode)) { return; } if (connectOnNetworkDisconnect) { try { transportService.connectToNode(node); // if all is well, make sure we restart the pinger if (masterPinger != null) { masterPinger.stop(); } this.masterPinger = new MasterPinger(); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger); } catch (Exception e) { logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); notifyMasterFailure(masterNode, null, "transport disconnected (with verified connect)"); } } else { logger.trace("[master] [{}] transport disconnected", node); notifyMasterFailure(node, null, "transport disconnected"); } } }
private void handleMasterGone(final DiscoveryNode masterNode, final Throwable cause, final String reason) { if (lifecycleState() != Lifecycle.State.STARTED) { // not started, ignore a master failure return; } if (localNodeMaster()) { // we might get this on both a master telling us shutting down, and then the disconnect failure return; } logger.info(() -> new ParameterizedMessage("master_left [{}], reason [{}]", masterNode, reason), cause); synchronized (stateMutex) { if (localNodeMaster() == false && masterNode.equals(committedState.get().nodes().getMasterNode())) { // flush any pending cluster states from old master, so it will not be set as master again pendingStatesQueue.failAllStatesAndClear(new ElasticsearchException("master left [{}]", reason)); rejoin("master left (reason = " + reason + ")"); } } }
/** * Checks that a node can be safely added to this node collection. * * @return null if all is OK or an error message explaining why a node can not be added. * * Note: if this method returns a non-null value, calling {@link #add(DiscoveryNode)} will fail with an * exception */ private String validateAdd(DiscoveryNode node) { for (ObjectCursor<DiscoveryNode> cursor : nodes.values()) { final DiscoveryNode existingNode = cursor.value; if (node.getAddress().equals(existingNode.getAddress()) && node.getId().equals(existingNode.getId()) == false) { return "can't add node " + node + ", found existing node " + existingNode + " with same address"; } if (node.getId().equals(existingNode.getId()) && node.equals(existingNode) == false) { return "can't add node " + node + ", found existing node " + existingNode + " with the same id but is a different node instance"; } } return null; }
/** * does simple sanity check of the incoming cluster state. Throws an exception on rejections. */ static void validateIncomingState(Logger logger, ClusterState incomingState, ClusterState lastState) { final ClusterName incomingClusterName = incomingState.getClusterName(); if (!incomingClusterName.equals(lastState.getClusterName())) { logger.warn("received cluster state from [{}] which is also master but with a different cluster name [{}]", incomingState.nodes().getMasterNode(), incomingClusterName); throw new IllegalStateException("received state from a node that is not part of the cluster"); } if (lastState.nodes().getLocalNode().equals(incomingState.nodes().getLocalNode()) == false) { logger.warn("received a cluster state from [{}] and not part of the cluster, should not happen", incomingState.nodes().getMasterNode()); throw new IllegalStateException("received state with a local node that does not match the current local node"); } if (shouldIgnoreOrRejectNewClusterState(logger, lastState, incomingState)) { String message = String.format( Locale.ROOT, "rejecting cluster state version [%d] uuid [%s] received from [%s]", incomingState.version(), incomingState.stateUUID(), incomingState.nodes().getMasterNodeId() ); logger.warn(message); throw new IllegalStateException(message); } }
final AtomicInteger counter = new AtomicInteger(nodes.size()); for (final DiscoveryNode node : nodes) { if (node.equals(localNode)) { try { doVerify(repository, verificationToken, localNode);
private void handleLeaveRequest(final DiscoveryNode node) { if (lifecycleState() != Lifecycle.State.STARTED) { // not started, ignore a node failure return; } if (localNodeMaster()) { removeNode(node, "zen-disco-node-left", "left"); } else if (node.equals(clusterState().nodes().getMasterNode())) { handleMasterGone(node, null, "shut_down"); } }
@Override public void run() { if (responseHandlers.contains(requestId)) { long timeoutTime = threadPool.relativeTimeInMillis(); timeoutInfoHandlers.put(requestId, new TimeoutInfoHolder(node, action, sentTime, timeoutTime)); // now that we have the information visible via timeoutInfoHandlers, we try to remove the request id final Transport.ResponseContext holder = responseHandlers.remove(requestId); if (holder != null) { assert holder.action().equals(action); assert holder.connection().getNode().equals(node); holder.handler().handleException( new ReceiveTimeoutTransportException(holder.connection().getNode(), holder.action(), "request_id [" + requestId + "] timed out after [" + (timeoutTime - sentTime) + "ms]")); } else { // response was processed, remove timeout info. timeoutInfoHandlers.remove(requestId); } } }
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) { assert currentState.nodes().getMasterNodeId() == null : currentState; DiscoveryNodes currentNodes = currentState.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes); nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId()); for (final DiscoveryNode joiningNode : joiningNodes) { final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId()); if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode); nodesBuilder.remove(nodeWithSameId.getId()); } final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress()); if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode); nodesBuilder.remove(nodeWithSameAddress.getId()); } } // now trim any left over dead nodes - either left there when the previous master stepped down // or removed by us above ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)).build(); return ClusterState.builder(allocationService.deassociateDeadNodes(tmpState, false, "removed dead nodes on election")); }
if (nodes.getLocalNode().equals(possibleMaster)) { continue;