DebugLogger log = messageLogger(msg); debugLog(msg, "Processing message [cls=" + msg.getClass().getSimpleName() + ", id=" + msg.id() + ']'); assert false : "Unknown message type: " + msg.getClass().getSimpleName(); if (msg.senderNodeId() != null && !msg.senderNodeId().equals(getLocalNodeId())) { onMessageExchanged(); msg instanceof TcpDiscoveryNodeAddFinishedMessage || msg instanceof TcpDiscoveryNodeAddedMessage) { U.enhanceThreadName(U.id8(next.id()) + ' ' + sock.getInetAddress().getHostAddress() + ":" + sock.getPort() + (isLocalNodeCoordinator() ? " crd" : ""));
/** {@inheritDoc} */ @Override protected void noMessageLoop() { if (U.currentTimeMillis() - lastMetricsUpdateMsgTime > spi.clientFailureDetectionTimeout()) { TcpDiscoveryNode clientNode = ring.node(clientNodeId); if (clientNode != null) { boolean failedNode; synchronized (mux) { failedNode = failedNodes.containsKey(clientNode); } if (!failedNode) { String msg = "Client node considered as unreachable " + "and will be dropped from cluster, " + "because no metrics update messages received in interval: " + "TcpDiscoverySpi.clientFailureDetectionTimeout() ms. " + "It may be caused by network problems or long GC pause on client node, try to increase this " + "parameter. " + "[nodeId=" + clientNodeId + ", clientFailureDetectionTimeout=" + spi.clientFailureDetectionTimeout() + ']'; failNode(clientNodeId, msg); U.warn(log, msg); } } } } }
/** * <strong>FOR TEST ONLY!!!</strong> */ void forceNextNodeFailure() { ((ServerImpl)impl).forceNextNodeFailure(); }
/** * Sends metrics update message if needed. */ private void sendMetricsUpdateMessage() { long elapsed = (lastTimeMetricsUpdateMsgSent + spi.metricsUpdateFreq) - U.currentTimeMillis(); if (elapsed > 0 || !isLocalNodeCoordinator()) return; TcpDiscoveryMetricsUpdateMessage msg = new TcpDiscoveryMetricsUpdateMessage(getConfiguredNodeId()); msg.verify(getLocalNodeId()); msgWorker.addMessage(msg); lastTimeMetricsUpdateMsgSent = U.currentTimeMillis(); }
/** * Processes discard message and discards previously registered pending messages. * * @param msg Discard message. */ private void processDiscardMessage(TcpDiscoveryDiscardMessage msg) { assert msg != null; IgniteUuid msgId = msg.msgId(); assert msgId != null; if (isLocalNodeCoordinator()) { if (!getLocalNodeId().equals(msg.verifierNodeId())) // Message is not verified or verified by former coordinator. msg.verify(getLocalNodeId()); else // Discard the message. return; } if (msg.verified()) pendingMsgs.discard(msgId, msg.customMessageDiscard()); if (ring.hasRemoteNodes()) sendMessageAcrossRing(msg); }
b.append(">>>").append(U.nl()); b.append("Local node ID: ").append(getLocalNodeId()).append(U.nl()).append(U.nl()); b.append("Local node: ").append(locNode).append(U.nl()).append(U.nl()); b.append("SPI state: ").append(spiState).append(U.nl()).append(U.nl()); b.append(" Message worker: ").append(threadStatus(msgWorker.runner())).append(U.nl()); b.append(" IP finder cleaner: ").append(threadStatus(ipFinderCleaner)).append(U.nl()); b.append(" Stats printer: ").append(threadStatus(statsPrinter)).append(U.nl());
UUID locNodeId = getLocalNodeId(); debugLog(msg, "No next node in topology."); debugLog(msg, "New next node [newNext=" + newNext + ", formerNext=" + next + ", ring=" + ring + ", failedNodes=" + failedNodes + ']'); debugLog(msg, "Failed to restore ring because next node ID received is not " + "as expected [expectedId=" + next.id() + ", rcvdId=" + nextId + ']'); debugLog(msg, "Failed to restore ring because next node order " + "received is not as expected [expected=" + next.internalOrder() + ", rcvd=" + nextOrder + ", id=" + next.id() + ']'); debugLog(msg, "Initialized connection with next node: " + next.id()); onException("Failed to connect to next node [msg=" + msg + ", err=" + e + ']', e); if (!checkAckTimeout(ackTimeout0)) break; debugLog(msg, "Pending messages will be sent [failure=" + failure + ", newNextNode=" + newNextNode + ", forceSndPending=" + forceSndPending + debugLog(msg, "Pending message has been sent to next node [msgId=" + msg.id() + ", pendingMsgId=" + pendingMsg.id() + ", next=" + next.id() + ", res=" + res + ']');
UUID locNodeId = getConfiguredNodeId(); debugLog(msg, "Initialized connection with remote node [nodeId=" + nodeId + ", client=" + req.client() + ']'); onException("Caught exception on handshake [err=" + e + ", sock=" + sock + ']', e); U.error(log, "Caught exception on handshake [err=" + e +", sock=" + sock + ']', e); onException("Caught exception on handshake [err=" + e +", sock=" + sock + ']', e); DebugLogger debugLog = messageLogger(msg); debugLog(msg, "Message has been received: " + msg); if (!getLocalNodeId().equals(msg0.routerNodeId()) && state == CONNECTING) { spi.writeToSocket(msg, sock, RES_OK, sockTimeout); msg.verify(getLocalNodeId()); onException("Caught exception on message read [sock=" + sock + ", locNodeId=" + locNodeId + ", rmtNodeId=" + nodeId + ']', e); onException("Caught exception on message read [sock=" + sock + ", locNodeId=" + locNodeId + ", rmtNodeId=" + nodeId + ']', e); if (isLocalNodeCoordinator() && !ring.hasRemoteServerNodes()) U.enhanceThreadName(msgWorkerThread, "crd");
assert msg != null; UUID locNodeId = getLocalNodeId(); boolean locNodeCoord = isLocalNodeCoordinator(); ", err=" + e.getMessage() + ']'); onException("Failed to send verified node left message to leaving node [msg=" + msg + ", err=" + e.getMessage() + ']', e);
localAuthentication(locCred); marshalCredentials(locNode, locCred); DiscoveryDataPacket discoveryData = spi.collectExchangeData(new DiscoveryDataPacket(getLocalNodeId())); if (!sendJoinRequestMessage(discoveryData)) { if (log.isDebugEnabled()) log.debug("Join request message has not been sent (local node is the first in the topology)."); localAuthentication(locCred); notifyDiscovery(EVT_NODE_JOINED, 1, locNode);
assert node != null; if (node.id().equals(getLocalNodeId())) return true; if (node == null || !nodeAlive(node.id())) return false; try { IgniteBiTuple<UUID, Boolean> t = pingNode(addr, node.id(), clientNodeId); log.debug("Failed to ping node [node=" + node + ", err=" + e.getMessage() + ']'); onException("Failed to ping node [node=" + node + ", err=" + e.getMessage() + ']', e);
UUID locNodeId = getLocalNodeId(); debugLog(msg, "Message has been sent directly to address [msg=" + msg + ", addr=" + addr + ", rmtNodeId=" + res.creatorNodeId() + ']'); U.error(log, "Class cast exception on direct send: " + addr, e); onException("Class cast exception on direct send: " + addr, e); log.error("Exception on direct send: " + e.getMessage(), e); onException("Exception on direct send: " + e.getMessage(), e); ackTimeout0 *= 2; if (!checkAckTimeout(ackTimeout0)) break;
UUID locNodeId = getLocalNodeId(); if (isLocalNodeCoordinator()) { if (msg.verified()) { spi.stats.onRingMessageReceived(msg); debugLog(msg, "Local node already has node being added. Passing TcpDiscoveryNodeAddedMessage to " + "coordinator for final processing [ring=" + ring + ", node=" + node + ", locNode=" + locNode + ", msg=" + msg + ']'); debugLog(msg, "Discarding node added message since new node's order is less than " + "max order in ring [ring=" + ring + ", node=" + node + ", locNode=" + locNode + ", msg=" + msg + ']'); if (!isLocalNodeCoordinator() && spi.nodeAuth != null && spi.nodeAuth.isGlobalNodeAuthentication()) { boolean authFailed = true; "[node=" + node + ", err=" + e.getMessage() + ']'); onException("Failed to send unauthenticated message to node " + "[node=" + node + ", err=" + e.getMessage() + ']', e);
/** * Trying get node in any state (visible or not) * @param nodeId Node id. */ ClusterNode getNode0(UUID nodeId) { assert nodeId != null; UUID locNodeId0 = getLocalNodeId(); if (locNodeId0 != null && locNodeId0.equals(nodeId)) // Return local node directly. return locNode; return ring.node(nodeId); }
/** {@inheritDoc} */ @Override public boolean pingNode(UUID nodeId) { assert nodeId != null; if (nodeId == getLocalNodeId()) return true; TcpDiscoveryNode node = ring.node(nodeId); if (node == null) return false; if (!nodeAlive(nodeId)) return false; long start = U.currentTimeMillis(); if (log.isInfoEnabled()) log.info("Pinging node: " + nodeId); boolean res = pingNode(node); long end = System.currentTimeMillis(); if (log.isInfoEnabled()) log.info("Finished node ping [nodeId=" + nodeId + ", res=" + res + ", time=" + (end - start) + "ms]"); if (!res && node.clientRouterNodeId() == null && nodeAlive(nodeId)) { LT.warn(log, "Failed to ping node (status check will be initiated): " + nodeId); msgWorker.addMessage(new TcpDiscoveryStatusCheckMessage(locNode, node.id())); } return res; }
msgBytes = U.marshal(spi.marshaller(), msg); DebugLogger msgLog = messageLogger(msg); else if (msgLog.isDebugEnabled()) msgLog.debug("Skip sending message ack to client, fail to get client node " + "[sock=" + sock + ", locNodeId=" + getLocalNodeId() + ", rmtNodeId=" + clientNodeId + ", msg=" + msg + ']'); if (msgLog.isDebugEnabled()) msgLog.debug("Sending message ack to client [sock=" + sock + ", locNodeId=" + getLocalNodeId() + ", rmtNodeId=" + clientNodeId + ", msg=" + msg + ']'); if (msgLog.isDebugEnabled()) msgLog.debug("Redirecting message to client [sock=" + sock + ", locNodeId=" + getLocalNodeId() + ", rmtNodeId=" + clientNodeId + ", msg=" + msg + ']'); if (log.isDebugEnabled()) U.error(log, "Client connection failed [sock=" + sock + ", locNodeId=" + getLocalNodeId() + ", rmtNodeId=" + clientNodeId + ", msg=" + msg + ']', e); onException("Client connection failed [sock=" + sock + ", locNodeId=" + getLocalNodeId() + ", rmtNodeId=" + clientNodeId + ", msg=" + msg + ']', e);
@Override public void run() { if (spiState == DISCONNECTED) { if (log.isDebugEnabled()) log.debug("Ignoring ping request, SPI is already disconnected: " + msg); return; } final ClientMessageWorker worker = clientMsgWorkers.get(msg.creatorNodeId()); if (worker == null) { if (log.isDebugEnabled()) log.debug("Ping request from dead client node, will be skipped: " + msg.creatorNodeId()); } else { boolean res; try { res = pingNode(msg.nodeToPing()); } catch (IgniteSpiException e) { log.error("Failed to ping node [nodeToPing=" + msg.nodeToPing() + ']', e); res = false; } TcpDiscoveryClientPingResponse pingRes = new TcpDiscoveryClientPingResponse( getLocalNodeId(), msg.nodeToPing(), res); pingRes.verify(getLocalNodeId()); worker.addMessage(pingRes); } } });
registerLocalNodeAddress(); else { if (F.isEmpty(spi.ipFinder.getRegisteredAddresses())) joinTopology();
assert addr != null; UUID locNodeId = getLocalNodeId(); return F.t(getLocalNodeId(), false); return F.t(getLocalNodeId(), false); return F.t(getLocalNodeId(), clientPingRes); if (nodeId != null && !nodeAlive(nodeId)) { if (log.isDebugEnabled()) log.debug("Failed to ping the node (has left or leaving topology): [nodeId=" + nodeId +
/** * Checks whether local node is coordinator. Nodes that are leaving or failed * (but are still in topology) are removed from search. * * @return {@code true} if local node is coordinator. */ public boolean isLocalNodeCoordinator() { if (impl instanceof ServerImpl) return ((ServerImpl)impl).isLocalNodeCoordinator(); return false; }