/** * Gets effective or resulting socket timeout with considering failure detection timeout * * @param srvrOperation {@code True} if socket connect to server node, * {@code False} if socket connect to client node. * @return Resulting socket timeout. */ public long getEffectiveSocketTimeout(boolean srvrOperation) { if (failureDetectionTimeoutEnabled()) return srvrOperation ? failureDetectionTimeout() : clientFailureDetectionTimeout(); else return sockTimeout; }
/** * Wait random delay before trying to reconnect. Delay will grow exponentially every time client is forced to * reconnect, but only if all these reconnections happened in small period of time (2 minutes). Maximum delay * could be configured with {@link IgniteSpiAdapter#clientFailureDetectionTimeout()}, default value is * {@link IgniteConfiguration#DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT}. * * @throws InterruptedException If thread is interrupted. */ private void throttleClientReconnect() throws InterruptedException { if (U.currentTimeMillis() - lastReconnectTimestamp > CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT) currentReconnectDelay = 0; // Skip pause on first reconnect. else if (currentReconnectDelay == 0) currentReconnectDelay = 200; else { long maxDelay = spi.failureDetectionTimeoutEnabled() ? spi.clientFailureDetectionTimeout() : IgniteConfiguration.DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT; currentReconnectDelay = Math.min(maxDelay, (int)(currentReconnectDelay * 1.5)); } if (currentReconnectDelay != 0) { ThreadLocalRandom random = ThreadLocalRandom.current(); Thread.sleep(random.nextLong(currentReconnectDelay / 2, currentReconnectDelay)); } lastReconnectTimestamp = U.currentTimeMillis(); }
/** {@inheritDoc} */ @Override protected void noMessageLoop() { if (U.currentTimeMillis() - lastMetricsUpdateMsgTime > spi.clientFailureDetectionTimeout()) { TcpDiscoveryNode clientNode = ring.node(clientNodeId); if (clientNode != null) { boolean failedNode; synchronized (mux) { failedNode = failedNodes.containsKey(clientNode); } if (!failedNode) { String msg = "Client node considered as unreachable " + "and will be dropped from cluster, " + "because no metrics update messages received in interval: " + "TcpDiscoverySpi.clientFailureDetectionTimeout() ms. " + "It may be caused by network problems or long GC pause on client node, try to increase this " + "parameter. " + "[nodeId=" + clientNodeId + ", clientFailureDetectionTimeout=" + spi.clientFailureDetectionTimeout() + ']'; failNode(clientNodeId, msg); U.warn(log, msg); } } } } }
/** * @throws Exception In case of error. */ @Test public void testFailureDetectionTimeoutDisabled() throws Exception { for (int i = 2; i < spis.size(); i++) { assertFalse(((TcpDiscoverySpi)spis.get(i)).failureDetectionTimeoutEnabled()); assertEquals(0, ((TcpDiscoverySpi)spis.get(i)).failureDetectionTimeout()); assertFalse(0 == ((TcpDiscoverySpi)spis.get(i)).clientFailureDetectionTimeout()); } }
if (clientNode.visible()) { if (clientNodeIds.contains(clientNode.id())) clientNode.clientAliveTime(spi.clientFailureDetectionTimeout()); else { if (clientNode.clientAliveTime() == 0L) clientNode.clientAliveTime(spi.clientFailureDetectionTimeout()); "'IgniteConfiguration.clientFailureDetectionTimeout' " + "(consider increasing configuration property) " + "[timeout=" + spi.clientFailureDetectionTimeout() + ", node=" + clientNode + ']');
spi.clientFailureDetectionTimeout() : spi.getSocketTimeout());
node.clientAliveTime(spi.clientFailureDetectionTimeout());
node.clientAliveTime(spi.clientFailureDetectionTimeout());
/** * Gets effective or resulting socket timeout with considering failure detection timeout * * @param srvrOperation {@code True} if socket connect to server node, * {@code False} if socket connect to client node. * @return Resulting socket timeout. */ public long getEffectiveSocketTimeout(boolean srvrOperation) { if (failureDetectionTimeoutEnabled()) return srvrOperation ? failureDetectionTimeout() : clientFailureDetectionTimeout(); else return sockTimeout; }
/** * Wait random delay before trying to reconnect. Delay will grow exponentially every time client is forced to * reconnect, but only if all these reconnections happened in small period of time (2 minutes). Maximum delay * could be configured with {@link IgniteSpiAdapter#clientFailureDetectionTimeout()}, default value is * {@link IgniteConfiguration#DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT}. * * @throws InterruptedException If thread is interrupted. */ private void throttleClientReconnect() throws InterruptedException { if (U.currentTimeMillis() - lastReconnectTimestamp > CLIENT_THROTTLE_RECONNECT_RESET_TIMEOUT) currentReconnectDelay = 0; // Skip pause on first reconnect. else if (currentReconnectDelay == 0) currentReconnectDelay = 200; else { long maxDelay = spi.failureDetectionTimeoutEnabled() ? spi.clientFailureDetectionTimeout() : IgniteConfiguration.DFLT_CLIENT_FAILURE_DETECTION_TIMEOUT; currentReconnectDelay = Math.min(maxDelay, (int)(currentReconnectDelay * 1.5)); } if (currentReconnectDelay != 0) { ThreadLocalRandom random = ThreadLocalRandom.current(); Thread.sleep(random.nextLong(currentReconnectDelay / 2, currentReconnectDelay)); } lastReconnectTimestamp = U.currentTimeMillis(); }
/** {@inheritDoc} */ @Override protected void noMessageLoop() { if (U.currentTimeMillis() - lastMetricsUpdateMsgTime > spi.clientFailureDetectionTimeout()) { TcpDiscoveryNode clientNode = ring.node(clientNodeId); if (clientNode != null) { boolean failedNode; synchronized (mux) { failedNode = failedNodes.containsKey(clientNode); } if (!failedNode) { String msg = "Client node considered as unreachable " + "and will be dropped from cluster, " + "because no metrics update messages received in interval: " + "TcpDiscoverySpi.clientFailureDetectionTimeout() ms. " + "It may be caused by network problems or long GC pause on client node, try to increase this " + "parameter. " + "[nodeId=" + clientNodeId + ", clientFailureDetectionTimeout=" + spi.clientFailureDetectionTimeout() + ']'; failNode(clientNodeId, msg); U.warn(log, msg); } } } } }
if (clientNode.visible()) { if (clientNodeIds.contains(clientNode.id())) clientNode.clientAliveTime(spi.clientFailureDetectionTimeout()); else { boolean aliveCheck = clientNode.isClientAlive(); "'IgniteConfiguration.clientFailureDetectionTimeout' " + "(consider increasing configuration property) " + "[timeout=" + spi.clientFailureDetectionTimeout() + ", node=" + clientNode + ']');
spi.clientFailureDetectionTimeout() : spi.getSocketTimeout());
node.clientAliveTime(spi.clientFailureDetectionTimeout());
node.clientAliveTime(spi.clientFailureDetectionTimeout());