/** * @param ackTimeout Acknowledgement timeout. * @return {@code True} if acknowledgement timeout is less or equal to * maximum acknowledgement timeout, {@code false} otherwise. */ protected boolean checkAckTimeout(long ackTimeout) { if (ackTimeout > spi.getMaxAckTimeout()) { LT.warn(log, "Acknowledgement timeout is greater than maximum acknowledgement timeout " + "(consider increasing 'maxAckTimeout' configuration property) " + "[ackTimeout=" + ackTimeout + ", maxAckTimeout=" + spi.getMaxAckTimeout() + ']'); return false; } return true; }
/** * @param c Closure to run. */ void runInWorkerThread(Runnable c) { IgniteThreadPoolExecutor pool; synchronized (stateMux) { if (connState == ConnectionState.STOPPED) { LT.warn(log, "Do not run closure, node is stopped."); return; } if (utilityPool == null) { utilityPool = new IgniteThreadPoolExecutor("zk-discovery-pool", igniteInstanceName, 0, 1, 2000, new LinkedBlockingQueue<Runnable>()); } pool = utilityPool; } pool.submit(c); }
/** {@inheritDoc} */ @Override public void onTimeout() { if (done.compareAndSet(false, true)) { // Close socket - timeout occurred. U.closeQuiet(sock); LT.warn(log, "Socket write has timed out (consider increasing " + (failureDetectionTimeoutEnabled() ? "'IgniteConfiguration.failureDetectionTimeout' configuration property) [" + "failureDetectionTimeout=" + failureDetectionTimeout() : "'sockTimeout' configuration property) [sockTimeout=" + sockTimeout) + ", rmtAddr=" + sock.getRemoteSocketAddress() + ", rmtPort=" + sock.getPort() + ", sockTimeout=" + sockTimeout + ']'); stats.onSocketTimeout(); } }
/** {@inheritDoc} */ @Override public void onMessageReceived(GridNioSession ses, Object msg) throws IgniteCheckedException { if (!(msg instanceof ByteBuffer)) throw new GridNioException("Failed to decode incoming message (incoming message is not a byte buffer, " + "is filter properly placed?): " + msg.getClass()); try { ByteBuffer input = (ByteBuffer)msg; while (input.hasRemaining()) { Object res = parser.decode(ses, input); if (res != null) proceedMessageReceived(ses, res); else { if (input.hasRemaining()) { if (directMode) return; LT.warn(log, "Parser returned null but there are still unread data in input buffer (bug in " + "parser code?) [parser=" + parser + ", ses=" + ses + ']'); input.position(input.limit()); } } } } catch (IOException e) { throw new GridNioException(e); } }
/** * @param msg Log message. * @param isLogExpected Is log expected or not. */ private void checkWarn(String msg, boolean isLogExpected) { LT.warn(log0, msg); check(null, msg, isLogExpected); }
/** * Check wal archive size configuration for correctness. * * @param memCfg durable memory configuration for an Apache Ignite node. */ private void checkWalArchiveSizeConfiguration(DataStorageConfiguration memCfg) throws IgniteCheckedException { if (memCfg.getWalHistorySize() == DFLT_WAL_HISTORY_SIZE || memCfg.getWalHistorySize() == Integer.MAX_VALUE) LT.warn(log, "DataRegionConfiguration.maxWalArchiveSize instead DataRegionConfiguration.walHistorySize " + "would be used for removing old archive wal files"); else if(memCfg.getMaxWalArchiveSize() == DFLT_WAL_ARCHIVE_MAX_SIZE) LT.warn(log, "walHistorySize was deprecated. maxWalArchiveSize should be used instead"); else throw new IgniteCheckedException("Should be used only one of wal history size or max wal archive size." + "(use DataRegionConfiguration.maxWalArchiveSize because DataRegionConfiguration.walHistorySize was deprecated)" ); if(memCfg.getMaxWalArchiveSize() < memCfg.getWalSegmentSize()) throw new IgniteCheckedException( "DataRegionConfiguration.maxWalArchiveSize should be greater than DataRegionConfiguration.walSegmentSize" ); }
/** * @param desc Process descriptor. * @return Client. * @throws IgniteCheckedException If failed. */ @Nullable protected HadoopCommunicationClient createNioClient(HadoopProcessDescriptor desc) throws IgniteCheckedException { assert desc != null; int shmemPort = desc.sharedMemoryPort(); // If remote node has shared memory server enabled and has the same set of MACs // then we are likely to run on the same host and shared memory communication could be tried. if (shmemPort != -1 && locProcDesc.parentNodeId().equals(desc.parentNodeId())) { try { return createShmemClient(desc, shmemPort); } catch (IgniteCheckedException e) { if (e.hasCause(IpcOutOfSystemResourcesException.class)) // Has cause or is itself the IpcOutOfSystemResourcesException. LT.warn(log, OUT_OF_RESOURCES_TCP_MSG); else if (log.isDebugEnabled()) log.debug("Failed to establish shared memory connection with local hadoop process: " + desc); } } return createTcpClient(desc); }
/** * @return Message that is in the head of the queue, {@code null} if queue is empty. */ @Nullable SessionWriteRequest pollFuture() { SessionWriteRequest last = queue.poll(); if (last != null) { if (sem != null && !last.messageThread()) sem.release(); if (outRecovery != null) { if (!outRecovery.add(last)) { LT.warn(log, "Unacknowledged messages queue size overflow, will attempt to reconnect " + "[remoteAddr=" + remoteAddress() + ", queueLimit=" + outRecovery.queueLimit() + ']'); if (log.isDebugEnabled()) log.debug("Unacknowledged messages queue size overflow, will attempt to reconnect " + "[remoteAddr=" + remoteAddress() + ", queueSize=" + outRecovery.messagesRequests().size() + ", queueLimit=" + outRecovery.queueLimit() + ']'); close(); } } } return last; }
", locNodeId=" + locNode.id() + ", rmtNodeId=" + node.id() + ']'; LT.warn(log, errMsg);
/** * Adds partition unload event. * * @param part Partition. */ public void addUnloadEvent(int part) { if (!eventRecordable(EVT_CACHE_REBALANCE_PART_UNLOADED)) LT.warn(log, "Added event without checking if event is recordable: " + U.gridEventName(EVT_CACHE_REBALANCE_PART_UNLOADED)); List<GridCacheContext> caches = this.caches; for (GridCacheContext cctx : caches) if (!cctx.config().isEventsDisabled()) cctx.gridEvents().record(new CacheRebalancingEvent(cctx.name(), cctx.localNode(), "Cache unloading event.", EVT_CACHE_REBALANCE_PART_UNLOADED, part, null, 0, 0)); }
/** * @param node Joining node. * @param errMsg Message to log. * @param sndMsg Message to send. */ private void nodeCheckError(TcpDiscoveryNode node, String errMsg, String sndMsg) { LT.warn(log, errMsg); // Always output in debug. if (log.isDebugEnabled()) log.debug(errMsg); try { trySendMessageDirectly(node, new TcpDiscoveryCheckFailedMessage(locNode.id(), sndMsg)); } catch (IgniteSpiException e) { if (log.isDebugEnabled()) log.debug("Failed to send marshaller check failed message to node " + "[node=" + node + ", err=" + e.getMessage() + ']'); onException("Failed to send marshaller check failed message to node " + "[node=" + node + ", err=" + e.getMessage() + ']', e); } }
/** {@inheritDoc} */ @Override public boolean pingNode(UUID nodeId) { assert nodeId != null; if (nodeId == getLocalNodeId()) return true; TcpDiscoveryNode node = ring.node(nodeId); if (node == null) return false; if (!nodeAlive(nodeId)) return false; long start = U.currentTimeMillis(); if (log.isInfoEnabled()) log.info("Pinging node: " + nodeId); boolean res = pingNode(node); long end = System.currentTimeMillis(); if (log.isInfoEnabled()) log.info("Finished node ping [nodeId=" + nodeId + ", res=" + res + ", time=" + (end - start) + "ms]"); if (!res && node.clientRouterNodeId() == null && nodeAlive(nodeId)) { LT.warn(log, "Failed to ping node (status check will be initiated): " + nodeId); msgWorker.addMessage(new TcpDiscoveryStatusCheckMessage(locNode, node.id())); } return res; }
/** * @param nodeId ID of the node. * @return {@code True} if ping succeeded. * @throws IgniteClientDisconnectedCheckedException If ping failed. */ public boolean pingNode(UUID nodeId) throws IgniteClientDisconnectedCheckedException { assert nodeId != null; if (!busyLock.enterBusy()) return false; try { return getSpi().pingNode(nodeId); } catch (IgniteException e) { if (e.hasCause(IgniteClientDisconnectedCheckedException.class, IgniteClientDisconnectedException.class)) { IgniteFuture<?> reconnectFut = ctx.cluster().clientReconnectFuture(); throw new IgniteClientDisconnectedCheckedException(reconnectFut, e.getMessage()); } LT.warn(log, "Ping failed with error [node=" + nodeId + ", err=" + e + ']'); return true; } finally { busyLock.leaveBusy(); } }
/** * Checks segment on start waiting for correct segment if necessary. * * @throws IgniteCheckedException If check failed. */ private void checkSegmentOnStart() throws IgniteCheckedException { assert hasRslvrs; if (log.isDebugEnabled()) log.debug("Starting network segment check."); while (true) { if (ctx.segmentation().isValidSegment()) break; if (ctx.config().isWaitForSegmentOnStart()) { LT.warn(log, "Failed to check network segment (retrying every 2000 ms)."); // Wait and check again. U.sleep(2000); } else throw new IgniteCheckedException("Failed to check network segment."); } if (log.isDebugEnabled()) log.debug("Finished network segment check successfully."); }
/** * Adds rebalancing event. * * @param part Partition. * @param type Event type. * @param discoNode Discovery node. * @param discoType Discovery event type. * @param discoTs Discovery event timestamp. */ public void addRebalanceEvent(int part, int type, ClusterNode discoNode, int discoType, long discoTs) { assert discoNode != null; assert type > 0; assert discoType > 0; assert discoTs > 0; if (!eventRecordable(type)) LT.warn(log, "Added event without checking if event is recordable: " + U.gridEventName(type)); List<GridCacheContext> caches = this.caches; for (GridCacheContext cctx : caches) if (!cctx.config().isEventsDisabled() && cctx.recordEvent(type)) { cctx.gridEvents().record(new CacheRebalancingEvent(cctx.name(), cctx.localNode(), "Cache rebalancing event.", type, part, discoNode, discoType, discoTs)); } }
/** * @param cacheName Cache name. * @param backups Number of backups. * @param log Logger. * @throws Exception If failed. */ @SuppressWarnings("BusyWait") public static <K, V> void waitTopologyUpdate(@Nullable String cacheName, int backups, IgniteLogger log) throws Exception { for (Ignite g : Ignition.allGrids()) { IgniteCache<K, V> cache = ((IgniteEx)g).cache(cacheName); GridDhtPartitionTopology top = dht(cache).topology(); while (true) { boolean wait = false; for (int p = 0; p < g.affinity(cacheName).partitions(); p++) { Collection<ClusterNode> nodes = top.nodes(p, AffinityTopologyVersion.NONE); if (nodes.size() > backups + 1) { LT.warn(log, "Partition map was not updated yet (will wait) [igniteInstanceName=" + g.name() + ", p=" + p + ", nodes=" + F.nodeIds(nodes) + ']'); wait = true; break; } } if (wait) Thread.sleep(20); else break; // While. } } }
/** * @param node Joining node data. * @return Validation result. */ private ZkNodeValidateResult validateJoiningNode(ZkJoiningNodeData joiningNodeData) { ZookeeperClusterNode node = joiningNodeData.node(); ZookeeperClusterNode node0 = rtState.top.nodesById.get(node.id()); if (node0 != null) { U.error(log, "Failed to include node in cluster, node with the same ID already exists [joiningNode=" + node + ", existingNode=" + node0 + ']'); // Note: exception message is checked in tests. return new ZkNodeValidateResult("Node with the same ID already exists: " + node0); } ZkNodeValidateResult res = authenticateNode(node); if (res.err != null) return res; IgniteNodeValidationResult err = spi.getSpiContext().validateNode(node); if (err == null) { DiscoveryDataBag joiningNodeBag = new DiscoveryDataBag(node.id(), joiningNodeData.node().isClient()); joiningNodeBag.joiningNodeData(joiningNodeData.discoveryData()); err = spi.getSpiContext().validateNode(node, joiningNodeBag); } if (err != null) { LT.warn(log, err.message()); res.err = err.sendMessage(); } return res; }
LT.warn(log, ex, "Partition eviction failed (current node is stopping).", false, true);
@Override public void run() { boolean ping = node.id().equals(err0.nodeId()) ? pingNode(node) : pingNode(err0.nodeId()); if (!ping) { if (log.isDebugEnabled()) log.debug("Conflicting node has already left, need to wait for event. " + "Will ignore join request for now since it will be recent [req=" + msg + ", err=" + err0.message() + ']'); // Ignore join request. return; } LT.warn(log, err0.message()); // Always output in debug. if (log.isDebugEnabled()) log.debug(err0.message()); try { trySendMessageDirectly(node, new TcpDiscoveryCheckFailedMessage(err0.nodeId(), err0.sendMessage())); } catch (IgniteSpiException e) { if (log.isDebugEnabled()) log.debug("Failed to send hash ID resolver validation failed message to node " + "[node=" + node + ", err=" + e.getMessage() + ']'); onException("Failed to send hash ID resolver validation failed message to node " + "[node=" + node + ", err=" + e.getMessage() + ']', e); } } }