/** {@inheritDoc} */ @Override public void resolve(CommunicationFailureContext ctx) { ClusterPart largestCluster = findLargestConnectedCluster(ctx); if (largestCluster == null) return; log.info("Communication problem resolver found fully connected independent cluster [" + "serverNodesCnt=" + largestCluster.srvNodesCnt + ", " + "clientNodesCnt=" + largestCluster.connectedClients.size() + ", " + "totalAliveNodes=" + ctx.topologySnapshot().size() + ", " + "serverNodesIds=" + clusterNodeIds(largestCluster.srvNodesSet, ctx.topologySnapshot(), 1000) + "]"); keepCluster(ctx, largestCluster); }
/** * Constructor. * * @param ctx Communication failure context. * @param nodeFilterOut Filter to exclude some cluster nodes from graph. */ public ClusterGraph(CommunicationFailureContext ctx, Predicate<ClusterNode> nodeFilterOut) { nodes = ctx.topologySnapshot(); nodeCnt = nodes.size(); assert nodeCnt > 0; connections = buildConnectivityMatrix(ctx, nodeFilterOut); fccSearcher = new FullyConnectedComponentSearcher(connections); }
/** * Keeps server cluster nodes presented in given {@code srvNodesSet}. * Client nodes which have connections to presented {@code srvNodesSet} will be also keeped. * Other nodes will be killed forcibly. * * @param ctx Communication failure context. * @param clusterPart Set of nodes need to keep in the cluster. */ private void keepCluster(CommunicationFailureContext ctx, ClusterPart clusterPart) { List<ClusterNode> allNodes = ctx.topologySnapshot(); // Kill server nodes. for (int idx = 0; idx < allNodes.size(); idx++) { ClusterNode node = allNodes.get(idx); // Client nodes will be processed separately. if (node.isClient()) continue; if (!clusterPart.srvNodesSet.get(idx)) ctx.killNode(node); } // Kill client nodes unable to connect to the presented part of cluster. for (int idx = 0; idx < allNodes.size(); idx++) { ClusterNode node = allNodes.get(idx); if (node.isClient() && !clusterPart.connectedClients.contains(node)) ctx.killNode(node); } }
/** * Finds set of the client nodes which are able to connect to given set of server nodes {@code srvNodesSet}. * * @param ctx Communication failure context. * @param srvNodesSet Server nodes set. * @return Set of client nodes. */ private Set<ClusterNode> findConnectedClients(CommunicationFailureContext ctx, BitSet srvNodesSet) { Set<ClusterNode> connectedClients = new HashSet<>(); List<ClusterNode> allNodes = ctx.topologySnapshot(); for (ClusterNode node : allNodes) { if (!node.isClient()) continue; boolean hasConnections = true; Iterator<Integer> it = new BitSetIterator(srvNodesSet); while (it.hasNext()) { int srvNodeIdx = it.next(); ClusterNode srvNode = allNodes.get(srvNodeIdx); if (!ctx.connectionAvailable(node, srvNode) || !ctx.connectionAvailable(srvNode, node)) { hasConnections = false; break; } } if (hasConnections) connectedClients.add(node); } return connectedClients; }
/** {@inheritDoc} */ @Override public void resolve(CommunicationFailureContext ctx) { List<ClusterNode> nodes = ctx.topologySnapshot(); assertTrue(!nodes.isEmpty()); for (ClusterNode node : nodes) { if (killNodeOrders.contains(node.order())) ctx.killNode(node); } } }
List<ClusterNode> srvNodes = ctx.topologySnapshot() .stream() .filter(node -> !node.isClient())
/** {@inheritDoc} */ @Override public void resolve(CommunicationFailureContext ctx) { LAST_KILLED_NODES.clear(); List<ClusterNode> nodes = ctx.topologySnapshot(); ThreadLocalRandom rnd = ThreadLocalRandom.current(); int killNodes = rnd.nextInt(nodes.size() / 2); log.info("Resolver kills nodes [total=" + nodes.size() + ", kill=" + killNodes + ']'); long srvCnt = nodes.stream().filter(node -> !node.isClient()).count(); Set<Integer> idxs = new HashSet<>(); while (idxs.size() < killNodes) { int idx = rnd.nextInt(nodes.size()); if(!nodes.get(idx).isClient() && !idxs.contains(idx) && --srvCnt < 1) continue; idxs.add(idx); } for (int idx : idxs) { ClusterNode node = nodes.get(idx); log.info("Resolver kills node: " + node.id()); LAST_KILLED_NODES.add(node); ctx.killNode(node); } } }
/** {@inheritDoc} */ @Override public void resolve(CommunicationFailureContext ctx) { List<ClusterNode> nodes = ctx.topologySnapshot(); ClusterNode node = nodes.get(0); log.info("Resolver kills node: " + node.id()); ctx.killNode(node); } }
/** {@inheritDoc} */ @Override public void resolve(CommunicationFailureContext ctx) { ClusterPart largestCluster = findLargestConnectedCluster(ctx); if (largestCluster == null) return; log.info("Communication problem resolver found fully connected independent cluster [" + "serverNodesCnt=" + largestCluster.srvNodesCnt + ", " + "clientNodesCnt=" + largestCluster.connectedClients.size() + ", " + "totalAliveNodes=" + ctx.topologySnapshot().size() + ", " + "serverNodesIds=" + clusterNodeIds(largestCluster.srvNodesSet, ctx.topologySnapshot(), 1000) + "]"); keepCluster(ctx, largestCluster); }
/** * Constructor. * * @param ctx Communication failure context. * @param nodeFilterOut Filter to exclude some cluster nodes from graph. */ public ClusterGraph(CommunicationFailureContext ctx, Predicate<ClusterNode> nodeFilterOut) { nodes = ctx.topologySnapshot(); nodeCnt = nodes.size(); assert nodeCnt > 0; connections = buildConnectivityMatrix(ctx, nodeFilterOut); fccSearcher = new FullyConnectedComponentSearcher(connections); }
/** * Keeps server cluster nodes presented in given {@code srvNodesSet}. * Client nodes which have connections to presented {@code srvNodesSet} will be also keeped. * Other nodes will be killed forcibly. * * @param ctx Communication failure context. * @param clusterPart Set of nodes need to keep in the cluster. */ private void keepCluster(CommunicationFailureContext ctx, ClusterPart clusterPart) { List<ClusterNode> allNodes = ctx.topologySnapshot(); // Kill server nodes. for (int idx = 0; idx < allNodes.size(); idx++) { ClusterNode node = allNodes.get(idx); // Client nodes will be processed separately. if (node.isClient()) continue; if (!clusterPart.srvNodesSet.get(idx)) ctx.killNode(node); } // Kill client nodes unable to connect to the presented part of cluster. for (int idx = 0; idx < allNodes.size(); idx++) { ClusterNode node = allNodes.get(idx); if (node.isClient() && !clusterPart.connectedClients.contains(node)) ctx.killNode(node); } }
/** * Finds set of the client nodes which are able to connect to given set of server nodes {@code srvNodesSet}. * * @param ctx Communication failure context. * @param srvNodesSet Server nodes set. * @return Set of client nodes. */ private Set<ClusterNode> findConnectedClients(CommunicationFailureContext ctx, BitSet srvNodesSet) { Set<ClusterNode> connectedClients = new HashSet<>(); List<ClusterNode> allNodes = ctx.topologySnapshot(); for (ClusterNode node : allNodes) { if (!node.isClient()) continue; boolean hasConnections = true; Iterator<Integer> it = new BitSetIterator(srvNodesSet); while (it.hasNext()) { int srvNodeIdx = it.next(); ClusterNode srvNode = allNodes.get(srvNodeIdx); if (!ctx.connectionAvailable(node, srvNode) || !ctx.connectionAvailable(srvNode, node)) { hasConnections = false; break; } } if (hasConnections) connectedClients.add(node); } return connectedClients; }
List<ClusterNode> srvNodes = ctx.topologySnapshot() .stream() .filter(node -> !node.isClient())