@Override public void afterRun() throws Exception { if (suspectedCaller != null) { ClusterServiceImpl service = getService(); // I am the master. I can remove the member directly String reason = "Removing " + suspectedCaller + ", since it thinks it's already split from this cluster " + "and looking to merge."; service.suspectMember(suspectedCaller, reason, true); } }
private void shutdownCluster(TransactionOptions options) { if (options == null) { changeClusterState(ClusterState.PASSIVE, true); } else { changeClusterState(ClusterState.PASSIVE, options, true); } long timeoutNanos = node.getProperties().getNanos(GroupProperty.CLUSTER_SHUTDOWN_TIMEOUT_SECONDS); long startNanos = System.nanoTime(); node.getNodeExtension().getInternalHotRestartService() .waitPartitionReplicaSyncOnCluster(timeoutNanos, TimeUnit.NANOSECONDS); timeoutNanos -= (System.nanoTime() - startNanos); shutdownNodes(timeoutNanos); }
public boolean updateMembers(MembersView membersView, Address callerAddress, String callerUuid, String targetUuid) { lock.lock(); try { if (!isJoined()) { logger.warning("Not updating members received from caller: " + callerAddress + " because node is not joined! "); return false; } if (!checkValidMaster(callerAddress)) { logger.warning("Not updating members because caller: " + callerAddress + " is not known master: " + getMasterAddress()); MembersViewMetadata callerMembersViewMetadata = new MembersViewMetadata(callerAddress, callerUuid, callerAddress, membersView.getVersion()); if (!clusterJoinManager.isMastershipClaimInProgress()) { sendExplicitSuspicion(callerMembersViewMetadata); } return false; } checkMemberUpdateContainsLocalMember(membersView, targetUuid); if (!shouldProcessMemberUpdate(membersView)) { return false; } membershipManager.updateMembers(membersView); return true; } finally { lock.unlock(); } }
@Override public void reset() { lock.lock(); try { resetJoinState(); resetLocalMemberUuid(); resetClusterId(); clearInternalState(); } finally { lock.unlock(); } }
private boolean verifyMemberListVersionAfterStateChange(ClusterServiceImpl clusterService, ClusterState clusterState, int expectedMemberListVersion) { if (clusterService.getMemberListVersion() != expectedMemberListVersion) { try { logger.warning("Reverting cluster state back to " + clusterState + " because member list version: " + clusterService.getMemberListVersion() + " is different than expected member list version: " + expectedMemberListVersion); clusterService.changeClusterState(clusterState); } catch (Exception e) { String error = e.getClass().getName() + ": " + e.getMessage(); logger.warning("While reverting cluster state to " + clusterState + "! " + error); } return false; } return true; }
lock.lock(); try { if (!checkValidMaster(callerAddress)) { if (logger.isFineEnabled()) { logger.fine("Not finalizing join because caller: " + callerAddress + " is not known master: " + getMasterAddress()); sendExplicitSuspicion(membersViewMetadata); return false; if (isJoined()) { if (logger.isFineEnabled()) { logger.fine("Node is already joined... No need to finalize join..."); checkMemberUpdateContainsLocalMember(membersView, targetUuid); initialClusterState(clusterState, clusterVersion); } catch (VersionMismatchException e) { return false; setClusterId(clusterId); ClusterClockImpl clusterClock = getClusterClock(); clusterClock.setClusterStartTime(clusterStartTime); clusterClock.setMasterTime(masterTime); setJoined(true);
private boolean checkIfJoinRequestFromAnExistingMember(JoinMessage joinMessage, Connection connection) { Address target = joinMessage.getAddress(); MemberImpl member = clusterService.getMember(target); if (member == null) { return checkIfUsingAnExistingMemberUuid(joinMessage); sendMasterAnswer(target); if (clusterService.isMaster() && !isMastershipClaimInProgress()) { if (logger.isFineEnabled()) { logger.fine(format("Ignoring join request, member already exists: %s", joinMessage)); clusterService.getMembershipManager().getMembersView(), preJoinOp, postJoinOp, clusterClock.getClusterTime(), clusterService.getClusterId(), clusterClock.getClusterStartTime(), clusterStateManager.getState(), clusterService.getClusterVersion(), partitionRuntimeState); op.setCallerUuid(clusterService.getThisUuid()); invokeClusterOp(op, target); if (clusterService.isMaster() || target.equals(clusterService.getMasterAddress())) { String msg = format("New join request has been received from an existing endpoint %s." + " Removing old member and processing join request...", member); logger.warning(msg); clusterService.suspectMember(member, msg, false); Connection existing = node.connectionManager.getConnection(target); if (existing != connection) {
public boolean setThisMemberAsMaster() { clusterServiceLock.lock(); try { if (clusterService.isJoined()) { logger.warning("Cannot set as master because node is already joined!"); return false; } logger.finest("This node is being set as the master"); Address thisAddress = node.getThisAddress(); MemberVersion version = node.getVersion(); clusterService.setMasterAddress(thisAddress); if (clusterService.getClusterVersion().isUnknown()) { clusterService.getClusterStateManager().setClusterVersion(version.asVersion()); } clusterService.getClusterClock().setClusterStartTime(Clock.currentTimeMillis()); clusterService.setClusterId(UuidUtil.createClusterUuid()); clusterService.getMembershipManager().setLocalMemberListJoinVersion(SINGLETON_MEMBER_LIST_VERSION); clusterService.setJoined(true); return true; } finally { clusterServiceLock.unlock(); } }
private void removeMember(MemberImpl member, String reason, boolean shouldCloseConn) { clusterServiceLock.lock(); try { assert clusterService.isMaster() : "Master: " + clusterService.getMasterAddress(); if (!clusterService.isJoined()) { logger.warning("Not removing " + member + " for reason: " + reason + ", because not joined!"); return; clusterService.getClusterJoinManager().removeJoin(member.getAddress()); clusterService.getClusterHeartbeatManager().removeMember(member); clusterService.printMemberList(); } finally { clusterServiceLock.unlock();
private void handleActiveAndJoined(JoinMessage joinMessage) { if (!(joinMessage instanceof JoinRequest)) { logDroppedMessage(joinMessage); return; } ClusterServiceImpl clusterService = node.getClusterService(); Address masterAddress = clusterService.getMasterAddress(); if (clusterService.isMaster()) { JoinMessage response = new JoinMessage(Packet.VERSION, node.getBuildInfo().getBuildNumber(), node.getVersion(), node.getThisAddress(), node.getThisUuid(), node.isLiteMember(), node.createConfigCheck()); node.multicastService.send(response); } else if (joinMessage.getAddress().equals(masterAddress)) { MemberImpl master = node.getClusterService().getMember(masterAddress); if (master != null && !master.getUuid().equals(joinMessage.getUuid())) { String message = "New join request has been received from current master. Suspecting " + masterAddress; logger.warning(message); // I just make a local suspicion. Probably other nodes will eventually suspect as well. clusterService.suspectMember(master, message, false); } } }
ClusterHeartbeatManager clusterHeartbeatManager = clusterService.getClusterHeartbeatManager(); assert !(member.localMember() && member.equals(clusterService.getLocalMember())) : "Local " + member + " cannot be replaced with " + memberInfo; addedMembers.add(member); long now = clusterService.getClusterTime(); clusterHeartbeatManager.onHeartbeat(member, now); clusterService.repairPartitionTableIfReturningMember(member); members[memberIndex++] = member; clusterService.printMemberList();
ClusterJoinManager(Node node, ClusterServiceImpl clusterService, Lock clusterServiceLock) { this.node = node; this.clusterService = clusterService; this.clusterServiceLock = clusterServiceLock; this.nodeEngine = clusterService.getNodeEngine(); logger = node.getLogger(getClass()); clusterStateManager = clusterService.getClusterStateManager(); clusterClock = clusterService.getClusterClock(); maxWaitMillisBeforeJoin = node.getProperties().getMillis(GroupProperty.MAX_WAIT_SECONDS_BEFORE_JOIN); waitMillisBeforeJoin = node.getProperties().getMillis(GroupProperty.WAIT_SECONDS_BEFORE_JOIN); staleJoinPreventionDuration = node.getProperties().getMillis(GroupProperty.MAX_JOIN_SECONDS); }
public PhoneHomeParameterCreator createParameters(Node hazelcastNode) { ClusterServiceImpl clusterService = hazelcastNode.getClusterService(); int clusterSize = clusterService.getMembers().size(); Long clusterUpTime = clusterService.getClusterClock().getClusterUpTime(); RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean(); JetBuildInfo jetBuildInfo = hazelcastNode.getBuildInfo().getJetBuildInfo(); PhoneHomeParameterCreator parameterCreator = new PhoneHomeParameterCreator() .addParam("version", buildInfo.getVersion()) .addParam("m", hazelcastNode.getThisUuid()) .addParam("p", getDownloadId()) .addParam("c", clusterService.getClusterId()) .addParam("crsz", convertToLetter(clusterSize)) .addParam("cssz", convertToLetter(hazelcastNode.clientEngine.getClientEndpointCount())) .addParam("cuptm", Long.toString(clusterUpTime)) .addParam("nuptm", Long.toString(runtimeMxBean.getUptime())) .addParam("jvmn", runtimeMxBean.getVmName()) .addParam("jvmv", System.getProperty("java.version")) .addParam("jetv", jetBuildInfo == null ? "" : jetBuildInfo.getVersion()); addClientInfo(hazelcastNode, parameterCreator); addOSInfo(parameterCreator); boolean isManagementCenterConfigEnabled = hazelcastNode.config.getManagementCenterConfig().isEnabled(); if (isManagementCenterConfigEnabled) { addManCenterInfo(hazelcastNode, clusterSize, parameterCreator); } else { parameterCreator.addParam("mclicense", "MC_NOT_CONFIGURED"); parameterCreator.addParam("mcver", "MC_NOT_CONFIGURED"); } return parameterCreator; }
ClusterState clusterState = clusterService.getClusterState(); if (!clusterState.isMigrationAllowed() && !clusterState.isJoinAllowed() && clusterState != IN_TRANSITION) { return (clusterService.getMemberListVersion() == expectedMemberListVersion); if (clusterService.getMemberListVersion() != expectedMemberListVersion) { logger.warning("Could not change cluster state to FROZEN because local member list version: " + clusterService.getMemberListVersion() + " is different than expected member list version: " + expectedMemberListVersion); return false; clusterService.changeClusterState(FROZEN);
public void suspectAddressIfNotConnected(Address address) { lock.lock(); try { MemberImpl member = getMember(address); if (member == null) { if (logger.isFineEnabled()) { logger.fine("Cannot suspect " + address + ", since it's not a member."); } return; } Connection conn = node.getConnectionManager().getConnection(address); if (conn != null && conn.isAlive()) { if (logger.isFineEnabled()) { logger.fine("Cannot suspect " + member + ", since there's a live connection -> " + conn); } return; } suspectMember(member, "No connection", false); } finally { lock.unlock(); } }
/** * Removes the {@code member} if it has not sent any heartbeats in {@link GroupProperty#MAX_NO_HEARTBEAT_SECONDS}. * If it has not sent any heartbeats in {@link #HEART_BEAT_INTERVAL_FACTOR} heartbeat intervals, it will log a warning. * * @param now the current cluster clock time * @param member the member which needs to be checked * @return if the member has been removed */ private boolean suspectMemberIfNotHeartBeating(long now, Member member) { if (clusterService.getMembershipManager().isMemberSuspected(member.getAddress())) { return true; } long lastHeartbeat = heartbeatFailureDetector.lastHeartbeat(member); if (!heartbeatFailureDetector.isAlive(member, now)) { double suspicionLevel = heartbeatFailureDetector.suspicionLevel(member, now); String reason = format("Suspecting %s because it has not sent any heartbeats since %s." + " Now: %s, heartbeat timeout: %d ms, suspicion level: %.2f", member, timeToString(lastHeartbeat), timeToString(now), maxNoHeartbeatMillis, suspicionLevel); logger.warning(reason); clusterService.suspectMember(member, reason, true); return true; } if (logger.isFineEnabled() && (now - lastHeartbeat) > heartbeatIntervalMillis * HEART_BEAT_INTERVAL_FACTOR) { double suspicionLevel = heartbeatFailureDetector.suspicionLevel(member, now); logger.fine(format("Not receiving any heartbeats from %s since %s, suspicion level: %.2f", member, timeToString(lastHeartbeat), suspicionLevel)); } return false; }
private void render(DiagnosticsLogWriter writer, ClusterServiceImpl clusterService) { ClusterHeartbeatManager clusterHeartbeatManager = clusterService.getClusterHeartbeatManager(); long expectedIntervalMillis = clusterHeartbeatManager.getHeartbeatIntervalMillis(); long nowMillis = System.currentTimeMillis(); for (MemberImpl member : clusterService.getMemberImpls()) { long lastHeartbeatMillis = clusterHeartbeatManager.getLastHeartbeatTime(member); if (lastHeartbeatMillis == 0L) { // member without a heartbeat; lets skip it continue; } long noHeartbeatMillis = nowMillis - lastHeartbeatMillis; float deviation = HUNDRED * ((float) (noHeartbeatMillis - expectedIntervalMillis)) / expectedIntervalMillis; if (deviation >= maxDeviationPercentage) { startLazyMainSection(writer); writer.startSection("member" + member.getAddress()); writer.writeKeyValueEntry("deviation(%)", deviation); writer.writeKeyValueEntry("noHeartbeat(ms)", noHeartbeatMillis); writer.writeKeyValueEntry("lastHeartbeat(ms)", lastHeartbeatMillis); writer.writeKeyValueEntryAsDateTime("lastHeartbeat(date-time)", lastHeartbeatMillis); writer.writeKeyValueEntry("now(ms)", nowMillis); writer.writeKeyValueEntryAsDateTime("now(date-time)", nowMillis); writer.endSection(); } } endLazyMainSection(writer); }
@Override public void run() { ClusterServiceImpl service = getService(); ClusterHeartbeatManager heartbeatManager = service.getClusterHeartbeatManager(); heartbeatManager.handleHeartbeat(senderMembersViewMetadata, targetUuid, timestamp); }
@Override protected Object call() { String serviceName = ClusterServiceImpl.SERVICE_NAME; ClusterServiceImpl service = getService(serviceName); String registrationId = service.addMembershipListener(new MembershipListenerImpl(endpoint)); endpoint.addListenerDestroyAction(serviceName, serviceName, registrationId); return registrationId; }
@Override public void changeClusterState(ClusterState newState, TransactionOptions options) { changeClusterState(newState, options, false); }