private synchronized void handleElectionTimeout(){ if(this.steppingDown > 0){ if(--this.steppingDown == 0){ this.logger.info("no hearing further news from leader, remove this server from config and step down"); ClusterServer server = this.config.getServer(this.id); if(server != null){ this.logger.info("stepping down (cycles left: %d), skip this election timeout event", this.steppingDown); this.restartElectionTimer(); return; this.logger.info("election timeout while joining the cluster, ignore it."); this.restartElectionTimer(); return; this.logger.error("A leader should never encounter election timeout, illegal application state, stop the application"); System.exit(-1); return; this.logger.debug("Election timeout, change to Candidate"); this.state.increaseTerm(); this.state.setVotedFor(-1);
private void stopElectionTimer(){ if(this.scheduledElection == null){ this.logger.warning("Election Timer is never started but is requested to stop, protential a bug"); return; } this.scheduledElection.cancel(false); this.scheduledElection = null; }
private synchronized void handleHeartbeatTimeout(PeerServer peer){ this.logger.debug("Heartbeat timeout for %d", peer.getId()); if(this.role == ServerRole.Leader){ this.requestAppendEntries(peer); synchronized(peer){ if(peer.isHeartbeatEnabled()){ // Schedule another heartbeat if heartbeat is still enabled peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }else{ this.logger.debug("heartbeat is disabled for peer %d", peer.getId()); } } }else{ this.logger.info("Receive a heartbeat event for %d while no longer as a leader", peer.getId()); } }
private boolean requestAppendEntries(PeerServer peer){ if(peer.makeBusy()){ peer.SendRequest(this.createAppendEntriesRequest(peer)) .whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { try{ handlePeerResponse(response, error); }catch(Throwable err){ this.logger.error("Uncaught exception %s", err.toString()); } }, this.context.getScheduledExecutor()); return true; } this.logger.debug("Server %d is busy, skip the request", peer.getId()); return false; }
this.logger.error("system is running into fatal errors, failed to find a snapshot for peer %d(snapshot null: %s, snapshot doesn't contais lastLogIndex: %s)", peer.getId(), String.valueOf(snapshot == null), String.valueOf(lastLogIndex > snapshot.getLastLogIndex())); System.exit(-1); return null; this.logger.error("invalid snapshot, this usually means a bug from state machine implementation, stop the system to prevent further errors"); System.exit(-1); return null; this.logger.info("trying to sync snapshot with last index %d to peer %d", snapshot.getLastLogIndex(), peer.getId()); peer.setSnapshotInSync(snapshot); int sizeRead = this.stateMachine.readSnapshotData(snapshot, offset, data); if(sizeRead < data.length){ this.logger.error("only %d bytes could be read from snapshot while %d bytes are expected, should be something wrong" , sizeRead, data.length); System.exit(-1); return null; this.logger.error("failed to read snapshot data due to io error %s", error.toString()); System.exit(-1); return null;
private void handleVotingResponse(RaftResponseMessage response){ this.votesResponded += 1; if(this.electionCompleted){ this.logger.info("Election completed, will ignore the voting result from this server"); return; } if(response.isAccepted()){ this.votesGranted += 1; } if(this.votesResponded >= this.peers.size() + 1){ this.electionCompleted = true; } // got a majority set of granted votes if(this.votesGranted > (this.peers.size() + 1) / 2){ this.logger.info("Server is elected as leader for term %d", this.state.getTerm()); this.electionCompleted = true; this.becomeLeader(); } }
this.becomeFollower(); }else if(this.role == ServerRole.Leader){ this.logger.error("Receive InstallSnapshotRequest from another leader(%d) with same term, there must be a bug, server exits", request.getSource()); System.exit(-1); }else{ response.setDestination(request.getSource()); if(!this.catchingUp && request.getTerm() < this.state.getTerm()){ this.logger.info("received an install snapshot request which has lower term than this server, decline the request"); response.setAccepted(false); response.setNextIndex(0); this.logger.warning("Receive an invalid InstallSnapshotRequest due to bad log entries or bad log entry value"); response.setNextIndex(0); response.setAccepted(false); this.logger.error("Received a snapshot which is older than this server (%d)", this.id); response.setNextIndex(0); response.setAccepted(false);
response.setAccepted(false); if(logEntries.length != 1 || logEntries[0].getValueType() != LogValueType.ClusterServer){ this.logger.info("bad add server request as we are expecting one log entry with value type of ClusterServer"); return response; this.logger.info("this is not a leader, cannot handle AddServerRequest"); return response; this.logger.warning("the server to be added has a duplicated id with existing server %d", server.getId()); return response; this.logger.info("previous config has not committed yet"); return response;
@Override public Void call() throws Exception { self.logger.debug("retrying the request %s", request.getMessageType().toString()); server.SendRequest(request).whenCompleteAsync((RaftResponseMessage furtherResponse, Throwable furtherError) -> { self.handleExtendedResponse(furtherResponse, furtherError); }, self.context.getScheduledExecutor()); return null; }}, server.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS); }
private synchronized RaftResponseMessage handleExtendedMessages(RaftRequestMessage request){ if(request.getMessageType() == RaftMessageType.AddServerRequest){ return this.handleAddServerRequest(request); }else if(request.getMessageType() == RaftMessageType.RemoveServerRequest){ return this.handleRemoveServerRequest(request); }else if(request.getMessageType() == RaftMessageType.SyncLogRequest){ return this.handleLogSyncRequest(request); }else if(request.getMessageType() == RaftMessageType.JoinClusterRequest){ return this.handleJoinClusterRequest(request); }else if(request.getMessageType() == RaftMessageType.LeaveClusterRequest){ return this.handleLeaveClusterRequest(request); }else if(request.getMessageType() == RaftMessageType.InstallSnapshotRequest){ return this.handleInstallSnapshotRequest(request); }else{ this.logger.error("receive an unknown request %s, for safety, step down.", request.getMessageType().toString()); System.exit(-1); } return null; }
private synchronized void handleHeartbeatTimeout(PeerServer peer){ this.logger.debug("Heartbeat timeout for %d", peer.getId()); if(this.role == ServerRole.Leader){ this.requestAppendEntries(peer); synchronized(peer){ if(peer.isHeartbeatEnabled()){ // Schedule another heartbeat if heartbeat is still enabled peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }else{ this.logger.debug("heartbeat is disabled for peer %d", peer.getId()); } } }else{ this.logger.info("Receive a heartbeat event for %d while no longer as a leader", peer.getId()); } }
synchronized(server){ ClusterConfiguration newConfig = ClusterConfiguration.fromBytes(logEntry.getValue()); server.logger.info("configuration at index %d is committed", newConfig.getLogIndex()); server.context.getServerStateManager().saveClusterConfiguration(newConfig); server.configChanging = false; server.logger.info("this server is committed as one of cluster members"); server.catchingUp = false; server.logger.error("error %s encountered for committing thread, which should not happen, according to this, state machine may not have further progress, stop the system", error, error.getMessage()); System.exit(-1);
private void handleVotingResponse(RaftResponseMessage response){ this.votesResponded += 1; if(this.electionCompleted){ this.logger.info("Election completed, will ignore the voting result from this server"); return; } if(response.isAccepted()){ this.votesGranted += 1; } if(this.votesResponded >= this.peers.size() + 1){ this.electionCompleted = true; } // got a majority set of granted votes if(this.votesGranted > (this.peers.size() + 1) / 2){ this.logger.info("Server is elected as leader for term %d", this.state.getTerm()); this.electionCompleted = true; this.becomeLeader(); } }
this.becomeFollower(); }else if(this.role == ServerRole.Leader){ this.logger.error("Receive InstallSnapshotRequest from another leader(%d) with same term, there must be a bug, server exits", request.getSource()); this.stateMachine.exit(-1); }else{ response.setDestination(request.getSource()); if(!this.catchingUp && request.getTerm() < this.state.getTerm()){ this.logger.info("received an install snapshot request which has lower term than this server, decline the request"); response.setAccepted(false); response.setNextIndex(0); this.logger.warning("Receive an invalid InstallSnapshotRequest due to bad log entries or bad log entry value"); response.setNextIndex(0); response.setAccepted(false); this.logger.error("Received a snapshot which is older than this server (%d)", this.id); response.setNextIndex(0); response.setAccepted(false);
private boolean requestAppendEntries(PeerServer peer){ if(peer.makeBusy()){ peer.SendRequest(this.createAppendEntriesRequest(peer)) .whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { try{ handlePeerResponse(response, error); }catch(Throwable err){ this.logger.error("Uncaught exception %s", err.toString()); } }, this.context.getScheduledExecutor()); return true; } this.logger.debug("Server %d is busy, skip the request", peer.getId()); return false; }
response.setAccepted(false); if(logEntries.length != 1 || logEntries[0].getValueType() != LogValueType.ClusterServer){ this.logger.info("bad add server request as we are expecting one log entry with value type of ClusterServer"); return response; this.logger.info("this is not a leader, cannot handle AddServerRequest"); return response; this.logger.warning("the server to be added has a duplicated id with existing server %d", server.getId()); return response; this.logger.info("previous config has not committed yet"); return response;
@Override public Void call() throws Exception { self.logger.debug("retrying the request %s", request.getMessageType().toString()); server.SendRequest(request).whenCompleteAsync((RaftResponseMessage furtherResponse, Throwable furtherError) -> { self.handleExtendedResponse(furtherResponse, furtherError); }, self.context.getScheduledExecutor()); return null; }}, server.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS); }
private synchronized RaftResponseMessage handleExtendedMessages(RaftRequestMessage request){ if(request.getMessageType() == RaftMessageType.AddServerRequest){ return this.handleAddServerRequest(request); }else if(request.getMessageType() == RaftMessageType.RemoveServerRequest){ return this.handleRemoveServerRequest(request); }else if(request.getMessageType() == RaftMessageType.SyncLogRequest){ return this.handleLogSyncRequest(request); }else if(request.getMessageType() == RaftMessageType.JoinClusterRequest){ return this.handleJoinClusterRequest(request); }else if(request.getMessageType() == RaftMessageType.LeaveClusterRequest){ return this.handleLeaveClusterRequest(request); }else if(request.getMessageType() == RaftMessageType.InstallSnapshotRequest){ return this.handleInstallSnapshotRequest(request); }else{ this.logger.error("receive an unknown request %s, for safety, step down.", request.getMessageType().toString()); this.stateMachine.exit(-1); } return null; }
private synchronized void handleElectionTimeout(){ if(this.steppingDown > 0){ if(--this.steppingDown == 0){ this.logger.info("no hearing further news from leader, remove this server from config and step down"); ClusterServer server = this.config.getServer(this.id); if(server != null){ this.logger.info("stepping down (cycles left: %d), skip this election timeout event", this.steppingDown); this.restartElectionTimer(); return; this.logger.info("election timeout while joining the cluster, ignore it."); this.restartElectionTimer(); return; this.logger.error("A leader should never encounter election timeout, illegal application state, stop the application"); this.stateMachine.exit(-1); return; this.logger.debug("Election timeout, change to Candidate"); this.state.increaseTerm(); this.state.setVotedFor(-1);
private synchronized void handleHeartbeatTimeout(PeerServer peer){ this.logger.debug("Heartbeat timeout for %d", peer.getId()); if(this.role == ServerRole.Leader){ this.requestAppendEntries(peer); synchronized(peer){ if(peer.isHeartbeatEnabled()){ // Schedule another heartbeat if heartbeat is still enabled peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }else{ this.logger.debug("heartbeat is disabled for peer %d", peer.getId()); } } }else{ this.logger.info("Receive a heartbeat event for %d while no longer as a leader", peer.getId()); } }