private boolean requestAppendEntries(PeerServer peer){ if(peer.makeBusy()){ peer.SendRequest(this.createAppendEntriesRequest(peer)) .whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { try{ handlePeerResponse(response, error); }catch(Throwable err){ this.logger.error("Uncaught exception %s", err.toString()); } }, this.context.getScheduledExecutor()); return true; } this.logger.debug("Server %d is busy, skip the request", peer.getId()); return false; }
if(response.isAccepted()){ synchronized(peer){ peer.setNextLogIndex(response.getNextIndex()); peer.setMatchedIndex(response.getNextIndex() - 1); matchedIndexes.add(this.logStore.getFirstAvailableIndex() - 1); for(PeerServer p : this.peers.values()){ matchedIndexes.add(p.getMatchedIndex()); needToCatchup = peer.clearPendingCommit() || response.getNextIndex() < this.logStore.getFirstAvailableIndex(); }else{ synchronized(peer){ if(response.getNextIndex() > 0 && peer.getNextLogIndex() > response.getNextIndex()){ peer.setNextLogIndex(response.getNextIndex()); }else{ peer.setNextLogIndex(peer.getNextLogIndex() - 1);
private void enableHeartbeatForPeer(PeerServer peer){ peer.enableHeartbeat(true); peer.resumeHeartbeatingSpeed(); peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }
private void becomeFollower(){ // stop heartbeat for all peers for(PeerServer server : this.peers.values()){ if(server.getHeartbeatTask() != null){ server.getHeartbeatTask().cancel(false); } server.enableHeartbeat(false); } this.serverToJoin = null; this.role = ServerRole.Follower; this.restartElectionTimer(); }
private synchronized void handleHeartbeatTimeout(PeerServer peer){ this.logger.debug("Heartbeat timeout for %d", peer.getId()); if(this.role == ServerRole.Leader){ this.requestAppendEntries(peer); synchronized(peer){ if(peer.isHeartbeatEnabled()){ // Schedule another heartbeat if heartbeat is still enabled peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }else{ this.logger.debug("heartbeat is disabled for peer %d", peer.getId()); } } }else{ this.logger.info("Receive a heartbeat event for %d while no longer as a leader", peer.getId()); } }
PeerServer peer = new PeerServer(server, context, peerServer -> this.handleHeartbeatTimeout(peerServer)); peer.setNextLogIndex(this.logStore.getFirstAvailableIndex()); this.peers.put(server.getId(), peer); this.logger.info("server %d is added to cluster", peer.getId()); if(this.role == ServerRole.Leader){ this.logger.info("enable heartbeating for server %d", peer.getId()); this.enableHeartbeatForPeer(peer); if(this.serverToJoin != null && this.serverToJoin.getId() == peer.getId()){ peer.setNextLogIndex(this.serverToJoin.getNextLogIndex()); this.serverToJoin = null; this.logger.info("peer %d cannot be found in current peer list", id); } else{ if(peer.getHeartbeatTask() != null){ peer.getHeartbeatTask().cancel(false); peer.enableHeartbeat(false); this.peers.remove(id); this.logger.info("server %d is removed from cluster", id.intValue());
if(response.isAccepted()){ synchronized(peer){ SnapshotSyncContext context = peer.getSnapshotSyncContext(); if(context == null){ this.logger.info("no snapshot sync context for this peer, drop the response"); if(response.getNextIndex() >= context.getSnapshot().getSize()){ this.logger.debug("snapshot sync is done"); peer.setNextLogIndex(context.getSnapshot().getLastLogIndex() + 1); peer.setMatchedIndex(context.getSnapshot().getLastLogIndex()); peer.setSnapshotInSync(null); needToCatchup = peer.clearPendingCommit() || response.getNextIndex() < this.logStore.getFirstAvailableIndex(); }else{ this.logger.debug("continue to sync snapshot at offset %d", response.getNextIndex());
final PeerServer server = (request.getMessageType() == RaftMessageType.LeaveClusterRequest) ? this.peers.get(request.getDestination()) : this.serverToJoin; if(server != null){ if(server.getCurrentHeartbeatInterval() >= this.context.getRaftParameters().getMaxHeartbeatInterval()){ if(request.getMessageType() == RaftMessageType.LeaveClusterRequest){ this.logger.info("rpc failed again for the removing server (%d), will remove this server directly", server.getId()); PeerServer peer = this.peers.get(server.getId()); if(peer == null){ this.logger.info("peer %d cannot be found in current peer list", id); } else{ if(peer.getHeartbeatTask() != null){ peer.getHeartbeatTask().cancel(false); peer.enableHeartbeat(false); this.peers.remove(server.getId()); this.logger.info("server %d is removed from cluster", server.getId()); this.removeServerFromCluster(server.getId()); }else{ this.logger.info("rpc failed again for the new coming server (%d), will stop retry for this server", server.getId()); this.configChanging = false; this.serverToJoin = null; server.slowDownHeartbeating(); final RaftServer self = this; this.context.getScheduledExecutor().schedule(new Callable<Void>(){ }, self.context.getScheduledExecutor()); return null;
private void requestVote(){ // vote for self this.logger.info("requestVote started with term %d", this.state.getTerm()); this.state.setVotedFor(this.id); this.context.getServerStateManager().persistState(this.state); this.votesGranted += 1; this.votedServers.add(this.id); // this is the only server? if(this.votesGranted > (this.peers.size() + 1) / 2){ this.electionCompleted = true; this.becomeLeader(); return; } for(PeerServer peer : this.peers.values()){ RaftRequestMessage request = new RaftRequestMessage(); request.setMessageType(RaftMessageType.RequestVoteRequest); request.setDestination(peer.getId()); request.setSource(this.id); request.setLastLogIndex(this.logStore.getFirstAvailableIndex() - 1); request.setLastLogTerm(this.termForLastLog(this.logStore.getFirstAvailableIndex() - 1)); request.setTerm(this.state.getTerm()); this.logger.debug("send %s to server %d with term %d", RaftMessageType.RequestVoteRequest.toString(), peer.getId(), this.state.getTerm()); peer.SendRequest(request).whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { handlePeerResponse(response, error); }, this.context.getScheduledExecutor()); } }
if(gap < this.context.getRaftParameters().getLogSyncStopGap()){ this.logger.info("LogSync is done for server %d with log gap %d, now put the server into cluster", this.serverToJoin.getId(), gap); ClusterConfiguration newConfig = new ClusterConfiguration(); newConfig.setLastLogIndex(this.config.getLogIndex()); newConfig.setLogIndex(this.logStore.getFirstAvailableIndex()); newConfig.getServers().addAll(this.config.getServers()); newConfig.getServers().add(this.serverToJoin.getClusterConfig()); LogEntry configEntry = new LogEntry(this.state.getTerm(), newConfig.toBytes(), LogValueType.Configuration); this.logStore.append(configEntry); request = new RaftRequestMessage(); request.setCommitIndex(this.quickCommitIndex); request.setDestination(this.serverToJoin.getId()); request.setSource(this.id); request.setTerm(this.state.getTerm()); this.serverToJoin.SendRequest(request).whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { this.handleExtendedResponse(response, error); }, this.context.getScheduledExecutor());
private RaftRequestMessage createSyncSnapshotRequest(PeerServer peer, long lastLogIndex, long term, long commitIndex){ synchronized(peer){ SnapshotSyncContext context = peer.getSnapshotSyncContext(); Snapshot snapshot = context == null ? null : context.getSnapshot(); Snapshot lastSnapshot = this.stateMachine.getLastSnapshot(); this.logger.error("system is running into fatal errors, failed to find a snapshot for peer %d(snapshot null: %s, snapshot doesn't contais lastLogIndex: %s)", peer.getId(), String.valueOf(snapshot == null), String.valueOf(lastLogIndex > snapshot.getLastLogIndex())); System.exit(-1); return null; this.logger.info("trying to sync snapshot with last index %d to peer %d", snapshot.getLastLogIndex(), peer.getId()); peer.setSnapshotInSync(snapshot); long offset = peer.getSnapshotSyncContext().getOffset(); long sizeLeft = snapshot.getSize() - offset; int blockSize = this.getSnapshotSyncBlockSize(); requestMessage.setMessageType(RaftMessageType.InstallSnapshotRequest); requestMessage.setSource(this.id); requestMessage.setDestination(peer.getId()); requestMessage.setLastLogIndex(snapshot.getLastLogIndex()); requestMessage.setLastLogTerm(snapshot.getLastLogTerm());
if(peer.getNextLogIndex() == 0){ peer.setNextLogIndex(currentNextIndex); lastLogIndex = peer.getNextLogIndex() - 1; this.logger.debug( "An AppendEntries Request for %d with LastLogIndex=%d, LastLogTerm=%d, EntriesLength=%d, CommitIndex=%d and Term=%d", peer.getId(), lastLogIndex, lastLogTerm, requestMessage.setMessageType(RaftMessageType.AppendEntriesRequest); requestMessage.setSource(this.id); requestMessage.setDestination(peer.getId()); requestMessage.setLastLogIndex(lastLogIndex); requestMessage.setLastLogTerm(lastLogTerm);
this.peers.put(server.getId(), new PeerServer(server, context, peerServer -> this.handleHeartbeatTimeout(peerServer)));
@Override public Void call() throws Exception { self.logger.debug("retrying the request %s", request.getMessageType().toString()); server.SendRequest(request).whenCompleteAsync((RaftResponseMessage furtherResponse, Throwable furtherError) -> { self.handleExtendedResponse(furtherResponse, furtherError); }, self.context.getScheduledExecutor()); return null; }}, server.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS); }
PeerServer peer = new PeerServer(server, context, peerServer -> this.handleHeartbeatTimeout(peerServer)); peer.setNextLogIndex(this.logStore.getFirstAvailableIndex()); this.peers.put(server.getId(), peer); this.logger.info("server %d is added to cluster", peer.getId()); if(this.role == ServerRole.Leader){ this.logger.info("enable heartbeating for server %d", peer.getId()); this.enableHeartbeatForPeer(peer); if(this.serverToJoin != null && this.serverToJoin.getId() == peer.getId()){ peer.setNextLogIndex(this.serverToJoin.getNextLogIndex()); this.serverToJoin = null; this.logger.info("peer %d cannot be found in current peer list", id); } else{ if(peer.getHeartbeatTask() != null){ peer.getHeartbeatTask().cancel(false); peer.enableHeartbeat(false); this.peers.remove(id); this.logger.info("server %d is removed from cluster", id.intValue());
private synchronized void handleHeartbeatTimeout(PeerServer peer){ this.logger.debug("Heartbeat timeout for %d", peer.getId()); if(this.role == ServerRole.Leader){ this.requestAppendEntries(peer); synchronized(peer){ if(peer.isHeartbeatEnabled()){ // Schedule another heartbeat if heartbeat is still enabled peer.setHeartbeatTask(this.context.getScheduledExecutor().schedule(peer.getHeartbeartHandler(), peer.getCurrentHeartbeatInterval(), TimeUnit.MILLISECONDS)); }else{ this.logger.debug("heartbeat is disabled for peer %d", peer.getId()); } } }else{ this.logger.info("Receive a heartbeat event for %d while no longer as a leader", peer.getId()); } }
if(response.isAccepted()){ synchronized(peer){ SnapshotSyncContext context = peer.getSnapshotSyncContext(); if(context == null){ this.logger.info("no snapshot sync context for this peer, drop the response"); if(response.getNextIndex() >= context.getSnapshot().getSize()){ this.logger.debug("snapshot sync is done"); peer.setNextLogIndex(context.getSnapshot().getLastLogIndex() + 1); peer.setMatchedIndex(context.getSnapshot().getLastLogIndex()); peer.setSnapshotInSync(null); needToCatchup = peer.clearPendingCommit() || response.getNextIndex() < this.logStore.getFirstAvailableIndex(); }else{ this.logger.debug("continue to sync snapshot at offset %d", response.getNextIndex());
final PeerServer server = (request.getMessageType() == RaftMessageType.LeaveClusterRequest) ? this.peers.get(request.getDestination()) : this.serverToJoin; if(server != null){ if(server.getCurrentHeartbeatInterval() >= this.context.getRaftParameters().getMaxHeartbeatInterval()){ if(request.getMessageType() == RaftMessageType.LeaveClusterRequest){ this.logger.info("rpc failed again for the removing server (%d), will remove this server directly", server.getId()); PeerServer peer = this.peers.get(server.getId()); if(peer == null){ this.logger.info("peer %d cannot be found in current peer list", id); } else{ if(peer.getHeartbeatTask() != null){ peer.getHeartbeatTask().cancel(false); peer.enableHeartbeat(false); this.peers.remove(server.getId()); this.logger.info("server %d is removed from cluster", server.getId()); this.removeServerFromCluster(server.getId()); }else{ this.logger.info("rpc failed again for the new coming server (%d), will stop retry for this server", server.getId()); this.configChanging = false; this.serverToJoin = null; server.slowDownHeartbeating(); final RaftServer self = this; this.context.getScheduledExecutor().schedule(new Callable<Void>(){ }, self.context.getScheduledExecutor()); return null;
private void inviteServerToJoinCluster(){ RaftRequestMessage request = new RaftRequestMessage(); request.setCommitIndex(this.quickCommitIndex); request.setDestination(this.serverToJoin.getId()); request.setSource(this.id); request.setTerm(this.state.getTerm()); request.setMessageType(RaftMessageType.JoinClusterRequest); request.setLastLogIndex(this.logStore.getFirstAvailableIndex() - 1); request.setLogEntries(new LogEntry[] { new LogEntry(this.state.getTerm(), this.config.toBytes(), LogValueType.Configuration) }); this.serverToJoin.SendRequest(request).whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { this.handleExtendedResponse(response, error); }, this.context.getScheduledExecutor()); }
if(gap < this.context.getRaftParameters().getLogSyncStopGap()){ this.logger.info("LogSync is done for server %d with log gap %d, now put the server into cluster", this.serverToJoin.getId(), gap); ClusterConfiguration newConfig = new ClusterConfiguration(); newConfig.setLastLogIndex(this.config.getLogIndex()); newConfig.setLogIndex(this.logStore.getFirstAvailableIndex()); newConfig.getServers().addAll(this.config.getServers()); newConfig.getServers().add(this.serverToJoin.getClusterConfig()); LogEntry configEntry = new LogEntry(this.state.getTerm(), newConfig.toBytes(), LogValueType.Configuration); this.logStore.append(configEntry); request = new RaftRequestMessage(); request.setCommitIndex(this.quickCommitIndex); request.setDestination(this.serverToJoin.getId()); request.setSource(this.id); request.setTerm(this.state.getTerm()); this.serverToJoin.SendRequest(request).whenCompleteAsync((RaftResponseMessage response, Throwable error) -> { this.handleExtendedResponse(response, error); }, this.context.getScheduledExecutor());