protected void rejoin(String reason) { assert Thread.holdsLock(stateMutex); ClusterState clusterState = committedState.get(); logger.warn("{}, current nodes: {}", reason, clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); // TODO: do we want to force a new thread if we actively removed the master? this is to give a full pinging cycle // before a decision is made. joinThreadControl.startNewThreadIfNotRunning(); if (clusterState.nodes().getMasterNodeId() != null) { // remove block if it already exists before adding new one assert clusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock().id()) == false : "NO_MASTER_BLOCK should only be added by ZenDiscovery"; ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()) .addGlobalBlock(discoverySettings.getNoMasterBlock()) .build(); DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build(); clusterState = ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(discoveryNodes) .build(); committedState.set(clusterState); clusterApplier.onNewClusterState(reason, this::clusterState, (source, e) -> {}); // don't wait for state to be applied } }
public static DiscoveryNodes readFrom(StreamInput in, DiscoveryNode localNode) throws IOException { Builder builder = new Builder(); if (in.readBoolean()) { builder.masterNodeId(in.readString()); } if (localNode != null) { builder.localNodeId(localNode.getId()); } int size = in.readVInt(); for (int i = 0; i < size; i++) { DiscoveryNode node = new DiscoveryNode(in); if (localNode != null && node.getId().equals(localNode.getId())) { // reuse the same instance of our address and local node id for faster equality node = localNode; } // some one already built this and validated it's OK, skip the n2 scans assert builder.validateAdd(node) == null : "building disco nodes from network doesn't pass preflight: " + builder.validateAdd(node); builder.putUnsafe(node); } return builder.build(); }
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) { assert currentState.nodes().getMasterNodeId() == null : currentState; DiscoveryNodes currentNodes = currentState.nodes(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes); nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId()); for (final DiscoveryNode joiningNode : joiningNodes) { final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId()); if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode); nodesBuilder.remove(nodeWithSameId.getId()); } final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress()); if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) { logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress, joiningNode); nodesBuilder.remove(nodeWithSameAddress.getId()); } } // now trim any left over dead nodes - either left there when the previous master stepped down // or removed by us above ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(ClusterBlocks.builder() .blocks(currentState.blocks()) .removeGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID)).build(); return ClusterState.builder(allocationService.deassociateDeadNodes(tmpState, false, "removed dead nodes on election")); }
public DiscoveryNodes readFrom(StreamInput in, DiscoveryNode localNode) throws IOException { Builder builder = new Builder(); if (in.readBoolean()) { builder.masterNodeId(in.readString()); } if (localNode != null) { builder.localNodeId(localNode.id()); } int size = in.readVInt(); for (int i = 0; i < size; i++) { DiscoveryNode node = DiscoveryNode.readNode(in); if (localNode != null && node.id().equals(localNode.id())) { // reuse the same instance of our address and local node id for faster equality node = localNode; } builder.put(node); } return builder.build(); }
protected ClusterState rejoin(ClusterState clusterState, String reason) { // *** called from within an cluster state update task *** // assert Thread.currentThread().getName().contains(InternalClusterService.UPDATE_THREAD_NAME); logger.warn(reason + ", current nodes: {}", clusterState.nodes()); nodesFD.stop(); masterFD.stop(reason); ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks()) .addGlobalBlock(discoverySettings.getNoMasterBlock()) .build(); // clean the nodes, we are now not connected to anybody, since we try and reform the cluster DiscoveryNodes discoveryNodes = new DiscoveryNodes.Builder(clusterState.nodes()).masterNodeId(null).build(); // TODO: do we want to force a new thread if we actively removed the master? this is to give a full pinging cycle // before a decision is made. joinThreadControl.startNewThreadIfNotRunning(); return ClusterState.builder(clusterState) .blocks(clusterBlocks) .nodes(discoveryNodes) .build(); }
@Override public ClusterState execute(ClusterState currentState) { // Take into account the previous known nodes, if they happen not to be available // then fault detection will remove these nodes. if (currentState.nodes().masterNode() != null) { // TODO can we tie break here? we don't have a remote master cluster state version to decide on logger.trace("join thread elected local node as master, but there is already a master in place: {}", currentState.nodes().masterNode()); throw new NotMasterException("Node [" + clusterService.localNode() + "] not master for join request"); } DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder(currentState.nodes()).masterNodeId(currentState.nodes().localNode().id()); // update the fact that we are the master... ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build(); currentState = ClusterState.builder(currentState).nodes(builder).blocks(clusterBlocks).build(); // reroute now to remove any dead nodes (master may have stepped down when they left and didn't update the routing table) RoutingAllocation.Result result = routingService.getAllocationService().reroute(currentState, "nodes joined"); if (result.changed()) { currentState = ClusterState.builder(currentState).routingResult(result).build(); } // Add the incoming join requests. // Note: we only do this now (after the reroute) to avoid assigning shards to these nodes. return super.execute(currentState); }
public static DiscoveryNodes readFrom(StreamInput in, DiscoveryNode localNode) throws IOException { Builder builder = new Builder(); if (in.readBoolean()) { builder.masterNodeId(in.readString()); } if (localNode != null) { builder.localNodeId(localNode.getId()); } int size = in.readVInt(); for (int i = 0; i < size; i++) { DiscoveryNode node = new DiscoveryNode(in); if (localNode != null && node.getId().equals(localNode.getId())) { // reuse the same instance of our address and local node id for faster equality node = localNode; } // some one already built this and validated it's OK, skip the n2 scans assert builder.validateAdd(node) == null : "building disco nodes from network doesn't pass preflight: " + builder.validateAdd(node); builder.putUnsafe(node); } return builder.build(); }
@Override public ClusterState execute(ClusterState currentState) { if (!masterNode.id().equals(currentState.nodes().masterNodeId())) { // master got switched on us, no need to send anything return currentState; } DiscoveryNodes discoveryNodes = DiscoveryNodes.builder(currentState.nodes()) // make sure the old master node, which has failed, is not part of the nodes we publish .remove(masterNode.id()) .masterNodeId(null).build(); // flush any pending cluster states from old master, so it will not be set as master again ArrayList<ProcessClusterState> pendingNewClusterStates = new ArrayList<>(); processNewClusterStates.drainTo(pendingNewClusterStates); logger.trace("removed [{}] pending cluster states", pendingNewClusterStates.size()); return rejoin(ClusterState.builder(currentState).nodes(discoveryNodes).build(), "master left (reason = " + reason + ")"); }
@Override public ClusterTasksResult<DiscoveryNode> execute( final ClusterState current, final List<DiscoveryNode> tasks) throws Exception { assert tasks.size() == 1; final DiscoveryNodes.Builder nodes = DiscoveryNodes.builder(current.nodes()); // always set the local node as master, there will not be other nodes nodes.masterNodeId(localNode().getId()); final ClusterState next = ClusterState.builder(current).nodes(nodes).build(); final ClusterTasksResult.Builder<DiscoveryNode> result = ClusterTasksResult.builder(); return result.successes(tasks).build(next); }
public DiscoveryNodes removeDeadMembers(Set<String> newNodes, String masterNodeId) { Builder builder = new Builder().masterNodeId(masterNodeId).localNodeId(localNodeId); for (DiscoveryNode node : this) { if (newNodes.contains(node.id())) { builder.put(node); } } return builder.build(); }
protected ClusterState createInitialState(DiscoveryNode localNode) { ClusterState.Builder builder = clusterApplier.newClusterStateBuilder(); return builder.nodes(DiscoveryNodes.builder().add(localNode) .localNodeId(localNode.getId()) .masterNodeId(localNode.getId()) .build()) .blocks(ClusterBlocks.builder() .addGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) .build(); }
@Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (LocalDiscovery discovery : clusterGroups.get(clusterName).members()) { nodesBuilder.put(discovery.localNode()); } nodesBuilder.localNodeId(master.localNode().id()).masterNodeId(master.localNode().id()); // remove the NO_MASTER block in this case ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()); return ClusterState.builder(currentState).nodes(nodesBuilder).blocks(blocks).build(); }
public DiscoveryNodes nodes() { DiscoveryNodes.Builder nodesBuilder = new DiscoveryNodes.Builder(); nodesBuilder.localNodeId(CassandraDiscovery.this.localNode.getId()).masterNodeId(CassandraDiscovery.this.localNode.getId()); for (DiscoveryNode node : members.values()) { nodesBuilder.add(node); } return nodesBuilder.build(); } }
@Override public ClusterState execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (LocalDiscovery discovery : clusterGroups.get(clusterName).members()) { nodesBuilder.put(discovery.localNode()); } nodesBuilder.localNodeId(master.localNode().id()).masterNodeId(master.localNode().id()); return ClusterState.builder(currentState).nodes(nodesBuilder).build(); }
protected ClusterState createInitialState(DiscoveryNode localNode) { ClusterState.Builder builder = clusterApplier.newClusterStateBuilder(); return builder.nodes(DiscoveryNodes.builder().add(localNode) .localNodeId(localNode.getId()) .masterNodeId(localNode.getId()) .build()) .blocks(ClusterBlocks.builder() .addGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) .build(); }
public DiscoveryNodes removeDeadMembers(Set<String> newNodes, String masterNodeId) { Builder builder = new Builder().masterNodeId(masterNodeId).localNodeId(localNodeId); for (DiscoveryNode node : this) { if (newNodes.contains(node.getId())) { builder.add(node); } } return builder.build(); }
@Override public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (LocalDiscovery discovery : clusterGroups.get(clusterName).members()) { nodesBuilder.add(discovery.localNode()); } nodesBuilder.localNodeId(master.localNode().getId()).masterNodeId(master.localNode().getId()); currentState = ClusterState.builder(currentState).nodes(nodesBuilder).build(); return newState(master.allocationService.reroute(currentState, "node_add")); }
@Override public ClusterTasksResult<LocalClusterUpdateTask> execute(ClusterState currentState) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (LocalDiscovery discovery : clusterGroups.get(clusterName).members()) { nodesBuilder.add(discovery.localNode()); } nodesBuilder.localNodeId(master.localNode().getId()).masterNodeId(master.localNode().getId()); return newState(ClusterState.builder(currentState).nodes(nodesBuilder).build()); }
public static MasterService createMasterService(ThreadPool threadPool, DiscoveryNode localNode) { ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName())) .nodes(DiscoveryNodes.builder() .add(localNode) .localNodeId(localNode.getId()) .masterNodeId(localNode.getId())) .blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK).build(); return createMasterService(threadPool, initialClusterState); }
protected ClusterState createInitialState(DiscoveryNode localNode) { ClusterState.Builder builder = clusterApplier.newClusterStateBuilder(); return builder.nodes(DiscoveryNodes.builder().add(localNode) .localNodeId(localNode.getId()) .masterNodeId(localNode.getId()) .build()) .blocks(ClusterBlocks.builder() .addGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) .build(); }