/** * Get the master node * * @return master node */ public DiscoveryNode getMasterNode() { return masterNode(); }
/** * In the case we follow an elected master the new cluster state needs to have the same elected master and * the new cluster state version needs to be equal or higher than our cluster state version. * If the first condition fails we reject the cluster state and throw an error. * If the second condition fails we ignore the cluster state. */ static boolean shouldIgnoreOrRejectNewClusterState(ESLogger logger, ClusterState currentState, ClusterState newClusterState) { if (currentState.nodes().masterNodeId() == null) { return false; } if (!currentState.nodes().masterNodeId().equals(newClusterState.nodes().masterNodeId())) { logger.warn("received a cluster state from a different master then the current one, rejecting (received {}, current {})", newClusterState.nodes().masterNode(), currentState.nodes().masterNode()); throw new IllegalStateException("cluster state from a different master than the current one, rejecting (received " + newClusterState.nodes().masterNode() + ", current " + currentState.nodes().masterNode() + ")"); } else if (newClusterState.version() < currentState.version()) { // if the new state has a smaller version, and it has the same master node, then no need to process it logger.debug("received a cluster state that has a lower version than the current one, ignoring (received {}, current {})", newClusterState.version(), currentState.version()); return true; } else { return false; } }
public String prettyPrint() { StringBuilder sb = new StringBuilder(); sb.append("nodes: \n"); for (DiscoveryNode node : this) { sb.append(" ").append(node); if (node == localNode()) { sb.append(", local"); } if (node == masterNode()) { sb.append(", master"); } sb.append("\n"); } return sb.toString(); }
@Override public ClusterState execute(ClusterState currentState) { return handleAnotherMaster(currentState, newState.nodes().masterNode(), newState.version(), "via a new cluster state"); }
/** * Returns the changes comparing this nodes to the provided nodes. */ public Delta delta(DiscoveryNodes other) { List<DiscoveryNode> removed = new ArrayList<>(); List<DiscoveryNode> added = new ArrayList<>(); for (DiscoveryNode node : other) { if (!this.nodeExists(node.id())) { removed.add(node); } } for (DiscoveryNode node : this) { if (!other.nodeExists(node.id())) { added.add(node); } } DiscoveryNode previousMasterNode = null; DiscoveryNode newMasterNode = null; if (masterNodeId != null) { if (other.masterNodeId == null || !other.masterNodeId.equals(masterNodeId)) { previousMasterNode = other.masterNode(); newMasterNode = masterNode(); } } return new Delta(previousMasterNode, newMasterNode, localNodeId, Collections.unmodifiableList(removed), Collections.unmodifiableList(added)); }
public void nodeMappingRefresh(final ClusterState state, final NodeMappingRefreshRequest request) { final DiscoveryNodes nodes = state.nodes(); if (nodes.masterNode() == null) { logger.warn("can't send mapping refresh for [{}][{}], no master known.", request.index(), Strings.arrayToCommaDelimitedString(request.types())); return; } transportService.sendRequest(nodes.masterNode(), ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME); }
private PingResponse createPingResponse(DiscoveryNodes discoNodes) { return new PingResponse(discoNodes.localNode(), discoNodes.masterNode(), clusterName, contextProvider.nodeHasJoinedClusterOnce()); }
public void shardStarted(final ShardRouting shardRouting, String indexUUID, final String reason) { DiscoveryNode masterNode = clusterService.state().nodes().masterNode(); if (masterNode == null) { logger.warn("{} can't send shard started for {}, no master known.", shardRouting.shardId(), shardRouting); return; } shardStarted(shardRouting, indexUUID, reason, masterNode); }
public void shardFailed(final ShardRouting shardRouting, final String indexUUID, final String message, @Nullable final Throwable failure) { DiscoveryNode masterNode = clusterService.state().nodes().masterNode(); if (masterNode == null) { logger.warn("can't send shard failed for {}, no master known.", shardRouting); return; } innerShardFailed(shardRouting, indexUUID, masterNode, message, failure); }
@Override public ClusterState execute(ClusterState currentState) throws Exception { if (!success) { // failed to join. Try again... joinThreadControl.markThreadAsDoneAndStartNew(currentThread); return currentState; } if (currentState.getNodes().masterNode() == null) { // Post 1.3.0, the master should publish a new cluster state before acking our join request. we now should have // a valid master. logger.debug("no master node is set, despite of join request completing. retrying pings."); joinThreadControl.markThreadAsDoneAndStartNew(currentThread); return currentState; } if (!currentState.getNodes().masterNode().equals(finalMasterNode)) { return joinThreadControl.stopRunningThreadAndRejoin(currentState, "master_switched_while_finalizing_join"); } // Note: we do not have to start master fault detection here because it's set at {@link #handleNewClusterStateFromMaster } // when the first cluster state arrives. joinThreadControl.markThreadAsDone(currentThread); return currentState; }
private void handleLeaveRequest(final DiscoveryNode node) { if (lifecycleState() != Lifecycle.State.STARTED) { // not started, ignore a node failure return; } if (localNodeMaster()) { removeNode(node, "zen-disco-node-left", "left"); } else if (node.equals(nodes().masterNode())) { handleMasterGone(node, "shut_down"); } }
public void allocateDangled(Collection<IndexMetaData> indices, final Listener listener) { ClusterState clusterState = clusterService.state(); DiscoveryNode masterNode = clusterState.nodes().masterNode(); if (masterNode == null) { listener.onFailure(new MasterNotDiscoveredException("no master to send allocate dangled request")); return; } AllocateDangledRequest request = new AllocateDangledRequest(clusterService.localNode(), indices.toArray(new IndexMetaData[indices.size()])); transportService.sendRequest(masterNode, ACTION_NAME, request, new TransportResponseHandler<AllocateDangledResponse>() { @Override public AllocateDangledResponse newInstance() { return new AllocateDangledResponse(); } @Override public void handleResponse(AllocateDangledResponse response) { listener.onResponse(response); } @Override public void handleException(TransportException exp) { listener.onFailure(exp); } @Override public String executor() { return ThreadPool.Names.SAME; } }); }
DiscoveryNodes nodes = nodes(); if (sendLeaveRequest) { if (nodes.masterNode() == null) { membership.sendLeaveRequestBlocking(nodes.masterNode(), nodes.localNode(), TimeValue.timeValueSeconds(1)); } catch (Exception e) { logger.debug("failed to send leave request to master [{}]", e, nodes.masterNode()); membership.sendLeaveRequest(nodes.localNode(), possibleMaster); } catch (Exception e) { logger.debug("failed to send leave request from master [{}] to possible master [{}]", e, nodes.masterNode(), possibleMaster);
public void nodeIndexDeleted(final ClusterState clusterState, final String index, final Settings indexSettings, final String nodeId) { final DiscoveryNodes nodes = clusterState.nodes(); transportService.sendRequest(clusterState.nodes().masterNode(), INDEX_DELETED_ACTION_NAME, new NodeIndexDeletedMessage(index, nodeId), EmptyTransportResponseHandler.INSTANCE_SAME); if (nodes.localNode().isDataNode() == false) { logger.trace("[{}] not acking store deletion (not a data node)", index); return; } threadPool.generic().execute(new AbstractRunnable() { @Override public void onFailure(Throwable t) { logger.warn("[{}] failed to ack index store deleted for index", t, index); } @Override protected void doRun() throws Exception { lockIndexAndAck(index, nodes, nodeId, clusterState, indexSettings); } }); }
private void lockIndexAndAck(String index, DiscoveryNodes nodes, String nodeId, ClusterState clusterState, Settings indexSettings) throws IOException { try { // we are waiting until we can lock the index / all shards on the node and then we ack the delete of the store to the // master. If we can't acquire the locks here immediately there might be a shard of this index still holding on to the lock // due to a "currently canceled recovery" or so. The shard will delete itself BEFORE the lock is released so it's guaranteed to be // deleted by the time we get the lock indicesService.processPendingDeletes(new Index(index), indexSettings, new TimeValue(30, TimeUnit.MINUTES)); transportService.sendRequest(clusterState.nodes().masterNode(), INDEX_STORE_DELETED_ACTION_NAME, new NodeIndexStoreDeletedMessage(index, nodeId), EmptyTransportResponseHandler.INSTANCE_SAME); } catch (LockObtainFailedException exc) { logger.warn("[{}] failed to lock all shards for index - timed out after 30 seconds", index); } catch (InterruptedException e) { logger.warn("[{}] failed to lock all shards for index - interrupted", index); } }
/** * Fails the given snapshot restore operation for the given shard */ public void failRestore(SnapshotId snapshotId, ShardId shardId) { logger.debug("[{}] failed to restore shard [{}]", snapshotId, shardId); UpdateIndexShardRestoreStatusRequest request = new UpdateIndexShardRestoreStatusRequest(snapshotId, shardId, new ShardRestoreStatus(clusterService.state().nodes().localNodeId(), RestoreInProgress.State.FAILURE)); transportService.sendRequest(clusterService.state().nodes().masterNode(), UPDATE_RESTORE_ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME); }
/** * This method is used by {@link StoreRecoveryService} to notify * {@code RestoreService} about shard restore completion. * * @param snapshotId snapshot id * @param shardId shard id */ public void indexShardRestoreCompleted(SnapshotId snapshotId, ShardId shardId) { logger.trace("[{}] successfully restored shard [{}]", snapshotId, shardId); UpdateIndexShardRestoreStatusRequest request = new UpdateIndexShardRestoreStatusRequest(snapshotId, shardId, new ShardRestoreStatus(clusterService.state().nodes().localNodeId(), RestoreInProgress.State.SUCCESS)); transportService.sendRequest(clusterService.state().nodes().masterNode(), UPDATE_RESTORE_ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME); }
@Override public void onNodeAck(DiscoveryNode node, @Nullable Throwable t) { if (!ackedTaskListener.mustAck(node)) { //we always wait for the master ack anyway if (!node.equals(nodes.masterNode())) { return; } } if (t == null) { logger.trace("ack received from node [{}], cluster_state update (version: {})", node, clusterStateVersion); } else { this.lastFailure = t; logger.debug("ack received from node [{}], cluster_state update (version: {})", t, node, clusterStateVersion); } if (countDown.countDown()) { logger.trace("all expected nodes acknowledged cluster_state update (version: {})", clusterStateVersion); FutureUtils.cancel(ackTimeoutCallback); ackedTaskListener.onAllNodesAcked(lastFailure); } }
/** * Updates the shard status */ public void updateIndexShardSnapshotStatus(SnapshotId snapshotId, ShardId shardId, SnapshotsInProgress.ShardSnapshotStatus status) { UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshotId, shardId, status); try { if (clusterService.state().nodes().localNodeMaster()) { innerUpdateSnapshotState(request); } else { transportService.sendRequest(clusterService.state().nodes().masterNode(), UPDATE_SNAPSHOT_ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME); } } catch (Throwable t) { logger.warn("[{}] [{}] failed to update snapshot state", t, request.snapshotId(), request.status()); } }
@Override public ClusterState execute(ClusterState currentState) { // Take into account the previous known nodes, if they happen not to be available // then fault detection will remove these nodes. if (currentState.nodes().masterNode() != null) { // TODO can we tie break here? we don't have a remote master cluster state version to decide on logger.trace("join thread elected local node as master, but there is already a master in place: {}", currentState.nodes().masterNode()); throw new NotMasterException("Node [" + clusterService.localNode() + "] not master for join request"); } DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder(currentState.nodes()).masterNodeId(currentState.nodes().localNode().id()); // update the fact that we are the master... ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build(); currentState = ClusterState.builder(currentState).nodes(builder).blocks(clusterBlocks).build(); // reroute now to remove any dead nodes (master may have stepped down when they left and didn't update the routing table) RoutingAllocation.Result result = routingService.getAllocationService().reroute(currentState, "nodes joined"); if (result.changed()) { currentState = ClusterState.builder(currentState).routingResult(result).build(); } // Add the incoming join requests. // Note: we only do this now (after the reroute) to avoid assigning shards to these nodes. return super.execute(currentState); }