/** * Builds decisions for all nodes in the cluster, so that the explain API can provide information on * allocation decisions for each node, while still waiting to allocate the shard (e.g. due to fetching shard data). */ protected List<NodeAllocationResult> buildDecisionsForAllNodes(ShardRouting shard, RoutingAllocation allocation) { List<NodeAllocationResult> results = new ArrayList<>(); for (RoutingNode node : allocation.routingNodes()) { Decision decision = allocation.deciders().canAllocate(shard, node, allocation); results.add(new NodeAllocationResult(node.node(), null, decision)); } return results; } }
@Override public void allocate(RoutingAllocation allocation) { if (allocation.routingNodes().size() == 0) { /* with no nodes this is pointless */ return; } final Balancer balancer = new Balancer(logger, allocation, weightFunction, threshold); balancer.allocateUnassigned(); balancer.moveShards(); balancer.balance(); }
public Balancer(Logger logger, RoutingAllocation allocation, WeightFunction weight, float threshold) { this.logger = logger; this.allocation = allocation; this.weight = weight; this.threshold = threshold; this.routingNodes = allocation.routingNodes(); this.metaData = allocation.metaData(); avgShardsPerNode = ((float) metaData.getTotalNumberOfShards()) / routingNodes.size(); nodes = Collections.unmodifiableMap(buildModelFromAssigned()); sorter = newNodeSorter(); }
private boolean hasDeadNodes(RoutingAllocation allocation) { for (RoutingNode routingNode : allocation.routingNodes()) { if (allocation.nodes().getDataNodes().containsKey(routingNode.nodeId()) == false) { return true; } } return false; }
@Override public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { assert shardRouting.primary() : "must not call force allocate on a non-primary shard"; Iterable<ShardRouting> assignedShards = allocation.routingNodes().assignedShards(shardRouting.shardId()); return decideSameNode(shardRouting, node, allocation, assignedShards); }
private void reroute(RoutingAllocation allocation) { assert hasDeadNodes(allocation) == false : "dead nodes should be explicitly cleaned up. See deassociateDeadNodes"; assert AutoExpandReplicas.getAutoExpandReplicaChanges(allocation.metaData(), allocation.nodes()).isEmpty() : "auto-expand replicas out of sync with number of nodes in the cluster"; // now allocate all the unassigned to available nodes if (allocation.routingNodes().unassigned().size() > 0) { removeDelayMarkers(allocation); gatewayAllocator.allocateUnassigned(allocation); } shardsAllocator.allocate(allocation); assert RoutingNodes.assertShardStats(allocation.routingNodes()); }
/** * Split the list of node shard states into groups yes/no/throttle based on allocation deciders */ private NodesToAllocate buildNodesToAllocate(RoutingAllocation allocation, List<NodeGatewayStartedShards> nodeShardStates, ShardRouting shardRouting, boolean forceAllocate) { List<DecidedNode> yesNodeShards = new ArrayList<>(); List<DecidedNode> throttledNodeShards = new ArrayList<>(); List<DecidedNode> noNodeShards = new ArrayList<>(); for (NodeGatewayStartedShards nodeShardState : nodeShardStates) { RoutingNode node = allocation.routingNodes().node(nodeShardState.getNode().getId()); if (node == null) { continue; } Decision decision = forceAllocate ? allocation.deciders().canForceAllocatePrimary(shardRouting, node, allocation) : allocation.deciders().canAllocate(shardRouting, node, allocation); DecidedNode decidedNode = new DecidedNode(nodeShardState, decision); if (decision.type() == Type.THROTTLE) { throttledNodeShards.add(decidedNode); } else if (decision.type() == Type.NO) { noNodeShards.add(decidedNode); } else { yesNodeShards.add(decidedNode); } } return new NodesToAllocate(Collections.unmodifiableList(yesNodeShards), Collections.unmodifiableList(throttledNodeShards), Collections.unmodifiableList(noNodeShards)); }
@Override public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { if (!allocation.routingNodes().allReplicasActive(shardRouting.shardId(), allocation.metaData())) { return allocation.decision(Decision.NO, NAME, "rebalancing is not allowed until all replicas in the cluster are active"); } return allocation.decision(Decision.YES, NAME, "rebalancing is allowed as all replicas are active in the cluster"); } }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { return allocation.decision(Decision.YES, NAME, "shard is primary and can be allocated"); } ShardRouting primary = allocation.routingNodes().activePrimary(shardRouting.shardId()); if (primary == null) { return allocation.decision(Decision.NO, NAME, "primary shard for this replica is not yet active"); } return allocation.decision(Decision.YES, NAME, "primary shard for this replica is already active"); } }
protected static void innerAllocatedUnassigned(RoutingAllocation allocation, PrimaryShardAllocator primaryShardAllocator, ReplicaShardAllocator replicaShardAllocator) { RoutingNodes.UnassignedShards unassigned = allocation.routingNodes().unassigned(); unassigned.sort(PriorityComparator.getAllocationComparator(allocation)); // sort for priority ordering primaryShardAllocator.allocateUnassigned(allocation); replicaShardAllocator.processExistingRecoveries(allocation); replicaShardAllocator.allocateUnassigned(allocation); }
private void deassociateDeadNodes(RoutingAllocation allocation) { for (Iterator<RoutingNode> it = allocation.routingNodes().mutableIterator(); it.hasNext(); ) { RoutingNode node = it.next(); if (allocation.nodes().getDataNodes().containsKey(node.nodeId())) { // its a live node, continue continue; } // now, go over all the shards routing on the node, and fail them for (ShardRouting shardRouting : node.copyShards()) { final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); boolean delayed = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).nanos() > 0; UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]", null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), delayed, AllocationStatus.NO_ATTEMPT); allocation.routingNodes().failShard(logger, shardRouting, unassignedInfo, indexMetaData, allocation.changes()); } // its a dead node, remove it, note, its important to remove it *after* we apply failed shard // since it relies on the fact that the RoutingNode exists in the list of nodes it.remove(); } }
@Override public Decision canRebalance(RoutingAllocation allocation) { if (clusterConcurrentRebalance == -1) { return allocation.decision(Decision.YES, NAME, "unlimited concurrent rebalances are allowed"); } int relocatingShards = allocation.routingNodes().getRelocatingShardCount(); if (relocatingShards >= clusterConcurrentRebalance) { return allocation.decision(Decision.THROTTLE, NAME, "reached the limit of concurrently rebalancing shards [%d], cluster setting [%s=%d]", relocatingShards, CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(), clusterConcurrentRebalance); } return allocation.decision(Decision.YES, NAME, "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]", clusterConcurrentRebalance, relocatingShards); } }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (shardRouting.primary()) { if (shardRouting.currentNodeId() == null) { if (shardRouting.recoverySource() != null && shardRouting.recoverySource().getType() == RecoverySource.Type.SNAPSHOT) { // restoring from a snapshot - check that the node can handle the version return isVersionCompatible((SnapshotRecoverySource)shardRouting.recoverySource(), node, allocation); } else { // existing or fresh primary on the node return allocation.decision(Decision.YES, NAME, "the primary shard is new or already existed on the node"); } } else { // relocating primary, only migrate to newer host return isVersionCompatibleRelocatePrimary(allocation.routingNodes(), shardRouting.currentNodeId(), node, allocation); } } else { final ShardRouting primary = allocation.routingNodes().activePrimary(shardRouting.shardId()); // check that active primary has a newer version so that peer recovery works if (primary != null) { return isVersionCompatibleAllocatingReplica(allocation.routingNodes(), primary.currentNodeId(), node, allocation); } else { // ReplicaAfterPrimaryActiveAllocationDecider should prevent this case from occurring return allocation.decision(Decision.YES, NAME, "no active primary shard yet"); } } }
for (Iterator<ShardRouting> it = allocation.routingNodes().nodeInterleavedShardIterator(); it.hasNext(); ) { ShardRouting shardRouting = it.next(); final MoveDecision moveDecision = decideMove(shardRouting);
private void applyStartedShards(RoutingAllocation routingAllocation, List<ShardRouting> startedShardEntries) { assert startedShardEntries.isEmpty() == false : "non-empty list of started shard entries expected"; RoutingNodes routingNodes = routingAllocation.routingNodes(); for (ShardRouting startedShard : startedShardEntries) { assert startedShard.initializing() : "only initializing shards can be started"; assert routingAllocation.metaData().index(startedShard.shardId().getIndex()) != null : "shard started for unknown index (shard entry: " + startedShard + ")"; assert startedShard == routingNodes.getByAllocationId(startedShard.shardId(), startedShard.allocationId().getId()) : "shard routing to start does not exist in routing table, expected: " + startedShard + " but was: " + routingNodes.getByAllocationId(startedShard.shardId(), startedShard.allocationId().getId()); routingNodes.startShard(logger, startedShard, routingAllocation.changes()); } }
/** * Reset failed allocation counter for unassigned shards */ private void resetFailedAllocationCounter(RoutingAllocation allocation) { final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = allocation.routingNodes().unassigned().iterator(); while (unassignedIterator.hasNext()) { ShardRouting shardRouting = unassignedIterator.next(); UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); unassignedIterator.updateUnassigned(new UnassignedInfo(unassignedInfo.getNumFailedAllocations() > 0 ? UnassignedInfo.Reason.MANUAL_ALLOCATION : unassignedInfo.getReason(), unassignedInfo.getMessage(), unassignedInfo.getFailure(), 0, unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), unassignedInfo.isDelayed(), unassignedInfo.getLastAllocationStatus()), shardRouting.recoverySource(), allocation.changes()); } }
private ClusterState buildResult(ClusterState oldState, RoutingAllocation allocation) { final RoutingTable oldRoutingTable = oldState.routingTable(); final RoutingNodes newRoutingNodes = allocation.routingNodes(); final RoutingTable newRoutingTable = new RoutingTable.Builder().updateNodes(oldRoutingTable.version(), newRoutingNodes).build(); final MetaData newMetaData = allocation.updateMetaDataWithRoutingChanges(newRoutingTable); assert newRoutingTable.validate(newMetaData); // validates the routing table is coherent with the cluster state metadata final ClusterState.Builder newStateBuilder = ClusterState.builder(oldState) .routingTable(newRoutingTable) .metaData(newMetaData); final RestoreInProgress restoreInProgress = allocation.custom(RestoreInProgress.TYPE); if (restoreInProgress != null) { RestoreInProgress updatedRestoreInProgress = allocation.updateRestoreInfoWithRoutingChanges(restoreInProgress); if (updatedRestoreInProgress != restoreInProgress) { ImmutableOpenMap.Builder<String, ClusterState.Custom> customsBuilder = ImmutableOpenMap.builder(allocation.getCustoms()); customsBuilder.put(RestoreInProgress.TYPE, updatedRestoreInProgress); newStateBuilder.customs(customsBuilder.build()); } } return newStateBuilder.build(); }
/** * Allocate unassigned shards to nodes (if any) where valid copies of the shard already exist. * It is up to the individual implementations of {@link #makeAllocationDecision(ShardRouting, RoutingAllocation, Logger)} * to make decisions on assigning shards to nodes. * * @param allocation the allocation state container object */ public void allocateUnassigned(RoutingAllocation allocation) { final RoutingNodes routingNodes = allocation.routingNodes(); final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator(); while (unassignedIterator.hasNext()) { final ShardRouting shard = unassignedIterator.next(); final AllocateUnassignedDecision allocateUnassignedDecision = makeAllocationDecision(shard, allocation, logger); if (allocateUnassignedDecision.isDecisionTaken() == false) { // no decision was taken by this allocator continue; } if (allocateUnassignedDecision.getAllocationDecision() == AllocationDecision.YES) { unassignedIterator.initialize(allocateUnassignedDecision.getTargetNode().getId(), allocateUnassignedDecision.getAllocationId(), shard.primary() ? ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE : allocation.clusterInfo().getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE), allocation.changes()); } else { unassignedIterator.removeAndIgnore(allocateUnassignedDecision.getAllocationStatus(), allocation.changes()); } } }
/** * Removes delay markers from unassigned shards based on current time stamp. */ private void removeDelayMarkers(RoutingAllocation allocation) { final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = allocation.routingNodes().unassigned().iterator(); final MetaData metaData = allocation.metaData(); while (unassignedIterator.hasNext()) { ShardRouting shardRouting = unassignedIterator.next(); UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); if (unassignedInfo.isDelayed()) { final long newComputedLeftDelayNanos = unassignedInfo.getRemainingDelay(allocation.getCurrentNanoTime(), metaData.getIndexSafe(shardRouting.index()).getSettings()); if (newComputedLeftDelayNanos == 0) { unassignedIterator.updateUnassigned(new UnassignedInfo(unassignedInfo.getReason(), unassignedInfo.getMessage(), unassignedInfo.getFailure(), unassignedInfo.getNumFailedAllocations(), unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false, unassignedInfo.getLastAllocationStatus()), shardRouting.recoverySource(), allocation.changes()); } } } }
return explainOrThrowRejectedCommand(explain, allocation, e); final RoutingNodes routingNodes = allocation.routingNodes(); RoutingNode routingNode = routingNodes.node(discoNode.getId()); if (routingNode == null) {