private void reroute(RoutingAllocation allocation) { assert hasDeadNodes(allocation) == false : "dead nodes should be explicitly cleaned up. See deassociateDeadNodes"; assert AutoExpandReplicas.getAutoExpandReplicaChanges(allocation.metaData(), allocation.nodes()).isEmpty() : "auto-expand replicas out of sync with number of nodes in the cluster"; // now allocate all the unassigned to available nodes if (allocation.routingNodes().unassigned().size() > 0) { removeDelayMarkers(allocation); gatewayAllocator.allocateUnassigned(allocation); } shardsAllocator.allocate(allocation); assert RoutingNodes.assertShardStats(allocation.routingNodes()); }
/** * Returns a built (on demand) routing nodes view of the routing table. */ public RoutingNodes getRoutingNodes() { if (routingNodes != null) { return routingNodes; } routingNodes = new RoutingNodes(this); return routingNodes; }
@Override public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) { if (shardRouting.primary()) { return allocation.decision(Decision.YES, NAME, "shard is primary and can be allocated"); } ShardRouting primary = allocation.routingNodes().activePrimary(shardRouting.shardId()); if (primary == null) { return allocation.decision(Decision.NO, NAME, "primary shard for this replica is not yet active"); } return allocation.decision(Decision.YES, NAME, "primary shard for this replica is already active"); } }
/** * Relocate a shard to another node, adding the target initializing * shard as well as assigning it. * * @return pair of source relocating and target initializing shards. */ public Tuple<ShardRouting,ShardRouting> relocateShard(ShardRouting startedShard, String nodeId, long expectedShardSize, RoutingChangesObserver changes) { ensureMutable(); relocatingShards++; ShardRouting source = startedShard.relocate(nodeId, expectedShardSize); ShardRouting target = source.getTargetRelocatingShard(); updateAssigned(startedShard, source); node(target.currentNodeId()).add(target); assignedShardsAdd(target); addRecovery(target); changes.relocationStarted(startedShard, target); return Tuple.tuple(source, target); }
/** * Returns one active replica shard for the given shard id or <code>null</code> if * no active replica is found. * * Since replicas could possibly be on nodes with a older version of ES than * the primary is, this will return replicas on the highest version of ES. * */ public ShardRouting activeReplicaWithHighestVersion(ShardId shardId) { // It's possible for replicaNodeVersion to be null, when deassociating dead nodes // that have been removed, the shards are failed, and part of the shard failing // calls this method with an out-of-date RoutingNodes, where the version might not // be accessible. Therefore, we need to protect against the version being null // (meaning the node will be going away). return assignedShards(shardId).stream() .filter(shr -> !shr.primary() && shr.active()) .filter(shr -> node(shr.currentNodeId()) != null) .max(Comparator.comparing(shr -> node(shr.currentNodeId()).node(), Comparator.nullsFirst(Comparator.comparing(DiscoveryNode::getVersion)))) .orElse(null); }
/** * Cancels the give shard from the Routing nodes internal statistics and cancels * the relocation if the shard is relocating. */ private void remove(ShardRouting shard) { assert shard.unassigned() == false : "only assigned shards can be removed here (" + shard + ")"; node(shard.currentNodeId()).remove(shard); if (shard.initializing() && shard.relocatingNodeId() == null) { inactiveShardCount--; assert inactiveShardCount >= 0; if (shard.primary()) { inactivePrimaryCount--; } } else if (shard.relocating()) { shard = cancelRelocation(shard); } assignedShardsRemove(shard); if (shard.initializing()) { removeRecovery(shard); } }
ensureMutable(); assert failedShard.assignedToNode() : "only assigned shards can be failed"; assert indexMetaData.getIndex().equals(failedShard.index()) : "shard failed for unknown index (shard entry: " + failedShard + ")"; assert getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId()) == failedShard : "shard routing to fail does not exist in routing table, expected: " + failedShard + " but was: " + getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId()); List<ShardRouting> assignedShards = assignedShards(failedShard.shardId()); if (assignedShards.isEmpty() == false) { ShardRouting replicaShard = getByAllocationId(routing.shardId(), routing.allocationId().getId()); assert replicaShard != null : "failed to re-resolve " + routing + " when failing replicas"; UnassignedInfo primaryFailedUnassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", null, 0, unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false, AllocationStatus.NO_ATTEMPT); failShard(logger, replicaShard, primaryFailedUnassignedInfo, indexMetaData, routingChangesObserver); ShardRouting targetShard = getByAllocationId(failedShard.shardId(), failedShard.allocationId().getRelocationId()); assert targetShard.isRelocationTargetOf(failedShard); if (failedShard.primary()) { logger.trace("{} is removed due to the failure/cancellation of the source shard", targetShard); remove(targetShard); routingChangesObserver.shardFailed(targetShard, unassignedInfo); } else { removeRelocationSource(targetShard); routingChangesObserver.relocationSourceRemoved(targetShard);
ensureMutable(); ShardRouting startedShard = started(initializingShard); logger.trace("{} marked shard as started (routing: {})", initializingShard.shardId(), initializingShard); routingChangesObserver.shardStarted(initializingShard, startedShard); RoutingNode relocationSourceNode = node(initializingShard.relocatingNodeId()); ShardRouting relocationSourceShard = relocationSourceNode.getByShardId(initializingShard.shardId()); assert relocationSourceShard.isRelocationSourceOf(initializingShard); assert relocationSourceShard.getTargetRelocatingShard() == initializingShard : "relocation target mismatch, expected: " + initializingShard + " but was: " + relocationSourceShard.getTargetRelocatingShard(); remove(relocationSourceShard); routingChangesObserver.relocationCompleted(relocationSourceShard); List<ShardRouting> assignedShards = assignedShards(startedShard.shardId()); if (routing.isRelocationTarget()) { ShardRouting sourceShard = getByAllocationId(routing.shardId(), routing.allocationId().getRelocationId()); ShardRouting startedReplica = cancelRelocation(sourceShard); remove(routing); routingChangesObserver.shardFailed(routing, new UnassignedInfo(UnassignedInfo.Reason.REINITIALIZED, "primary changed")); relocateShard(startedReplica, sourceShard.relocatingNodeId(), sourceShard.getExpectedShardSize(), routingChangesObserver); } else { ShardRouting reinitializedReplica = reinitReplica(routing); routingChangesObserver.initializedReplicaReinitialized(routing, reinitializedReplica);
public List<ShardRouting> shardsWithState(ShardRoutingState... state) { // TODO these are used on tests only - move into utils class List<ShardRouting> shards = new ArrayList<>(); for (RoutingNode routingNode : this) { shards.addAll(routingNode.shardsWithState(state)); } for (ShardRoutingState s : state) { if (s == ShardRoutingState.UNASSIGNED) { unassigned().forEach(shards::add); break; } } return shards; }
/** * Moves a shard from unassigned to initialize state */ public void initialize(ShardRouting shard, String nodeId, long expectedSize) { ensureMutable(); assert shard.unassigned() : shard; shard.initialize(nodeId, expectedSize); node(nodeId).add(shard); inactiveShardCount++; if (shard.primary()) { inactivePrimaryCount++; } assignedShardsAdd(shard); }
final ShardId shardId = new ShardId(index, i); final HashSet<ShardRouting> shards = shardsByShardId.get(shardId); final List<ShardRouting> mutableShardRoutings = routingNodes.assignedShards(shardId); assert (shards == null && mutableShardRoutings.size() == 0) || (shards != null && shards.size() == mutableShardRoutings.size() && shards.containsAll(mutableShardRoutings)); for (ShardRouting shard : routingNodes.unassigned()) { if (shard.primary()) { unassignedPrimaryCount++; for (ShardRouting shard : routingNodes.unassigned().ignored()) { if (shard.primary()) { unassignedIgnoredPrimaryCount++; routingNodes.unassigned().getNumPrimaries() + "]"; assert unassignedIgnoredPrimaryCount == routingNodes.unassignedShards.getNumIgnoredPrimaries() : "Unassigned ignored primaries is [" + unassignedIgnoredPrimaryCount + "] but RoutingNodes returned unassigned ignored primaries [" + routingNodes.unassigned().getNumIgnoredPrimaries() + "]"; assert inactivePrimaryCount == routingNodes.inactivePrimaryCount : "Inactive Primary count [" + inactivePrimaryCount + "] but RoutingNodes returned inactive primaries [" + "Inactive Shard count [" + inactiveShardCount + "] but RoutingNodes returned inactive shards [" + routingNodes.inactiveShardCount + "]"; assert routingNodes.getRelocatingShardCount() == relocating : "Relocating shards mismatch [" + routingNodes.getRelocatingShardCount() + "] but expected [" + relocating + "]";
throw new IllegalArgumentException("Cannot have two different shards with same shard id on same node"); assignedShardsAdd(shard); if (shard.relocating()) { relocatingShards++; addInitialRecovery(targetShardRouting, indexShard.primary); previousValue = entries.put(targetShardRouting.shardId(), targetShardRouting); if (previousValue != null) { throw new IllegalArgumentException("Cannot have two different shards with same shard id on same node"); assignedShardsAdd(targetShardRouting); } else if (shard.initializing()) { if (shard.primary()) { addInitialRecovery(shard, indexShard.primary);
ObjectIntHashMap<String> nodesPerAttribute = allocation.routingNodes().nodesPerAttributesCounts(awarenessAttribute); for (ShardRouting assignedShard : allocation.routingNodes().assignedShards(shardRouting.shardId())) { if (assignedShard.started() || assignedShard.initializing()) { RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId()); shardPerAttribute.addTo(routingNode.node().getAttributes().get(awarenessAttribute), 1); if (!node.nodeId().equals(nodeId)) { shardPerAttribute.putOrAdd(allocation.routingNodes().node(nodeId).node().getAttributes().get(awarenessAttribute), 0, -1); shardPerAttribute.addTo(node.node().getAttributes().get(awarenessAttribute), 1);
/** * Returns the active primary shard for the given shard id or <code>null</code> if * no primary is found or the primary is not active. */ public ShardRouting activePrimary(ShardId shardId) { for (ShardRouting shardRouting : assignedShards(shardId)) { if (shardRouting.primary() && shardRouting.active()) { return shardRouting; } } return null; }
DiscoveryNode discoNode = allocation.nodes().resolveNode(node); boolean found = false; for (RoutingNodes.RoutingNodeIterator it = allocation.routingNodes().routingNodeIter(discoNode.id()); it.hasNext(); ) { ShardRouting shardRouting = it.next(); if (!shardRouting.shardId().equals(shardId)) { RoutingNode relocatingFromNode = allocation.routingNodes().node(shardRouting.relocatingNodeId()); if (relocatingFromNode != null) { for (ShardRouting fromShardRouting : relocatingFromNode) { if (fromShardRouting.isSameShard(shardRouting) && fromShardRouting.state() == RELOCATING) { allocation.routingNodes().cancelRelocation(fromShardRouting); break; RoutingNodes.RoutingNodeIterator initializingNode = allocation.routingNodes().routingNodeIter(shardRouting.relocatingNodeId()); if (initializingNode != null) { while (initializingNode.hasNext()) {
/** * Cancels the give shard from the Routing nodes internal statistics and cancels * the relocation if the shard is relocating. */ private void remove(ShardRouting shard) { ensureMutable(); if (!shard.active() && shard.relocatingNodeId() == null) { inactiveShardCount--; assert inactiveShardCount >= 0; if (shard.primary()) { inactivePrimaryCount--; } } else if (shard.relocating()) { cancelRelocation(shard); } assignedShardsRemove(shard); }
routingNodes.unassigned().shuffle(); long currentNanoTime = currentNanoTime(); RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, tmpState, allocation.addIgnoreShardForNode(shardToFail.shardId(), shardToFail.currentNodeId()); ShardRouting failedShard = routingNodes.getByAllocationId(shardToFail.shardId(), shardToFail.allocationId().getId()); if (failedShard != null) { if (failedShard != shardToFail) { routingNodes.failShard(logger, failedShard, unassignedInfo, indexMetaData, allocation.changes()); } else { logger.trace("{} shard routing failed in an earlier iteration (routing: {})", shardToFail.shardId(), shardToFail);
private ShardRouting reinitShadowPrimary(ShardRouting candidate) { if (candidate.relocating()) { cancelRelocation(candidate); } ShardRouting reinitializedShard = candidate.reinitializePrimaryShard(); updateAssigned(candidate, reinitializedShard); inactivePrimaryCount++; inactiveShardCount++; addRecovery(reinitializedShard); return reinitializedShard; }
int currentInRecoveries = allocation.routingNodes().getIncomingRecoveries(node.nodeId()); if (currentInRecoveries >= concurrentIncomingRecoveries) { return allocation.decision(THROTTLE, NAME, } else { ShardRouting primaryShard = allocation.routingNodes().activePrimary(shardRouting.shardId()); if (primaryShard == null) { return allocation.decision(Decision.NO, NAME, "primary shard for this replica is not yet active"); int primaryNodeOutRecoveries = allocation.routingNodes().getOutgoingRecoveries(primaryShard.currentNodeId()); if (primaryNodeOutRecoveries >= concurrentOutgoingRecoveries) { return allocation.decision(THROTTLE, NAME,
private void assignedShardsAdd(ShardRouting shard) { assert shard.unassigned() == false : "unassigned shard " + shard + " cannot be added to list of assigned shards"; List<ShardRouting> shards = assignedShards.computeIfAbsent(shard.shardId(), k -> new ArrayList<>()); assert assertInstanceNotInList(shard, shards) : "shard " + shard + " cannot appear twice in list of assigned shards"; shards.add(shard); }