/** * Reset failed allocation counter for unassigned shards */ private void resetFailedAllocationCounter(RoutingAllocation allocation) { final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = allocation.routingNodes().unassigned().iterator(); while (unassignedIterator.hasNext()) { ShardRouting shardRouting = unassignedIterator.next(); UnassignedInfo unassignedInfo = shardRouting.unassignedInfo(); unassignedIterator.updateUnassigned(new UnassignedInfo(unassignedInfo.getNumFailedAllocations() > 0 ? UnassignedInfo.Reason.MANUAL_ALLOCATION : unassignedInfo.getReason(), unassignedInfo.getMessage(), unassignedInfo.getFailure(), 0, unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), unassignedInfo.isDelayed(), unassignedInfo.getLastAllocationStatus()), shardRouting.recoverySource(), allocation.changes()); } }
@Override public boolean equals(Object o) { if (this == o) { return true; } if (o == null || !(o instanceof ShardRouting)) { return false; } ShardRouting that = (ShardRouting) o; if (unassignedInfo != null ? !unassignedInfo.equals(that.unassignedInfo) : that.unassignedInfo != null) { return false; } return equalsIgnoringMetaData(that); }
private XContentBuilder unassignedInfoToXContent(UnassignedInfo unassignedInfo, XContentBuilder builder) throws IOException { builder.startObject("unassigned_info"); builder.field("reason", unassignedInfo.getReason()); builder.field("at", UnassignedInfo.DATE_TIME_FORMATTER.format(Instant.ofEpochMilli(unassignedInfo.getUnassignedTimeInMillis()))); if (unassignedInfo.getNumFailedAllocations() > 0) { builder.field("failed_allocation_attempts", unassignedInfo.getNumFailedAllocations()); } String details = unassignedInfo.getDetails(); if (details != null) { builder.field("details", details); } builder.field("last_allocation_status", AllocationDecision.fromAllocationStatus(unassignedInfo.getLastAllocationStatus())); builder.endObject(); return builder; } }
long nextDelayNanos = UnassignedInfo.findNextDelayedAllocation(currentNanoTime, state); if (nextDelayNanos < 0) { logger.trace("no need to schedule reroute - no delayed unassigned shards"); UnassignedInfo.getNumberOfDelayedUnassigned(state)); DelayedRerouteTask currentTask = delayedRerouteTask.getAndSet(newTask); assert existingTask == currentTask || currentTask == null;
if (shardRouting.unassignedInfo().getReason() != UnassignedInfo.Reason.FORCED_EMPTY_PRIMARY) { String unassignedInfoMessage = "force empty allocation from previous reason " + shardRouting.unassignedInfo().getReason() + ", " + shardRouting.unassignedInfo().getMessage(); unassignedInfoToUpdate = new UnassignedInfo(UnassignedInfo.Reason.FORCED_EMPTY_PRIMARY, unassignedInfoMessage, shardRouting.unassignedInfo().getFailure(), 0, System.nanoTime(), System.currentTimeMillis(), false, shardRouting.unassignedInfo().getLastAllocationStatus());
getByAllocationId(failedShard.shardId(), failedShard.allocationId().getId()); logger.debug("{} failing shard {} with unassigned info ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary()); UnassignedInfo primaryFailedUnassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing", null, 0, unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false, AllocationStatus.NO_ATTEMPT); failShard(logger, replicaShard, primaryFailedUnassignedInfo, indexMetaData, routingChangesObserver); unassignedInfo.shortSummary()); ShardRouting sourceShard = getByAllocationId(failedShard.shardId(), failedShard.allocationId().getRelocationId()); assert sourceShard.isRelocationSourceOf(failedShard); logger.trace("{}, resolved source to [{}]. canceling relocation ... ({})", failedShard.shardId(), sourceShard, unassignedInfo.shortSummary()); cancelRelocation(sourceShard); remove(failedShard);
table.addCell(shard.unassignedInfo().getReason()); Instant instant = Instant.ofEpochMilli(shard.unassignedInfo().getUnassignedTimeInMillis()); table.addCell(UnassignedInfo.DATE_TIME_FORMATTER.format(instant)); table.addCell(TimeValue.timeValueMillis(System.currentTimeMillis() - shard.unassignedInfo().getUnassignedTimeInMillis())); table.addCell(shard.unassignedInfo().getDetails()); } else { table.addCell(null);
public UnassignedInfo readFrom(StreamInput in) throws IOException { return new UnassignedInfo(in); }
public String shortSummary() { StringBuilder sb = new StringBuilder(); sb.append("[reason=").append(reason).append("]"); sb.append(", at[").append(DATE_TIME_FORMATTER.format(Instant.ofEpochMilli(unassignedTimeMillis))).append("]"); if (failedAllocations > 0) { sb.append(", failed_attempts[").append(failedAllocations).append("]"); } sb.append(", delayed=").append(delayed); String details = getDetails(); if (details != null) { sb.append(", details[").append(details).append("]"); } sb.append(", allocation_status[").append(lastAllocationStatus.value()).append("]"); return sb.toString(); }
if (shard.unassignedInfo() != null && shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) { continue; UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.REALLOCATED_REPLICA, "existing allocation of replica to [" + currentNode + "] cancelled, sync id match found on node ["+ nodeWithHighestMatch + "]",
/** * Is the allocator responsible for allocating the given {@link ShardRouting}? */ private static boolean isResponsibleFor(final ShardRouting shard) { return shard.primary() == false // must be a replica && shard.unassigned() // must be unassigned // if we are allocating a replica because of index creation, no need to go and find a copy, there isn't one... && shard.unassignedInfo().getReason() != UnassignedInfo.Reason.INDEX_CREATED; }
/** * Finds the next (closest) delay expiration of an delayed shard in nanoseconds based on current time. * Returns 0 if delay is negative. * Returns -1 if no delayed shard is found. */ public static long findNextDelayedAllocation(long currentNanoTime, ClusterState state) { MetaData metaData = state.metaData(); RoutingTable routingTable = state.routingTable(); long nextDelayNanos = Long.MAX_VALUE; for (ShardRouting shard : routingTable.shardsWithState(ShardRoutingState.UNASSIGNED)) { UnassignedInfo unassignedInfo = shard.unassignedInfo(); if (unassignedInfo.isDelayed()) { Settings indexSettings = metaData.index(shard.index()).getSettings(); // calculate next time to schedule final long newComputedLeftDelayNanos = unassignedInfo.getRemainingDelay(currentNanoTime, indexSettings); if (newComputedLeftDelayNanos < nextDelayNanos) { nextDelayNanos = newComputedLeftDelayNanos; } } } return nextDelayNanos == Long.MAX_VALUE ? -1L : nextDelayNanos; }
public ShardRouting updateUnassigned(UnassignedInfo unassignedInfo, RecoverySource recoverySource) { assert this.unassignedInfo != null : "can only update unassign info if they are already set"; assert this.unassignedInfo.isDelayed() || (unassignedInfo.isDelayed() == false) : "cannot transition from non-delayed to delayed"; return new ShardRouting(shardId, currentNodeId, relocatingNodeId, primary, state, recoverySource, unassignedInfo, allocationId, expectedShardSize); }
shardToFail.shardId(), shardToFail, failedShard); int failedAllocations = failedShard.unassignedInfo() != null ? failedShard.unassignedInfo().getNumFailedAllocations() : 0; String message = "failed shard on node [" + shardToFail.currentNodeId() + "]: " + failedShardEntry.getMessage(); UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, message, failedShardEntry.getFailure(), failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT);
@Override public void allocateUnassigned(RoutingAllocation allocation) { final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = allocation.routingNodes().unassigned().iterator(); while (unassignedIterator.hasNext()) { ShardRouting shard = unassignedIterator.next(); if (shard.primary() || shard.unassignedInfo().getReason() == UnassignedInfo.Reason.INDEX_CREATED) { continue; } if (shard.unassignedInfo().isDelayed()) { unassignedIterator.removeAndIgnore(UnassignedInfo.AllocationStatus.DELAYED_ALLOCATION, allocation.changes()); } } } }
table.addCell(shard.unassignedInfo().getReason()); table.addCell(UnassignedInfo.DATE_TIME_FORMATTER.printer().print(shard.unassignedInfo().getUnassignedTimeInMillis())); table.addCell(TimeValue.timeValueMillis(System.currentTimeMillis() - shard.unassignedInfo().getUnassignedTimeInMillis())); table.addCell(shard.unassignedInfo().getDetails()); } else { table.addCell(null);
@Override public int hashCode() { int h = hashCode; if (h == 0) { h = shardId.hashCode(); h = 31 * h + (currentNodeId != null ? currentNodeId.hashCode() : 0); h = 31 * h + (relocatingNodeId != null ? relocatingNodeId.hashCode() : 0); h = 31 * h + (primary ? 1 : 0); h = 31 * h + (state != null ? state.hashCode() : 0); h = 31 * h + (recoverySource != null ? recoverySource.hashCode() : 0); h = 31 * h + (allocationId != null ? allocationId.hashCode() : 0); h = 31 * h + (unassignedInfo != null ? unassignedInfo.hashCode() : 0); hashCode = h; } return h; }
private ClusterHealthResponse clusterHealth(ClusterHealthRequest request, ClusterState clusterState, int numberOfPendingTasks, int numberOfInFlightFetch, TimeValue pendingTaskTimeInQueue) { if (logger.isTraceEnabled()) { logger.trace("Calculating health based on state version [{}]", clusterState.version()); } String[] concreteIndices; try { concreteIndices = indexNameExpressionResolver.concreteIndexNames(clusterState, request); } catch (IndexNotFoundException e) { // one of the specified indices is not there - treat it as RED. ClusterHealthResponse response = new ClusterHealthResponse(clusterState.getClusterName().value(), Strings.EMPTY_ARRAY, clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue); response.setStatus(ClusterHealthStatus.RED); return response; } return new ClusterHealthResponse(clusterState.getClusterName().value(), concreteIndices, clusterState, numberOfPendingTasks, numberOfInFlightFetch, UnassignedInfo.getNumberOfDelayedUnassigned(clusterState), pendingTaskTimeInQueue); } }
@Override public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) { if (failedShard.primary() && failedShard.initializing()) { RecoverySource recoverySource = failedShard.recoverySource(); if (recoverySource.getType() == RecoverySource.Type.SNAPSHOT) { // mark restore entry for this shard as failed when it's due to a file corruption. There is no need wait on retries // to restore this shard on another node if the snapshot files are corrupt. In case where a node just left or crashed, // however, we only want to acknowledge the restore operation once it has been successfully restored on another node. if (unassignedInfo.getFailure() != null && Lucene.isCorruptionException(unassignedInfo.getFailure().getCause())) { changes(recoverySource).shards.put( failedShard.shardId(), new ShardRestoreStatus(failedShard.currentNodeId(), RestoreInProgress.State.FAILURE, unassignedInfo.getFailure().getCause().getMessage())); } } } }
long minDelaySetting = UnassignedInfo.findSmallestDelayedAllocationSettingNanos(settings, event.state()); if (minDelaySetting <= 0) { logger.trace("no need to schedule reroute - no delayed unassigned shards, minDelaySetting [{}], scheduled [{}]", minDelaySetting, minDelaySettingAtLastSchedulingNanos); FutureUtils.cancel(registeredNextDelayFuture); minDelaySettingAtLastSchedulingNanos = minDelaySetting; TimeValue nextDelay = TimeValue.timeValueNanos(UnassignedInfo.findNextDelayedAllocationIn(event.state())); assert nextDelay.nanos() > 0 : "next delay must be non 0 as minDelaySetting is [" + minDelaySetting + "]"; logger.info("delaying allocation for [{}] unassigned shards, next check in [{}]", UnassignedInfo.getNumberOfDelayedUnassigned(event.state()), nextDelay); registeredNextDelayFuture = threadPool.schedule(nextDelay, ThreadPool.Names.SAME, new AbstractRunnable() { @Override