private void preemptReducer(int hangingMapRequests) { clearAllPendingReduceRequests(); // preempt for making space for at least one map int preemptionReduceNumForOneMap = ResourceCalculatorUtils.divideAndCeilContainers(mapResourceRequest, reduceResourceRequest, getSchedulerResourceTypes()); int preemptionReduceNumForPreemptionLimit = ResourceCalculatorUtils.divideAndCeilContainers( Resources.multiply(getResourceLimit(), maxReducePreemptionLimit), reduceResourceRequest, getSchedulerResourceTypes()); int preemptionReduceNumForAllMaps = ResourceCalculatorUtils.divideAndCeilContainers( Resources.multiply(mapResourceRequest, hangingMapRequests), reduceResourceRequest, getSchedulerResourceTypes()); int toPreempt = Math.min(Math.max(preemptionReduceNumForOneMap, preemptionReduceNumForPreemptionLimit), preemptionReduceNumForAllMaps); LOG.info("Going to preempt " + toPreempt + " due to lack of space for maps"); assignedRequests.preemptReduce(toPreempt); }
@SuppressWarnings("unchecked") @Override public void run() { ContainerAllocatorEvent event; while (!stopped.get() && !Thread.currentThread().isInterrupted()) { try { event = RMContainerAllocator.this.eventQueue.take(); } catch (InterruptedException e) { if (!stopped.get()) { LOG.error("Returning, interrupted : " + e); } return; } try { handleEvent(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " to the ContainreAllocator", t); // Kill the AM eventHandler.handle(new JobEvent(getJob().getID(), JobEventType.INTERNAL_ERROR)); return; } } } };
@Override protected AllocateResponse makeRemoteRequest() throws IOException, YarnException { allocateResponse = super.makeRemoteRequest(); return allocateResponse; } }
if (preemptReducersForHangingMapRequests( reducerUnconditionalPreemptionDelayMs)) { return true; Resource availableResourceForMap = getAvailableResources(); if (ResourceCalculatorUtils.computeAvailableContainers(availableResourceForMap, mapResourceRequest, getSchedulerResourceTypes()) > 0) { return preemptReducersForHangingMapRequests(reducerNoHeadroomPreemptionDelayMs);
@SuppressWarnings("unchecked") private List<Container> getResources() throws Exception { applyConcurrentTaskLimits(); Resource headRoom = Resources.clone(getAvailableResources()); AllocateResponse response; response = makeRemoteRequest(); eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT)); throw new RMContainerAllocationException( "Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e); } catch (ApplicationMasterNotRegisteredException e) { LOG.info("ApplicationMaster is out of sync with ResourceManager," register(); addOutstandingRequestOnResync(); return null; } catch (InvalidLabelResourceRequestException e) { + StringUtils.stringifyException(e); LOG.info(diagMsg); JobId jobId = this.getJob().getID(); eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg)); eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
Resource headRoom = getAvailableResources(); if (!getIsReduceStarted()) {//not set yet int completedMapsForReduceSlowstart = (int)Math.ceil(reduceSlowStart * totalMaps); } else { LOG.info("Reduce slow start threshold reached. Scheduling reduces."); setIsReduceStarted(true); LOG.info("All maps assigned. " + "Ramping up all remaining reduces:" + numPendingReduces); scheduleAllReduces(); return; Resource totalResourceLimit = getResourceLimit(); mapResourceReqt, getSchedulerResourceTypes()) >= (scheduledMaps + assignedMaps)) { ResourceCalculatorUtils.computeAvailableContainers(Resources.subtract( finalReduceResourceLimit, netScheduledReduceResource), reduceResourceReqt, getSchedulerResourceTypes()); rampUpReduces(rampUp); } else if (rampUp < 0) { int rampDown = -1 * rampUp; rampDown = Math.min(rampDown, scheduledReduces); LOG.info("Ramping down " + rampDown); rampDownReduces(rampDown);
@SuppressWarnings("unchecked") private List<Container> getResources() throws Exception { applyConcurrentTaskLimits(); Resource headRoom = Resources.clone(getAvailableResources()); AllocateResponse response; response = makeRemoteRequest(); eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT)); throw new RMContainerAllocationException( "Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e); } catch (ApplicationMasterNotRegisteredException e) { LOG.info("ApplicationMaster is out of sync with ResourceManager," register(); addOutstandingRequestOnResync(); return null; } catch (Exception e) { eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT)); throw new RMContainerAllocationException("Could not contact RM after " + Resource newHeadRoom = getAvailableResources(); List<Container> newContainers = response.getAllocatedContainers(); updateAMRMToken(response.getAMRMToken());
doCallRealMethod().when(allocator).scheduleReduces(anyInt(), anyInt(), anyInt(), anyInt(), anyInt(), anyInt(), any(Resource.class), any(Resource.class), anyInt(), anyFloat(), anyFloat()); doReturn(EnumSet.of(SchedulerResourceTypes.MEMORY)).when(allocator) .getSchedulerResourceTypes(); allocator.scheduleReduces( totalMaps, succeededMaps, scheduledMaps, scheduledReduces, numPendingReduces, maxReduceRampupLimit, reduceSlowStart); verify(allocator, never()).setIsReduceStarted(true); allocator.scheduleReduces( totalMaps, succeededMaps, 0, scheduledReduces, numPendingReduces, maxReduceRampupLimit, reduceSlowStart); verify(allocator, never()).setIsReduceStarted(true); verify(allocator, never()).scheduleAllReduces(); doReturn(BuilderUtils.newResource(0, 0)).when(allocator).getResourceLimit(); allocator.scheduleReduces( totalMaps, succeededMaps, scheduledMaps, scheduledReduces, numPendingReduces,
|| PRIORITY_MAP.equals(priority)) { if (ResourceCalculatorUtils.computeAvailableContainers(allocatedResource, mapResourceRequest, getSchedulerResourceTypes()) <= 0 || maps.isEmpty()) { LOG.info("Cannot assign container " + allocated reduceResourceRequest, getSchedulerResourceTypes()) <= 0 || (reducePending <= 0)) { LOG.info("Cannot assign container " + allocated if (isNodeBlacklisted(allocatedHost)) { + toBeReplacedReq.attemptID); ContainerRequest newReq = getFilteredContainerRequest(toBeReplacedReq); decContainerReq(toBeReplacedReq); if (toBeReplacedReq.attemptID.getTaskId().getTaskType() == TaskType.MAP) { reduces.put(newReq.attemptID, newReq); addContainerReq(newReq);
RMContainerAllocator allocator = new RMContainerAllocator( mock(ClientService.class), appContext, new NoopAMPreemptionPolicy()) { allocator.init(conf); allocator.start(); while (allocator.getLastHeartbeatTime() != 5 && timeToWaitMs > 0) { Thread.sleep(10); timeToWaitMs -= 10; Assert.assertEquals(5, allocator.getLastHeartbeatTime()); clock.setTime(7); timeToWaitMs = 5000; while (allocator.getLastHeartbeatTime() != 7 && timeToWaitMs > 0) { Thread.sleep(10); timeToWaitMs -= 10; Assert.assertEquals(7, allocator.getLastHeartbeatTime()); allocator.runOnNextHeartbeat(new Runnable() { @Override public void run() { clock.setTime(8); timeToWaitMs = 5000; while (allocator.getLastHeartbeatTime() != 8 && timeToWaitMs > 0) { Thread.sleep(10); timeToWaitMs -= 10;
@Test public void testCompletedContainerEvent() { RMContainerAllocator allocator = new RMContainerAllocator( mock(ClientService.class), mock(AppContext.class), new NoopAMPreemptionPolicy()); ContainerExitStatus.ABORTED); TaskAttemptEvent event = allocator.createContainerFinishedEvent(status, attemptId); Assert.assertEquals(TaskAttemptEventType.TA_CONTAINER_COMPLETED, event.getType()); TaskAttemptEvent abortedEvent = allocator.createContainerFinishedEvent( abortedStatus, attemptId); Assert.assertEquals(TaskAttemptEventType.TA_KILL, abortedEvent.getType()); ContainerState.RUNNING, "", ContainerExitStatus.PREEMPTED); TaskAttemptEvent event2 = allocator.createContainerFinishedEvent(status2, attemptId); Assert.assertEquals(TaskAttemptEventType.TA_CONTAINER_COMPLETED, event2.getType()); TaskAttemptEvent abortedEvent2 = allocator.createContainerFinishedEvent( preemptedStatus, attemptId); Assert.assertEquals(TaskAttemptEventType.TA_KILL, abortedEvent2.getType());
public void sendRequests(List<ContainerRequestEvent> reqs) { for (ContainerRequestEvent req : reqs) { super.handleEvent(req); } }
private void clearAllPendingReduceRequests() { rampDownReduces(Integer.MAX_VALUE); }
@Override protected void serviceStart() throws Exception { if (job.isUber()) { MRApps.setupDistributedCacheLocal(getConfig()); this.containerAllocator = new LocalContainerAllocator( this.clientService, this.context, nmHost, nmPort, nmHttpPort , containerID); } else { this.containerAllocator = new RMContainerAllocator( this.clientService, this.context); } ((Service)this.containerAllocator).init(getConfig()); ((Service)this.containerAllocator).start(); super.serviceStart(); }
@SuppressWarnings("unchecked") @VisibleForTesting void processFinishedContainer(ContainerStatus container) { LOG.info("Received completed container " + container.getContainerId()); TaskAttemptId attemptID = assignedRequests.get(container.getContainerId()); if (attemptID == null) { LOG.error("Container complete event for unknown container " + container.getContainerId()); } else { pendingRelease.remove(container.getContainerId()); assignedRequests.remove(attemptID); // Send the diagnostics String diagnostic = StringInterner.weakIntern(container.getDiagnostics()); eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptID, diagnostic)); // send the container completed event to Task attempt eventHandler.handle(createContainerFinishedEvent(container, attemptID)); } }
@Override protected void register() { ApplicationAttemptId attemptId = getContext().getApplicationAttemptId(); Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps().get(attemptId.getApplicationId()) .getRMAppAttempt(attemptId).getAMRMToken(); try { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ugi.addTokenIdentifier(token.decodeIdentifier()); } catch (IOException e) { throw new YarnRuntimeException(e); } super.register(); }
@SuppressWarnings("unchecked") private List<Container> getResources() throws Exception { applyConcurrentTaskLimits(); Resource headRoom = Resources.clone(getAvailableResources()); AllocateResponse response; response = makeRemoteRequest(); eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT)); throw new RMContainerAllocationException( "Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationAttemptId(), e); } catch (ApplicationMasterNotRegisteredException e) { LOG.info("ApplicationMaster is out of sync with ResourceManager," register(); addOutstandingRequestOnResync(); return null; } catch (InvalidLabelResourceRequestException e) { + StringUtils.stringifyException(e); LOG.info(diagMsg); JobId jobId = this.getJob().getID(); eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg)); eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); eventHandler.handle(new JobEvent(this.getJob().getID(), JobEventType.JOB_AM_REBOOT));
Resource headRoom = getAvailableResources(); if (!getIsReduceStarted()) {//not set yet int completedMapsForReduceSlowstart = (int)Math.ceil(reduceSlowStart * totalMaps); } else { LOG.info("Reduce slow start threshold reached. Scheduling reduces."); setIsReduceStarted(true); LOG.info("All maps assigned. " + "Ramping up all remaining reduces:" + numPendingReduces); scheduleAllReduces(); return; Resource totalResourceLimit = getResourceLimit(); mapResourceReqt, getSchedulerResourceTypes()) >= (scheduledMaps + assignedMaps)) { ResourceCalculatorUtils.computeAvailableContainers(Resources.subtract( finalReduceResourceLimit, netScheduledReduceResource), reduceResourceReqt, getSchedulerResourceTypes()); rampUpReduces(rampUp); } else if (rampUp < 0) { int rampDown = -1 * rampUp; rampDown = Math.min(rampDown, scheduledReduces); LOG.info("Ramping down " + rampDown); rampDownReduces(rampDown);
|| PRIORITY_MAP.equals(priority)) { if (ResourceCalculatorUtils.computeAvailableContainers(allocatedResource, mapResourceRequest, getSchedulerResourceTypes()) <= 0 || maps.isEmpty()) { LOG.info("Cannot assign container " + allocated reduceResourceRequest, getSchedulerResourceTypes()) <= 0 || reduces.isEmpty()) { LOG.info("Cannot assign container " + allocated if (isNodeBlacklisted(allocatedHost)) { + toBeReplacedReq.attemptID); ContainerRequest newReq = getFilteredContainerRequest(toBeReplacedReq); decContainerReq(toBeReplacedReq); if (toBeReplacedReq.attemptID.getTaskId().getTaskType() == TaskType.MAP) { reduces.put(newReq.attemptID, newReq); addContainerReq(newReq);
if (preemptReducersForHangingMapRequests( reducerUnconditionalPreemptionDelayMs)) { return true; reduceResourceRequest, scheduledRequests.reduces.size()); Resource availableResourceForMap = Resources.subtract(getAvailableResources(), scheduledReducesResource); if (ResourceCalculatorUtils.computeAvailableContainers(availableResourceForMap, mapResourceRequest, getSchedulerResourceTypes()) > 0) { return preemptReducersForHangingMapRequests(reducerNoHeadroomPreemptionDelayMs);