private static void updateMillisCounters(JobCounterUpdateEvent jce, TaskAttemptImpl taskAttempt) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); long duration = (taskAttempt.getFinishTime() - taskAttempt.getLaunchTime()); int mbRequired = taskAttempt.getMemoryRequired(taskAttempt.conf, taskType); int vcoresRequired = taskAttempt.getCpuRequired(taskAttempt.conf, taskType); int minSlotMemSize = taskAttempt.conf.getInt( YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); int simSlotsRequired = minSlotMemSize == 0 ? 0 : (int) Math.ceil((float) mbRequired / minSlotMemSize); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.SLOTS_MILLIS_MAPS, simSlotsRequired * duration); jce.addCounterUpdate(JobCounter.MB_MILLIS_MAPS, duration * mbRequired); jce.addCounterUpdate(JobCounter.VCORES_MILLIS_MAPS, duration * vcoresRequired); jce.addCounterUpdate(JobCounter.MILLIS_MAPS, duration); } else { jce.addCounterUpdate(JobCounter.SLOTS_MILLIS_REDUCES, simSlotsRequired * duration); jce.addCounterUpdate(JobCounter.MB_MILLIS_REDUCES, duration * mbRequired); jce.addCounterUpdate(JobCounter.VCORES_MILLIS_REDUCES, duration * vcoresRequired); jce.addCounterUpdate(JobCounter.MILLIS_REDUCES, duration); } }
@Override public boolean isFinished() { readLock.lock(); try { // TODO: Use stateMachine level method? return (getInternalState() == TaskAttemptStateInternal.SUCCEEDED || getInternalState() == TaskAttemptStateInternal.FAILED || getInternalState() == TaskAttemptStateInternal.KILLED); } finally { readLock.unlock(); } }
protected Set<String> resolveHosts(String[] src) { Set<String> result = new HashSet<String>(); if (src != null) { for (int i = 0; i < src.length; i++) { if (src[i] == null) { continue; } else if (isIP(src[i])) { result.add(resolveHost(src[i])); } else { result.add(src[i]); } } } return result; }
private TaskAttemptImpl addAttempt(Avataar avataar) { TaskAttemptImpl attempt = createAttempt(); attempt.setAvataar(avataar); if (LOG.isDebugEnabled()) { LOG.debug("Created attempt " + attempt.getID()); } switch (attempts.size()) { case 0: attempts = Collections.singletonMap(attempt.getID(), (TaskAttempt) attempt); break; case 1: Map<TaskAttemptId, TaskAttempt> newAttempts = new LinkedHashMap<TaskAttemptId, TaskAttempt>(maxAttempts); newAttempts.putAll(attempts); attempts = newAttempts; attempts.put(attempt.getID(), attempt); break; default: attempts.put(attempt.getID(), attempt); break; } ++nextAttemptNumber; return attempt; }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
@Test public void testFailMapTaskByClient() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_FAILMSG_BY_CLIENT)); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.FAILED); assertEquals("Task attempt's internal state is not " + "FAIL_CONTAINER_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_CONTAINER_CLEANUP); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CONTAINER_CLEANED)); assertEquals("Task attempt's internal state is not FAIL_TASK_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_TASK_CLEANUP); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CLEANUP_DONE)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.FAILED); assertFalse("InternalError occurred", eventHandler.internalError); }
Container.newInstance(containerId, containerNodeId, nodeHttpAddress, null, null, null); computeRackAndLocality(); launchTime = taInfo.getStartTime(); finishTime = (taInfo.getFinishTime() != -1) ? trackerName = taInfo.getHostname(); httpPort = taInfo.getHttpPort(); sendLaunchedEvents(); reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime(); reportedStatus.sortFinishTime = taInfo.getSortFinishTime(); addDiagnosticInfo(taInfo.getError()); attemptState = TaskAttemptStateInternal.SUCCEEDED; reportedStatus.taskState = TaskAttemptState.SUCCEEDED; eventHandler.handle(createJobCounterUpdateEventTASucceeded(this)); logAttemptFinishedEvent(attemptState); } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) { attemptState = TaskAttemptStateInternal.FAILED; reportedStatus.taskState = TaskAttemptState.FAILED; eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(this, TaskAttemptStateInternal.FAILED); eventHandler.handle( addDiagnosticInfo("Killed during application recovery"); needToClean = true;
taskAttempt.container = container; taskAttempt.remoteTask = taskAttempt.createRemoteTask(); taskAttempt.jvmID = new WrappedJvmID(taskAttempt.remoteTask.getTaskID().getJobID(), ContainerLaunchContext launchContext = createContainerLaunchContext( cEvent.getApplicationACLs(), taskAttempt.conf, taskAttempt.jobToken, taskAttempt.remoteTask, taskAttempt.oldJobId, taskAttempt.jvmID, (new SpeculatorEvent(taskAttempt.getID().getTaskId(), -1));
private Resource getResourceInfoFromContainerRequest( TaskAttemptImpl taImpl, EventHandler eventHandler) { taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_SCHEDULE)); assertEquals("Task attempt is not in STARTING state", taImpl.getState(), TaskAttemptState.STARTING); ArgumentCaptor<Event> captor = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(2)).handle(captor.capture()); List<ContainerRequestEvent> containerRequestEvents = new ArrayList<>(); for (Event e : captor.getAllValues()) { if (e instanceof ContainerRequestEvent) { containerRequestEvents.add((ContainerRequestEvent) e); } } assertEquals("Expected one ContainerRequestEvent after scheduling " + "task attempt", 1, containerRequestEvents.size()); return containerRequestEvents.get(0).getCapability(); }
@Override public TaskAttemptReport getReport() { TaskAttemptReport result = recordFactory.newRecordInstance(TaskAttemptReport.class); readLock.lock(); try { result.setTaskAttemptId(attemptId); //take the LOCAL state of attempt //DO NOT take from reportedStatus result.setTaskAttemptState(getState()); result.setProgress(reportedStatus.progress); result.setStartTime(launchTime); result.setFinishTime(finishTime); result.setShuffleFinishTime(this.reportedStatus.shuffleFinishTime); result.setDiagnosticInfo(StringUtils.join(LINE_SEPARATOR, getDiagnostics())); result.setPhase(reportedStatus.phase); result.setStateString(reportedStatus.stateString); result.setCounters(TypeConverter.toYarn(getCounters())); result.setContainerId(this.getAssignedContainerID()); result.setNodeManagerHost(trackerName); result.setNodeManagerHttpPort(httpPort); if (this.container != null) { result.setNodeManagerPort(this.container.getNodeId().getPort()); } return result; } finally { readLock.unlock(); } }
public TaskAttemptStateInternal transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { if(taskAttempt.getID().getTaskId().getTaskType() == TaskType.REDUCE) { taskAttempt.getID().toString()); return TaskAttemptStateInternal.SUCCEEDED; assert (taskAttempt.getFinishTime() != 0); assert (taskAttempt.getLaunchTime() != 0); taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAKilled(taskAttempt, true));
initTaskAttemptStatus(reportedStatus); getMemoryRequired(conf, taskId.getTaskType())); this.resourceCapability.setVirtualCores( getCpuRequired(conf, taskId.getTaskType())); this.dataLocalHosts = resolveHosts(dataLocalHosts); RackResolver.init(conf); this.dataLocalRacks = new HashSet<String>();
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // too many fetch failure can only happen for map tasks Preconditions .checkArgument(taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP); //add to diagnostic taskAttempt.addDiagnosticInfo("Too Many fetch failures.Failing the attempt"); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, true)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); } }
when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE)); taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class))); taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0)); assertEquals("Task attempt is not in running state", taImpl.getState(), TaskAttemptState.RUNNING); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL)); assertFalse("InternalError occurred trying to handle TA_KILL", assertEquals("Task should be in KILL_CONTAINER_CLEANUP state", TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, taImpl.getInternalState());
when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE)); taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class))); taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0)); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_DONE)); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_CONTAINER_COMPLETED)); assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED); taImpl.getFinishTime() > 0); Long finishTime = taImpl.getFinishTime(); Thread.sleep(5); taImpl.handle(new TaskAttemptTooManyFetchFailureEvent(attemptId, reduceTAId, "Host")); taImpl.getState(), TaskAttemptState.FAILED); finishTime, Long.valueOf(taImpl.getFinishTime()));
when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE)); taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class))); taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0)); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_COMMIT_PENDING)); assertEquals("Task attempt is not in commit pending state", taImpl.getState(), TaskAttemptState.COMMIT_PENDING); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_CONTAINER_CLEANED)); assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED", eventHandler.internalError); assertEquals("Task attempt is assigned locally", Locality.OFF_SWITCH, taImpl.getLocality());
when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_SCHEDULE)); taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId, container, mock(Map.class))); taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0)); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_DONE)); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_CONTAINER_COMPLETED)); assertEquals("Task attempt is not in succeeded state", taImpl.getState(), TaskAttemptState.SUCCEEDED); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_KILL)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED); taImpl.handle(new TaskAttemptEvent(attemptId, TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE)); assertEquals("Task attempt is not in KILLED state, still", taImpl.getState(), TaskAttemptState.KILLED); assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { taskAttempt.appContext.getTaskAttemptFinishingMonitor().unregister( taskAttempt.attemptId); // The attempt stays in finishing state for too long String msg = "Task attempt " + taskAttempt.getID() + " is done from " + "TaskUmbilicalProtocol's point of view. However, it stays in " + "finishing state for too long"; LOG.warn(msg); taskAttempt.addDiagnosticInfo(msg); sendContainerCleanup(taskAttempt, event); } }
initTaskAttemptStatus(reportedStatus); populateResourceCapability(taskId.getTaskType()); this.dataLocalHosts = resolveHosts(dataLocalHosts); RackResolver.init(conf); this.dataLocalRacks = new HashSet<String>();
createMapTaskAttemptImplForTest(eventHandler, splitInfo); TaskAttemptImpl spyTa = spy(mockTaskAttempt); when(spyTa.resolveHost(hosts[0])).thenReturn("host1"); spyTa.dataLocalHosts = spyTa.resolveHosts(splitInfo.getLocations()); verify(spyTa).resolveHost(hosts[0]); ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(2)).handle(arg.capture());