private TaskAttemptImpl addAttempt(Avataar avataar) { TaskAttemptImpl attempt = createAttempt(); attempt.setAvataar(avataar); if (LOG.isDebugEnabled()) { LOG.debug("Created attempt " + attempt.getID()); } switch (attempts.size()) { case 0: attempts = Collections.singletonMap(attempt.getID(), (TaskAttempt) attempt); break; case 1: Map<TaskAttemptId, TaskAttempt> newAttempts = new LinkedHashMap<TaskAttemptId, TaskAttempt>(maxAttempts); newAttempts.putAll(attempts); attempts = newAttempts; attempts.put(attempt.getID(), attempt); break; default: attempts.put(attempt.getID(), attempt); break; } ++nextAttemptNumber; return attempt; }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { taskAttempt.appContext.getTaskAttemptFinishingMonitor().unregister( taskAttempt.attemptId); // The attempt stays in finishing state for too long String msg = "Task attempt " + taskAttempt.getID() + " is done from " + "TaskUmbilicalProtocol's point of view. However, it stays in " + "finishing state for too long"; LOG.warn(msg); taskAttempt.addDiagnosticInfo(msg); sendContainerCleanup(taskAttempt, event); } }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { taskAttempt.appContext.getTaskAttemptFinishingMonitor().unregister( taskAttempt.attemptId); // The attempt stays in finishing state for too long String msg = "Task attempt " + taskAttempt.getID() + " is done from " + "TaskUmbilicalProtocol's point of view. However, it stays in " + "finishing state for too long"; LOG.warn(msg); taskAttempt.addDiagnosticInfo(msg); sendContainerCleanup(taskAttempt, event); } }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // Tell any speculator that we're requesting a container taskAttempt.eventHandler.handle (new SpeculatorEvent(taskAttempt.getID().getTaskId(), +1)); //request for container if (rescheduled) { taskAttempt.eventHandler.handle( ContainerRequestEvent.createContainerRequestEventForFailedContainer( taskAttempt.attemptId, taskAttempt.resourceCapability)); } else { taskAttempt.eventHandler.handle(new ContainerRequestEvent( taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt.dataLocalHosts.toArray( new String[taskAttempt.dataLocalHosts.size()]), taskAttempt.dataLocalRacks.toArray( new String[taskAttempt.dataLocalRacks.size()]))); } } }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // Tell any speculator that we're requesting a container taskAttempt.eventHandler.handle (new SpeculatorEvent(taskAttempt.getID().getTaskId(), +1)); //request for container if (rescheduled) { taskAttempt.eventHandler.handle( ContainerRequestEvent.createContainerRequestEventForFailedContainer( taskAttempt.attemptId, taskAttempt.resourceCapability)); } else { taskAttempt.eventHandler.handle(new ContainerRequestEvent( taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt.dataLocalHosts.toArray( new String[taskAttempt.dataLocalHosts.size()]), taskAttempt.dataLocalRacks.toArray( new String[taskAttempt.dataLocalRacks.size()]))); } } }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // Tell any speculator that we're requesting a container taskAttempt.eventHandler.handle (new SpeculatorEvent(taskAttempt.getID().getTaskId(), +1)); //request for container if (rescheduled) { taskAttempt.eventHandler.handle( ContainerRequestEvent.createContainerRequestEventForFailedContainer( taskAttempt.attemptId, taskAttempt.resourceCapability)); } else { taskAttempt.eventHandler.handle(new ContainerRequestEvent( taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt.dataLocalHosts.toArray( new String[taskAttempt.dataLocalHosts.size()]), taskAttempt.dataLocalRacks.toArray( new String[taskAttempt.dataLocalRacks.size()]))); } } }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAKilled( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_KILLED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_KILLED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_FAILED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_FAILED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAKilled( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_KILLED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_KILLED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
private static JobCounterUpdateEvent createJobCounterUpdateEventTAKilled( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskAttempt.getID().getTaskId().getJobId()); if (taskType == TaskType.MAP) { jce.addCounterUpdate(JobCounter.NUM_KILLED_MAPS, 1); } else { jce.addCounterUpdate(JobCounter.NUM_KILLED_REDUCES, 1); } if (!taskAlreadyCompleted) { updateMillisCounters(jce, taskAttempt); } return jce; }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { // too many fetch failure can only happen for map tasks Preconditions .checkArgument(taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP); //add to diagnostic taskAttempt.addDiagnosticInfo("Too Many fetch failures.Failing the attempt"); if (taskAttempt.getLaunchTime() != 0) { taskAttempt.eventHandler .handle(createJobCounterUpdateEventTAFailed(taskAttempt, true)); TaskAttemptUnsuccessfulCompletionEvent tauce = createTaskAttemptUnsuccessfulCompletionEvent(taskAttempt, TaskAttemptStateInternal.FAILED); taskAttempt.eventHandler.handle(new JobHistoryEvent( taskAttempt.attemptId.getTaskId().getJobId(), tauce)); }else { LOG.debug("Not generating HistoryFinish event since start event not " + "generated for taskAttempt: " + taskAttempt.getID()); } taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED)); } }
private Resource getResourceInfoFromContainerRequest( TaskAttemptImpl taImpl, EventHandler eventHandler) { taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_SCHEDULE)); assertEquals("Task attempt is not in STARTING state", taImpl.getState(), TaskAttemptState.STARTING); ArgumentCaptor<Event> captor = ArgumentCaptor.forClass(Event.class); verify(eventHandler, times(2)).handle(captor.capture()); List<ContainerRequestEvent> containerRequestEvents = new ArrayList<>(); for (Event e : captor.getAllValues()) { if (e instanceof ContainerRequestEvent) { containerRequestEvents.add((ContainerRequestEvent) e); } } assertEquals("Expected one ContainerRequestEvent after scheduling " + "task attempt", 1, containerRequestEvents.size()); return containerRequestEvents.get(0).getCapability(); }
private void containerKillBeforeAssignment(boolean scheduleAttempt) throws Exception { MockEventHandler eventHandler = new MockEventHandler(); ApplicationId appId = ApplicationId.newInstance(1, 2); JobId jobId = MRBuilderUtils.newJobId(appId, 1); TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, mock(Path.class), 1, mock(TaskSplitMetaInfo.class), new JobConf(), mock(TaskAttemptListener.class), mock(Token.class), new Credentials(), SystemClock.getInstance(), mock(AppContext.class)); if (scheduleAttempt) { taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_SCHEDULE)); } taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED); assertEquals("Task attempt's internal state is not KILLED", taImpl.getInternalState(), TaskAttemptStateInternal.KILLED); assertFalse("InternalError occurred", eventHandler.internalError); TaskEvent event = eventHandler.lastTaskEvent; assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType()); // In NEW state, new map attempt should not be rescheduled. assertFalse(((TaskTAttemptKilledEvent)event).getRescheduleAttempt()); }
@Test public void testFailMapTaskByClient() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_FAILMSG_BY_CLIENT)); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.FAILED); assertEquals("Task attempt's internal state is not " + "FAIL_CONTAINER_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_CONTAINER_CLEANUP); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CONTAINER_CLEANED)); assertEquals("Task attempt's internal state is not FAIL_TASK_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_TASK_CLEANUP); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CLEANUP_DONE)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.FAILED); assertFalse("InternalError occurred", eventHandler.internalError); }
@Test public void testTimeoutWhileFailFinishing() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptFailEvent(taImpl.getID())); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.FAILED); assertEquals("Task attempt's internal state is not " + "FAIL_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_FINISHING_CONTAINER); // If the task stays in FAIL_FINISHING_CONTAINER for too long, // TaskAttemptListenerImpl will time out the attempt. taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_TIMED_OUT)); assertEquals("Task attempt's internal state is not FAIL_CONTAINER_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.FAIL_CONTAINER_CLEANUP); assertFalse("InternalError occurred", eventHandler.internalError); }
@Test public void testKillMapTaskAfterSuccess() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_DONE)); assertEquals("Task attempt is not in SUCCEEDED state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CONTAINER_CLEANED)); // Send a map task attempt kill event indicating next map attempt has to be // reschedule taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED); assertEquals("Task attempt's internal state is not KILLED", taImpl.getInternalState(), TaskAttemptStateInternal.KILLED); assertFalse("InternalError occurred", eventHandler.internalError); TaskEvent event = eventHandler.lastTaskEvent; assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType()); // Send an attempt killed event to TaskImpl forwarding the same reschedule // flag we received in task attempt kill event. assertTrue(((TaskTAttemptKilledEvent)event).getRescheduleAttempt()); }
@Test public void testTaskAttemptDiagnosticEventOnFinishing() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_DONE)); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER); // TA_DIAGNOSTICS_UPDATE doesn't change state taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(taImpl.getID(), "Task got updated")); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER); assertFalse("InternalError occurred", eventHandler.internalError); }
@Test public void testTimeoutWhileSuccessFinishing() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_DONE)); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER); // If the task stays in SUCCESS_FINISHING_CONTAINER for too long, // TaskAttemptListenerImpl will time out the attempt. taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_TIMED_OUT)); assertEquals("Task attempt is not in RUNNING state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_CONTAINER_CLEANUP", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP); assertFalse("InternalError occurred", eventHandler.internalError); }