@SuppressWarnings("unchecked") void preemptReduce(int toPreempt) { List<TaskAttemptId> reduceList = new ArrayList<TaskAttemptId> (reduces.keySet()); //sort reduces on progress Collections.sort(reduceList, new Comparator<TaskAttemptId>() { @Override public int compare(TaskAttemptId o1, TaskAttemptId o2) { return Float.compare( getJob().getTask(o1.getTaskId()).getAttempt(o1).getProgress(), getJob().getTask(o2.getTaskId()).getAttempt(o2).getProgress()); } }); for (int i = 0; i < toPreempt && reduceList.size() > 0; i++) { TaskAttemptId id = reduceList.remove(0);//remove the one on top LOG.info("Preempting " + id); preemptionWaitingReduces.add(id); eventHandler.handle(new TaskAttemptKillEvent(id, RAMPDOWN_DIAGNOSTIC)); } }
@SuppressWarnings("unchecked") void preemptReduce(int toPreempt) { List<TaskAttemptId> reduceList = new ArrayList<TaskAttemptId> (reduces.keySet()); //sort reduces on progress Collections.sort(reduceList, new Comparator<TaskAttemptId>() { @Override public int compare(TaskAttemptId o1, TaskAttemptId o2) { return Float.compare( getJob().getTask(o1.getTaskId()).getAttempt(o1).getProgress(), getJob().getTask(o2.getTaskId()).getAttempt(o2).getProgress()); } }); for (int i = 0; i < toPreempt && reduceList.size() > 0; i++) { TaskAttemptId id = reduceList.remove(0);//remove the one on top LOG.info("Preempting " + id); preemptionWaitingReduces.add(id); eventHandler.handle(new TaskAttemptKillEvent(id, RAMPDOWN_DIAGNOSTIC)); } }
@SuppressWarnings("unchecked") void preemptReduce(int toPreempt) { List<TaskAttemptId> reduceList = new ArrayList<TaskAttemptId> (reduces.keySet()); //sort reduces on progress Collections.sort(reduceList, new Comparator<TaskAttemptId>() { @Override public int compare(TaskAttemptId o1, TaskAttemptId o2) { return Float.compare( getJob().getTask(o1.getTaskId()).getAttempt(o1).getProgress(), getJob().getTask(o2.getTaskId()).getAttempt(o2).getProgress()); } }); for (int i = 0; i < toPreempt && reduceList.size() > 0; i++) { TaskAttemptId id = reduceList.remove(0);//remove the one on top LOG.info("Preempting " + id); preemptionWaitingReduces.add(id); eventHandler.handle(new TaskAttemptKillEvent(id, RAMPDOWN_DIAGNOSTIC)); } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent ev = (TaskTAttemptEvent) event; // The nextAttemptNumber is commit pending, decide on set the commitAttempt TaskAttemptId attemptID = ev.getTaskAttemptID(); if (task.commitAttempt == null) { // TODO: validate attemptID task.commitAttempt = attemptID; LOG.info(attemptID + " given a go for committing the task output."); } else { // Don't think this can be a pluggable decision, so simply raise an // event for the TaskAttempt to delete its output. LOG.info(task.commitAttempt + " already given a go for committing the task output, so killing " + attemptID); task.eventHandler.handle(new TaskAttemptKillEvent(attemptID, SPECULATION + task.commitAttempt + " committed first!")); } } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent ev = (TaskTAttemptEvent) event; // The nextAttemptNumber is commit pending, decide on set the commitAttempt TaskAttemptId attemptID = ev.getTaskAttemptID(); if (task.commitAttempt == null) { // TODO: validate attemptID task.commitAttempt = attemptID; LOG.info(attemptID + " given a go for committing the task output."); } else { // Don't think this can be a pluggable decision, so simply raise an // event for the TaskAttempt to delete its output. LOG.info(task.commitAttempt + " already given a go for committing the task output, so killing " + attemptID); task.eventHandler.handle(new TaskAttemptKillEvent(attemptID, SPECULATION + task.commitAttempt + " committed first!")); } } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent ev = (TaskTAttemptEvent) event; // The nextAttemptNumber is commit pending, decide on set the commitAttempt TaskAttemptId attemptID = ev.getTaskAttemptID(); if (task.commitAttempt == null) { // TODO: validate attemptID task.commitAttempt = attemptID; LOG.info(attemptID + " given a go for committing the task output."); } else { // Don't think this can be a pluggable decision, so simply raise an // event for the TaskAttempt to delete its output. LOG.info(task.commitAttempt + " already given a go for committing the task output, so killing " + attemptID); task.eventHandler.handle(new TaskAttemptKillEvent(attemptID, SPECULATION + task.commitAttempt + " committed first!")); } } }
private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg) { if (attempt != null && !attempt.isFinished()) { eventHandler.handle( new TaskAttemptKillEvent(attempt.getID(), logMsg)); } }
private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg) { if (attempt != null && !attempt.isFinished()) { eventHandler.handle( new TaskAttemptKillEvent(attempt.getID(), logMsg)); } }
private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg) { if (attempt != null && !attempt.isFinished()) { eventHandler.handle( new TaskAttemptKillEvent(attempt.getID(), logMsg)); } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent taskTAttemptEvent = (TaskTAttemptEvent) event; TaskAttemptId taskAttemptId = taskTAttemptEvent.getTaskAttemptID(); task.handleTaskAttemptCompletion( taskAttemptId, TaskAttemptCompletionEventStatus.SUCCEEDED); task.finishedAttempts.add(taskAttemptId); task.inProgressAttempts.remove(taskAttemptId); task.successfulAttempt = taskAttemptId; task.sendTaskSucceededEvents(); for (TaskAttempt attempt : task.attempts.values()) { if (attempt.getID() != task.successfulAttempt && // This is okay because it can only talk us out of sending a // TA_KILL message to an attempt that doesn't need one for // other reasons. !attempt.isFinished()) { LOG.info("Issuing kill to other attempt " + attempt.getID()); task.eventHandler.handle(new TaskAttemptKillEvent(attempt.getID(), SPECULATION + task.successfulAttempt + " succeeded first!")); } } task.finished(TaskStateInternal.SUCCEEDED); } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent taskTAttemptEvent = (TaskTAttemptEvent) event; TaskAttemptId taskAttemptId = taskTAttemptEvent.getTaskAttemptID(); task.handleTaskAttemptCompletion( taskAttemptId, TaskAttemptCompletionEventStatus.SUCCEEDED); task.finishedAttempts.add(taskAttemptId); task.inProgressAttempts.remove(taskAttemptId); task.successfulAttempt = taskAttemptId; task.sendTaskSucceededEvents(); for (TaskAttempt attempt : task.attempts.values()) { if (attempt.getID() != task.successfulAttempt && // This is okay because it can only talk us out of sending a // TA_KILL message to an attempt that doesn't need one for // other reasons. !attempt.isFinished()) { LOG.info("Issuing kill to other attempt " + attempt.getID()); task.eventHandler.handle(new TaskAttemptKillEvent(attempt.getID(), SPECULATION + task.successfulAttempt + " succeeded first!")); } } task.finished(TaskStateInternal.SUCCEEDED); } }
@Override public void transition(TaskImpl task, TaskEvent event) { TaskTAttemptEvent taskTAttemptEvent = (TaskTAttemptEvent) event; TaskAttemptId taskAttemptId = taskTAttemptEvent.getTaskAttemptID(); task.handleTaskAttemptCompletion( taskAttemptId, TaskAttemptCompletionEventStatus.SUCCEEDED); task.finishedAttempts.add(taskAttemptId); task.inProgressAttempts.remove(taskAttemptId); task.successfulAttempt = taskAttemptId; task.sendTaskSucceededEvents(); for (TaskAttempt attempt : task.attempts.values()) { if (attempt.getID() != task.successfulAttempt && // This is okay because it can only talk us out of sending a // TA_KILL message to an attempt that doesn't need one for // other reasons. !attempt.isFinished()) { LOG.info("Issuing kill to other attempt " + attempt.getID()); task.eventHandler.handle(new TaskAttemptKillEvent(attempt.getID(), SPECULATION + task.successfulAttempt + " succeeded first!")); } } task.finished(TaskStateInternal.SUCCEEDED); } }
private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) { // rerun previously successful map tasks // do this only if the job is still in the running state and there are // running reducers if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) { List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId); if (taskAttemptIdList != null) { String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId; for (TaskAttemptId id : taskAttemptIdList) { if (TaskType.MAP == id.getTaskId().getTaskType()) { // reschedule only map tasks because their outputs maybe unusable LOG.info(mesg + ". AttemptId:" + id); // Kill the attempt and indicate that next map attempt should be // rescheduled (i.e. considered as a fast fail map). eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true)); } } } } // currently running task attempts on unusable nodes are handled in // RMContainerAllocator }
private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) { // rerun previously successful map tasks // do this only if the job is still in the running state and there are // running reducers if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) { List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId); if (taskAttemptIdList != null) { String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId; for (TaskAttemptId id : taskAttemptIdList) { if (TaskType.MAP == id.getTaskId().getTaskType()) { // reschedule only map tasks because their outputs maybe unusable LOG.info(mesg + ". AttemptId:" + id); // Kill the attempt and indicate that next map attempt should be // rescheduled (i.e. considered as a fast fail map). eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true)); } } } } // currently running task attempts on unusable nodes are handled in // RMContainerAllocator }
private void actOnUnusableNode(NodeId nodeId, NodeState nodeState) { // rerun previously successful map tasks // do this only if the job is still in the running state and there are // running reducers if (getInternalState() == JobStateInternal.RUNNING && !allReducersComplete()) { List<TaskAttemptId> taskAttemptIdList = nodesToSucceededTaskAttempts.get(nodeId); if (taskAttemptIdList != null) { String mesg = "TaskAttempt killed because it ran on unusable node " + nodeId; for (TaskAttemptId id : taskAttemptIdList) { if (TaskType.MAP == id.getTaskId().getTaskType()) { // reschedule only map tasks because their outputs maybe unusable LOG.info(mesg + ". AttemptId:" + id); // Kill the attempt and indicate that next map attempt should be // rescheduled (i.e. considered as a fast fail map). eventHandler.handle(new TaskAttemptKillEvent(id, mesg, true)); } } } } // currently running task attempts on unusable nodes are handled in // RMContainerAllocator }
eventHandler.handle(new TaskAttemptKillEvent(tid, "TaskAttempt killed because it ran on unusable node" + taskAttemptNodeId, rescheduleNextAttempt));
eventHandler.handle(new TaskAttemptKillEvent(tid, "TaskAttempt killed because it ran on unusable node" + taskAttemptNodeId, rescheduleNextAttempt));
eventHandler.handle(new TaskAttemptKillEvent(tid, "TaskAttempt killed because it ran on unusable node" + taskAttemptNodeId, rescheduleNextAttempt));
private void containerKillBeforeAssignment(boolean scheduleAttempt) throws Exception { MockEventHandler eventHandler = new MockEventHandler(); ApplicationId appId = ApplicationId.newInstance(1, 2); JobId jobId = MRBuilderUtils.newJobId(appId, 1); TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, mock(Path.class), 1, mock(TaskSplitMetaInfo.class), new JobConf(), mock(TaskAttemptListener.class), mock(Token.class), new Credentials(), SystemClock.getInstance(), mock(AppContext.class)); if (scheduleAttempt) { taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_SCHEDULE)); } taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED); assertEquals("Task attempt's internal state is not KILLED", taImpl.getInternalState(), TaskAttemptStateInternal.KILLED); assertFalse("InternalError occurred", eventHandler.internalError); TaskEvent event = eventHandler.lastTaskEvent; assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType()); // In NEW state, new map attempt should not be rescheduled. assertFalse(((TaskTAttemptKilledEvent)event).getRescheduleAttempt()); }
@Test public void testKillMapTaskAfterSuccess() throws Exception { MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_DONE)); assertEquals("Task attempt is not in SUCCEEDED state", taImpl.getState(), TaskAttemptState.SUCCEEDED); assertEquals("Task attempt's internal state is not " + "SUCCESS_FINISHING_CONTAINER", taImpl.getInternalState(), TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER); taImpl.handle(new TaskAttemptEvent(taImpl.getID(), TaskAttemptEventType.TA_CONTAINER_CLEANED)); // Send a map task attempt kill event indicating next map attempt has to be // reschedule taImpl.handle(new TaskAttemptKillEvent(taImpl.getID(),"", true)); assertEquals("Task attempt is not in KILLED state", taImpl.getState(), TaskAttemptState.KILLED); assertEquals("Task attempt's internal state is not KILLED", taImpl.getInternalState(), TaskAttemptStateInternal.KILLED); assertFalse("InternalError occurred", eventHandler.internalError); TaskEvent event = eventHandler.lastTaskEvent; assertEquals(TaskEventType.T_ATTEMPT_KILLED, event.getType()); // Send an attempt killed event to TaskImpl forwarding the same reschedule // flag we received in task attempt kill event. assertTrue(((TaskTAttemptKilledEvent)event).getRescheduleAttempt()); }