@Override public GetDiagnosticsResponse getDiagnostics(GetDiagnosticsRequest request) throws IOException { TaskAttemptId taskAttemptId = request.getTaskAttemptId(); Job job = verifyAndGetJob(taskAttemptId.getTaskId().getJobId(), true); GetDiagnosticsResponse response = recordFactory.newRecordInstance(GetDiagnosticsResponse.class); response.addAllDiagnostics(job.getTask(taskAttemptId.getTaskId()).getAttempt(taskAttemptId).getDiagnostics()); return response; }
private void killUnfinishedAttempt(TaskAttempt attempt, String logMsg) { if (attempt != null && !attempt.isFinished()) { eventHandler.handle( new TaskAttemptKillEvent(attempt.getID(), logMsg)); } }
@Override public GetTaskAttemptReportResponse getTaskAttemptReport( GetTaskAttemptReportRequest request) throws IOException { TaskAttemptId taskAttemptId = request.getTaskAttemptId(); Job job = verifyAndGetJob(taskAttemptId.getTaskId().getJobId(), true); GetTaskAttemptReportResponse response = recordFactory.newRecordInstance(GetTaskAttemptReportResponse.class); response.setTaskAttemptReport(job.getTask(taskAttemptId.getTaskId()).getAttempt(taskAttemptId).getReport()); return response; }
@Override public int compare(TaskAttempt o1, TaskAttempt o2) { if (o1.getFinishTime() == 0 || o2.getFinishTime() == 0) { if (o1.getFinishTime() == 0 && o2.getFinishTime() == 0) { if (o1.getLaunchTime() == 0 || o2.getLaunchTime() == 0) { if (o1.getLaunchTime() == 0 && o2.getLaunchTime() == 0) { return 0; } else { long res = o1.getLaunchTime() - o2.getLaunchTime(); return res > 0 ? -1 : 1; } } else { return (int) (o1.getLaunchTime() - o2.getLaunchTime()); } } else { long res = o1.getFinishTime() - o2.getFinishTime(); return res > 0 ? -1 : 1; } } else { return (int) (o1.getFinishTime() - o2.getFinishTime()); } } });
private TaskAttempt selectBestAttempt() { if (successfulAttempt != null) { return attempts.get(successfulAttempt); } float progress = 0f; TaskAttempt result = null; for (TaskAttempt at : attempts.values()) { switch (at.getState()) { // ignore all failed task attempts case FAILED: case KILLED: continue; } if (result == null) { result = at; //The first time around } // calculate the best progress float attemptProgress = at.getProgress(); if (attemptProgress > progress) { result = at; progress = attemptProgress; } } return result; }
failed = 0; killed = 0; if (TaskAttemptStateUI.NEW.correspondsTo(attempt.getState())) { } else if (TaskAttemptStateUI.RUNNING.correspondsTo(attempt.getState())) { .getState())) { ++successful; } else if (TaskAttemptStateUI.FAILED.correspondsTo(attempt.getState())) { ++failed; } else if (TaskAttemptStateUI.KILLED.correspondsTo(attempt.getState())) { ++killed; failedMapAttempts += failed; killedMapAttempts += killed; if (attempt.getState() == TaskAttemptState.SUCCEEDED) { numMaps++; avgMapTime += (attempt.getFinishTime() - attempt.getLaunchTime()); failedReduceAttempts += failed; killedReduceAttempts += killed; if (attempt.getState() == TaskAttemptState.SUCCEEDED) { numReduces++; avgShuffleTime += (attempt.getShuffleFinishTime() - attempt .getLaunchTime()); avgMergeTime += attempt.getSortFinishTime() - attempt.getShuffleFinishTime(); avgReduceTime += (attempt.getFinishTime() - attempt .getSortFinishTime());
long startTime, long finishTime, long elapsedTime, float progress) { TaskAttemptId attid = ta.getID(); String attemptId = MRApps.toString(attid); WebServicesTestUtils.checkStringMatch("state", ta.getState().toString(), state); WebServicesTestUtils.checkStringMatch("rack", ta.getNodeRackName(), rack); WebServicesTestUtils.checkStringMatch("nodeHttpAddress", ta.getNodeHttpAddress(), nodeHttpAddress); List<String> diagnosticsList = ta.getDiagnostics(); if (diagnosticsList != null && !diagnostics.isEmpty()) { StringBuffer b = new StringBuffer(); diagnostics); WebServicesTestUtils.checkStringMatch("assignedContainerId", ta.getAssignedContainerID().toString(), assignedContainerId); assertEquals("startTime wrong", ta.getLaunchTime(), startTime); assertEquals("finishTime wrong", ta.getFinishTime(), finishTime); assertEquals("elapsedTime wrong", finishTime - startTime, elapsedTime); assertEquals("progress wrong", ta.getProgress() * 100, progress, 1e-3f);
report.setFinishTime(getFinishTime()); report.setTaskState(getState()); report.setProgress(bestAttempt == null ? 0f : bestAttempt.getProgress()); report.setStatus(bestAttempt == null ? "" : bestAttempt.getReport().getStateString()); if (TaskAttemptState.RUNNING.equals(attempt.getState())) { report.addRunningAttempt(attempt.getID()); String prefix = "AttemptID:" + att.getID() + " Info:"; for (CharSequence cs : att.getDiagnostics()) { report.addDiagnostics(prefix + cs); : bestAttempt.getCounters()));
if (taskAttempt.getLaunchTime() != 0 && taskAttempt.getFinishTime() != 0) { attemptRunTime = (int) (taskAttempt.getFinishTime() - taskAttempt.getLaunchTime()); String taStateString = taskAttempt.getState().toString(); try { taceStatus = TaskAttemptCompletionEventStatus.valueOf(taStateString); } catch (Exception e) { LOG.warn("Cannot constuct TACEStatus from TaskAtemptState: [" + taStateString + "] for taskAttemptId: [" + taskAttempt.getID() + "]. Defaulting to KILLED"); tace.setAttemptId(taskAttempt.getID()); tace.setAttemptRunTime(attemptRunTime); tace.setEventId(eventId++); tace.setMapOutputServerAddress(taskAttempt .getAssignedContainerMgrAddress()); tace.setStatus(taceStatus); completionEvents.add(tace); if (taskAttempt.getID().getTaskId().getTaskType() == TaskType.MAP) { mapCompletionEvents.add(tace);
private void handleTaskAttemptCompletion(TaskAttemptId attemptId, TaskAttemptCompletionEventStatus status) { TaskAttempt attempt = attempts.get(attemptId); //raise the completion event only if the container is assigned // to nextAttemptNumber if (attempt.getNodeHttpAddress() != null) { TaskAttemptCompletionEvent tce = recordFactory .newRecordInstance(TaskAttemptCompletionEvent.class); tce.setEventId(-1); String scheme = (encryptedShuffle) ? "https://" : "http://"; tce.setMapOutputServerAddress(StringInterner.weakIntern(scheme + attempt.getNodeHttpAddress().split(":")[0] + ":" + attempt.getShufflePort())); tce.setStatus(status); tce.setAttemptId(attempt.getID()); int runTime = 0; if (attempt.getFinishTime() != 0 && attempt.getLaunchTime() !=0) runTime = (int)(attempt.getFinishTime() - attempt.getLaunchTime()); tce.setAttemptRunTime(runTime); //raise the event to job so that it adds the completion event to its //data structures eventHandler.handle(new JobTaskAttemptCompletedEvent(tce)); } }
String nodeIdString = attempt.getAssignedContainerMgrAddress(); String nodeRackName = ta.getRack(); long elapsedReduceTime = -1; if(type == TaskType.REDUCE) { shuffleFinishTime = attempt.getShuffleFinishTime(); sortFinishTime = attempt.getSortFinishTime(); elapsedShuffleTime = Times.elapsed(attemptStartTime, shuffleFinishTime, false); int sortId = attempt.getID().getId() + (attempt.getID().getTaskId().getId() * 10000);
public TaskAttemptInfo(TaskAttempt ta, TaskType type, Boolean isRunning) { final TaskAttemptReport report = ta.getReport(); this.type = type.toString(); this.id = MRApps.toString(ta.getID()); this.nodeHttpAddress = ta.getNodeHttpAddress(); this.startTime = report.getStartTime(); this.finishTime = report.getFinishTime(); this.assignedContainer = report.getContainerId(); if (assignedContainer != null) { this.assignedContainerId = assignedContainer.toString(); } this.progress = report.getProgress() * 100; this.status = report.getStateString(); this.state = report.getTaskAttemptState(); this.elapsedTime = Times .elapsed(this.startTime, this.finishTime, isRunning); if (this.elapsedTime == -1) { this.elapsedTime = 0; } this.diagnostics = report.getDiagnosticInfo(); this.rack = ta.getNodeRackName(); }
if (attempt.getAssignedContainerMgrAddress() != null) { task.eventHandler.handle(new ContainerFailedEvent(attempt.getID(), attempt.getAssignedContainerMgrAddress())); TaskFailedEvent taskFailedEvent = createTaskFailedEvent(task, attempt.getDiagnostics(), TaskStateInternal.FAILED, taskAttemptId); task.eventHandler.handle(new JobHistoryEvent(task.taskId.getJobId(),
private TaskAttempt getSuccessfulAttempt(Task task) { for (TaskAttempt attempt : task.getAttempts().values()) { if (attempt.getState() == TaskAttemptState.SUCCEEDED) { return attempt; } } return null; }
new TaskAttemptEvent(mapAttempt1.getID(), TaskAttemptEventType.TA_DONE)); TaskAttemptState.FAILED, mapAttempt1.getState()); Assert.assertEquals(mapAttempt1.getDiagnostics().get(0), "Too many fetch failures. Failing the attempt. " + "Last failure reported by " + reduceAttempt3.getID().toString() + " from host host3"); new TaskAttemptEvent(mapAttempt2.getID(), TaskAttemptEventType.TA_DONE)); new TaskAttemptEvent(reduceAttempt.getID(), TaskAttemptEventType.TA_DONE)); new TaskAttemptEvent(reduceAttempt2.getID(), TaskAttemptEventType.TA_DONE)); new TaskAttemptEvent(reduceAttempt3.getID(), TaskAttemptEventType.TA_DONE)); 6, events.length); Assert.assertEquals("Event map attempt id not correct", mapAttempt1.getID(), events[0].getAttemptId()); Assert.assertEquals("Event map attempt id not correct", mapAttempt1.getID(), events[1].getAttemptId()); Assert.assertEquals("Event map attempt id not correct", mapAttempt2.getID(), events[2].getAttemptId());
private void updateStatus(MRApp app, TaskAttempt attempt, Phase phase) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); status.counters = new Counters(); status.fetchFailedMaps = new ArrayList<TaskAttemptId>(); status.id = attempt.getID(); status.mapFinishTime = 0; status.phase = phase; status.progress = 0.5f; status.shuffleFinishTime = 0; status.sortFinishTime = 0; status.stateString = "OK"; status.taskState = attempt.getState(); TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(attempt.getID(), new AtomicReference<>(status)); app.getContext().getEventHandler().handle(event); }
private long getFinishTime(TaskAttemptId taId) { if (taId == null) { return clock.getTime(); } long finishTime = 0; for (TaskAttempt at : attempts.values()) { //select the max finish time of all attempts if (at.getID().equals(taId)) { return at.getFinishTime(); } } return finishTime; }
app.getContext() .getEventHandler() .handle(new TaskAttemptEvent(mta.getID(), TaskAttemptEventType.TA_DONE)); app.getContext() .getEventHandler() .handle(new TaskAttemptEvent(rta.getID(), TaskAttemptEventType.TA_DONE)); app.waitForState(job, JobState.SUCCEEDED); Assert.assertEquals(mta.getFinishTime(), 11); Assert.assertEquals(mta.getLaunchTime(), 10); Assert.assertEquals(rta.getFinishTime(), 11); Assert.assertEquals(rta.getLaunchTime(), 10); Counters counters = job.getAllCounters();
public ReduceTaskAttemptInfo(TaskAttempt ta, Boolean isRunning) { super(ta, TaskType.REDUCE, isRunning); this.shuffleFinishTime = ta.getShuffleFinishTime(); this.mergeFinishTime = ta.getSortFinishTime(); this.elapsedShuffleTime = Times.elapsed(this.startTime, this.shuffleFinishTime, false); if (this.elapsedShuffleTime == -1) { this.elapsedShuffleTime = 0; } this.elapsedMergeTime = Times.elapsed(this.shuffleFinishTime, this.mergeFinishTime, false); if (this.elapsedMergeTime == -1) { this.elapsedMergeTime = 0; } this.elapsedReduceTime = Times.elapsed(this.mergeFinishTime, this.finishTime, false); if (this.elapsedReduceTime == -1) { this.elapsedReduceTime = 0; } }
private void addAndScheduleAttempt(Avataar avataar, boolean reschedule) { TaskAttempt attempt = addAttempt(avataar); inProgressAttempts.add(attempt.getID()); //schedule the nextAttemptNumber if (failedAttempts.size() > 0 || reschedule) { eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_RESCHEDULE)); } else { eventHandler.handle(new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_SCHEDULE)); } }