@Override public long getLastHeartbeatTime() { return getContext().getClock().getTime(); }
public Job submit(Configuration conf, boolean mapSpeculative, boolean reduceSpeculative) throws Exception { String user = conf.get(MRJobConfig.USER_NAME, UserGroupInformation .getCurrentUser().getShortUserName()); conf.set(MRJobConfig.USER_NAME, user); conf.set(MRJobConfig.MR_AM_STAGING_DIR, testAbsPath.toString()); conf.setBoolean(MRJobConfig.MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR, true); // TODO: fix the bug where the speculator gets events with // not-fully-constructed objects. For now, disable speculative exec conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapSpeculative); conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, reduceSpeculative); init(conf); start(); DefaultMetricsSystem.shutdown(); Job job = getContext().getAllJobs().values().iterator().next(); if (assignedQueue != null) { job.setQueueName(assignedQueue); } // Write job.xml String jobFile = MRApps.getJobFile(conf, user, TypeConverter.fromYarn(job.getID())); LOG.info("Writing job conf to " + jobFile); new File(jobFile).getParentFile().mkdirs(); conf.writeXml(new FileOutputStream(jobFile)); return job; }
@Override public synchronized void register(TaskAttemptId attemptID) { getContext().getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_CONTAINER_COMPLETED)); } };
protected void containerLaunched(TaskAttemptId attemptID, int shufflePort) { getContext().getEventHandler().handle( new TaskAttemptContainerLaunchedEvent(attemptID, shufflePort)); }
protected void attemptLaunched(TaskAttemptId attemptID) { if (autoComplete) { // send the done event getContext().getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE)); } }
private void sendFetchFailure(MRApp app, TaskAttempt reduceAttempt, TaskAttempt mapAttempt, String hostname) { app.getContext().getEventHandler().handle( new JobTaskAttemptFetchFailureEvent( reduceAttempt.getID(), Arrays.asList(new TaskAttemptId[] {mapAttempt.getID()}), hostname)); }
private void finishTask(DrainDispatcher rmDispatcher, MockNM node, MRApp mrApp, Task task) throws Exception { TaskAttempt attempt = task.getAttempts().values().iterator().next(); List<ContainerStatus> contStatus = new ArrayList<ContainerStatus>(1); contStatus.add(ContainerStatus.newInstance(attempt.getAssignedContainerID(), ContainerState.COMPLETE, "", 0)); Map<ApplicationId,List<ContainerStatus>> statusUpdate = new HashMap<ApplicationId,List<ContainerStatus>>(1); statusUpdate.put(mrApp.getAppID(), contStatus); node.nodeHeartbeat(statusUpdate, true); rmDispatcher.await(); mrApp.getContext().getEventHandler().handle( new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_DONE)); mrApp.waitForState(task, TaskState.SUCCEEDED); }
@Test public void testJobRebootNotLastRetryOnUnregistrationFailure() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); // return exteranl state as RUNNING since otherwise the JobClient will // prematurely exit. app.waitForState(job, JobState.RUNNING); }
private void updateStatus(MRApp app, TaskAttempt attempt, Phase phase) { TaskAttemptStatusUpdateEvent.TaskAttemptStatus status = new TaskAttemptStatusUpdateEvent.TaskAttemptStatus(); status.counters = new Counters(); status.fetchFailedMaps = new ArrayList<TaskAttemptId>(); status.id = attempt.getID(); status.mapFinishTime = 0; status.phase = phase; status.progress = 0.5f; status.shuffleFinishTime = 0; status.sortFinishTime = 0; status.stateString = "OK"; status.taskState = attempt.getState(); TaskAttemptStatusUpdateEvent event = new TaskAttemptStatusUpdateEvent(attempt.getID(), new AtomicReference<>(status)); app.getContext().getEventHandler().handle(event); }
@Override protected void serviceInit(Configuration conf) throws Exception { try { //Create the staging directory if it does not exist String user = UserGroupInformation.getCurrentUser().getShortUserName(); Path stagingDir = MRApps.getStagingAreaDir(conf, user); FileSystem fs = getFileSystem(conf); fs.mkdirs(stagingDir); } catch (Exception e) { throw new YarnRuntimeException("Error creating staging dir", e); } super.serviceInit(conf); if (this.clusterInfo != null) { getContext().getClusterInfo().setMaxContainerCapability( this.clusterInfo.getMaxContainerCapability()); } else { getContext().getClusterInfo().setMaxContainerCapability( Resource.newInstance(10240, 1)); } }
@Test public void testJobError() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an invalid event on task at current state app.getContext().getEventHandler().handle( new TaskEvent( task.getID(), TaskEventType.T_SCHEDULE)); //this must lead to job error app.waitForState(job, JobState.ERROR); }
@Override protected Job createJob(Configuration conf, JobStateInternal forcedState, String diagnostic) { UserGroupInformation currentUser = null; try { currentUser = UserGroupInformation.getCurrentUser(); } catch (IOException e) { throw new YarnRuntimeException(e); } Job newJob = new TestJob(getJobId(), getAttemptID(), conf, getDispatcher().getEventHandler(), getTaskAttemptListener(), getContext().getClock(), getCommitter(), isNewApiCommitter(), currentUser.getUserName(), getContext(), forcedState, diagnostic); ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob); getDispatcher().register(JobFinishEvent.Type.class, new EventHandler<JobFinishEvent>() { @Override public void handle(JobFinishEvent event) { stop(); } }); return newJob; }
@Test public void testJobRebootOnLastRetryOnUnregistrationFailure() throws Exception { // make startCount as 2 since this is last retry which equals to // DEFAULT_MAX_AM_RETRY // The last param mocks the unregistration failure MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2, false); Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState((JobImpl) job, JobStateInternal.REBOOT); // return exteranl state as RUNNING if this is the last retry while // unregistration fails app.waitForState(job, JobState.RUNNING); }
app.getContext().getEventHandler().handle( new TaskAttemptEvent( attempt.getID(), app.getContext().getEventHandler().handle( new TaskAttemptEvent( attempt.getID(), app.getContext().getEventHandler().handle( new TaskAttemptEvent( task.getAttempts().values().iterator().next().getID(),
app.getContext().getEventHandler().handle( new TaskEvent(task1.getID(), TaskEventType.T_KILL));
public void verifyCompleted() { for (Job job : getContext().getAllJobs().values()) { JobReport jobReport = job.getReport(); System.out.println("Job start time :" + jobReport.getStartTime()); System.out.println("Job finish time :" + jobReport.getFinishTime()); Assert.assertTrue("Job start time is not less than finish time", jobReport.getStartTime() <= jobReport.getFinishTime()); Assert.assertTrue("Job finish time is in future", jobReport.getFinishTime() <= System.currentTimeMillis()); for (Task task : job.getTasks().values()) { TaskReport taskReport = task.getReport(); System.out.println("Task start time : " + taskReport.getStartTime()); System.out.println("Task finish time : " + taskReport.getFinishTime()); Assert.assertTrue("Task start time is not less than finish time", taskReport.getStartTime() <= taskReport.getFinishTime()); for (TaskAttempt attempt : task.getAttempts().values()) { TaskAttemptReport attemptReport = attempt.getReport(); Assert.assertTrue("Attempt start time is not less than finish time", attemptReport.getStartTime() <= attemptReport.getFinishTime()); } } } }
JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT);
@Test public void testAbsentNotificationOnNotLastRetryUnregistrationFailure() throws Exception { HttpServer2 server = startHttpServer(); MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, this.getClass().getName(), true, 1, false)); doNothing().when(app).sysexit(); JobConf conf = new JobConf(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT); // Now shutdown. // Unregistration fails: isLastAMRetry is recalculated, this is not app.shutDownJob(); // Not the last AM attempt. So user should that the job is still running. app.waitForState(job, JobState.RUNNING); Assert.assertFalse(app.isLastAMRetry()); Assert.assertEquals(0, JobEndServlet.calledTimes); Assert.assertNull(JobEndServlet.requestUri); Assert.assertNull(JobEndServlet.foundJobState); server.stop(); }
@Override public void handle(ContainerAllocatorEvent event) { ContainerId cId = ContainerId.newContainerId(getContext().getApplicationAttemptId(), containerCount++); NodeId nodeId = NodeId.newInstance(NM_HOST, NM_PORT); ContainerTokenIdentifier containerTokenIdentifier = new ContainerTokenIdentifier(cId, nodeId.toString(), "user", resource, System.currentTimeMillis() + 10000, 42, 42, Priority.newInstance(0), 0); Token containerToken = newContainerToken(nodeId, "password".getBytes(), containerTokenIdentifier); Container container = Container.newInstance(cId, nodeId, NM_HOST + ":" + NM_HTTP_PORT, resource, null, containerToken); JobID id = TypeConverter.fromYarn(applicationId); JobId jobId = TypeConverter.toYarn(id); getContext().getEventHandler().handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent( org.apache.hadoop.mapreduce.TaskType.REDUCE, 100))); getContext().getEventHandler().handle(new JobHistoryEvent(jobId, new NormalizedResourceEvent( org.apache.hadoop.mapreduce.TaskType.MAP, 100))); getContext().getEventHandler().handle( new TaskAttemptContainerAssignedEvent(event.getAttemptID(), container, null)); }
@Override public void handle(ContainerLauncherEvent event) { switch (event.getType()) { case CONTAINER_REMOTE_LAUNCH: containerLaunched(event.getTaskAttemptID(), shufflePort); attemptLaunched(event.getTaskAttemptID()); break; case CONTAINER_REMOTE_CLEANUP: getContext().getEventHandler().handle( new TaskAttemptEvent(event.getTaskAttemptID(), TaskAttemptEventType.TA_CONTAINER_CLEANED)); break; case CONTAINER_COMPLETED: break; } } }