private static void verifySpeculationMessage(MRApp app, TaskAttempt[] ta) throws Exception { app.waitForState(ta[0], TaskAttemptState.SUCCEEDED); // The speculative attempt may be not killed before the MR job succeeds. }
@Test public void testZeroMapReduces() throws Exception{ MRApp app = new MRApp(0, 0, true, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); }
conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1); job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED);
@Test public void testZeroMaps() throws Exception { MRApp app = new MRApp(0, 1, true, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); app.verifyCompleted(); }
@Test public void testMapFailureMaxPercent() throws Exception { MRApp app = new MockFirstFailingTaskMRApp(4, 0); Configuration conf = new Configuration(); //reduce the no of attempts so test run faster conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1); conf.setInt(MRJobConfig.MAP_FAILURES_MAX_PERCENT, 20); conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); //setting the failure percentage to 25% (1/4 is 25) will //make the Job successful app = new MockFirstFailingTaskMRApp(4, 0); conf = new Configuration(); //reduce the no of attempts so test run faster conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2); conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1); conf.setInt(MRJobConfig.MAP_FAILURES_MAX_PERCENT, 25); conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1); job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED); }
@SuppressWarnings("resource") @Test public void testJobSuccess() throws Exception { MRApp app = new MRApp(2, 2, true, this.getClass().getName(), true, false); JobImpl job = (JobImpl) app.submit(new Configuration()); app.waitForInternalState(job, JobStateInternal.SUCCEEDED); // AM is not unregistered Assert.assertEquals(JobState.RUNNING, job.getState()); // imitate that AM is unregistered app.successfullyUnregistered.set(true); app.waitForState(job, JobState.SUCCEEDED); }
@Test public void testMapReduce() throws Exception { MRApp app = new MRApp(2, 2, true, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); app.verifyCompleted(); Assert.assertEquals(System.getProperty("user.name"),job.getUserName()); }
@Test public void testJobRebootNotLastRetryOnUnregistrationFailure() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); // return exteranl state as RUNNING since otherwise the JobClient will // prematurely exit. app.waitForState(job, JobState.RUNNING); }
@Test public void testJobError() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an invalid event on task at current state app.getContext().getEventHandler().handle( new TaskEvent( task.getID(), TaskEventType.T_SCHEDULE)); //this must lead to job error app.waitForState(job, JobState.ERROR); }
@Test public void testTaskFailWithUnusedContainer() throws Exception { MRApp app = new MRAppWithFailingTaskAndUnusedContainer(); Configuration conf = new Configuration(); int maxAttempts = 1; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); app.waitForState(task, TaskState.SCHEDULED); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator() .next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts .size()); TaskAttempt attempt = attempts.values().iterator().next(); app.waitForInternalState((TaskAttemptImpl) attempt, TaskAttemptStateInternal.ASSIGNED); app.getDispatcher().getEventHandler().handle( new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_CONTAINER_COMPLETED)); app.waitForState(job, JobState.FAILED); }
@Test public void testContainerPassThrough() throws Exception { MRApp app = new MRApp(0, 1, true, this.getClass().getName(), true) { @Override protected ContainerLauncher createContainerLauncher(AppContext context) { return new MockContainerLauncher() { @Override public void handle(ContainerLauncherEvent event) { if (event instanceof ContainerRemoteLaunchEvent) { containerObtainedByContainerLauncher = ((ContainerRemoteLaunchEvent) event).getAllocatedContainer(); } super.handle(event); } }; }; }; Job job = app.submit(new Configuration()); app.waitForState(job, JobState.SUCCEEDED); app.verifyCompleted(); Collection<Task> tasks = job.getTasks().values(); Collection<TaskAttempt> taskAttempts = tasks.iterator().next().getAttempts().values(); TaskAttemptImpl taskAttempt = (TaskAttemptImpl) taskAttempts.iterator().next(); // Container from RM should pass through to the launcher. Container object // should be the same. Assert.assertTrue(taskAttempt.container == containerObtainedByContainerLauncher); }
@Test public void testJobRebootOnLastRetryOnUnregistrationFailure() throws Exception { // make startCount as 2 since this is last retry which equals to // DEFAULT_MAX_AM_RETRY // The last param mocks the unregistration failure MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2, false); Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState((JobImpl) job, JobStateInternal.REBOOT); // return exteranl state as RUNNING if this is the last retry while // unregistration fails app.waitForState(job, JobState.RUNNING); }
private void finishTask(DrainDispatcher rmDispatcher, MockNM node, MRApp mrApp, Task task) throws Exception { TaskAttempt attempt = task.getAttempts().values().iterator().next(); List<ContainerStatus> contStatus = new ArrayList<ContainerStatus>(1); contStatus.add(ContainerStatus.newInstance(attempt.getAssignedContainerID(), ContainerState.COMPLETE, "", 0)); Map<ApplicationId,List<ContainerStatus>> statusUpdate = new HashMap<ApplicationId,List<ContainerStatus>>(1); statusUpdate.put(mrApp.getAppID(), contStatus); node.nodeHeartbeat(statusUpdate, true); rmDispatcher.await(); mrApp.getContext().getEventHandler().handle( new TaskAttemptEvent(attempt.getID(), TaskAttemptEventType.TA_DONE)); mrApp.waitForState(task, TaskState.SUCCEEDED); }
MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); TaskAttempt attempt = task.getAttempts().values().iterator().next(); app.waitForState(attempt, TaskAttemptState.RUNNING); app.waitForState(attempt, TaskAttemptState.COMMIT_PENDING); app.waitForState(attempt, TaskAttemptState.COMMIT_PENDING); TaskAttemptEventType.TA_DONE)); app.waitForState(job, JobState.SUCCEEDED);
private void testMRAppHistory(MRApp app) throws Exception { Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId, Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task .getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next() .getAttempts(); Assert.assertEquals("Num attempts is not correct", 4, attempts.size()); Iterator<TaskAttempt> it = attempts.values().iterator(); TaskAttemptReport report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); Assert.assertEquals("Diagnostic Information is not Correct", "Test Diagnostic Event", report.getDiagnosticInfo()); report = it.next().getReport(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, report.getTaskAttemptState()); }
@Test public void testPreviousJobOutputCleanedWhenNoRecovery() throws Exception { int runCount = 0; MRApp app = new MRAppWithHistory(1, 2, false, this.getClass().getName(), true, ++runCount); Configuration conf = new Configuration(); conf.setBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, false); conf.setClass("mapred.output.committer.class", TestFileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); conf.set(FileOutputFormat.OUTDIR, outputDir.toString()); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("No of tasks not correct", 3, job.getTasks().size()); //stop the app before the job completes. app.stop(); app.close(); //rerun app = new MRAppWithHistory(1, 2, false, this.getClass().getName(), false, ++runCount); job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("No of tasks not correct", 3, job.getTasks().size()); TestFileOutputCommitter committer = ( TestFileOutputCommitter) app.getCommitter(); assertTrue("commiter.abortJob() has not been called", committer.isAbortJobCalled()); app.close(); }
@Test //All Task attempts are timed out, leading to Job failure public void testTimedOutTask() throws Exception { MRApp app = new TimeOutTaskMRApp(1, 0); Configuration conf = new Configuration(); int maxAttempts = 2; conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, maxAttempts); // disable uberization (requires entire job to be reattempted, so max for // subtask attempts is overridden to 1) conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.FAILED); Map<TaskId,Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.FAILED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", maxAttempts, attempts.size()); for (TaskAttempt attempt : attempts.values()) { Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, attempt.getReport().getTaskAttemptState()); } }
@Test //First attempt is failed and second attempt is passed //The job succeeds. public void testFailTask() throws Exception { MRApp app = new MockFirstFailingAttemptMRApp(1, 0); Configuration conf = new Configuration(); // this test requires two task attempts, but uberization overrides max to 1 conf.setBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false); Job job = app.submit(conf); app.waitForState(job, JobState.SUCCEEDED); Map<TaskId,Task> tasks = job.getTasks(); Assert.assertEquals("Num tasks is not correct", 1, tasks.size()); Task task = tasks.values().iterator().next(); Assert.assertEquals("Task state not correct", TaskState.SUCCEEDED, task.getReport().getTaskState()); Map<TaskAttemptId, TaskAttempt> attempts = tasks.values().iterator().next().getAttempts(); Assert.assertEquals("Num attempts is not correct", 2, attempts.size()); //one attempt must be failed //and another must have succeeded Iterator<TaskAttempt> it = attempts.values().iterator(); Assert.assertEquals("Attempt state not correct", TaskAttemptState.FAILED, it.next().getReport().getTaskAttemptState()); Assert.assertEquals("Attempt state not correct", TaskAttemptState.SUCCEEDED, it.next().getReport().getTaskAttemptState()); }
@Test public void testAbsentNotificationOnNotLastRetryUnregistrationFailure() throws Exception { HttpServer2 server = startHttpServer(); MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, this.getClass().getName(), true, 1, false)); doNothing().when(app).sysexit(); JobConf conf = new JobConf(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT); // Now shutdown. // Unregistration fails: isLastAMRetry is recalculated, this is not app.shutDownJob(); // Not the last AM attempt. So user should that the job is still running. app.waitForState(job, JobState.RUNNING); Assert.assertFalse(app.isLastAMRetry()); Assert.assertEquals(0, JobEndServlet.calledTimes); Assert.assertNull(JobEndServlet.requestUri); Assert.assertNull(JobEndServlet.foundJobState); server.stop(); }
JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));