@SuppressWarnings("unchecked") @Override public void run() { ContainerAllocatorEvent event; while (!stopped.get() && !Thread.currentThread().isInterrupted()) { try { event = RMContainerAllocator.this.eventQueue.take(); } catch (InterruptedException e) { if (!stopped.get()) { LOG.error("Returning, interrupted : " + e); } return; } try { handleEvent(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " to the ContainreAllocator", t); // Kill the AM eventHandler.handle(new JobEvent(getJob().getID(), JobEventType.INTERNAL_ERROR)); return; } } } };
@Override public void transition(JobImpl job, JobEvent event) { job.setupProgress = 1.0f; job.scheduleTasks(job.mapTasks, job.numReduceTasks == 0); job.scheduleTasks(job.reduceTasks, true); // If we have no tasks, just transition to job completed if (job.numReduceTasks == 0 && job.numMapTasks == 0) { job.eventHandler.handle(new JobEvent(job.jobId, JobEventType.JOB_COMPLETED)); } } }
@SuppressWarnings("unchecked") @Override public void run() { ContainerAllocatorEvent event; while (!stopped.get() && !Thread.currentThread().isInterrupted()) { try { event = RMContainerAllocator.this.eventQueue.take(); } catch (InterruptedException e) { if (!stopped.get()) { LOG.error("Returning, interrupted : " + e); } return; } try { handleEvent(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " to the ContainreAllocator", t); // Kill the AM eventHandler.handle(new JobEvent(getJob().getID(), JobEventType.INTERNAL_ERROR)); return; } } } };
@SuppressWarnings("unchecked") @Override public void run() { ContainerAllocatorEvent event; while (!stopped.get() && !Thread.currentThread().isInterrupted()) { try { event = RMContainerAllocator.this.eventQueue.take(); } catch (InterruptedException e) { if (!stopped.get()) { LOG.error("Returning, interrupted : " + e); } return; } try { handleEvent(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " to the ContainreAllocator", t); // Kill the AM eventHandler.handle(new JobEvent(getJob().getID(), JobEventType.INTERNAL_ERROR)); return; } } } };
@Override public void transition(JobImpl job, JobEvent event) { job.setupProgress = 1.0f; job.scheduleTasks(job.mapTasks, job.numReduceTasks == 0); job.scheduleTasks(job.reduceTasks, true); // If we have no tasks, just transition to job completed if (job.numReduceTasks == 0 && job.numMapTasks == 0) { job.eventHandler.handle(new JobEvent(job.jobId, JobEventType.JOB_COMPLETED)); } } }
@Override public void transition(JobImpl job, JobEvent event) { job.setupProgress = 1.0f; job.scheduleTasks(job.mapTasks, job.numReduceTasks == 0); job.scheduleTasks(job.reduceTasks, true); // If we have no tasks, just transition to job completed if (job.numReduceTasks == 0 && job.numMapTasks == 0) { job.eventHandler.handle(new JobEvent(job.jobId, JobEventType.JOB_COMPLETED)); } } }
protected void internalError(TaskEventType type) { LOG.error("Invalid event " + type + " on Task " + this.taskId); eventHandler.handle(new JobDiagnosticsUpdateEvent( this.taskId.getJobId(), "Invalid event " + type + " on Task " + this.taskId)); eventHandler.handle(new JobEvent(this.taskId.getJobId(), JobEventType.INTERNAL_ERROR)); }
protected void internalError(TaskEventType type) { LOG.error("Invalid event " + type + " on Task " + this.taskId); eventHandler.handle(new JobDiagnosticsUpdateEvent( this.taskId.getJobId(), "Invalid event " + type + " on Task " + this.taskId)); eventHandler.handle(new JobEvent(this.taskId.getJobId(), JobEventType.INTERNAL_ERROR)); }
protected void internalError(TaskEventType type) { LOG.error("Invalid event " + type + " on Task " + this.taskId); eventHandler.handle(new JobDiagnosticsUpdateEvent( this.taskId.getJobId(), "Invalid event " + type + " on Task " + this.taskId)); eventHandler.handle(new JobEvent(this.taskId.getJobId(), JobEventType.INTERNAL_ERROR)); }
@SuppressWarnings("unchecked") @Override public KillJobResponse killJob(KillJobRequest request) throws IOException { JobId jobId = request.getJobId(); UserGroupInformation callerUGI = UserGroupInformation.getCurrentUser(); String message = "Kill job " + jobId + " received from " + callerUGI + " at " + Server.getRemoteAddress(); LOG.info(message); verifyAndGetJob(jobId, JobACL.MODIFY_JOB, false); appContext.getEventHandler().handle( new JobDiagnosticsUpdateEvent(jobId, message)); appContext.getEventHandler().handle( new JobEvent(jobId, JobEventType.JOB_KILL)); KillJobResponse response = recordFactory.newRecordInstance(KillJobResponse.class); return response; }
@SuppressWarnings("unchecked") @Override public KillJobResponse killJob(KillJobRequest request) throws IOException { JobId jobId = request.getJobId(); UserGroupInformation callerUGI = UserGroupInformation.getCurrentUser(); String message = "Kill job " + jobId + " received from " + callerUGI + " at " + Server.getRemoteAddress(); LOG.info(message); verifyAndGetJob(jobId, JobACL.MODIFY_JOB, false); appContext.getEventHandler().handle( new JobDiagnosticsUpdateEvent(jobId, message)); appContext.getEventHandler().handle( new JobEvent(jobId, JobEventType.JOB_KILL)); KillJobResponse response = recordFactory.newRecordInstance(KillJobResponse.class); return response; }
@SuppressWarnings("unchecked") @Override public KillJobResponse killJob(KillJobRequest request) throws IOException { JobId jobId = request.getJobId(); UserGroupInformation callerUGI = UserGroupInformation.getCurrentUser(); String message = "Kill job " + jobId + " received from " + callerUGI + " at " + Server.getRemoteAddress(); LOG.info(message); verifyAndGetJob(jobId, JobACL.MODIFY_JOB, false); appContext.getEventHandler().handle( new JobDiagnosticsUpdateEvent(jobId, message)); appContext.getEventHandler().handle( new JobEvent(jobId, JobEventType.JOB_KILL)); KillJobResponse response = recordFactory.newRecordInstance(KillJobResponse.class); return response; }
@SuppressWarnings("unchecked") @Override public void handle(TaskAttemptEvent event) { if (LOG.isDebugEnabled()) { LOG.debug("Processing " + event.getTaskAttemptID() + " of type " + event.getType()); } writeLock.lock(); try { final TaskAttemptStateInternal oldState = getInternalState() ; try { stateMachine.doTransition(event.getType(), event); } catch (InvalidStateTransitonException e) { LOG.error("Can't handle this event at current state for " + this.attemptId, e); eventHandler.handle(new JobDiagnosticsUpdateEvent( this.attemptId.getTaskId().getJobId(), "Invalid event " + event.getType() + " on TaskAttempt " + this.attemptId)); eventHandler.handle(new JobEvent(this.attemptId.getTaskId().getJobId(), JobEventType.INTERNAL_ERROR)); } if (oldState != getInternalState()) { LOG.info(attemptId + " TaskAttempt Transitioned from " + oldState + " to " + getInternalState()); } } finally { writeLock.unlock(); } }
@Test public void testJobRebootNotLastRetryOnUnregistrationFailure() throws Exception { MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true); Job job = app.submit(new Configuration()); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); // return exteranl state as RUNNING since otherwise the JobClient will // prematurely exit. app.waitForState(job, JobState.RUNNING); }
@Test public void testJobRebootOnLastRetryOnUnregistrationFailure() throws Exception { // make startCount as 2 since this is last retry which equals to // DEFAULT_MAX_AM_RETRY // The last param mocks the unregistration failure MRApp app = new MRApp(1, 0, false, this.getClass().getName(), true, 2, false); Configuration conf = new Configuration(); Job job = app.submit(conf); app.waitForState(job, JobState.RUNNING); Assert.assertEquals("Num tasks not correct", 1, job.getTasks().size()); Iterator<Task> it = job.getTasks().values().iterator(); Task task = it.next(); app.waitForState(task, TaskState.RUNNING); //send an reboot event app.getContext().getEventHandler().handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState((JobImpl) job, JobStateInternal.REBOOT); // return exteranl state as RUNNING if this is the last retry while // unregistration fails app.waitForState(job, JobState.RUNNING); }
@Test(timeout=20000) public void testRebootedDuringCommit() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 2); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); CyclicBarrier syncBarrier = new CyclicBarrier(2); OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); AppContext mockContext = mock(AppContext.class); when(mockContext.isLastAMRetry()).thenReturn(true); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, mockContext); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); syncBarrier.await(); job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); assertJobState(job, JobStateInternal.REBOOT); // return the external state as ERROR since this is last retry. Assert.assertEquals(JobState.RUNNING, job.getState()); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true); Assert.assertEquals(JobState.ERROR, job.getState()); dispatcher.stop(); commitHandler.stop(); }
@Test public void testAbsentNotificationOnNotLastRetryUnregistrationFailure() throws Exception { HttpServer2 server = startHttpServer(); MRApp app = spy(new MRAppWithCustomContainerAllocator(2, 2, false, this.getClass().getName(), true, 1, false)); doNothing().when(app).sysexit(); JobConf conf = new JobConf(); conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL, JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus"); JobImpl job = (JobImpl)app.submit(conf); app.waitForState(job, JobState.RUNNING); app.getContext().getEventHandler() .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT)); app.waitForInternalState(job, JobStateInternal.REBOOT); // Now shutdown. // Unregistration fails: isLastAMRetry is recalculated, this is not app.shutDownJob(); // Not the last AM attempt. So user should that the job is still running. app.waitForState(job, JobState.RUNNING); Assert.assertFalse(app.isLastAMRetry()); Assert.assertEquals(0, JobEndServlet.calledTimes); Assert.assertNull(JobEndServlet.requestUri); Assert.assertNull(JobEndServlet.foundJobState); server.stop(); }
@Test(timeout=20000) public void testKilledDuringCommit() throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); AsyncDispatcher dispatcher = new AsyncDispatcher(); dispatcher.init(conf); dispatcher.start(); CyclicBarrier syncBarrier = new CyclicBarrier(2); OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true); CommitterEventHandler commitHandler = createCommitterEventHandler(dispatcher, committer); commitHandler.init(conf); commitHandler.start(); JobImpl job = createRunningStubbedJob(conf, dispatcher, 2, null); completeJobTasks(job); assertJobState(job, JobStateInternal.COMMITTING); syncBarrier.await(); job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL)); assertJobState(job, JobStateInternal.KILLED); dispatcher.stop(); commitHandler.stop(); }
private static StubbedJob createRunningStubbedJob(Configuration conf, Dispatcher dispatcher, int numSplits, AppContext appContext) { StubbedJob job = createStubbedJob(conf, dispatcher, numSplits, appContext); job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(job.getID())); assertJobState(job, JobStateInternal.RUNNING); return job; }
@Test public void testJobPriorityUpdate() throws Exception { Configuration conf = new Configuration(); AsyncDispatcher dispatcher = new AsyncDispatcher(); Priority submittedPriority = Priority.newInstance(5); AppContext mockContext = mock(AppContext.class); when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false); JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext); JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.SETUP); // Update priority of job to 5, and it will be updated job.setJobPriority(submittedPriority); Assert.assertEquals(submittedPriority, job.getReport().getJobPriority()); job.handle(new JobSetupCompletedEvent(jobId)); assertJobState(job, JobStateInternal.RUNNING); // Update priority of job to 8, and see whether its updated Priority updatedPriority = Priority.newInstance(8); job.setJobPriority(updatedPriority); assertJobState(job, JobStateInternal.RUNNING); Priority jobPriority = job.getReport().getJobPriority(); Assert.assertNotNull(jobPriority); // Verify whether changed priority is same as what is set in Job. Assert.assertEquals(updatedPriority, jobPriority); }