/** * Verify task timeout is set as expected in TaskHeartBeatHandler with given * configuration. * @param conf the configuration * @param expectedTimeout expected timeout value */ private static void verifyTaskTimeoutConfig(final Configuration conf, final long expectedTimeout) { final TaskHeartbeatHandler hb = new TaskHeartbeatHandler(null, SystemClock.getInstance(), 1); hb.init(conf); Assert.assertTrue("The value of the task timeout is incorrect.", hb.getTaskTimeOut() == expectedTimeout); } }
@Override public void registerLaunchedTask( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmId) { // The AM considers the task to be launched (Has asked the NM to launch it) // The JVM will only be given a task after this registartion. launchedJVMs.add(jvmId); taskHeartbeatHandler.register(attemptID); }
@Override public void unregister( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmID) { // Unregistration also comes from the same TaskAttempt which does the // registration. Events are ordered at TaskAttempt, so unregistration will // always come after registration. // Remove from launchedJVMs before jvmIDToActiveAttemptMap to avoid // synchronization issue with getTask(). getTask should be checking // jvmIDToActiveAttemptMap before it checks launchedJVMs. // remove the mappings if not already removed launchedJVMs.remove(jvmID); jvmIDToActiveAttemptMap.remove(jvmID); //unregister this attempt taskHeartbeatHandler.unregister(attemptID); }
clock.setTime(0); final TaskHeartbeatHandler hb = new TaskHeartbeatHandler(mockHandler, clock, 1); Configuration conf = new Configuration(); conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 1); hb.init(conf); hb.start(); try { ApplicationId appId = ApplicationId.newInstance(0l, 5); TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP); final TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2); Assert.assertFalse(hb.hasRecentlyUnregistered(taid)); hb.register(taid); Assert.assertFalse(hb.hasRecentlyUnregistered(taid)); hb.unregister(taid); Assert.assertTrue(hb.hasRecentlyUnregistered(taid)); long unregisterTimeout = conf.getLong(MRJobConfig.TASK_EXIT_TIMEOUT, MRJobConfig.TASK_EXIT_TIMEOUT_DEFAULT); hb.stop();
@Override public void done(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Done acknowledgement from " + taskAttemptID.toString()); org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); context.getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE)); }
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testTimeout() throws InterruptedException { EventHandler mockHandler = mock(EventHandler.class); Clock clock = SystemClock.getInstance(); TaskHeartbeatHandler hb = new TaskHeartbeatHandler(mockHandler, clock, 1); Configuration conf = new Configuration(); conf.setInt(MRJobConfig.TASK_TIMEOUT, 10); //10 ms // set TASK_PROGRESS_REPORT_INTERVAL to a value smaller than TASK_TIMEOUT // so that TASK_TIMEOUT is not overridden conf.setLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL, 5); conf.setInt(MRJobConfig.TASK_TIMEOUT_CHECK_INTERVAL_MS, 10); //10 ms hb.init(conf); hb.start(); try { ApplicationId appId = ApplicationId.newInstance(0l, 5); JobId jobId = MRBuilderUtils.newJobId(appId, 4); TaskId tid = MRBuilderUtils.newTaskId(jobId, 3, TaskType.MAP); TaskAttemptId taid = MRBuilderUtils.newTaskAttemptId(tid, 2); hb.register(taid); Thread.sleep(100); //Events only happen when the task is canceled verify(mockHandler, times(2)).handle(any(Event.class)); } finally { hb.stop(); } }
@Test public void testStatusUpdateProgress() throws IOException, InterruptedException { configureMocks(); startListener(true); verify(hbHandler).register(attemptId); // make sure a ping doesn't report progress AMFeedback feedback = listener.statusUpdate(attemptID, null); assertTrue(feedback.getTaskFound()); verify(hbHandler, never()).progressing(eq(attemptId)); // make sure a status update does report progress MapTaskStatus mockStatus = new MapTaskStatus(attemptID, 0.0f, 1, TaskStatus.State.RUNNING, "", "RUNNING", "", TaskStatus.Phase.MAP, new Counters()); feedback = listener.statusUpdate(attemptID, mockStatus); assertTrue(feedback.getTaskFound()); verify(hbHandler).progressing(eq(attemptId)); }
protected void registerHeartbeatHandler(Configuration conf) { taskHeartbeatHandler = new TaskHeartbeatHandler(context.getEventHandler(), context.getClock(), conf.getInt(MRJobConfig.MR_AM_TASK_LISTENER_THREAD_COUNT, MRJobConfig.DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT)); addService(taskHeartbeatHandler); }
if (!taskHeartbeatHandler.hasRecentlyUnregistered(yarnAttemptID)) { LOG.error("Status update was called with illegal TaskAttemptId: " + yarnAttemptID); taskHeartbeatHandler.progressing(yarnAttemptID); TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
@Override public Boolean get() { return !hb.hasRecentlyUnregistered(taid); } }, 10, 10000);
@Override public void done(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Done acknowledgment from " + taskAttemptID.toString()); org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); context.getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE)); }
protected void registerHeartbeatHandler(Configuration conf) { taskHeartbeatHandler = new TaskHeartbeatHandler(context.getEventHandler(), context.getClock(), conf.getInt(MRJobConfig.MR_AM_TASK_LISTENER_THREAD_COUNT, MRJobConfig.DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT)); addService(taskHeartbeatHandler); }
@Override public void done(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Done acknowledgement from " + taskAttemptID.toString()); org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); context.getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_DONE)); }
@Override public void registerLaunchedTask( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmId) { // The AM considers the task to be launched (Has asked the NM to launch it) // The JVM will only be given a task after this registartion. launchedJVMs.add(jvmId); taskHeartbeatHandler.register(attemptID); }
protected void registerHeartbeatHandler(Configuration conf) { taskHeartbeatHandler = new TaskHeartbeatHandler(context.getEventHandler(), context.getClock(), conf.getInt(MRJobConfig.MR_AM_TASK_LISTENER_THREAD_COUNT, MRJobConfig.DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT)); addService(taskHeartbeatHandler); }
@Override public void unregister( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmID) { // Unregistration also comes from the same TaskAttempt which does the // registration. Events are ordered at TaskAttempt, so unregistration will // always come after registration. // Remove from launchedJVMs before jvmIDToActiveAttemptMap to avoid // synchronization issue with getTask(). getTask should be checking // jvmIDToActiveAttemptMap before it checks launchedJVMs. // remove the mappings if not already removed launchedJVMs.remove(jvmID); jvmIDToActiveAttemptMap.remove(jvmID); //unregister this attempt taskHeartbeatHandler.unregister(attemptID); }
@Override public void preempted(TaskAttemptID taskAttemptID, TaskStatus taskStatus) throws IOException, InterruptedException { LOG.info("Preempted state update from " + taskAttemptID.toString()); // An attempt is telling us that it got preempted. org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); preemptionPolicy.reportSuccessfulPreemption(attemptID); taskHeartbeatHandler.progressing(attemptID); context.getEventHandler().handle( new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_PREEMPTED)); }
@Override public void registerLaunchedTask( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmId) { // The AM considers the task to be launched (Has asked the NM to launch it) // The JVM will only be given a task after this registartion. launchedJVMs.add(jvmId); taskHeartbeatHandler.register(attemptID); attemptIdToStatus.put(attemptID, new AtomicReference<>()); }
@Override public void unregister( org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID, WrappedJvmID jvmID) { // Unregistration also comes from the same TaskAttempt which does the // registration. Events are ordered at TaskAttempt, so unregistration will // always come after registration. // Remove from launchedJVMs before jvmIDToActiveAttemptMap to avoid // synchronization issue with getTask(). getTask should be checking // jvmIDToActiveAttemptMap before it checks launchedJVMs. // remove the mappings if not already removed launchedJVMs.remove(jvmID); jvmIDToActiveAttemptMap.remove(jvmID); //unregister this attempt taskHeartbeatHandler.unregister(attemptID); attemptIdToStatus.remove(attemptID); }
@Override public void reportDiagnosticInfo(TaskAttemptID taskAttemptID, String diagnosticInfo) throws IOException { diagnosticInfo = StringInterner.weakIntern(diagnosticInfo); LOG.info("Diagnostics report from " + taskAttemptID.toString() + ": " + diagnosticInfo); org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); // This is mainly used for cases where we want to propagate exception traces // of tasks that fail. // This call exists as a hadoop mapreduce legacy wherein all changes in // counters/progress/phase/output-size are reported through statusUpdate() // call but not diagnosticInformation. context.getEventHandler().handle( new TaskAttemptDiagnosticsUpdateEvent(attemptID, diagnosticInfo)); }