/** * Ensure that by default JobContext.MAX_TASK_FAILURES_PER_TRACKER is less * JobContext.MAP_MAX_ATTEMPTS and JobContext.REDUCE_MAX_ATTEMPTS so that * failed tasks will be retried on other nodes */ @Test public void testMaxTaskFailuresPerTracker() { JobConf jobConf = new JobConf(true); Assert.assertTrue("By default JobContext.MAX_TASK_FAILURES_PER_TRACKER was " + "not less than JobContext.MAP_MAX_ATTEMPTS and REDUCE_MAX_ATTEMPTS" ,jobConf.getMaxTaskFailuresPerTracker() < jobConf.getMaxMapAttempts() && jobConf.getMaxTaskFailuresPerTracker() < jobConf.getMaxReduceAttempts() ); }
public List<String> getBlackListedTrackers() { ArrayList<String> blackListedTrackers = new ArrayList<String>(); for (Entry<String, Integer> entry : trackerToFailureMap.entrySet()) { Integer failures = entry.getValue(); String tracker = entry.getKey(); if (failures.intValue() >= this.getJobConf() .getMaxTaskFailuresPerTracker()) { blackListedTrackers.add(JobInProgress .convertTrackerNameToHostName(tracker)); } } return blackListedTrackers; } }
/** * Note that a task has failed on a given tracker and add the tracker * to the blacklist iff too many trackers in the cluster i.e. * (clusterSize * CLUSTER_BLACKLIST_PERCENT) haven't turned 'flaky' already. * * @param trackerName task-tracker on which a task failed */ void addTrackerTaskFailure(String trackerName) { if (flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) { String trackerHostName = convertTrackerNameToHostName(trackerName); Integer trackerFailures = trackerToFailuresMap.get(trackerHostName); if (trackerFailures == null) { trackerFailures = 0; } trackerToFailuresMap.put(trackerHostName, ++trackerFailures); // Check if this tasktracker has turned 'flaky' if (trackerFailures.intValue() == conf.getMaxTaskFailuresPerTracker()) { ++flakyTaskTrackers; LOG.info("TaskTracker at '" + trackerHostName + "' turned 'flaky'"); } } }
JobInProgress(JobConf conf) { restartCount = 0; jobSetupCleanupNeeded = false; this.memoryPerMap = conf.getMemoryForMapTask(); this.memoryPerReduce = conf.getMemoryForReduceTask(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker(); }
private boolean shouldRunOnTaskTracker(String taskTracker) { // // Check if too many tasks of this job have failed on this // tasktracker prior to assigning it a new one. // int taskTrackerFailedTasks = getTrackerTaskFailures(taskTracker); if ((flakyTaskTrackers < (clusterSize * CLUSTER_BLACKLIST_PERCENT)) && taskTrackerFailedTasks >= conf.getMaxTaskFailuresPerTracker()) { if (LOG.isDebugEnabled()) { String flakyTracker = convertTrackerNameToHostName(taskTracker); LOG.debug("Ignoring the black-listed tasktracker: '" + flakyTracker + "' for assigning a new task"); } return false; } return true; }
private void printBlackListedTrackers(JspWriter out, JobInProgress job) throws IOException { Map<String, Integer> trackerErrors = job.getTaskTrackerErrors(); out.print("<table border=2 cellpadding=\"5\" cellspacing=\"2\">"); out.print("<tr><th>TaskTracker</th><th>No. of Failures</th></tr>\n"); int maxErrorsPerTracker = job.getJobConf().getMaxTaskFailuresPerTracker(); for (Map.Entry<String,Integer> e : trackerErrors.entrySet()) { if (e.getValue().intValue() >= maxErrorsPerTracker) { out.print("<tr><td>" + HtmlQuoting.quoteHtmlChars(e.getKey()) + "</td><td>" + e.getValue() + "</td></tr>\n"); } } out.print("</table>\n"); }
/** * Ensure that by default JobContext.MAX_TASK_FAILURES_PER_TRACKER is less * JobContext.MAP_MAX_ATTEMPTS and JobContext.REDUCE_MAX_ATTEMPTS so that * failed tasks will be retried on other nodes */ @Test public void testMaxTaskFailuresPerTracker() { JobConf jobConf = new JobConf(true); Assert.assertTrue("By default JobContext.MAX_TASK_FAILURES_PER_TRACKER was " + "not less than JobContext.MAP_MAX_ATTEMPTS and REDUCE_MAX_ATTEMPTS" ,jobConf.getMaxTaskFailuresPerTracker() < jobConf.getMaxMapAttempts() && jobConf.getMaxTaskFailuresPerTracker() < jobConf.getMaxReduceAttempts() ); } }
this.memoryPerMap = conf.getMemoryForMapTask(); this.memoryPerReduce = conf.getMemoryForReduceTask(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker(); this.nonLocalMaps = new LinkedList<TaskInProgress>(); this.nonLocalRunningMaps = new LinkedHashSet<TaskInProgress>();
assertEquals("ses", conf.getSessionId()); assertEquals(3, conf.getMaxTaskFailuresPerTracker()); conf.setMaxTaskFailuresPerTracker(2); assertEquals(2, conf.getMaxTaskFailuresPerTracker());
this.memoryPerReduce = conf.getMemoryForReduceTask(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();
this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();
this.reduceFailuresPercent = conf.getMaxReduceTaskFailuresPercent(); this.maxTaskFailuresPerTracker = conf.getMaxTaskFailuresPerTracker();