private long getTotalHdfsOps(JobCounters counter) { long mapHdfsReadOps = counter.getCounterValue(JobCounters.CounterName.HDFS_READ_OPS); long mapHdfsWriteOps = counter.getCounterValue(JobCounters.CounterName.HDFS_WRITE_OPS); return mapHdfsReadOps + mapHdfsWriteOps; }
private double getAverageSpillBytes(List<TaskExecutionAPIEntity> tasks) { if (tasks.isEmpty()) { return 0; } long totalSpillBytes = 0; for (TaskExecutionAPIEntity task : tasks) { totalSpillBytes += task.getJobCounters().getCounterValue(JobCounters.CounterName.SPLIT_RAW_BYTES); } return totalSpillBytes / tasks.size(); }
/** * The default index size is 16. * * @param attempt * @return minimal sort memory */ private long getMinimumIOSortMemory(TaskAttemptExecutionAPIEntity attempt) { long records = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_RECORDS); long outputBytes = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_BYTES); return outputBytes + records * 16; }
public static double[] getCounterValues(List<TaskExecutionAPIEntity> tasks, JobCounters.CounterName counterName) { List<Double> values = new ArrayList<>(); for (TaskExecutionAPIEntity task : tasks) { values.add(Double.valueOf(task.getJobCounters().getCounterValue(counterName))); } return toArray(values); }
private String analyzeMapTaskNum(List<String> optSettings) { StringBuilder sb = new StringBuilder(); long numMaps = context.getNumMaps(); long avgMapTime = context.getAvgMapTimeInSec(); long avgMapInput = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_READ) / numMaps; String avgMapInputDisplaySize = bytesToHumanReadable(avgMapInput); if (avgMapInput < 5 * FileUtils.ONE_MB && avgMapTime < 30 && numMaps > 1) { sb.append("Best practice: average map input bytes only have ").append(avgMapInputDisplaySize); sb.append(". Please reduce the number of mappers by merging input files.\n"); } else if (avgMapInput > FileUtils.ONE_GB) { sb.append("Best practice: average map input bytes have ").append(avgMapInputDisplaySize); sb.append(". Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } if (avgMapTime < 10 && numMaps > 1) { sb.append("Best practice: average map time only have ").append(avgMapTime); sb.append(" seconds. Please reduce the number of mappers by merging input files or by using a larger block size.\n"); } else if (avgMapTime > 600 && avgMapInput < FileUtils.ONE_GB) { sb.append("Best practice: average map time is ").append(avgMapInput); sb.append(" seconds. Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } return sb.toString(); }
public MRTaskExecutionResponse.TaskDistributionResponse getHistoryTaskDistribution(List<org.apache.eagle.jpm.mr.historyentity.TaskExecutionAPIEntity> tasks, String counterName, String distRange) { MRTaskExecutionResponse.TaskDistributionResponse response = new MRTaskExecutionResponse.TaskDistributionResponse(); response.counterName = counterName; List<Long> distRangeList = ResourceUtils.parseDistributionList(distRange); for (int i = 0; i < distRangeList.size(); i++) { response.taskBuckets.add(new MRTaskExecutionResponse.CountUnit(distRangeList.get(i))); } JobCounters.CounterName jobCounterName = JobCounters.CounterName.valueOf(counterName.toUpperCase()); for (org.apache.eagle.jpm.mr.historyentity.TaskExecutionAPIEntity task : tasks) { Long counterValue = task.getJobCounters().getCounterValue(jobCounterName); int pos = ResourceUtils.getDistributionPosition(distRangeList, counterValue); response.taskBuckets.get(pos).countVal++; } return response; }
long mapGCTime = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.GC_MILLISECONDS); long mapCPUTime = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.CPU_MILLISECONDS); long reduceGCTime = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.GC_MILLISECONDS); long reduceCPUTime = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.CPU_MILLISECONDS); if (reduceGCTime > reduceCPUTime * 0.1) { setting = String.format("-D%s", REDUCE_JAVA_OPTS);
long spillRecords = 0L; // Spilled Records try { outputRecords = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.MAP_OUTPUT_RECORDS); spillRecords = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.SPILLED_RECORDS); long reduceInputRecords = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); spillRecords = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.SPILLED_RECORDS); if (reduceInputRecords < spillRecords) { sb.append("Please add more memory (mapreduce.reduce.java.opts) to avoid spilled records.");
long avgReduceTime = context.getAvgReduceTimeInSec(); long avgShuffleTime = context.getAvgShuffleTimeInSec(); long avgShuffleBytes = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_SHUFFLE_BYTES) / numReduces; long avgReduceOutput = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_WRITTEN) / numReduces; long avgReduceTotalTime = avgShuffleTime + avgReduceTime;
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { TaskAttemptExecutionAPIEntity worstReduce = context.getWorstReduce(); if (context.getNumReduces() == 0 || worstReduce == null) { return null; } StringBuilder sb = new StringBuilder(); try { long worstTimeInSec = (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) / DateTimeUtil.ONESECOND; if (worstTimeInSec - context.getAvgReduceTimeInSec() > 30 * 60 ) { long avgInputs = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS) / context.getNumReduces(); long worstInputs = worstReduce.getJobCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); if (worstInputs > avgInputs * 5) { sb.append("Data skew detected in reducers. The average reduce time is ").append(context.getAvgReduceTimeInSec()); sb.append(" seconds, the worst reduce time is ").append(worstTimeInSec); sb.append(" seconds. Please investigate this problem to improve your job performance.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.DATA_SKEW, Result.ResultLevel.INFO, sb.toString()); } } catch (NullPointerException e) { // When job failed there may not have counters, so just ignore it } return null; } }