private long getTotalHdfsOps(JobCounters counter) { long mapHdfsReadOps = counter.getCounterValue(JobCounters.CounterName.HDFS_READ_OPS); long mapHdfsWriteOps = counter.getCounterValue(JobCounters.CounterName.HDFS_WRITE_OPS); return mapHdfsReadOps + mapHdfsWriteOps; }
jobCounters = new JobCounters(); Map<String, Map<String, Long>> counters = jobCounters.getCounters(); counters.put(Constants.REDUCE_TASK_ATTEMPT_FILE_SYSTEM_COUNTER, this.reduceFileSystemTaskCounterAgg.result()); jobCounters.setCounters(counters);
@Override public byte[] serialize(JobCounters counters) { final Map<String, Map<String, Long>> groupMap = counters.getCounters(); int totalSize = 4; for (Map<String, Long> counterMap : groupMap.values()) {
@Override public JobCounters deserialize(byte[] bytes) { JobCounters counters = new JobCounters(); final int length = bytes.length; if (length < 4) { final Map<String, Map<String, Long>> groupMap = counters.getCounters(); int pos = 0; final int totalGroups = Bytes.toInt(bytes, pos);
return null; JobCounters jc = new JobCounters(); Map<String, Map<String, Long>> groups = new HashMap<>(); JhCounters counters = (JhCounters) value; jc.setCounters(groups); return jc;
private void taskAttemptEntityCreated(TaskAttemptExecutionAPIEntity entity) { JobCounters jobCounters = entity.getJobCounters(); String taskType = entity.getTags().get(TASK_TYPE.toString()); if (taskType != null && jobCounters != null && jobCounters.getCounters() != null) { if (Constants.TaskType.MAP.toString().equals(taskType.toUpperCase())) { mapAttemptDuration += entity.getDuration(); this.mapTaskAttemptCounterAgg.accumulate(jobCounters.getCounters().get(Constants.TASK_COUNTER)); this.mapFileSystemCounterAgg.accumulate(jobCounters.getCounters().get(Constants.FILE_SYSTEM_COUNTER)); return; } else if (Constants.TaskType.REDUCE.toString().equals(taskType.toUpperCase())) { reduceAttemptDuration += entity.getDuration(); this.reduceTaskAttemptCounterAgg.accumulate(jobCounters.getCounters().get(Constants.TASK_COUNTER)); this.reduceFileSystemTaskCounterAgg.accumulate(jobCounters.getCounters().get(Constants.FILE_SYSTEM_COUNTER)); return; } } ObjectMapper objectMapper = new ObjectMapper(); try { LOG.warn("Unknown task type of task attempt execution entity: " + objectMapper.writeValueAsString(entity)); } catch (Exception e) { LOG.error(e.getMessage(), e); } }
public void taskExecutionEntityCreated(TaskExecutionAPIEntity taskExecutionAPIEntity) { JobCounters jobCounters = taskExecutionAPIEntity.getJobCounters(); if (jobCounters == null || jobCounters.getCounters() == null) { LOG.warn("found null job counters, task {}", taskExecutionAPIEntity.getTags().get(MRJobTagName.TASK_ID.toString())); return; long endTime = taskExecutionAPIEntity.getEndTime(); Map<String, Map<String, Long>> counters = jobCounters.getCounters(); for (String groupName : counters.keySet()) { Map<String, Long> metricValues = counters.get(groupName);
private double getAverageSpillBytes(List<TaskExecutionAPIEntity> tasks) { if (tasks.isEmpty()) { return 0; } long totalSpillBytes = 0; for (TaskExecutionAPIEntity task : tasks) { totalSpillBytes += task.getJobCounters().getCounterValue(JobCounters.CounterName.SPLIT_RAW_BYTES); } return totalSpillBytes / tasks.size(); }
@Override public List<GenericMetricEntity> generateMetrics(JobExecutionAPIEntity entity) { List<GenericMetricEntity> metrics = new ArrayList<>(); if (entity != null) { Long currentTime = System.currentTimeMillis(); Map<String, String> tags = entity.getTags(); metrics.add(metricWrapper(currentTime, Constants.ALLOCATED_MB, new double[]{entity.getAllocatedMB()}, tags)); metrics.add(metricWrapper(currentTime, Constants.ALLOCATED_VCORES, new double[]{entity.getAllocatedVCores()}, tags)); metrics.add(metricWrapper(currentTime, Constants.RUNNING_CONTAINERS, new double[]{entity.getRunningContainers()}, tags)); org.apache.eagle.jpm.util.jobcounter.JobCounters jobCounters = entity.getJobCounters(); if (jobCounters != null && jobCounters.getCounters() != null) { for (Map<String, Long> metricGroup : jobCounters.getCounters().values()) { for (Map.Entry<String, Long> entry : metricGroup.entrySet()) { String metricName = entry.getKey().toLowerCase(); metrics.add(metricWrapper(currentTime, metricName, new double[]{entry.getValue()}, tags)); } } } } return metrics; }
/** * The default index size is 16. * * @param attempt * @return minimal sort memory */ private long getMinimumIOSortMemory(TaskAttemptExecutionAPIEntity attempt) { long records = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_RECORDS); long outputBytes = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_BYTES); return outputBytes + records * 16; }
if (jobCounters != null && jobCounters.getCounters() != null) { for (Map<String, Long> metricGroup : jobCounters.getCounters().values()) { for (Map.Entry<String, Long> entry : metricGroup.entrySet()) { String metricName = entry.getKey().toLowerCase();
public static double[] getCounterValues(List<TaskExecutionAPIEntity> tasks, JobCounters.CounterName counterName) { List<Double> values = new ArrayList<>(); for (TaskExecutionAPIEntity task : tasks) { values.add(Double.valueOf(task.getJobCounters().getCounterValue(counterName))); } return toArray(values); }
JobCounters jobCounters = parseCounters(totalCounters); jobExecutionEntity.setJobCounters(jobCounters); if (jobCounters.getCounters().containsKey(Constants.JOB_COUNTER)) { Map<String, Long> counters = jobCounters.getCounters().get(Constants.JOB_COUNTER); if (counters.containsKey(Constants.JobCounter.DATA_LOCAL_MAPS.toString())) { jobExecutionEntity.setDataLocalMaps(counters.get(Constants.JobCounter.DATA_LOCAL_MAPS.toString()).intValue());
private String analyzeMapTaskNum(List<String> optSettings) { StringBuilder sb = new StringBuilder(); long numMaps = context.getNumMaps(); long avgMapTime = context.getAvgMapTimeInSec(); long avgMapInput = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_READ) / numMaps; String avgMapInputDisplaySize = bytesToHumanReadable(avgMapInput); if (avgMapInput < 5 * FileUtils.ONE_MB && avgMapTime < 30 && numMaps > 1) { sb.append("Best practice: average map input bytes only have ").append(avgMapInputDisplaySize); sb.append(". Please reduce the number of mappers by merging input files.\n"); } else if (avgMapInput > FileUtils.ONE_GB) { sb.append("Best practice: average map input bytes have ").append(avgMapInputDisplaySize); sb.append(". Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } if (avgMapTime < 10 && numMaps > 1) { sb.append("Best practice: average map time only have ").append(avgMapTime); sb.append(" seconds. Please reduce the number of mappers by merging input files or by using a larger block size.\n"); } else if (avgMapTime > 600 && avgMapInput < FileUtils.ONE_GB) { sb.append("Best practice: average map time is ").append(avgMapInput); sb.append(" seconds. Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } return sb.toString(); }
public MRTaskExecutionResponse.TaskDistributionResponse getHistoryTaskDistribution(List<org.apache.eagle.jpm.mr.historyentity.TaskExecutionAPIEntity> tasks, String counterName, String distRange) { MRTaskExecutionResponse.TaskDistributionResponse response = new MRTaskExecutionResponse.TaskDistributionResponse(); response.counterName = counterName; List<Long> distRangeList = ResourceUtils.parseDistributionList(distRange); for (int i = 0; i < distRangeList.size(); i++) { response.taskBuckets.add(new MRTaskExecutionResponse.CountUnit(distRangeList.get(i))); } JobCounters.CounterName jobCounterName = JobCounters.CounterName.valueOf(counterName.toUpperCase()); for (org.apache.eagle.jpm.mr.historyentity.TaskExecutionAPIEntity task : tasks) { Long counterValue = task.getJobCounters().getCounterValue(jobCounterName); int pos = ResourceUtils.getDistributionPosition(distRangeList, counterValue); response.taskBuckets.get(pos).countVal++; } return response; }
long mapGCTime = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.GC_MILLISECONDS); long mapCPUTime = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.CPU_MILLISECONDS); long reduceGCTime = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.GC_MILLISECONDS); long reduceCPUTime = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.CPU_MILLISECONDS); if (reduceGCTime > reduceCPUTime * 0.1) { setting = String.format("-D%s", REDUCE_JAVA_OPTS);
long spillRecords = 0L; // Spilled Records try { outputRecords = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.MAP_OUTPUT_RECORDS); spillRecords = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.SPILLED_RECORDS); long reduceInputRecords = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); spillRecords = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.SPILLED_RECORDS); if (reduceInputRecords < spillRecords) { sb.append("Please add more memory (mapreduce.reduce.java.opts) to avoid spilled records.");
long avgReduceTime = context.getAvgReduceTimeInSec(); long avgShuffleTime = context.getAvgShuffleTimeInSec(); long avgShuffleBytes = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_SHUFFLE_BYTES) / numReduces; long avgReduceOutput = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_WRITTEN) / numReduces; long avgReduceTotalTime = avgShuffleTime + avgReduceTime;
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { TaskAttemptExecutionAPIEntity worstReduce = context.getWorstReduce(); if (context.getNumReduces() == 0 || worstReduce == null) { return null; } StringBuilder sb = new StringBuilder(); try { long worstTimeInSec = (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) / DateTimeUtil.ONESECOND; if (worstTimeInSec - context.getAvgReduceTimeInSec() > 30 * 60 ) { long avgInputs = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS) / context.getNumReduces(); long worstInputs = worstReduce.getJobCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); if (worstInputs > avgInputs * 5) { sb.append("Data skew detected in reducers. The average reduce time is ").append(context.getAvgReduceTimeInSec()); sb.append(" seconds, the worst reduce time is ").append(worstTimeInSec); sb.append(" seconds. Please investigate this problem to improve your job performance.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.DATA_SKEW, Result.ResultLevel.INFO, sb.toString()); } } catch (NullPointerException e) { // When job failed there may not have counters, so just ignore it } return null; } }