@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. Map<Integer, List<Map.Entry<TaskMetrics, TaskInfo>>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<Map.Entry<TaskMetrics, TaskInfo>> taskMetrics = jobMetric.get(stageId); for (Map.Entry<TaskMetrics, TaskInfo> taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric.getKey(), taskMetric.getValue()); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
public Metrics(TaskMetrics metrics, TaskInfo taskInfo) { this( metrics.executorDeserializeTime(), TimeUnit.NANOSECONDS.toMillis(metrics.executorDeserializeCpuTime()), metrics.executorRunTime(), TimeUnit.NANOSECONDS.toMillis(metrics.executorCpuTime()), metrics.resultSize(), metrics.jvmGCTime(), metrics.resultSerializationTime(), metrics.memoryBytesSpilled(), metrics.diskBytesSpilled(), taskInfo.duration(), optionalInputMetric(metrics), optionalShuffleReadMetric(metrics), optionalShuffleWriteMetrics(metrics), optionalOutputMetrics(metrics)); }
public Metrics(TaskMetrics metrics) { this( metrics.executorDeserializeTime(), metrics.executorRunTime(), metrics.resultSize(), metrics.jvmGCTime(), metrics.resultSerializationTime(), metrics.memoryBytesSpilled(), metrics.diskBytesSpilled(), optionalInputMetric(metrics), optionalShuffleReadMetric(metrics), optionalShuffleWriteMetrics(metrics)); }
public Metrics(TaskMetrics metrics) { this( metrics.executorDeserializeTime(), metrics.executorRunTime(), metrics.resultSize(), metrics.jvmGCTime(), metrics.resultSerializationTime(), metrics.memoryBytesSpilled(), metrics.diskBytesSpilled(), optionalInputMetric(metrics), optionalShuffleReadMetric(metrics), optionalShuffleWriteMetrics(metrics)); }
return new Metrics( executorDeserializeTime, executorDeserializeCpuTime,
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobId + "] Metrics"; Map<Integer, List<TaskMetrics>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<TaskMetrics> taskMetrics = jobMetric.get(stageId); for (TaskMetrics taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public void onTaskEnd(SparkListenerTaskEnd taskEnd) { if (taskEnd.reason() instanceof org.apache.spark.Success$ && !taskEnd.taskInfo().speculative()) { Metrics metrics = new Metrics(taskEnd.taskMetrics(), taskEnd.taskInfo()); Integer jobId; synchronized (stageToJobId) { jobId = stageToJobId.get(taskEnd.stageId()); } // TODO: implement implicit AsyncRDDActions conversion instead of jc.monitor()? // TODO: how to handle stage failures? String clientId = getClientId(jobId); if (clientId != null) { protocol.sendMetrics(clientId, jobId, taskEnd.stageId(), taskEnd.taskInfo().taskId(), metrics); } } }
return new Metrics( executorDeserializeTime, executorRunTime,
return new Metrics( executorDeserializeTime, executorRunTime,
@Override public void onTaskEnd(SparkListenerTaskEnd taskEnd) { if (taskEnd.reason() instanceof org.apache.spark.Success$ && !taskEnd.taskInfo().speculative()) { Metrics metrics = new Metrics(taskEnd.taskMetrics()); Integer jobId; synchronized (stageToJobId) { jobId = stageToJobId.get(taskEnd.stageId()); } // TODO: implement implicit AsyncRDDActions conversion instead of jc.monitor()? // TODO: how to handle stage failures? String clientId = getClientId(jobId); if (clientId != null) { protocol.sendMetrics(clientId, jobId, taskEnd.stageId(), taskEnd.taskInfo().taskId(), metrics); } } }
@Override public void onTaskEnd(SparkListenerTaskEnd taskEnd) { if (taskEnd.reason() instanceof org.apache.spark.Success$ && !taskEnd.taskInfo().speculative()) { Metrics metrics = new Metrics(taskEnd.taskMetrics()); Integer jobId; synchronized (stageToJobId) { jobId = stageToJobId.get(taskEnd.stageId()); } // TODO: implement implicit AsyncRDDActions conversion instead of jc.monitor()? // TODO: how to handle stage failures? String clientId = getClientId(jobId); if (clientId != null) { protocol.sendMetrics(clientId, jobId, taskEnd.stageId(), taskEnd.taskInfo().taskId(), metrics); } } }