public static long getSparkStatisticAsLong(SparkStatisticGroup group, String name) { return Long.parseLong(group.getSparkStatistic(name).getValue()); } }
private void logSparkStatistic(SparkStatistics sparkStatistic) { Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); LOG.info(group.getGroupName()); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); LOG.info("\t" + statistic.getName() + ": " + statistic.getValue()); } } }
SparkStatisticGroup(String groupName, List<SparkStatistic> statisticList) { this.groupName = groupName; for (SparkStatistic sparkStatistic : statisticList) { this.statistics.put(sparkStatistic.getName(), sparkStatistic); } }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. - e.g. RECORDS_IN, RECORDS_OUT sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. - e.g. metrics collected by Spark itself (JvmGCTime, // ExecutorRunTime, etc.) Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisticList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisticList)); } return new SparkStatistics(statisticGroups); }
public SparkStatisticsBuilder add(String groupName, String name, String value) { List<SparkStatistic> statisticList = statisticMap.get(groupName); if (statisticList == null) { statisticList = new LinkedList<SparkStatistic>(); statisticMap.put(groupName, statisticList); } statisticList.add(new SparkStatistic(name, value)); return this; } }
SparkStatistics(List<SparkStatisticGroup> statisticGroupsList) { for (SparkStatisticGroup group : statisticGroupsList) { statisticGroups.put(group.getGroupName(), group); } }
private void printConsoleMetrics() { SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup( SparkStatisticsNames.SPARK_GROUP_NAME); if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.TASK_DURATION_TIME)) { metricsString += SparkStatisticsNames.TASK_DURATION_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.EXECUTOR_CPU_TIME)) { metricsString += SparkStatisticsNames.EXECUTOR_CPU_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.JVM_GC_TIME)) { metricsString += SparkStatisticsNames.JVM_GC_TIME + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, if (sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.BYTES_READ) && sparkStatisticGroup.containsSparkStatistic(SparkStatisticsNames.RECORDS_READ)) { metricsString += SparkStatisticsNames.BYTES_READ + forwardSlash + SparkStatisticsNames.RECORDS_READ + colon + if (sparkStatisticGroup.containsSparkStatistic( SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ) && sparkStatisticGroup.containsSparkStatistic( SparkStatisticsNames.SHUFFLE_RECORDS_READ)) { metricsString += SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ + forwardSlash + if (sparkStatisticGroup.containsSparkStatistic( SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN) && sparkStatisticGroup.containsSparkStatistic(
/** * Use the Spark metrics and calculate how much task executione time was spent performing GC * operations. If more than a defined threshold of time is spent, print out a warning on the * console. */ private void printExcessiveGCWarning() { SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup( SparkStatisticsNames.SPARK_GROUP_NAME); if (sparkStatisticGroup != null) { long taskDurationTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME); long jvmGCTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME); // Threshold percentage to trigger the GC warning double threshold = 0.1; if (jvmGCTime > taskDurationTime * threshold) { long percentGcTime = Math.round((double) jvmGCTime / taskDurationTime * 100); String gcWarning = String.format("WARNING: Spark Job[%s] Spent %s%% (%s ms / %s ms) of " + "task time in GC", sparkJobID, percentGcTime, jvmGCTime, taskDurationTime); console.printInfo(gcWarning); } } }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics"; Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@VisibleForTesting static String sparkStatisticsToString(SparkStatistics sparkStatistic, int sparkJobID) { StringBuilder sparkStatsString = new StringBuilder(); sparkStatsString.append("\n\n"); sparkStatsString.append(String.format("=====Spark Job[%d] Statistics=====", sparkJobID)); sparkStatsString.append("\n\n"); Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); sparkStatsString.append(group.getGroupName()).append("\n"); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); sparkStatsString.append("\t").append(statistic.getName()).append(": ").append( statistic.getValue()).append("\n"); } } return sparkStatsString.toString(); }
public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisitcList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisitcList)); } return new SparkStatistics(statisticGroups); }
public SparkStatisticsBuilder add(String groupName, String name, String value) { List<SparkStatistic> statisticList = statisticMap.get(groupName); if (statisticList == null) { statisticList = new LinkedList<SparkStatistic>(); statisticMap.put(groupName, statisticList); } statisticList.add(new SparkStatistic(name, value)); return this; } }
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. Map<Integer, List<Map.Entry<TaskMetrics, TaskInfo>>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<Map.Entry<TaskMetrics, TaskInfo>> taskMetrics = jobMetric.get(stageId); for (Map.Entry<TaskMetrics, TaskInfo> taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric.getKey(), taskMetric.getValue()); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
private void logSparkStatistic(SparkStatistics sparkStatistic) { Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); LOG.info(group.getGroupName()); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); LOG.info("\t" + statistic.getName() + ": " + statistic.getValue()); } } }
public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisitcList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisitcList)); } return new SparkStatistics(statisticGroups); }
public SparkStatisticsBuilder add(SparkCounters sparkCounters) { for (SparkCounterGroup counterGroup : sparkCounters.getSparkCounterGroups().values()) { String groupDisplayName = counterGroup.getGroupDisplayName(); List<SparkStatistic> statisticList = statisticMap.get(groupDisplayName); if (statisticList == null) { statisticList = new LinkedList<SparkStatistic>(); statisticMap.put(groupDisplayName, statisticList); } for (SparkCounter counter : counterGroup.getSparkCounters().values()) { String displayName = counter.getDisplayName(); statisticList.add(new SparkStatistic(displayName, Long.toString(counter.getValue()))); } } return this; }
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobId + "] Metrics"; Map<Integer, List<TaskMetrics>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<TaskMetrics> taskMetrics = jobMetric.get(stageId); for (TaskMetrics taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
public SparkStatisticsBuilder add(SparkCounters sparkCounters) { for (SparkCounterGroup counterGroup : sparkCounters.getSparkCounterGroups().values()) { String groupDisplayName = counterGroup.getGroupDisplayName(); List<SparkStatistic> statisticList = statisticMap.get(groupDisplayName); if (statisticList == null) { statisticList = new LinkedList<SparkStatistic>(); statisticMap.put(groupDisplayName, statisticList); } for (SparkCounter counter : counterGroup.getSparkCounters().values()) { String displayName = counter.getDisplayName(); statisticList.add(new SparkStatistic(displayName, Long.toString(counter.getValue()))); } } return this; }
@Test public void testSparkStatisticsToString() { SparkStatisticsBuilder statsBuilder = new SparkStatisticsBuilder(); statsBuilder.add("TEST", "stat1", "1"); statsBuilder.add("TEST", "stat2", "1"); String statsString = SparkTask.sparkStatisticsToString(statsBuilder.build(), 10); Assert.assertTrue(statsString.contains("10")); Assert.assertTrue(statsString.contains("TEST")); Assert.assertTrue(statsString.contains("stat1")); Assert.assertTrue(statsString.contains("stat2")); Assert.assertTrue(statsString.contains("1")); }