public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisticList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisticList)); } return new SparkStatistics(statisticGroups); }
@VisibleForTesting static String sparkStatisticsToString(SparkStatistics sparkStatistic, int sparkJobID) { StringBuilder sparkStatsString = new StringBuilder(); sparkStatsString.append("\n\n"); sparkStatsString.append(String.format("=====Spark Job[%d] Statistics=====", sparkJobID)); sparkStatsString.append("\n\n"); Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); sparkStatsString.append(group.getGroupName()).append("\n"); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); sparkStatsString.append("\t").append(statistic.getName()).append(": ").append( statistic.getValue()).append("\n"); } } return sparkStatsString.toString(); }
/** * Use the Spark metrics and calculate how much task executione time was spent performing GC * operations. If more than a defined threshold of time is spent, print out a warning on the * console. */ private void printExcessiveGCWarning() { SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup( SparkStatisticsNames.SPARK_GROUP_NAME); if (sparkStatisticGroup != null) { long taskDurationTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME); long jvmGCTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME); // Threshold percentage to trigger the GC warning double threshold = 0.1; if (jvmGCTime > taskDurationTime * threshold) { long percentGcTime = Math.round((double) jvmGCTime / taskDurationTime * 100); String gcWarning = String.format("WARNING: Spark Job[%s] Spent %s%% (%s ms / %s ms) of " + "task time in GC", sparkJobID, percentGcTime, jvmGCTime, taskDurationTime); console.printInfo(gcWarning); } } }
public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisitcList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisitcList)); } return new SparkStatistics(statisticGroups); }
private void logSparkStatistic(SparkStatistics sparkStatistic) { Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); LOG.info(group.getGroupName()); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); LOG.info("\t" + statistic.getName() + ": " + statistic.getValue()); } } }
private void printConsoleMetrics() { SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup( SparkStatisticsNames.SPARK_GROUP_NAME);
public SparkStatistics build() { List<SparkStatisticGroup> statisticGroups = new LinkedList<SparkStatisticGroup>(); for (Map.Entry<String, List<SparkStatistic>> entry : statisticMap.entrySet()) { String groupName = entry.getKey(); List<SparkStatistic> statisitcList = entry.getValue(); statisticGroups.add(new SparkStatisticGroup(groupName, statisitcList)); } return new SparkStatistics(statisticGroups); }
private void logSparkStatistic(SparkStatistics sparkStatistic) { Iterator<SparkStatisticGroup> groupIterator = sparkStatistic.getStatisticGroups(); while (groupIterator.hasNext()) { SparkStatisticGroup group = groupIterator.next(); LOG.info(group.getGroupName()); Iterator<SparkStatistic> statisticIterator = group.getStatistics(); while (statisticIterator.hasNext()) { SparkStatistic statistic = statisticIterator.next(); LOG.info("\t" + statistic.getName() + ": " + statistic.getValue()); } } }