/** * Use the Spark metrics and calculate how much task executione time was spent performing GC * operations. If more than a defined threshold of time is spent, print out a warning on the * console. */ private void printExcessiveGCWarning() { SparkStatisticGroup sparkStatisticGroup = sparkStatistics.getStatisticGroup( SparkStatisticsNames.SPARK_GROUP_NAME); if (sparkStatisticGroup != null) { long taskDurationTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME); long jvmGCTime = SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME); // Threshold percentage to trigger the GC warning double threshold = 0.1; if (jvmGCTime > taskDurationTime * threshold) { long percentGcTime = Math.round((double) jvmGCTime / taskDurationTime * 100); String gcWarning = String.format("WARNING: Spark Job[%s] Spent %s%% (%s ms / %s ms) of " + "task time in GC", sparkJobID, percentGcTime, jvmGCTime, taskDurationTime); console.printInfo(gcWarning); } } }
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. Map<Integer, List<Map.Entry<TaskMetrics, TaskInfo>>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<Map.Entry<TaskMetrics, TaskInfo>> taskMetrics = jobMetric.get(stageId); for (Map.Entry<TaskMetrics, TaskInfo> taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric.getKey(), taskMetric.getValue()); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. - e.g. RECORDS_IN, RECORDS_OUT sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. - e.g. metrics collected by Spark itself (JvmGCTime, // ExecutorRunTime, etc.) Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics"; Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.TASK_DURATION_TIME) + separator; SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.EXECUTOR_CPU_TIME) + separator; SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.JVM_GC_TIME) + separator; metricsString += SparkStatisticsNames.BYTES_READ + forwardSlash + SparkStatisticsNames.RECORDS_READ + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.BYTES_READ) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.RECORDS_READ) + separator; metricsString += SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ + forwardSlash + SparkStatisticsNames.SHUFFLE_RECORDS_READ + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_TOTAL_BYTES_READ) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_RECORDS_READ) + separator; metricsString += SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN + forwardSlash + SparkStatisticsNames.SHUFFLE_RECORDS_WRITTEN + colon + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_BYTES_WRITTEN) + forwardSlash + SparkMetricsUtils.getSparkStatisticAsLong(sparkStatisticGroup, SparkStatisticsNames.SHUFFLE_RECORDS_WRITTEN);
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobId + "] Metrics"; Map<Integer, List<TaskMetrics>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<TaskMetrics> taskMetrics = jobMetric.get(stageId); for (TaskMetrics taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }