@Override public int[] getStageIds() { SparkJobInfo sparkJobInfo = getJobInfo(); return sparkJobInfo == null ? new int[0] : sparkJobInfo.stageIds(); }
@Override public JobExecutionStatus getState() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.status() : null; }
@Override public String getAppID() { Future<String> getAppID = sparkClient.run(new GetAppIDJob()); try { return getAppID.get(sparkClientTimeoutInSeconds, TimeUnit.SECONDS); } catch (Exception e) { LOG.warn("Failed to get APP ID.", e); return null; } }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. - e.g. RECORDS_IN, RECORDS_OUT sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. - e.g. metrics collected by Spark itself (JvmGCTime, // ExecutorRunTime, etc.) Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public SparkStatistics getSparkStatistics() { SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(sparkCounters); // add spark job metrics. Map<Integer, List<Map.Entry<TaskMetrics, TaskInfo>>> jobMetric = jobMetricsListener.getJobMetric(jobId); if (jobMetric == null) { return null; } MetricsCollection metricsCollection = new MetricsCollection(); Set<Integer> stageIds = jobMetric.keySet(); for (int stageId : stageIds) { List<Map.Entry<TaskMetrics, TaskInfo>> taskMetrics = jobMetric.get(stageId); for (Map.Entry<TaskMetrics, TaskInfo> taskMetric : taskMetrics) { Metrics metrics = new Metrics(taskMetric.getKey(), taskMetric.getValue()); metricsCollection.addMetrics(jobId, stageId, 0, metrics); } } Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics(metricsCollection .getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public void cleanup() { jobMetricsListener.cleanup(jobId); if (cachedRDDIds != null) { for (Integer cachedRDDId: cachedRDDIds) { sparkContext.sc().unpersistRDD(cachedRDDId, false); } } }
private LocalHiveSparkClient(SparkConf sparkConf) { sc = new JavaSparkContext(sparkConf); jobMetricsListener = new JobMetricsListener(); sc.sc().listenerBus().addListener(jobMetricsListener); }
private void printAppInfo() { String sparkMaster = hiveConf.get("spark.master"); if (sparkMaster != null && sparkMaster.startsWith("yarn")) { String appID = sparkJobStatus.getAppID(); if (appID != null) { console.printInfo("Running with YARN Application = " + appID); console.printInfo("Kill Command = " + HiveConf.getVar(hiveConf, HiveConf.ConfVars.YARNBIN) + " application -kill " + appID); } } } }
private SparkStageInfo getSparkStageInfo(int stageId) { Future<SparkStageInfo> getStageInfo = sparkClient.run(new GetStageInfoJob(stageId)); try { return getStageInfo.get(sparkClientTimeoutInSeconds, TimeUnit.SECONDS); } catch (Throwable t) { LOG.warn("Error getting stage info", t); return null; } }
@Override public String getWebUIURL() { Future<String> getWebUIURL = sparkClient.run(new GetWebUIURLJob()); try { return getWebUIURL.get(sparkClientTimeoutInSeconds, TimeUnit.SECONDS); } catch (Exception e) { LOG.warn("Failed to get web UI URL.", e); if (Thread.interrupted()) { error = e; } return "UNKNOWN"; } }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics"; Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public int[] getStageIds() { SparkJobInfo sparkJobInfo = getJobInfo(); return sparkJobInfo == null ? new int[0] : sparkJobInfo.stageIds(); }
@Override public int[] getStageIds() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.stageIds() : new int[0]; }
@Override public void cleanup() { jobMetricsListener.cleanup(jobId); if (cachedRDDIds != null) { for (Integer cachedRDDId: cachedRDDIds) { sparkContext.sc().unpersistRDD(cachedRDDId, false); } } }
private void printAppInfo() { String sparkMaster = hiveConf.get("spark.master"); if (sparkMaster != null && sparkMaster.startsWith("yarn")) { String appID = sparkJobStatus.getAppID(); if (appID != null) { console.printInfo("Running with YARN Application = " + appID); console.printInfo("Kill Command = " + HiveConf.getVar(hiveConf, HiveConf.ConfVars.YARNBIN) + " application -kill " + appID); } } } }
@Override public String getAppID() { Future<String> getAppID = sparkClient.run(new GetAppIDJob()); try { return getAppID.get(sparkClientTimeoutInSeconds, TimeUnit.SECONDS); } catch (Exception e) { LOG.warn("Failed to get APP ID.", e); if (Thread.interrupted()) { error = e; } return null; } }
@Override public JobExecutionStatus getState() { SparkJobInfo sparkJobInfo = getJobInfo(); // For spark job with empty source data, it's not submitted actually, so we would never // receive JobStart/JobEnd event in JobStateListener, use JavaFutureAction to get current // job state. if (sparkJobInfo == null && future.isDone()) { try { future.get(); } catch (Exception e) { LOG.error("Failed to run job " + jobId, e); return JobExecutionStatus.FAILED; } return JobExecutionStatus.SUCCEEDED; } return sparkJobInfo == null ? null : sparkJobInfo.status(); }
@Override public JobExecutionStatus getState() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.status() : null; }
@Override public JobExecutionStatus getState() { SparkJobInfo sparkJobInfo = getJobInfo(); // For spark job with empty source data, it's not submitted actually, so we would never // receive JobStart/JobEnd event in JobStateListener, use JavaFutureAction to get current // job state. if (sparkJobInfo == null && future.isDone()) { try { future.get(); } catch (Exception e) { LOG.error("Failed to run job " + jobId, e); return JobExecutionStatus.FAILED; } return JobExecutionStatus.SUCCEEDED; } return sparkJobInfo == null ? null : sparkJobInfo.status(); }
@Override public int[] getStageIds() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.stageIds() : new int[0]; }