state = sparkJobStatus.getRemoteJobState(); Preconditions.checkState(sparkJobStatus.isRemoteActive(), "Connection to remote Spark driver was lost"); HiveException he = new HiveException(ErrorMsg.SPARK_JOB_MONITOR_TIMEOUT, Long.toString(timeCount)); sparkJobStatus.setMonitorError(he); running = false; done = true; JobExecutionStatus sparkJobState = sparkJobStatus.getState(); if (sparkJobState == JobExecutionStatus.RUNNING) { Map<SparkStage, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); if (!running) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_TO_RUNNING); printAppInfo(); console.printInfo("Hive on Spark Session Web UI URL: " + sparkJobStatus.getWebUIURL()); console.printInfo("\nQuery Hive on Spark job[" + sparkJobStatus.getJobId() + "] stages: " + Arrays.toString(sparkJobStatus.getStageIds())); console.printInfo("Spark job[" + sparkJobStatus.getJobId() + "] status = RUNNING"); running = true; Map<SparkStage, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); updateFunction.printStatus(progressMap, lastProgressMap); lastProgressMap = progressMap; double duration = (System.currentTimeMillis() - startTime) / 1000.0; console.printInfo("Spark job[" + sparkJobStatus.getJobId() + "] finished successfully in " + String.format("%.2f second(s)", duration));
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. - e.g. RECORDS_IN, RECORDS_OUT sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. - e.g. metrics collected by Spark itself (JvmGCTime, // ExecutorRunTime, etc.) Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(SparkStatisticsNames.SPARK_GROUP_NAME, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public JobExecutionStatus getState() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.status() : null; }
@Override public Map<String, SparkStageProgress> getSparkStageProgress() throws HiveException { Map<String, SparkStageProgress> stageProgresses = new HashMap<String, SparkStageProgress>(); for (int stageId : getStageIds()) { SparkStageInfo sparkStageInfo = getSparkStageInfo(stageId); if (sparkStageInfo != null && sparkStageInfo.name() != null) { int runningTaskCount = sparkStageInfo.numActiveTasks(); int completedTaskCount = sparkStageInfo.numCompletedTasks(); int failedTaskCount = sparkStageInfo.numFailedTasks(); int totalTaskCount = sparkStageInfo.numTasks(); SparkStageProgress sparkStageProgress = new SparkStageProgress( totalTaskCount, completedTaskCount, runningTaskCount, failedTaskCount); stageProgresses.put(String.valueOf(sparkStageInfo.stageId()) + "_" + sparkStageInfo.currentAttemptId(), sparkStageProgress); } } return stageProgresses; }
JobHandle.State state = sparkJobStatus.getRemoteJobState(); JobExecutionStatus sparkJobState = sparkJobStatus.getState(); if (sparkJobState == JobExecutionStatus.RUNNING) { Map<String, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); if (!running) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_TO_RUNNING); printAppInfo(); console.printInfo("\nQuery Hive on Spark job[" + sparkJobStatus.getJobId() + "] stages: " + Arrays.toString(sparkJobStatus.getStageIds())); + sparkJobStatus.getJobId() + "])"); running = true; Preconditions.checkState(sparkJobStatus.isRemoteActive(), "Remote context becomes inactive."); Map<String, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); printStatus(progressMap, lastProgressMap); lastProgressMap = progressMap; break; case FAILED: String detail = sparkJobStatus.getError().getMessage(); StringBuilder errBuilder = new StringBuilder(); errBuilder.append("Job failed with "); msg = "Failed to monitor Job[" + sparkJobStatus.getJobId() + "]" + msg;
JobHandle.State state = sparkJobStatus.getRemoteJobState(); if (LOG.isDebugEnabled()) { console.printInfo("state = " + state); JobExecutionStatus sparkJobState = sparkJobStatus.getState(); if (sparkJobState == JobExecutionStatus.RUNNING) { Map<String, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); if (!running) { perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_TO_RUNNING); + sparkJobStatus.getJobId() + "] stages:"); for (int stageId : sparkJobStatus.getStageIds()) { console.printInfo(Integer.toString(stageId)); + sparkJobStatus.getJobId() + "])"); running = true; Map<String, SparkStageProgress> progressMap = sparkJobStatus.getSparkStageProgress(); printStatus(progressMap, lastProgressMap); lastProgressMap = progressMap; msg = "Failed to monitor Job[ " + sparkJobStatus.getJobId() + "]" + msg;
@Test public void testRemoteSparkCancel() { RemoteSparkJobStatus jobSts = mock(RemoteSparkJobStatus.class); when(jobSts.getRemoteJobState()).thenReturn(State.CANCELLED); when(jobSts.isRemoteActive()).thenReturn(true); HiveConf hiveConf = new HiveConf(); SessionState.start(hiveConf); RemoteSparkJobMonitor remoteSparkJobMonitor = new RemoteSparkJobMonitor(hiveConf, jobSts); Assert.assertEquals(remoteSparkJobMonitor.startMonitor(), 3); }
private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); //update the credential provider location in the jobConf HiveConfUtil.updateJobCredentialProviders(jobConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); // make sure NullScanFileSystem can be loaded - HIVE-18442 jobConf.set("fs." + NullScanFileSystem.getBaseScheme() + ".impl", NullScanFileSystem.class.getCanonicalName()); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); if (driverContext.isShutdown()) { throw new HiveException("Operation is cancelled."); } JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics"; Map<String, Long> flatJobMetric = extractMetrics(metricsCollection); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
private void printAppInfo() { String sparkMaster = hiveConf.get("spark.master"); if (sparkMaster != null && sparkMaster.startsWith("yarn")) { String appID = sparkJobStatus.getAppID(); if (appID != null) { console.printInfo("Running with YARN Application = " + appID); console.printInfo("Kill Command = " + HiveConf.getVar(hiveConf, HiveConf.ConfVars.YARNBIN) + " application -kill " + appID); } } } }
@Override public Map<String, SparkStageProgress> getSparkStageProgress() throws HiveException { Map<String, SparkStageProgress> stageProgresses = new HashMap<String, SparkStageProgress>(); for (int stageId : getStageIds()) { SparkStageInfo sparkStageInfo = getSparkStageInfo(stageId); if (sparkStageInfo != null && sparkStageInfo.name() != null) { int runningTaskCount = sparkStageInfo.numActiveTasks(); int completedTaskCount = sparkStageInfo.numCompletedTasks(); int failedTaskCount = sparkStageInfo.numFailedTasks(); int totalTaskCount = sparkStageInfo.numTasks(); SparkStageProgress sparkStageProgress = new SparkStageProgress( totalTaskCount, completedTaskCount, runningTaskCount, failedTaskCount); stageProgresses.put(String.valueOf(sparkStageInfo.stageId()) + "_" + sparkStageInfo.currentAttemptId(), sparkStageProgress); } } return stageProgresses; }
private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); //update the credential provider location in the jobConf HiveConfUtil.updateJobCredentialProviders(jobConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); if (driverContext.isShutdown()) { throw new HiveException("Operation is cancelled."); } JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
private void printAppInfo() { String sparkMaster = hiveConf.get("spark.master"); if (sparkMaster != null && sparkMaster.startsWith("yarn")) { String appID = sparkJobStatus.getAppID(); if (appID != null) { console.printInfo("Running with YARN Application = " + appID); console.printInfo("Kill Command = " + HiveConf.getVar(hiveConf, HiveConf.ConfVars.YARNBIN) + " application -kill " + appID); } } } }
@Override public int[] getStageIds() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.stageIds() : new int[0]; }
@Override public SparkJobRef execute(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
@Override public SparkStatistics getSparkStatistics() { MetricsCollection metricsCollection = jobHandle.getMetrics(); if (metricsCollection == null || getCounter() == null) { return null; } SparkStatisticsBuilder sparkStatisticsBuilder = new SparkStatisticsBuilder(); // add Hive operator level statistics. sparkStatisticsBuilder.add(getCounter()); // add spark job metrics. String jobIdentifier = "Spark Job[" + jobHandle.getClientJobId() + "] Metrics"; Map<String, Long> flatJobMetric = SparkMetricsUtils.collectMetrics( metricsCollection.getAllMetrics()); for (Map.Entry<String, Long> entry : flatJobMetric.entrySet()) { sparkStatisticsBuilder.add(jobIdentifier, entry.getKey(), Long.toString(entry.getValue())); } return sparkStatisticsBuilder.build(); }
@Override public JobExecutionStatus getState() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.status() : null; }
@Override public int[] getStageIds() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.stageIds() : new int[0]; }
@Override public JobExecutionStatus getState() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.status() : null; }
@Override public int[] getStageIds() throws HiveException { SparkJobInfo sparkJobInfo = getSparkJobInfo(); return sparkJobInfo != null ? sparkJobInfo.stageIds() : new int[0]; }