public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String sparkSessionId, String hiveSessionId) throws Exception { Map<String, String> sparkConf = initiateSparkConf(hiveconf, hiveSessionId); // Submit spark job through local spark context while spark master is local mode, otherwise submit // spark job through remote spark context. String master = sparkConf.get("spark.master"); if (master.equals("local") || master.startsWith("local[")) { // With local spark context, all user sessions share the same spark context. return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf), hiveconf); } else { return new RemoteHiveSparkClient(hiveconf, sparkConf, sparkSessionId); } }
@Override public SparkJobRef execute(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { if (SparkClientUtilities.isYarnMaster(hiveConf.get("spark.master")) && !remoteClient.isActive()) { // Re-create the remote client if not active any more close(); createRemoteClient(); } try { return submit(driverContext, sparkWork); } catch (Throwable cause) { throw new Exception("Failed to submit Spark work, please retry later", cause); } }
@Override public int getExecutorCount() throws Exception { return getExecutorCount(sparkClientTimtout, TimeUnit.SECONDS); }
private synchronized void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException { // add hive-exec jar addJars((new JobConf(this.getClass())).getJar()); // add aux jars addJars(conf.getAuxJars()); addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars()); // add added jars String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars); addJars(addedJars); // add plugin module jars on demand // jobConf will hold all the configuration for hadoop, tez, and hive JobConf jobConf = new JobConf(conf); jobConf.set(MR_JAR_PROPERTY, ""); for (BaseWork work : sparkWork.getAllWork()) { work.configureJobConf(jobConf); } addJars(jobConf.get(MR_JAR_PROPERTY)); // remove the location of container tokens conf.unset(MR_CREDENTIALS_LOCATION_PROPERTY); // add added files String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles); addResources(addedFiles); // add added archives String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives); addResources(addedArchives); }
private void createRemoteClient() throws Exception { remoteClient = SparkClientFactory.createClient(conf, hiveConf); if (HiveConf.getBoolVar(hiveConf, ConfVars.HIVE_PREWARM_ENABLED) && SparkClientUtilities.isYarnMaster(hiveConf.get("spark.master"))) { int minExecutors = getExecutorsToWarm(); if (minExecutors <= 0) { return; } LOG.info("Prewarm Spark executors. The minimum number of executors to warm is " + minExecutors); // Spend at most MAX_PREWARM_TIME to wait for executors to come up. int curExecutors = 0; long ts = System.currentTimeMillis(); do { try { curExecutors = getExecutorCount(MAX_PREWARM_TIME, TimeUnit.MILLISECONDS); } catch (TimeoutException e) { // let's don't fail on future timeout since we have a timeout for pre-warm LOG.warn("Timed out getting executor count.", e); } if (curExecutors >= minExecutors) { LOG.info("Finished prewarming Spark executors. The current number of executors is " + curExecutors); return; } Thread.sleep(500); // sleep half a second } while (System.currentTimeMillis() - ts < MAX_PREWARM_TIME); LOG.info("Timeout (" + MAX_PREWARM_TIME / 1000 + "s) occurred while prewarming executors. " + "The current number of executors is " + curExecutors); } }
private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); //update the credential provider location in the jobConf HiveConfUtil.updateJobCredentialProviders(jobConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); // make sure NullScanFileSystem can be loaded - HIVE-18442 jobConf.set("fs." + NullScanFileSystem.getBaseScheme() + ".impl", NullScanFileSystem.class.getCanonicalName()); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); if (driverContext.isShutdown()) { throw new HiveException("Operation is cancelled."); } JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
RemoteHiveSparkClient(HiveConf hiveConf, Map<String, String> conf, String sessionId) throws Exception { this.hiveConf = hiveConf; sparkClientTimtout = hiveConf.getTimeVar(HiveConf.ConfVars.SPARK_CLIENT_FUTURE_TIMEOUT, TimeUnit.SECONDS); sparkConf = HiveSparkClientFactory.generateSparkConf(conf); this.conf = conf; this.sessionId = sessionId; createRemoteClient(); }
private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException { // add hive-exec jar addJars((new JobConf(this.getClass())).getJar()); // add aux jars addJars(conf.getAuxJars()); addJars(SessionState.get() == null ? null : SessionState.get().getReloadableAuxJars()); // add added jars String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars); addJars(addedJars); // add plugin module jars on demand // jobConf will hold all the configuration for hadoop, tez, and hive JobConf jobConf = new JobConf(conf); jobConf.set(MR_JAR_PROPERTY, ""); for (BaseWork work : sparkWork.getAllWork()) { work.configureJobConf(jobConf); } addJars(conf.get(MR_JAR_PROPERTY)); // add added files String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles); addResources(addedFiles); // add added archives String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives); addResources(addedArchives); }
(SparkClientUtilities.isYarnMaster(hiveConf.get("spark.master")) || SparkClientUtilities.isLocalMaster(hiveConf.get("spark.master")))) { int minExecutors = getExecutorsToWarm(); if (minExecutors <= 0) { return; do { try { curExecutors = getExecutorCount(maxPrewarmTime, TimeUnit.MILLISECONDS); } catch (TimeoutException e) {
private SparkJobRef submit(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); //update the credential provider location in the jobConf HiveConfUtil.updateJobCredentialProviders(jobConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); if (driverContext.isShutdown()) { throw new HiveException("Operation is cancelled."); } JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
RemoteHiveSparkClient(HiveConf hiveConf, Map<String, String> conf) throws Exception { this.hiveConf = hiveConf; sparkClientTimtout = hiveConf.getTimeVar(HiveConf.ConfVars.SPARK_CLIENT_FUTURE_TIMEOUT, TimeUnit.SECONDS); sparkConf = HiveSparkClientFactory.generateSparkConf(conf); this.conf = conf; createRemoteClient(); }
private void refreshLocalResources(SparkWork sparkWork, HiveConf conf) throws IOException { // add hive-exec jar addJars((new JobConf(this.getClass())).getJar()); // add aux jars addJars(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)); // add added jars String addedJars = Utilities.getResourceFiles(conf, SessionState.ResourceType.JAR); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDJARS, addedJars); addJars(addedJars); // add plugin module jars on demand // jobConf will hold all the configuration for hadoop, tez, and hive JobConf jobConf = new JobConf(conf); jobConf.set(MR_JAR_PROPERTY, ""); for (BaseWork work : sparkWork.getAllWork()) { work.configureJobConf(jobConf); } addJars(conf.get(MR_JAR_PROPERTY)); // add added files String addedFiles = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDFILES, addedFiles); addResources(addedFiles); // add added archives String addedArchives = Utilities.getResourceFiles(conf, SessionState.ResourceType.ARCHIVE); HiveConf.setVar(conf, HiveConf.ConfVars.HIVEADDEDARCHIVES, addedArchives); addResources(addedArchives); }
@Override public SparkJobRef execute(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { if (SparkClientUtilities.isYarnMaster(hiveConf.get("spark.master")) && !remoteClient.isActive()) { // Re-create the remote client if not active any more close(); createRemoteClient(); } try { return submit(driverContext, sparkWork); } catch (Throwable cause) { throw new Exception("Failed to submit Spark work, please retry later", cause); } }
public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Exception { Map<String, String> sparkConf = initiateSparkConf(hiveconf); // Submit spark job through local spark context while spark master is local mode, otherwise submit // spark job through remote spark context. String master = sparkConf.get("spark.master"); if (master.equals("local") || master.startsWith("local[")) { // With local spark context, all user sessions share the same spark context. return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf)); } else { return new RemoteHiveSparkClient(hiveconf, sparkConf); } }
@Override public SparkJobRef execute(final DriverContext driverContext, final SparkWork sparkWork) throws Exception { final Context ctx = driverContext.getCtx(); final HiveConf hiveConf = (HiveConf) ctx.getConf(); refreshLocalResources(sparkWork, hiveConf); final JobConf jobConf = new JobConf(hiveConf); // Create temporary scratch dir final Path emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(jobConf); fs.mkdirs(emptyScratchDir); byte[] jobConfBytes = KryoSerializer.serializeJobConf(jobConf); byte[] scratchDirBytes = KryoSerializer.serialize(emptyScratchDir); byte[] sparkWorkBytes = KryoSerializer.serialize(sparkWork); JobStatusJob job = new JobStatusJob(jobConfBytes, scratchDirBytes, sparkWorkBytes); JobHandle<Serializable> jobHandle = remoteClient.submit(job); RemoteSparkJobStatus sparkJobStatus = new RemoteSparkJobStatus(remoteClient, jobHandle, sparkClientTimtout); return new RemoteSparkJobRef(hiveConf, jobHandle, sparkJobStatus); }
@Override public int getExecutorCount() throws Exception { return getExecutorCount(sparkClientTimtout, TimeUnit.SECONDS); }
public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws IOException, SparkException { Map<String, String> sparkConf = initiateSparkConf(hiveconf); // Submit spark job through local spark context while spark master is local mode, otherwise submit // spark job through remote spark context. String master = sparkConf.get("spark.master"); if (master.equals("local") || master.startsWith("local[")) { // With local spark context, all user sessions share the same spark context. return LocalHiveSparkClient.getInstance(generateSparkConf(sparkConf)); } else { return new RemoteHiveSparkClient(hiveconf, sparkConf); } }