@Override public SparkSession call() throws Exception { SessionState.setCurrentSessionState(sessionState); return SparkUtilities.getSparkSession(hiveConf, sparkSessionManager); } }
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
/** * Uploads a local file to HDFS * This method is not thread safe * * @param source * @param conf * @return * @throws IOException */ public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException { Path localFile = new Path(source.getPath()); Path remoteFile = new Path(SessionState.get().getSparkSession().getHDFSSessionDir(), getFileName(source)); FileSystem fileSystem = FileSystem.get(remoteFile.toUri(), conf); // Overwrite if the remote file already exists. Whether the file can be added // on executor is up to spark, i.e. spark.files.overwrite fileSystem.copyFromLocalFile(false, true, localFile, remoteFile); Path fullPath = fileSystem.getFileStatus(remoteFile).getPath(); return fullPath.toUri(); }
private void addJars(String addedJars) throws IOException { for (String addedJar : CSV_SPLITTER.split(Strings.nullToEmpty(addedJars))) { try { URI jarUri = FileUtils.getURI(addedJar); if (jarUri != null && !localJars.contains(jarUri)) { localJars.add(jarUri); if (SparkUtilities.needUploadToHDFS(jarUri, sparkConf)) { jarUri = SparkUtilities.uploadToHDFS(jarUri, hiveConf); } remoteClient.addJar(jarUri); } } catch (URISyntaxException e) { LOG.warn("Failed to add jar:" + addedJar, e); } } }
SparkUtilities.collectOp(root, SparkPartitionPruningSinkOperator.class, allDPPs, seen); Set<SparkPartitionPruningSinkOperator> nestedDPPs = new HashSet<>(); for (Operator<?> branch : branchingOp.getChildOperators()) { if (!isDirectDPPBranch(branch)) { SparkUtilities.collectOp(branch, SparkPartitionPruningSinkOperator.class, nestedDPPs, seen);
private void addJars(String addedJars) throws IOException { for (String addedJar : CSV_SPLITTER.split(Strings.nullToEmpty(addedJars))) { try { URI jarUri = SparkUtilities.getURI(addedJar); if (jarUri != null && !localJars.contains(jarUri)) { if (SparkUtilities.needUploadToHDFS(jarUri, sparkConf)) { jarUri = SparkUtilities.uploadToHDFS(jarUri, hiveConf); } localJars.add(jarUri); remoteClient.addJar(jarUri); } } catch (URISyntaxException e) { LOG.warn("Failed to add jar:" + addedJar, e); } } }
/** * Recursively find all operators under root, that are of class clazz or are the sub-class of clazz, and * put them in result. * @param result all operators under root that are of class clazz * @param root the root operator under which all operators will be examined * @param clazz clas to collect. Must NOT be null. */ public static void collectOp(Collection<Operator<?>> result, Operator<?> root, Class<?> clazz) { Preconditions.checkArgument(clazz != null, "AssertionError: clazz should not be null"); if (root == null) { return; } if (clazz.isAssignableFrom(root.getClass())) { result.add(root); } for (Operator<?> child : root.getChildOperators()) { collectOp(result, child, clazz); } }
private MapJoinTableContainer load(FileSystem fs, Path path, MapJoinTableContainerSerDe mapJoinTableSerde) throws HiveException { LOG.info("\tLoad back all hashtable files from tmp folder uri:" + path); if (!SparkUtilities.isDedicatedCluster(hconf)) { return loadMapJoinTableContainer(fs, path, mapJoinTableSerde); } try { return SmallTableCache.get(path.toString(), () -> loadMapJoinTableContainer(fs, path, mapJoinTableSerde)); } catch (ExecutionException e) { throw new HiveException(e); } }
this.outputs = outputs; this.topOps = topOps; this.currentTask = SparkUtilities.createSparkTask(conf); this.rootTasks.add(currentTask); this.leafOpToFollowingWorkInfo =
"No targetWork found for tablescan " + ts); String targetId = SparkUtilities.getWorkId(targetWork); String sourceId = SparkUtilities.getWorkId(sourceWork); if (tmpPath == null) { Path baseTmpPath = context.parseContext.getContext().getMRTmpPath(); tmpPath = SparkUtilities.generateTmpPathForPartitionPruning(baseTmpPath, targetId); targetWork.setTmpPathForPartitionPruning(tmpPath); LOG.info("Setting tmp path between source work and target work:\n" + tmpPath);
public static String colNameWithTargetId(MapWork target, String colName) { return SparkUtilities.getWorkId(target) + ":" + colName; }
private void addResources(String addedFiles) throws IOException { for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) { try { URI fileUri = FileUtils.getURI(addedFile); if (fileUri != null && !localFiles.contains(fileUri)) { localFiles.add(fileUri); if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) { fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf); } remoteClient.addFile(fileUri); } } catch (URISyntaxException e) { LOG.warn("Failed to add file:" + addedFile, e); } } }
private void addResources(String addedFiles) throws IOException { for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) { try { URI fileUri = SparkUtilities.getURI(addedFile); if (fileUri != null && !localFiles.contains(fileUri)) { if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) { fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf); } localFiles.add(fileUri); remoteClient.addFile(fileUri); } } catch (URISyntaxException e) { LOG.warn("Failed to add file:" + addedFile, e); } } }
private void collectDPPInfos(SparkWork sparkWork) { for (BaseWork work : sparkWork.getAllWork()) { Set<Operator<?>> seen = new HashSet<>(); for (Operator root : work.getAllRootOperators()) { List<SparkPartitionPruningSinkOperator> sinks = new ArrayList<>(); SparkUtilities.collectOp(root, SparkPartitionPruningSinkOperator.class, sinks, seen); for (SparkPartitionPruningSinkOperator sink : sinks) { idToDpps.put(sink.getUniqueId(), sink); } } } }
private MapJoinTableContainer load(FileSystem fs, Path path, MapJoinTableContainerSerDe mapJoinTableSerde) throws HiveException { LOG.info("\tLoad back all hashtable files from tmp folder uri:" + path); if (!SparkUtilities.isDedicatedCluster(hconf)) { return useFastContainer ? mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) : mapJoinTableSerde.load(fs, path, hconf); } MapJoinTableContainer mapJoinTable = SmallTableCache.get(path); if (mapJoinTable == null) { synchronized (path.toString().intern()) { mapJoinTable = SmallTableCache.get(path); if (mapJoinTable == null) { mapJoinTable = useFastContainer ? mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) : mapJoinTableSerde.load(fs, path, hconf); SmallTableCache.cache(path, mapJoinTable); } } } return mapJoinTable; }
this.outputs = outputs; this.topOps = topOps; this.currentTask = SparkUtilities.createSparkTask(conf); this.rootTasks.add(currentTask); this.leafOpToFollowingWorkInfo =
private void addResources(String addedFiles) throws IOException { for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) { try { URI fileUri = FileUtils.getURI(addedFile); if (fileUri != null && !localFiles.contains(fileUri)) { localFiles.add(fileUri); if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) { fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf); } remoteClient.addFile(fileUri); } } catch (URISyntaxException e) { LOG.warn("Failed to add file:" + addedFile, e); } } }
try { sparkSessionManager = SparkSessionManagerImpl.getInstance(); sparkSession = SparkUtilities.getSparkSession( context.getConf(), sparkSessionManager); sparkMemoryAndCores = sparkSession.getMemoryAndCores();
@Override public void collect(HiveKey key, BytesWritable value) throws IOException { lastRecordOutput.add(SparkUtilities.copyHiveKey(key), SparkUtilities.copyBytesWritable(value)); }
/** * Uploads a local file to HDFS * * @param source * @param conf * @return * @throws IOException */ public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException { Path localFile = new Path(source.getPath()); Path remoteFile = new Path(SessionState.get().getSparkSession().getHDFSSessionDir(), getFileName(source)); FileSystem fileSystem = FileSystem.get(conf); // Overwrite if the remote file already exists. Whether the file can be added // on executor is up to spark, i.e. spark.files.overwrite fileSystem.copyFromLocalFile(false, true, localFile, remoteFile); Path fullPath = fileSystem.getFileStatus(remoteFile).getPath(); return fullPath.toUri(); }