org.apache.hadoop.hive.ql.exec.spark.SparkUtilities java code examples

 @Override
 public SparkSession call() throws Exception {
  SessionState.setCurrentSessionState(sessionState);
  return SparkUtilities.getSparkSession(hiveConf, sparkSessionManager);
 }
}

@Override
public void collect(HiveKey key, BytesWritable value) throws IOException {
 lastRecordOutput.add(SparkUtilities.copyHiveKey(key),
   SparkUtilities.copyBytesWritable(value));
}

/**
 * Uploads a local file to HDFS
 * This method is not thread safe
 *
 * @param source
 * @param conf
 * @return
 * @throws IOException
 */
public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException {
 Path localFile = new Path(source.getPath());
 Path remoteFile = new Path(SessionState.get().getSparkSession().getHDFSSessionDir(),
   getFileName(source));
 FileSystem fileSystem = FileSystem.get(remoteFile.toUri(), conf);
 // Overwrite if the remote file already exists. Whether the file can be added
 // on executor is up to spark, i.e. spark.files.overwrite
 fileSystem.copyFromLocalFile(false, true, localFile, remoteFile);
 Path fullPath = fileSystem.getFileStatus(remoteFile).getPath();
 return fullPath.toUri();
}

private void addJars(String addedJars) throws IOException {
 for (String addedJar : CSV_SPLITTER.split(Strings.nullToEmpty(addedJars))) {
  try {
   URI jarUri = FileUtils.getURI(addedJar);
   if (jarUri != null && !localJars.contains(jarUri)) {
    localJars.add(jarUri);
    if (SparkUtilities.needUploadToHDFS(jarUri, sparkConf)) {
     jarUri = SparkUtilities.uploadToHDFS(jarUri, hiveConf);
    }
    remoteClient.addJar(jarUri);
   }
  } catch (URISyntaxException e) {
   LOG.warn("Failed to add jar:" + addedJar, e);
  }
 }
}

SparkUtilities.collectOp(root, SparkPartitionPruningSinkOperator.class, allDPPs, seen);
 Set<SparkPartitionPruningSinkOperator> nestedDPPs = new HashSet<>();
 for (Operator<?> branch : branchingOp.getChildOperators()) {
  if (!isDirectDPPBranch(branch)) {
   SparkUtilities.collectOp(branch, SparkPartitionPruningSinkOperator.class, nestedDPPs,
     seen);

private void addJars(String addedJars) throws IOException {
 for (String addedJar : CSV_SPLITTER.split(Strings.nullToEmpty(addedJars))) {
  try {
   URI jarUri = SparkUtilities.getURI(addedJar);
   if (jarUri != null && !localJars.contains(jarUri)) {
    if (SparkUtilities.needUploadToHDFS(jarUri, sparkConf)) {
     jarUri = SparkUtilities.uploadToHDFS(jarUri, hiveConf);
    }
    localJars.add(jarUri);
    remoteClient.addJar(jarUri);
   }
  } catch (URISyntaxException e) {
   LOG.warn("Failed to add jar:" + addedJar, e);
  }
 }
}

/**
 * Recursively find all operators under root, that are of class clazz or are the sub-class of clazz, and
 * put them in result.
 * @param result all operators under root that are of class clazz
 * @param root the root operator under which all operators will be examined
 * @param clazz clas to collect. Must NOT be null.
 */
public static void collectOp(Collection<Operator<?>> result, Operator<?> root, Class<?> clazz) {
 Preconditions.checkArgument(clazz != null, "AssertionError: clazz should not be null");
 if (root == null) {
  return;
 }
 if (clazz.isAssignableFrom(root.getClass())) {
  result.add(root);
 }
 for (Operator<?> child : root.getChildOperators()) {
  collectOp(result, child, clazz);
 }
}

private MapJoinTableContainer load(FileSystem fs, Path path,
  MapJoinTableContainerSerDe mapJoinTableSerde) throws HiveException {
 LOG.info("\tLoad back all hashtable files from tmp folder uri:" + path);
 if (!SparkUtilities.isDedicatedCluster(hconf)) {
  return loadMapJoinTableContainer(fs, path, mapJoinTableSerde);
 }
 try {
  return SmallTableCache.get(path.toString(), () -> loadMapJoinTableContainer(fs, path, mapJoinTableSerde));
 } catch (ExecutionException e) {
  throw new HiveException(e);
 }
}

this.outputs = outputs;
this.topOps = topOps;
this.currentTask = SparkUtilities.createSparkTask(conf);
this.rootTasks.add(currentTask);
this.leafOpToFollowingWorkInfo =

  "No targetWork found for tablescan " + ts);
String targetId = SparkUtilities.getWorkId(targetWork);
String sourceId = SparkUtilities.getWorkId(sourceWork);
if (tmpPath == null) {
 Path baseTmpPath = context.parseContext.getContext().getMRTmpPath();
 tmpPath = SparkUtilities.generateTmpPathForPartitionPruning(baseTmpPath, targetId);
 targetWork.setTmpPathForPartitionPruning(tmpPath);
 LOG.info("Setting tmp path between source work and target work:\n" + tmpPath);

public static String colNameWithTargetId(MapWork target, String colName) {
 return SparkUtilities.getWorkId(target) + ":" + colName;
}

private void addResources(String addedFiles) throws IOException {
 for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) {
  try {
   URI fileUri = FileUtils.getURI(addedFile);
   if (fileUri != null && !localFiles.contains(fileUri)) {
    localFiles.add(fileUri);
    if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) {
     fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf);
    }
    remoteClient.addFile(fileUri);
   }
  } catch (URISyntaxException e) {
   LOG.warn("Failed to add file:" + addedFile, e);
  }
 }
}

private void addResources(String addedFiles) throws IOException {
 for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) {
  try {
   URI fileUri = SparkUtilities.getURI(addedFile);
   if (fileUri != null && !localFiles.contains(fileUri)) {
    if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) {
     fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf);
    }
    localFiles.add(fileUri);
    remoteClient.addFile(fileUri);
   }
  } catch (URISyntaxException e) {
   LOG.warn("Failed to add file:" + addedFile, e);
  }
 }
}

private void collectDPPInfos(SparkWork sparkWork) {
 for (BaseWork work : sparkWork.getAllWork()) {
  Set<Operator<?>> seen = new HashSet<>();
  for (Operator root : work.getAllRootOperators()) {
   List<SparkPartitionPruningSinkOperator> sinks = new ArrayList<>();
   SparkUtilities.collectOp(root, SparkPartitionPruningSinkOperator.class, sinks, seen);
   for (SparkPartitionPruningSinkOperator sink : sinks) {
    idToDpps.put(sink.getUniqueId(), sink);
   }
  }
 }
}

private MapJoinTableContainer load(FileSystem fs, Path path,
  MapJoinTableContainerSerDe mapJoinTableSerde) throws HiveException {
 LOG.info("\tLoad back all hashtable files from tmp folder uri:" + path);
 if (!SparkUtilities.isDedicatedCluster(hconf)) {
  return useFastContainer ? mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) :
    mapJoinTableSerde.load(fs, path, hconf);
 }
 MapJoinTableContainer mapJoinTable = SmallTableCache.get(path);
 if (mapJoinTable == null) {
  synchronized (path.toString().intern()) {
   mapJoinTable = SmallTableCache.get(path);
   if (mapJoinTable == null) {
    mapJoinTable = useFastContainer ?
      mapJoinTableSerde.loadFastContainer(desc, fs, path, hconf) :
      mapJoinTableSerde.load(fs, path, hconf);
    SmallTableCache.cache(path, mapJoinTable);
   }
  }
 }
 return mapJoinTable;
}

this.outputs = outputs;
this.topOps = topOps;
this.currentTask = SparkUtilities.createSparkTask(conf);
this.rootTasks.add(currentTask);
this.leafOpToFollowingWorkInfo =

private void addResources(String addedFiles) throws IOException {
 for (String addedFile : CSV_SPLITTER.split(Strings.nullToEmpty(addedFiles))) {
  try {
   URI fileUri = FileUtils.getURI(addedFile);
   if (fileUri != null && !localFiles.contains(fileUri)) {
    localFiles.add(fileUri);
    if (SparkUtilities.needUploadToHDFS(fileUri, sparkConf)) {
     fileUri = SparkUtilities.uploadToHDFS(fileUri, hiveConf);
    }
    remoteClient.addFile(fileUri);
   }
  } catch (URISyntaxException e) {
   LOG.warn("Failed to add file:" + addedFile, e);
  }
 }
}

try {
 sparkSessionManager = SparkSessionManagerImpl.getInstance();
 sparkSession = SparkUtilities.getSparkSession(
   context.getConf(), sparkSessionManager);
 sparkMemoryAndCores = sparkSession.getMemoryAndCores();

@Override
public void collect(HiveKey key, BytesWritable value) throws IOException {
 lastRecordOutput.add(SparkUtilities.copyHiveKey(key),
   SparkUtilities.copyBytesWritable(value));
}

/**
 * Uploads a local file to HDFS
 *
 * @param source
 * @param conf
 * @return
 * @throws IOException
 */
public static URI uploadToHDFS(URI source, HiveConf conf) throws IOException {
 Path localFile = new Path(source.getPath());
 Path remoteFile = new Path(SessionState.get().getSparkSession().getHDFSSessionDir(),
   getFileName(source));
 FileSystem fileSystem = FileSystem.get(conf);
 // Overwrite if the remote file already exists. Whether the file can be added
 // on executor is up to spark, i.e. spark.files.overwrite
 fileSystem.copyFromLocalFile(false, true, localFile, remoteFile);
 Path fullPath = fileSystem.getFileStatus(remoteFile).getPath();
 return fullPath.toUri();
}

Javadoc

Contains utilities methods used as part of Spark tasks.

Most used methods

getSparkSession
copyBytesWritable
copyHiveKey
getFileName
needUploadToHDFS
uploadToHDFS
Uploads a local file to HDFS
collectOp
Collect operators of type T starting from root. Matching operators will be put into result. Set seen
createSparkTask
getWorkId
Return the ID for this BaseWork, in String form.
isDedicatedCluster
findReusableDPPSink
generateTmpPathForPartitionPruning
Generate a temporary path for dynamic partition pruning in Spark branch TODO: no longer need this if

Popular in Java

Creating JSON documents from java classes using gson
getExternalFilesDir (Context)
getSupportFragmentManager (FragmentActivity)
getContentResolver (Context)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
From CI to AI: The AI layer in your organization

How to useSparkUtilities in org.apache.hadoop.hive.ql.exec.spark

Best Java code snippets using org.apache.hadoop.hive.ql.exec.spark.SparkUtilities (Showing top 20 results out of 315)

How to use
SparkUtilities
in
org.apache.hadoop.hive.ql.exec.spark