org.apache.hadoop.hive.ql.exec.spark.SparkPlanGenerator java code examples

 new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);

private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception {
 initStatsPublisher(work);
 JobConf newJobConf = cloneJobConf(work);
 checkSpecs(work, newJobConf);
 byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
 boolean caching = isCachingWork(work, sparkWork);
 if (work instanceof MapWork) {

private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork,
                   BaseWork work) throws Exception {
 if (cloneToWork.containsKey(work)) {
  BaseWork originalWork = cloneToWork.get(work);
  if (workToParentWorkTranMap.containsKey(originalWork)) {
   return workToParentWorkTranMap.get(originalWork);
  }
 }
 SparkTran result;
 if (work instanceof MapWork) {
  result = generateMapInput(sparkPlan, (MapWork)work);
  sparkPlan.addTran(result);
 } else if (work instanceof ReduceWork) {
  boolean toCache = cloneToWork.containsKey(work);
  List<BaseWork> parentWorks = sparkWork.getParents(work);
  SparkEdgeProperty sparkEdgeProperty = sparkWork.getEdgeProperty(parentWorks.get(0), work);
  result = generate(sparkPlan, sparkEdgeProperty, toCache, work.getName(), work);
  sparkPlan.addTran(result);
  for (BaseWork parentWork : parentWorks) {
   sparkPlan.connect(workToTranMap.get(parentWork), result);
  }
 } else {
  throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, "
   + "but found " + work.getClass().getName());
 }
 if (cloneToWork.containsKey(work)) {
  workToParentWorkTranMap.put(cloneToWork.get(work), result);
 }
 return result;
}

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork)
  throws Exception {
 JobConf jobConf = cloneJobConf(mapWork);
 Class ifClass = getInputFormat(jobConf, mapWork);
 JavaPairRDD<WritableComparable, Writable> hadoopRDD;
 if (mapWork.getNumMapTasks() != null) {
  jobConf.setNumMapTasks(mapWork.getNumMapTasks());
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass,
    WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
 } else {
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
 }
 // Caching is disabled for MapInput due to HIVE-8920
 MapInput result = new MapInput(sparkPlan, hadoopRDD, false/*cloneToWork.containsKey(mapWork)*/);
 return result;
}

public SparkPlan generate(SparkWork sparkWork) throws Exception {
 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 SparkPlan sparkPlan = new SparkPlan(this.jobConf, this.sc.sc());
 cloneToWork = sparkWork.getCloneToWork();
 workToTranMap.clear();
 workToParentWorkTranMap.clear();
 try {
  for (BaseWork work : sparkWork.getAllWork()) {
   // Run the SparkDynamicPartitionPruner, we run this here instead of inside the
   // InputFormat so that we don't have to run pruning when creating a Record Reader
   runDynamicPartitionPruner(work);
   perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
   SparkTran tran = generate(work, sparkWork);
   SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
   sparkPlan.addTran(tran);
   sparkPlan.connect(parentTran, tran);
   workToTranMap.put(work, tran);
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
  }
 } finally {
  // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
  // as this may executed in a pool thread.
  Utilities.clearWorkMap(jobConf);
 }
 perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 return sparkPlan;
}

public SparkPlan generate(SparkWork sparkWork) throws Exception {
 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 SparkPlan sparkPlan = new SparkPlan();
 cloneToWork = sparkWork.getCloneToWork();
 workToTranMap.clear();
 workToParentWorkTranMap.clear();
 try {
  for (BaseWork work : sparkWork.getAllWork()) {
   perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
   SparkTran tran = generate(work, sparkWork);
   SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
   sparkPlan.addTran(tran);
   sparkPlan.connect(parentTran, tran);
   workToTranMap.put(work, tran);
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
  }
 } finally {
  // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
  // as this may executed in a pool thread.
  Utilities.clearWorkMap(jobConf);
 }
 perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 return sparkPlan;
}

private SparkTran generate(BaseWork work) throws Exception {
 initStatsPublisher(work);
 JobConf newJobConf = cloneJobConf(work);
 checkSpecs(work, newJobConf);
 byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
 if (work instanceof MapWork) {
  MapTran mapTran = new MapTran();
  HiveMapFunction mapFunc = new HiveMapFunction(confBytes, sparkReporter);
  mapTran.setMapFunction(mapFunc);
  return mapTran;
 } else if (work instanceof ReduceWork) {
  ReduceTran reduceTran = new ReduceTran();
  HiveReduceFunction reduceFunc = new HiveReduceFunction(confBytes, sparkReporter);
  reduceTran.setReduceFunction(reduceFunc);
  return reduceTran;
 } else {
  throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, "
   + "but found " + work.getClass().getName());
 }
}

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork)
  throws Exception {
 JobConf jobConf = cloneJobConf(mapWork);
 Class ifClass = getInputFormat(jobConf, mapWork);
 sc.sc().setCallSite(CallSite.apply(mapWork.getName(), ""));
 JavaPairRDD<WritableComparable, Writable> hadoopRDD;
 if (mapWork.getNumMapTasks() != null) {
  jobConf.setNumMapTasks(mapWork.getNumMapTasks());
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass,
    WritableComparable.class, Writable.class, mapWork.getNumMapTasks());
 } else {
  hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class);
 }
 boolean toCache = false/*cloneToWork.containsKey(mapWork)*/;
 String tables = mapWork.getAllRootOperators().stream()
     .filter(op -> op instanceof TableScanOperator)
     .map(ts -> ((TableScanDesc) ts.getConf()).getAlias())
     .collect(Collectors.joining(", "));
 String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() +
     (toCache ? ", cached)" : ")");
 // Caching is disabled for MapInput due to HIVE-8920
 MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork);
 return result;
}

public SparkPlan generate(SparkWork sparkWork) throws Exception {
 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 SparkPlan sparkPlan = new SparkPlan();
 cloneToWork = sparkWork.getCloneToWork();
 workToTranMap.clear();
 workToParentWorkTranMap.clear();
 try {
  for (BaseWork work : sparkWork.getAllWork()) {
   perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
   SparkTran tran = generate(work);
   SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work);
   sparkPlan.addTran(tran);
   sparkPlan.connect(parentTran, tran);
   workToTranMap.put(work, tran);
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName());
  }
 } finally {
  // clear all ThreadLocal cached MapWork/ReduceWork after plan generation
  // as this may executed in a pool thread.
  Utilities.clearWorkMap();
 }
 perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN);
 return sparkPlan;
}

 new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);

private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception {
 initStatsPublisher(work);
 JobConf newJobConf = cloneJobConf(work);
 checkSpecs(work, newJobConf);
 byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf);
 boolean caching = isCachingWork(work, sparkWork);
 if (work instanceof MapWork) {

private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork,
                   BaseWork work) throws Exception {
 if (cloneToWork.containsKey(work)) {
  BaseWork originalWork = cloneToWork.get(work);
  if (workToParentWorkTranMap.containsKey(originalWork)) {
   return workToParentWorkTranMap.get(originalWork);
  }
 }
 SparkTran result;
 if (work instanceof MapWork) {
  result = generateMapInput(sparkPlan, (MapWork)work);
  sparkPlan.addTran(result);
 } else if (work instanceof ReduceWork) {
  List<BaseWork> parentWorks = sparkWork.getParents(work);
  result = generate(sparkPlan,
   sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work));
  sparkPlan.addTran(result);
  for (BaseWork parentWork : parentWorks) {
   sparkPlan.connect(workToTranMap.get(parentWork), result);
  }
 } else {
  throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, "
   + "but found " + work.getClass().getName());
 }
 if (cloneToWork.containsKey(work)) {
  workToParentWorkTranMap.put(cloneToWork.get(work), result);
 }
 return result;
}

@SuppressWarnings("unchecked")
private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork)
  throws Exception {
 JobConf jobConf = cloneJobConf(mapWork);
 Class ifClass = getInputFormat(jobConf, mapWork);
 JavaPairRDD<WritableComparable, Writable> hadoopRDD = sc.hadoopRDD(jobConf, ifClass,
   WritableComparable.class, Writable.class);
 // Caching is disabled for MapInput due to HIVE-8920
 MapInput result = new MapInput(sparkPlan, hadoopRDD, false/*cloneToWork.containsKey(mapWork)*/);
 return result;
}

 new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter);
SparkPlan plan = gen.generate(localSparkWork);

private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork,
                   BaseWork work) throws Exception {
 if (cloneToWork.containsKey(work)) {
  BaseWork originalWork = cloneToWork.get(work);
  if (workToParentWorkTranMap.containsKey(originalWork)) {
   return workToParentWorkTranMap.get(originalWork);
  }
 }
 SparkTran result;
 if (work instanceof MapWork) {
  result = generateMapInput(sparkPlan, (MapWork)work);
  sparkPlan.addTran(result);
 } else if (work instanceof ReduceWork) {
  List<BaseWork> parentWorks = sparkWork.getParents(work);
  result = generate(sparkPlan,
   sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work));
  sparkPlan.addTran(result);
  for (BaseWork parentWork : parentWorks) {
   sparkPlan.connect(workToTranMap.get(parentWork), result);
  }
 } else {
  throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, "
   + "but found " + work.getClass().getName());
 }
 if (cloneToWork.containsKey(work)) {
  workToParentWorkTranMap.put(cloneToWork.get(work), result);
 }
 return result;
}

 new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter);
SparkPlan plan = gen.generate(localSparkWork);

sc = new JavaSparkContext(sparkConf);
SparkPlanGenerator sparkPlanGenerator = new SparkPlanGenerator(sc, null, jobConf, tmpDir,
    null);
SparkPlan sparkPlan = sparkPlanGenerator.generate(sparkTask.getWork());
RDD<Tuple2<HiveKey, BytesWritable>> reducerRdd = sparkPlan.generateGraph().rdd();

 new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);

 new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter);
SparkPlan plan = gen.generate(localSparkWork);

Most used methods

<init>
generate
checkSpecs
cloneJobConf
generateMapInput
generateParentTran
getInputFormat
initStatsPublisher
isCachingWork
runDynamicPartitionPruner
Run a SparkDynamicPartitionPruner on the given BaseWork. This method only runs the pruner if the wor

Popular in Java

Making http post requests using okhttp
notifyDataSetChanged (ArrayAdapter)
getApplicationContext (Context)
scheduleAtFixedRate (Timer)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
JList (javax.swing)
Top plugins for Android Studio

How to useSparkPlanGenerator in org.apache.hadoop.hive.ql.exec.spark

Best Java code snippets using org.apache.hadoop.hive.ql.exec.spark.SparkPlanGenerator (Showing top 19 results out of 315)

How to use
SparkPlanGenerator
in
org.apache.hadoop.hive.ql.exec.spark