new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter); SparkPlan plan = gen.generate(sparkWork);
private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception { initStatsPublisher(work); JobConf newJobConf = cloneJobConf(work); checkSpecs(work, newJobConf); byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf); boolean caching = isCachingWork(work, sparkWork); if (work instanceof MapWork) {
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception { if (cloneToWork.containsKey(work)) { BaseWork originalWork = cloneToWork.get(work); if (workToParentWorkTranMap.containsKey(originalWork)) { return workToParentWorkTranMap.get(originalWork); } } SparkTran result; if (work instanceof MapWork) { result = generateMapInput(sparkPlan, (MapWork)work); sparkPlan.addTran(result); } else if (work instanceof ReduceWork) { boolean toCache = cloneToWork.containsKey(work); List<BaseWork> parentWorks = sparkWork.getParents(work); SparkEdgeProperty sparkEdgeProperty = sparkWork.getEdgeProperty(parentWorks.get(0), work); result = generate(sparkPlan, sparkEdgeProperty, toCache, work.getName(), work); sparkPlan.addTran(result); for (BaseWork parentWork : parentWorks) { sparkPlan.connect(workToTranMap.get(parentWork), result); } } else { throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName()); } if (cloneToWork.containsKey(work)) { workToParentWorkTranMap.put(cloneToWork.get(work), result); } return result; }
@SuppressWarnings("unchecked") private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception { JobConf jobConf = cloneJobConf(mapWork); Class ifClass = getInputFormat(jobConf, mapWork); JavaPairRDD<WritableComparable, Writable> hadoopRDD; if (mapWork.getNumMapTasks() != null) { jobConf.setNumMapTasks(mapWork.getNumMapTasks()); hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class, mapWork.getNumMapTasks()); } else { hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class); } // Caching is disabled for MapInput due to HIVE-8920 MapInput result = new MapInput(sparkPlan, hadoopRDD, false/*cloneToWork.containsKey(mapWork)*/); return result; }
public SparkPlan generate(SparkWork sparkWork) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); SparkPlan sparkPlan = new SparkPlan(this.jobConf, this.sc.sc()); cloneToWork = sparkWork.getCloneToWork(); workToTranMap.clear(); workToParentWorkTranMap.clear(); try { for (BaseWork work : sparkWork.getAllWork()) { // Run the SparkDynamicPartitionPruner, we run this here instead of inside the // InputFormat so that we don't have to run pruning when creating a Record Reader runDynamicPartitionPruner(work); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); SparkTran tran = generate(work, sparkWork); SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work); sparkPlan.addTran(tran); sparkPlan.connect(parentTran, tran); workToTranMap.put(work, tran); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); } } finally { // clear all ThreadLocal cached MapWork/ReduceWork after plan generation // as this may executed in a pool thread. Utilities.clearWorkMap(jobConf); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); return sparkPlan; }
public SparkPlan generate(SparkWork sparkWork) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); SparkPlan sparkPlan = new SparkPlan(); cloneToWork = sparkWork.getCloneToWork(); workToTranMap.clear(); workToParentWorkTranMap.clear(); try { for (BaseWork work : sparkWork.getAllWork()) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); SparkTran tran = generate(work, sparkWork); SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work); sparkPlan.addTran(tran); sparkPlan.connect(parentTran, tran); workToTranMap.put(work, tran); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); } } finally { // clear all ThreadLocal cached MapWork/ReduceWork after plan generation // as this may executed in a pool thread. Utilities.clearWorkMap(jobConf); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); return sparkPlan; }
private SparkTran generate(BaseWork work) throws Exception { initStatsPublisher(work); JobConf newJobConf = cloneJobConf(work); checkSpecs(work, newJobConf); byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf); if (work instanceof MapWork) { MapTran mapTran = new MapTran(); HiveMapFunction mapFunc = new HiveMapFunction(confBytes, sparkReporter); mapTran.setMapFunction(mapFunc); return mapTran; } else if (work instanceof ReduceWork) { ReduceTran reduceTran = new ReduceTran(); HiveReduceFunction reduceFunc = new HiveReduceFunction(confBytes, sparkReporter); reduceTran.setReduceFunction(reduceFunc); return reduceTran; } else { throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName()); } }
@SuppressWarnings("unchecked") private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception { JobConf jobConf = cloneJobConf(mapWork); Class ifClass = getInputFormat(jobConf, mapWork); sc.sc().setCallSite(CallSite.apply(mapWork.getName(), "")); JavaPairRDD<WritableComparable, Writable> hadoopRDD; if (mapWork.getNumMapTasks() != null) { jobConf.setNumMapTasks(mapWork.getNumMapTasks()); hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class, mapWork.getNumMapTasks()); } else { hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class); } boolean toCache = false/*cloneToWork.containsKey(mapWork)*/; String tables = mapWork.getAllRootOperators().stream() .filter(op -> op instanceof TableScanOperator) .map(ts -> ((TableScanDesc) ts.getConf()).getAlias()) .collect(Collectors.joining(", ")); String rddName = mapWork.getName() + " (" + tables + ", " + hadoopRDD.getNumPartitions() + (toCache ? ", cached)" : ")"); // Caching is disabled for MapInput due to HIVE-8920 MapInput result = new MapInput(sparkPlan, hadoopRDD, toCache, rddName, mapWork); return result; }
public SparkPlan generate(SparkWork sparkWork) throws Exception { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); SparkPlan sparkPlan = new SparkPlan(); cloneToWork = sparkWork.getCloneToWork(); workToTranMap.clear(); workToParentWorkTranMap.clear(); try { for (BaseWork work : sparkWork.getAllWork()) { perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); SparkTran tran = generate(work); SparkTran parentTran = generateParentTran(sparkPlan, sparkWork, work); sparkPlan.addTran(tran); sparkPlan.connect(parentTran, tran); workToTranMap.put(work, tran); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_CREATE_TRAN + work.getName()); } } finally { // clear all ThreadLocal cached MapWork/ReduceWork after plan generation // as this may executed in a pool thread. Utilities.clearWorkMap(); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_BUILD_PLAN); return sparkPlan; }
new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter); SparkPlan plan = gen.generate(sparkWork);
private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception { initStatsPublisher(work); JobConf newJobConf = cloneJobConf(work); checkSpecs(work, newJobConf); byte[] confBytes = KryoSerializer.serializeJobConf(newJobConf); boolean caching = isCachingWork(work, sparkWork); if (work instanceof MapWork) {
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception { if (cloneToWork.containsKey(work)) { BaseWork originalWork = cloneToWork.get(work); if (workToParentWorkTranMap.containsKey(originalWork)) { return workToParentWorkTranMap.get(originalWork); } } SparkTran result; if (work instanceof MapWork) { result = generateMapInput(sparkPlan, (MapWork)work); sparkPlan.addTran(result); } else if (work instanceof ReduceWork) { List<BaseWork> parentWorks = sparkWork.getParents(work); result = generate(sparkPlan, sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work)); sparkPlan.addTran(result); for (BaseWork parentWork : parentWorks) { sparkPlan.connect(workToTranMap.get(parentWork), result); } } else { throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName()); } if (cloneToWork.containsKey(work)) { workToParentWorkTranMap.put(cloneToWork.get(work), result); } return result; }
@SuppressWarnings("unchecked") private MapInput generateMapInput(SparkPlan sparkPlan, MapWork mapWork) throws Exception { JobConf jobConf = cloneJobConf(mapWork); Class ifClass = getInputFormat(jobConf, mapWork); JavaPairRDD<WritableComparable, Writable> hadoopRDD = sc.hadoopRDD(jobConf, ifClass, WritableComparable.class, Writable.class); // Caching is disabled for MapInput due to HIVE-8920 MapInput result = new MapInput(sparkPlan, hadoopRDD, false/*cloneToWork.containsKey(mapWork)*/); return result; }
new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter); SparkPlan plan = gen.generate(localSparkWork);
private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, BaseWork work) throws Exception { if (cloneToWork.containsKey(work)) { BaseWork originalWork = cloneToWork.get(work); if (workToParentWorkTranMap.containsKey(originalWork)) { return workToParentWorkTranMap.get(originalWork); } } SparkTran result; if (work instanceof MapWork) { result = generateMapInput(sparkPlan, (MapWork)work); sparkPlan.addTran(result); } else if (work instanceof ReduceWork) { List<BaseWork> parentWorks = sparkWork.getParents(work); result = generate(sparkPlan, sparkWork.getEdgeProperty(parentWorks.get(0), work), cloneToWork.containsKey(work)); sparkPlan.addTran(result); for (BaseWork parentWork : parentWorks) { sparkPlan.connect(workToTranMap.get(parentWork), result); } } else { throw new IllegalStateException("AssertionError: expected either MapWork or ReduceWork, " + "but found " + work.getClass().getName()); } if (cloneToWork.containsKey(work)) { workToParentWorkTranMap.put(cloneToWork.get(work), result); } return result; }
new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter); SparkPlan plan = gen.generate(localSparkWork);
sc = new JavaSparkContext(sparkConf); SparkPlanGenerator sparkPlanGenerator = new SparkPlanGenerator(sc, null, jobConf, tmpDir, null); SparkPlan sparkPlan = sparkPlanGenerator.generate(sparkTask.getWork()); RDD<Tuple2<HiveKey, BytesWritable>> reducerRdd = sparkPlan.generateGraph().rdd();
new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter); SparkPlan plan = gen.generate(sparkWork);
new SparkPlanGenerator(jc.sc(), null, localJobConf, localScratchDir, sparkReporter); SparkPlan plan = gen.generate(localSparkWork);