return createdTaskMap.get(sparkWork); SparkTask resultTask = originalTask.getWork() == sparkWork ? originalTask : (SparkTask) TaskFactory.get(sparkWork); if (!dependencyGraph.get(sparkWork).isEmpty()) { SparkTask parentTask = createSparkTask(originalTask, parentWork, createdTaskMap, conditionalTask); parentTask.addDependentTask(resultTask); List<Task<? extends Serializable>> parentTasks = originalTask.getParentTasks(); if (parentTasks != null && parentTasks.size() > 0) { originalTask.setParentTasks(new ArrayList<Task<? extends Serializable>>()); for (Task<? extends Serializable> parentTask : parentTasks) { parentTask.addDependentTask(resultTask);
@Test public void removeEmptySparkTask() { SparkTask grandpa = new SparkTask(); SparkWork grandpaWork = new SparkWork("grandpa"); grandpaWork.add(new MapWork()); grandpa.setWork(grandpaWork); SparkTask parent = new SparkTask(); SparkWork parentWork = new SparkWork("parent"); parentWork.add(new MapWork()); parent.setWork(parentWork); SparkTask child1 = new SparkTask(); SparkWork childWork1 = new SparkWork("child1"); childWork1.add(new MapWork()); child1.setWork(childWork1); grandpa.addDependentTask(parent); parent.addDependentTask(child1); Assert.assertEquals(grandpa.getChildTasks().size(), 1); Assert.assertEquals(child1.getParentTasks().size(), 1); if (isEmptySparkWork(parent.getWork())) { SparkUtilities.removeEmptySparkTask(parent); } Assert.assertEquals(grandpa.getChildTasks().size(), 0); Assert.assertEquals(child1.getParentTasks().size(), 0); }
SparkSessionManager sparkSessionManager = null; try { printConfigInfo(); sparkSessionManager = SparkSessionManagerImpl.getInstance(); sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager); SparkWork sparkWork = getWork(); sparkWork.setRequiredCounterPrefix(getOperatorCounters()); killJob(); throw new HiveException(String.format("Spark task %s cancelled for query %s", getId(), sparkWork.getQueryId())); addToHistory(Keys.SPARK_JOB_HANDLE_ID, jobRef.getJobId()); addToHistory(Keys.SPARK_JOB_ID, Integer.toString(sparkJobID)); getSparkJobInfo(sparkJobStatus); setSparkException(sparkJobStatus, rc); printConsoleMetrics(); printExcessiveGCWarning(); if (LOG.isInfoEnabled() && sparkStatistics != null) { LOG.info(sparkStatisticsToString(sparkStatistics, sparkJobID)); jobID + " and task ID " + getId()); } else if (rc == 2) { // Cancel job if the monitor found job submission timeout. LOG.info("Failed to submit Spark job for application id " + (Strings.isNullOrEmpty(jobID) ? "UNKNOWN" : jobID));
/** * remove currTask from the children of its parentTask * remove currTask from the parent of its childrenTask * @param currTask */ public static void removeEmptySparkTask(SparkTask currTask) { //remove currTask from parentTasks ArrayList<Task> parTasks = new ArrayList<Task>(); parTasks.addAll(currTask.getParentTasks()); Object[] parTaskArr = parTasks.toArray(); for (Object parTask : parTaskArr) { ((Task) parTask).removeDependentTask(currTask); } //remove currTask from childTasks currTask.removeFromChildrenTasks(); }
private void checkMapJoin(SparkTask sparkTask) throws SemanticException { SparkWork sparkWork = sparkTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { List<String> warnings = new CrossProductHandler.MapJoinCheck(sparkTask.toString()).analyze(baseWork); for (String w : warnings) { warn(w); } } }
SparkWork sparkWork = sparkTask.getWork(); if (!visitedTasks.contains(sparkTask)) { dependencyGraph.clear(); if (sparkTask.getParentTasks() != null && sparkTask.getParentTasks().size() == 1 && sparkTask.getParentTasks().get(0) instanceof SparkTask) { SparkTask parent = (SparkTask) sparkTask.getParentTasks().get(0); if (containsOp(sparkWork, MapJoinOperator.class) && containsOp(parent.getWork(), SparkHashTableSinkOperator.class)) { updateConditionalTask(conditionalTask, sparkTask, parent);
SparkWork currentWork = currentTask.getWork(); Set<Operator<?>> reduceSinkSet = OperatorUtils.getOp(reduceWork, ReduceSinkOperator.class); List<Task<? extends Serializable>> childTasks = currentTask.getChildTasks(); currentTask.removeDependentTask(childTask); newTask.addDependentTask(childTask); currentTask.addDependentTask(newTask); newTask.setFetchSource(currentTask.isFetchSource());
@Override public Collection<MapWork> getMapWork() { List<MapWork> result = Lists.newArrayList(); for (BaseWork w : getWork().getRoots()) { result.add((MapWork) w); } return result; }
SparkSessionManager sparkSessionManager = null; try { printConfigInfo(); sparkSessionManager = SparkSessionManagerImpl.getInstance(); sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager); SparkWork sparkWork = getWork(); sparkWork.setRequiredCounterPrefix(getOperatorCounters()); addToHistory(jobRef); sparkJobID = jobRef.getJobId(); this.jobID = jobRef.getSparkJobStatus().getAppID(); rc = jobRef.monitorJob(); SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus(); getSparkJobInfo(sparkJobStatus, rc); if (rc == 0) { sparkStatistics = sparkJobStatus.getSparkStatistics(); if (LOG.isInfoEnabled() && sparkStatistics != null) { LOG.info(String.format("=====Spark Job[%s] statistics=====", jobRef.getJobId())); logSparkStatistic(sparkStatistics); setException(e); rc = 1; } finally { Utilities.clearWork(conf); if (sparkSession != null && sparkSessionManager != null) { rc = close(rc); try {
SparkSessionManager sparkSessionManager = null; try { printConfigInfo(); sparkSessionManager = SparkSessionManagerImpl.getInstance(); sparkSession = SparkUtilities.getSparkSession(conf, sparkSessionManager); SparkWork sparkWork = getWork(); sparkWork.setRequiredCounterPrefix(getCounterPrefixes()); perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_SUBMIT_JOB); addToHistory(jobRef); rc = jobRef.monitorJob(); SparkJobStatus sparkJobStatus = jobRef.getSparkJobStatus(); if (LOG.isInfoEnabled() && sparkStatistics != null) { LOG.info(String.format("=====Spark Job[%s] statistics=====", jobRef.getJobId())); logSparkStatistic(sparkStatistics); Utilities.clearWork(conf); if (sparkSession != null && sparkSessionManager != null) { rc = close(rc); try { sparkSessionManager.returnSession(sparkSession);
SparkWork sparkWork = context.currentTask.getWork(); if (BasicStatsNoJobTask.canUseFooterScan(table, inputFormat)) { columnStatsWork.setSourceTask(context.currentTask); Task<StatsWork> statsTask = TaskFactory.get(columnStatsWork); context.currentTask.addDependentTask(statsTask);
private void generateLocalWork(SparkTask originalTask) { SparkWork originalWork = originalTask.getWork(); Collection<BaseWork> allBaseWorks = originalWork.getAllWork(); Context ctx = physicalContext.getContext(); continue; Path tmpPath = Utilities.generateTmpPath(ctx.getMRTmpPath(), originalTask.getId()); MapredLocalWork bigTableLocalWork = work.getMapRedLocalWork(); List<Operator<? extends OperatorDesc>> dummyOps =
if (fetchTask != null && context.currentTask.getNumChild() == 0) { if (fetchTask.isFetchFrom(fileSink.getConf())) { context.currentTask.setFetchSource(true);
SparkTask pruningTask = SparkUtilities.createSparkTask(conf); SparkTask mainTask = procCtx.currentTask; pruningTask.addDependentTask(procCtx.currentTask); procCtx.rootTasks.remove(procCtx.currentTask); procCtx.rootTasks.add(pruningTask);
private Map<String, List<String>> getCounterPrefixes() throws HiveException, MetaException { Map<String, List<String>> counters = getOperatorCounters(); StatsTask statsTask = getStatsTaskInChildTasks(this); String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS); // fetch table prefix if SparkTask try to gather table statistics based on counter. if (statsImpl.equalsIgnoreCase("counter") && statsTask != null) { List<String> prefixes = getRequiredCounterPrefix(statsTask); for (String prefix : prefixes) { List<String> counterGroup = counters.get(prefix); if (counterGroup == null) { counterGroup = new LinkedList<String>(); counters.put(prefix, counterGroup); } counterGroup.add(StatsSetupConst.ROW_COUNT); counterGroup.add(StatsSetupConst.RAW_DATA_SIZE); } } return counters; }
SparkWork currentWork = currentTask.getWork(); Set<Operator<?>> reduceSinkSet = SparkMapJoinResolver.getOp(reduceWork, ReduceSinkOperator.class); List<Task<? extends Serializable>> childTasks = currentTask.getChildTasks(); currentTask.removeDependentTask(childTask); newTask.addDependentTask(childTask); currentTask.addDependentTask(newTask); newTask.setFetchSource(currentTask.isFetchSource());
@Override public Operator<? extends OperatorDesc> getReducer(MapWork mapWork) { List<BaseWork> children = getWork().getChildren(mapWork); if (children.size() != 1) { return null; } if (!(children.get(0) instanceof ReduceWork)) { return null; } return ((ReduceWork) children.get(0)).getReducer(); }
SparkWork sparkWork = sparkTask.getWork(); if (!visitedTasks.contains(sparkTask)) { dependencyGraph.clear(); if (sparkTask.getParentTasks() != null && sparkTask.getParentTasks().size() == 1 && sparkTask.getParentTasks().get(0) instanceof SparkTask) { SparkTask parent = (SparkTask) sparkTask.getParentTasks().get(0); if (containsOp(sparkWork, MapJoinOperator.class) && containsOp(parent.getWork(), SparkHashTableSinkOperator.class)) { updateConditionalTask(conditionalTask, sparkTask, parent);
SparkWork sparkWork = context.currentTask.getWork(); boolean partialScan = parseContext.getQueryProperties().isPartialScanAnalyzeCommand(); boolean noScan = parseContext.getQueryProperties().isNoScanAnalyzeCommand(); statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask);
private void checkMapJoin(SparkTask sparkTask) throws SemanticException { SparkWork sparkWork = sparkTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWorkUnsorted()) { List<String> warnings = new CrossProductCheck.MapJoinCheck(sparkTask.toString()).analyze(baseWork); for (String w : warnings) { warn(w); } } }