/** * Add a dependent task on the current conditional task. The task will not be a direct child of * conditional task. Actually it will be added as child task of associated tasks. * * @return true if the task got added false if it already existed */ @Override public boolean addDependentTask(Task<? extends Serializable> dependent) { boolean ret = false; if (getListTasks() != null) { ret = true; for (Task<? extends Serializable> tsk : getListTasks()) { ret = ret & tsk.addDependentTask(dependent); } } return ret; }
/** * Add a dependent task on the current conditional task. The task will not be a direct child of * conditional task. Actually it will be added as child task of associated tasks. * * @return true if the task got added false if it already existed */ @Override public boolean addDependentTask(Task<? extends Serializable> dependent) { boolean ret = false; if (getListTasks() != null) { ret = true; for (Task<? extends Serializable> tsk : getListTasks()) { ret = ret & tsk.addDependentTask(dependent); } } return ret; }
protected List<Task<?>> getChildTasks(Task<?> task) { if (task instanceof ConditionalTask) { return ((ConditionalTask) task).getListTasks(); } return task.getChildTasks(); } }
protected List<Task<?>> getChildTasks(Task<?> task) { if (task instanceof ConditionalTask) { return ((ConditionalTask) task).getListTasks(); } return task.getChildTasks(); } }
public static void traverse(List<Task<? extends Serializable>> tasks, Function function) { List<Task<? extends Serializable>> listOfTasks = new ArrayList<>(tasks); while (!listOfTasks.isEmpty()) { List<Task<? extends Serializable>> children = new ArrayList<>(); for (Task<? extends Serializable> task : listOfTasks) { // skip processing has to be done first before continuing if (function.skipProcessing(task)) { continue; } // Add list tasks from conditional tasks if (task instanceof ConditionalTask) { children.addAll(((ConditionalTask) task).getListTasks()); } if (task.getDependentTasks() != null) { children.addAll(task.getDependentTasks()); } function.process(task); } listOfTasks = children; } }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nos) throws SemanticException { Task<? extends Serializable> currentTask = (Task<? extends Serializable>) nd; if(currentTask.isMapRedTask()) { if (currentTask instanceof ConditionalTask) { List<Task<? extends Serializable>> taskList = ((ConditionalTask) currentTask).getListTasks(); for (Task<? extends Serializable> tsk : taskList) { if (tsk instanceof SparkTask) { processCurrentTask((SparkTask) tsk, (ConditionalTask) currentTask); visitedTasks.add(tsk); } } } else if (currentTask instanceof SparkTask) { processCurrentTask((SparkTask) currentTask, null); visitedTasks.add(currentTask); } } return null; }
/** * Make the move task in the GenMRProcContext following the FileSinkOperator a dependent of all * possible subtrees branching from the ConditionalTask. * * @param newOutput * @param cndTsk * @param mvTasks * @param hconf * @param dependencyTask */ public static void linkMoveTask(FileSinkOperator newOutput, ConditionalTask cndTsk, List<Task<MoveWork>> mvTasks, HiveConf hconf, DependencyCollectionTask dependencyTask) { Task<MoveWork> mvTask = GenMapRedUtils.findMoveTask(mvTasks, newOutput); for (Task<? extends Serializable> tsk : cndTsk.getListTasks()) { linkMoveTask(mvTask, tsk, hconf, dependencyTask); } }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nos) throws SemanticException { Task<? extends Serializable> currentTask = (Task<? extends Serializable>) nd; if(currentTask.isMapRedTask()) { if (currentTask instanceof ConditionalTask) { List<Task<? extends Serializable>> taskList = ((ConditionalTask) currentTask).getListTasks(); for (Task<? extends Serializable> tsk : taskList) { if (tsk instanceof SparkTask) { processCurrentTask((SparkTask) tsk, (ConditionalTask) currentTask); visitedTasks.add(tsk); } } } else if (currentTask instanceof SparkTask) { processCurrentTask((SparkTask) currentTask, null); visitedTasks.add(currentTask); } } return null; }
@Override public boolean done() { boolean ret = true; List<Task<? extends Serializable>> parentTasks = getParentTasks(); if (parentTasks != null) { for (Task<? extends Serializable> par : parentTasks) { ret = ret && par.done(); } } List<Task<? extends Serializable>> retTasks; if (resolved) { retTasks = resTasks; } else { retTasks = getListTasks(); } if (ret && retTasks != null) { for (Task<? extends Serializable> tsk : retTasks) { ret = ret && tsk.done(); } } return ret; }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; if (currTask instanceof SparkTask) { SparkWork sparkWork = ((SparkTask) currTask).getWork(); checkShuffleJoin(sparkWork); checkMapJoin((SparkTask) currTask); } else if (currTask instanceof ConditionalTask) { List<Task<? extends Serializable>> taskList = ((ConditionalTask) currTask).getListTasks(); for (Task<? extends Serializable> task : taskList) { dispatch(task, stack, nodeOutputs); } } return null; }
/** * Walk the current operator and its descendants. * * @param nd * current operator in the graph * @throws SemanticException */ @Override protected void walk(Node nd) throws SemanticException { opStack.push(nd); dispatch(nd, opStack); // move all the children to the front of queue if (nd.getChildren() != null) { for (Node n : nd.getChildren()) { walk(n); } } else if (nd instanceof ConditionalTask) { for (Task n : ((ConditionalTask) nd).getListTasks()) { if (n.getParentTasks() == null || n.getParentTasks().isEmpty()) { walk(n); } } } opStack.pop(); } }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; // not map reduce task or not conditional task, just skip if (currTask.isMapRedTask()) { if (currTask instanceof ConditionalTask) { // get the list of task List<Task<? extends Serializable>> taskList = ((ConditionalTask) currTask).getListTasks(); for (Task<? extends Serializable> tsk : taskList) { if (tsk.isMapRedTask()) { this.processCurrentTask(tsk, ((ConditionalTask) currTask)); } } } else { this.processCurrentTask(currTask, null); } } return null; }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; if (currTask instanceof SparkTask) { SparkWork sparkWork = ((SparkTask) currTask).getWork(); checkShuffleJoin(sparkWork); checkMapJoin((SparkTask) currTask); } else if (currTask instanceof ConditionalTask) { List<Task<? extends Serializable>> taskList = ((ConditionalTask) currTask).getListTasks(); for (Task<? extends Serializable> task : taskList) { dispatch(task, stack, nodeOutputs); } } return null; }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; // not map reduce task or not conditional task, just skip if (currTask.isMapRedTask()) { if (currTask instanceof ConditionalTask) { // get the list of task List<Task<? extends Serializable>> taskList = ((ConditionalTask) currTask).getListTasks(); for (Task<? extends Serializable> tsk : taskList) { if (tsk.isMapRedTask()) { this.processCurrentTask(tsk, ((ConditionalTask) currTask)); } } } else { this.processCurrentTask(currTask, null); } } return null; }
@Override protected void setInputFormat(Task<? extends Serializable> task) { if (task instanceof ExecDriver) { MapWork work = ((MapredWork) task.getWork()).getMapWork(); HashMap<String, Operator<? extends OperatorDesc>> opMap = work.getAliasToWork(); if (!opMap.isEmpty()) { for (Operator<? extends OperatorDesc> op : opMap.values()) { setInputFormat(work, op); } } } else if (task instanceof ConditionalTask) { List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks(); for (Task<? extends Serializable> tsk : listTasks) { setInputFormat(tsk); } } if (task.getChildTasks() != null) { for (Task<? extends Serializable> childTask : task.getChildTasks()) { setInputFormat(childTask); } } }
private void breakTaskTree(Task<? extends Serializable> task) { if (task instanceof ExecDriver) { HashMap<String, Operator<? extends OperatorDesc>> opMap = ((MapredWork) task .getWork()).getMapWork().getAliasToWork(); if (!opMap.isEmpty()) { for (Operator<? extends OperatorDesc> op : opMap.values()) { breakOperatorTree(op); } } } else if (task instanceof ConditionalTask) { List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task) .getListTasks(); for (Task<? extends Serializable> tsk : listTasks) { breakTaskTree(tsk); } } if (task.getChildTasks() == null) { return; } for (Task<? extends Serializable> childTask : task.getChildTasks()) { breakTaskTree(childTask); } }
@Override protected void setInputFormat(Task<? extends Serializable> task) { if (task instanceof TezTask) { TezWork work = ((TezTask)task).getWork(); List<BaseWork> all = work.getAllWork(); for (BaseWork w: all) { if (w instanceof MapWork) { MapWork mapWork = (MapWork) w; HashMap<String, Operator<? extends OperatorDesc>> opMap = mapWork.getAliasToWork(); if (!opMap.isEmpty()) { for (Operator<? extends OperatorDesc> op : opMap.values()) { setInputFormat(mapWork, op); } } } } } else if (task instanceof ConditionalTask) { List<Task<? extends Serializable>> listTasks = ((ConditionalTask) task).getListTasks(); for (Task<? extends Serializable> tsk : listTasks) { setInputFormat(tsk); } } if (task.getChildTasks() != null) { for (Task<? extends Serializable> childTask : task.getChildTasks()) { setInputFormat(childTask); } } }
@Test public void testConditionalMoveOnHdfsIsNotOptimized() throws SemanticException { hiveConf.set(HiveConf.ConfVars.HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED.varname, "true"); Path sinkDirName = new Path("hdfs://bucket/scratch/-ext-10002"); FileSinkOperator fileSinkOperator = createFileSinkOperator(sinkDirName); Path finalDirName = new Path("hdfs://bucket/scratch/-ext-10000"); Path tableLocation = new Path("hdfs://bucket/warehouse/table"); Task<MoveWork> moveTask = createMoveTask(finalDirName, tableLocation); List<Task<MoveWork>> moveTaskList = Collections.singletonList(moveTask); GenMapRedUtils.createMRWorkForMergingFiles(fileSinkOperator, finalDirName, null, moveTaskList, hiveConf, dummyMRTask, new LineageState()); ConditionalTask conditionalTask = (ConditionalTask)dummyMRTask.getChildTasks().get(0); Task<? extends Serializable> moveOnlyTask = conditionalTask.getListTasks().get(0); Task<? extends Serializable> mergeOnlyTask = conditionalTask.getListTasks().get(1); Task<? extends Serializable> mergeAndMoveTask = conditionalTask.getListTasks().get(2); // Verify moveOnlyTask is NOT optimized assertEquals(1, moveOnlyTask.getChildTasks().size()); verifyMoveTask(moveOnlyTask, sinkDirName, finalDirName); verifyMoveTask(moveOnlyTask.getChildTasks().get(0), finalDirName, tableLocation); // Verify mergeOnlyTask is NOT optimized assertEquals(1, mergeOnlyTask.getChildTasks().size()); verifyMoveTask(mergeOnlyTask.getChildTasks().get(0), finalDirName, tableLocation); // Verify mergeAndMoveTask is NOT optimized assertEquals(1, mergeAndMoveTask.getChildTasks().size()); assertEquals(1, mergeAndMoveTask.getChildTasks().get(0).getChildTasks().size()); verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0), sinkDirName, finalDirName); verifyMoveTask(mergeAndMoveTask.getChildTasks().get(0).getChildTasks().get(0), finalDirName, tableLocation); }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; if (currTask instanceof MapRedTask) { MapRedTask mrTsk = (MapRedTask)currTask; MapredWork mrWrk = mrTsk.getWork(); checkMapJoins(mrTsk); checkMRReducer(currTask.toString(), mrWrk); } else if (currTask instanceof ConditionalTask ) { List<Task<? extends Serializable>> taskListInConditionalTask = ((ConditionalTask) currTask).getListTasks(); for(Task<? extends Serializable> tsk: taskListInConditionalTask){ dispatch(tsk, stack, nodeOutputs); } } else if (currTask instanceof TezTask) { TezTask tezTask = (TezTask) currTask; TezWork tezWork = tezTask.getWork(); checkMapJoins(tezWork); checkTezReducer(tezWork); } return null; }
private void resolveTask(DriverContext driverContext) throws HiveException { for (Task<? extends Serializable> tsk : getListTasks()) { if (!resTasks.contains(tsk)) { driverContext.remove(tsk); console.printInfo(tsk.getId() + " is filtered out by condition resolver."); if (tsk.isMapRedTask()) { driverContext.incCurJobNo(1); } //recursively remove this task from its children's parent task tsk.removeFromChildrenTasks(); } else { if (getParentTasks() != null) { // This makes it so that we can go back up the tree later for (Task<? extends Serializable> task : getParentTasks()) { task.addDependentTask(tsk); } } // resolved task if (driverContext.addToRunnable(tsk)) { console.printInfo(tsk.getId() + " is selected by condition resolver."); } } } }