@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { MapredWork work = Utilities.getMapRedWork(job); List<Operator<?>> opList = work.getAllOperators(); for (Operator<?> op : opList) { if (op instanceof FileSinkOperator) { ((FileSinkOperator) op).checkOutputSpecs(ignored, job); } } } }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { MapredWork work = Utilities.getMapRedWork(job); List<Operator<?>> opList = work.getAllOperators(); for (Operator<?> op : opList) { if (op instanceof FileSinkOperator) { ((FileSinkOperator) op).checkOutputSpecs(ignored, job); } } } }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; Set<Operator<? extends OperatorDesc>> ops = new HashSet<>(); if (currTask instanceof MapRedTask) { MapRedTask mr = (MapRedTask) currTask; ops.addAll(mr.getWork().getAllOperators()); } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); for (BaseWork w : work.getAllWork()) { ops.addAll(w.getAllOperators()); } } else if (currTask instanceof SparkTask) { SparkWork sparkWork = (SparkWork) currTask.getWork(); for (BaseWork w : sparkWork.getAllWork()) { ops.addAll(w.getAllOperators()); } } setOrAnnotateStats(ops, physicalContext.getParseContext()); return null; }
/** * Clones using the powers of XML. Do not use unless necessary. * @param plan The plan. * @return The clone. */ public static MapredWork clonePlan(MapredWork plan) { // TODO: need proper clone. Meanwhile, let's at least keep this horror in one place PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); Operator<?> op = plan.getAnyOperator(); CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext(); ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); serializePlan(plan, baos, true); MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true); // Restore the context. for (Operator<?> newOp : newPlan.getAllOperators()) { newOp.setCompilationOpContext(ctx); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); return newPlan; }
@Override public Object dispatch(Node nd, Stack<Node> stack, Object... nodeOutputs) throws SemanticException { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; Set<Operator<? extends OperatorDesc>> ops = new HashSet<>(); if (currTask instanceof MapRedTask) { MapRedTask mr = (MapRedTask) currTask; ops.addAll(mr.getWork().getAllOperators()); } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); for (BaseWork w : work.getAllWork()) { ops.addAll(w.getAllOperators()); } } else if (currTask instanceof SparkTask) { SparkWork sparkWork = (SparkWork) currTask.getWork(); for (BaseWork w : sparkWork.getAllWork()) { ops.addAll(w.getAllOperators()); } } setOrAnnotateStats(ops, physicalContext.getParseContext()); return null; }
/** * Clones using the powers of XML. Do not use unless necessary. * @param plan The plan. * @return The clone. */ public static MapredWork clonePlan(MapredWork plan) { // TODO: need proper clone. Meanwhile, let's at least keep this horror in one place PerfLogger perfLogger = SessionState.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.CLONE_PLAN); Operator<?> op = plan.getAnyOperator(); CompilationOpContext ctx = (op == null) ? null : op.getCompilationOpContext(); ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); serializePlan(plan, baos, true); MapredWork newPlan = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), MapredWork.class, true); // Restore the context. for (Operator<?> newOp : newPlan.getAllOperators()) { newOp.setCompilationOpContext(ctx); } perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.CLONE_PLAN); return newPlan; }
/** * Converts job properties to a DAGNode representation * * @param task * @return */ private DAGNode<Job> asDAGNode(Task<? extends Serializable> task) { MapredWork mrWork = (MapredWork) task.getWork(); List<String> indexTableAliases = getAllJobAliases(getPathToAliases(mrWork)); String[] features = getFeatures(mrWork.getAllOperators(), task.getTaskTag()); String[] displayAliases = getDisplayAliases(indexTableAliases); // DAGNode's name of a workflow is unique among all workflows DAGNode<Job> dagNode = new DAGNode<Job>(AmbroseHiveUtil.getNodeIdFromNodeName(conf, task.getId()), new HiveJob(displayAliases, features)); // init empty successors dagNode.setSuccessors(new ArrayList<DAGNode<? extends Job>>()); return dagNode; }
@Override public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { MapredWork work = Utilities.getMapRedWork(job); List<Operator<?>> opList = work.getAllOperators(); for (Operator<?> op : opList) { if (op instanceof FileSinkOperator) { ((FileSinkOperator) op).checkOutputSpecs(ignored, job); } } } }