@Override public String toString() { return getName() + '[' + getIdentifier() + ']'; }
@Override public String toString() { return getName() + "[" + getIdentifier() + "]"; }
private void runCycleAnalysisForPartitionPruning(OptimizeSparkProcContext procCtx) { boolean cycleFree = false; while (!cycleFree) { cycleFree = true; Set<Set<Operator<?>>> components = getComponents(procCtx); for (Set<Operator<?>> component : components) { if (LOG.isDebugEnabled()) { LOG.debug("Component: "); for (Operator<?> co : component) { LOG.debug("Operator: " + co.getName() + ", " + co.getIdentifier()); } } if (component.size() != 1) { LOG.info("Found cycle in operator plan..."); cycleFree = false; removeDPPOperator(component, procCtx); break; } } LOG.info("Cycle free: " + cycleFree); } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; PTFOperator ptfOp = (PTFOperator) nd; pushRankLimit(ptfOp, owi); return super.process(nd, stack, procCtx, nodeOutputs); }
private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { // Semijoins may have created task level cycles, examine those connectTerminalOps(procCtx.parseContext); boolean cycleFree = false; while (!cycleFree) { cycleFree = true; Set<Set<Operator<?>>> components = getComponents(procCtx); for (Set<Operator<?>> component : components) { if (LOG.isDebugEnabled()) { LOG.debug("Component: "); for (Operator<?> co : component) { LOG.debug("Operator: " + co.getName() + ", " + co.getIdentifier()); } } if (component.size() != 1) { LOG.info("Found cycle in operator plan..."); cycleFree = false; removeCycleOperator(component, procCtx); break; } } LOG.info("Cycle free: " + cycleFree); } }
/** * @param operator the input operator * @param throwException if throw a exception when the input operator has multiple children * @return the single child or null when the input operator has multiple children and * throwException is false; * @throws HiveException */ protected static Operator<?> getSingleChild(Operator<?> operator, boolean throwException) throws SemanticException { List<Operator<?>> children = operator.getChildOperators(); if (children != null && children.size() == 1) { return children.get(0); } if (throwException) { if (children == null) { throw new SemanticException("Operator " + operator.getName() + " (ID: " + operator.getIdentifier() + ") does not have any parent, but we expect 1 parent."); } else if (children.size() > 1) { throw new SemanticException("Operator " + operator.getName() + " (ID: " + operator.getIdentifier() + ") has " + children.size() + " parents, but we expect 1 parent."); } } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; PTFOperator ptfOp = (PTFOperator) nd; pushRankLimit(ptfOp, owi); return super.process(nd, stack, procCtx, nodeOutputs); }
/** * @param operator the input operator * @param throwException if throw a exception when the input operator has multiple parents * @return the single parent or null when the input operator has multiple parents and * throwException is false; * @throws HiveException */ protected static Operator<?> getSingleParent(Operator<?> operator, boolean throwException) throws SemanticException { List<Operator<?>> parents = operator.getParentOperators(); if (parents != null && parents.size() == 1) { return parents.get(0); } if (throwException) { if (parents == null) { throw new SemanticException("Operator " + operator.getName() + " (ID: " + operator.getIdentifier() + ") does not have any parent, but we expect 1 parent."); } else if (parents.size() > 1) { throw new SemanticException("Operator " + operator.getName() + " (ID: " + operator.getIdentifier() + ") has " + parents.size() + " parents, but we expect 1 parent."); } } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; // The lateral view forward operator has 2 children, a SELECT(*) and // a SELECT(cols) (for the UDTF operator) The child at index 0 is the // SELECT(*) because that's the way that the DAG was constructed. We // only want to get the predicates from the SELECT(*). ExprWalkerInfo childPreds = owi .getPrunedPreds((Operator<? extends OperatorDesc>) nd.getChildren() .get(0)); owi.putPrunedPreds((Operator<? extends OperatorDesc>) nd, childPreds); return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; // The lateral view forward operator has 2 children, a SELECT(*) and // a SELECT(cols) (for the UDTF operator) The child at index 0 is the // SELECT(*) because that's the way that the DAG was constructed. We // only want to get the predicates from the SELECT(*). ExprWalkerInfo childPreds = owi .getPrunedPreds((Operator<? extends OperatorDesc>) nd.getChildren() .get(0)); owi.putPrunedPreds((Operator<? extends OperatorDesc>) nd, childPreds); return null; }
@Override public int compare(Operator<?> o1, Operator<?> o2) { Long id1 = Long.valueOf(o1.getIdentifier()); Long id2 = Long.valueOf(o2.getIdentifier()); int c0 = Objects.compare(o1.getOperatorName(), o2.getOperatorName(), Comparator.naturalOrder()); if (c0 != 0) { return c0; } return Long.compare(id1, id2); } };
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; LOG.info("Walk to operator " + op.getIdentifier() + " " + op.getName() + ". No actual work to do"); CorrelationNodeProcCtx correlationCtx = (CorrelationNodeProcCtx) ctx; if (op.getName().equals(MapJoinOperator.getOperatorName())) { correlationCtx.setAbort(true); correlationCtx.getAbortReasons().add("Found MAPJOIN"); } if (op.getName().equals(FileSinkOperator.getOperatorName())) { correlationCtx.incrementFileSinkOperatorCount(); } return null; } };
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; LOG.info("Walk to operator " + op.getIdentifier() + " " + op.getName() + ". No actual work to do"); CorrelationNodeProcCtx correlationCtx = (CorrelationNodeProcCtx) ctx; if (op.getName().equals(MapJoinOperator.getOperatorName())) { correlationCtx.setAbort(true); correlationCtx.getAbortReasons().add("Found MAPJOIN"); } if (op.getName().equals(FileSinkOperator.getOperatorName())) { correlationCtx.incrementFileSinkOperatorCount(); } return null; } };
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.debug("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); // script operator is a black-box to hive so no optimization here // assuming that nothing can be pushed above the script op // same with LIMIT op // create a filter with all children predicates OpWalkerInfo owi = (OpWalkerInfo) procCtx; ExprWalkerInfo childInfo = getChildWalkerInfo((Operator<?>) nd, owi); if (childInfo != null && HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, null, false); return createFilter((Operator)nd, unpushedPreds, owi); } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; Set<String> includes = getQualifiedAliases((Operator<?>) nd, owi); boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, includes); if (hasUnpushedPredicates && HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { if (includes != null || nd instanceof ReduceSinkOperator) { owi.getCandidateFilterOps().clear(); } else { ExprWalkerInfo pruned = owi.getPrunedPreds((Operator<? extends OperatorDesc>) nd); Map<String, List<ExprNodeDesc>> residual = pruned.getResidualPredicates(true); if (residual != null && !residual.isEmpty()) { createFilter((Operator) nd, residual, owi); pruned.getNonFinalCandidates().clear(); } } } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.debug("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); // script operator is a black-box to hive so no optimization here // assuming that nothing can be pushed above the script op // same with LIMIT op // create a filter with all children predicates OpWalkerInfo owi = (OpWalkerInfo) procCtx; ExprWalkerInfo childInfo = getChildWalkerInfo((Operator<?>) nd, owi); if (childInfo != null && HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, null, false); return createFilter((Operator)nd, unpushedPreds, owi); } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; Set<String> includes = getQualifiedAliases((Operator<?>) nd, owi); boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, includes); if (hasUnpushedPredicates && HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { if (includes != null || nd instanceof ReduceSinkOperator) { owi.getCandidateFilterOps().clear(); } else { ExprWalkerInfo pruned = owi.getPrunedPreds((Operator<? extends OperatorDesc>) nd); Map<String, List<ExprNodeDesc>> residual = pruned.getResidualPredicates(true); if (residual != null && !residual.isEmpty()) { createFilter((Operator) nd, residual, owi); pruned.getNonFinalCandidates().clear(); } } } return null; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; TableScanOperator tsOp = (TableScanOperator) nd; mergeWithChildrenPred(tsOp, owi, null, null); if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { // remove all the candidate filter operators // when we get to the TS removeAllCandidates(owi); } ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); // nonFinalCandidates predicates should be empty assert pushDownPreds == null || !pushDownPreds.hasNonFinalCandidates(); return createFilter(tsOp, pushDownPreds, owi); }
/** * @param newOperator the operator will be inserted between child and parent * @param child * @param parent * @param context * @throws HiveException */ protected static void insertOperatorBetween( Operator<?> newOperator, Operator<?> parent, Operator<?> child) throws SemanticException { isNullOperator(newOperator); isNullOperator(parent); isNullOperator(child); if (parent != getSingleParent(child)) { throw new SemanticException("Operator " + parent.getName() + " (ID: " + parent.getIdentifier() + ") is not the only parent of Operator " + child.getName() + " (ID: " + child.getIdentifier() + ")"); } if (child != getSingleChild(parent)) { throw new SemanticException("Operator " + child.getName() + " (ID: " + child.getIdentifier() + ") is not the only child of Operator " + parent.getName() + " (ID: " + parent.getIdentifier() + ")"); } newOperator.setParentOperators(Utilities.makeList(parent)); newOperator.setChildOperators(Utilities.makeList(child)); child.setParentOperators(Utilities.makeList(newOperator)); parent.setChildOperators(Utilities.makeList(newOperator)); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; TableScanOperator tsOp = (TableScanOperator) nd; mergeWithChildrenPred(tsOp, owi, null, null); if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { // remove all the candidate filter operators // when we get to the TS removeAllCandidates(owi); } ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); // nonFinalCandidates predicates should be empty assert pushDownPreds == null || !pushDownPreds.hasNonFinalCandidates(); return createFilter(tsOp, pushDownPreds, owi); }