private boolean isSemiJoinBranch(final GroupByOperator gOp, ReduceSinkDeduplicateProcCtx dedupCtx) { for(int i=0; i<gOp.getChildren().size(); i++) { if(gOp.getChildren().get(i) instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator)gOp.getChildren().get(i); if(dedupCtx.getPctx().getRsToSemiJoinBranchInfo().containsKey(rsOp)) { return true; } } } return false; }
private void removeSemiJoinEdges(Operator<?> op, OptimizeTezProcContext procCtx, Map<ReduceSinkOperator, TableScanOperator> sjToRemove) throws SemanticException { if(op instanceof ReduceSinkOperator && op.getNumChild() == 0) { Map<ReduceSinkOperator, SemiJoinBranchInfo> sjMap = procCtx.parseContext.getRsToSemiJoinBranchInfo(); if(sjMap.get(op) != null) { sjToRemove.put((ReduceSinkOperator)op, sjMap.get(op).getTsOp()); } } for(Operator<?> child:op.getChildOperators()) { removeSemiJoinEdges(child, procCtx, sjToRemove); } }
for (ReduceSinkOperator rs : pCtx.getRsToSemiJoinBranchInfo().keySet()) { TerminalOpsInfo terminalOpsInfo = rsToTerminalOpsInfo.get(rs); if (terminalOpsInfo != null) { .getParentOperators().get(0); OperatorUtils.findWorkOperatorsAndSemiJoinEdges(selOp, pCtx.getRsToSemiJoinBranchInfo(), workRSOps, workTerminalOps);
/** * This method gathers the TS operators with DPP from the context and * stores them into the input optimization cache. */ private static void gatherDPPTableScanOps( ParseContext pctx, SharedWorkOptimizerCache optimizerCache) throws SemanticException { // Find TS operators with partition pruning enabled in plan // because these TS may potentially read different data for // different pipeline. // These can be: // 1) TS with DPP. // 2) TS with semijoin DPP. Map<String, TableScanOperator> topOps = pctx.getTopOps(); Collection<Operator<? extends OperatorDesc>> tableScanOps = Lists.<Operator<?>>newArrayList(topOps.values()); Set<AppMasterEventOperator> s = OperatorUtils.findOperators(tableScanOps, AppMasterEventOperator.class); for (AppMasterEventOperator a : s) { if (a.getConf() instanceof DynamicPruningEventDesc) { DynamicPruningEventDesc dped = (DynamicPruningEventDesc) a.getConf(); optimizerCache.tableScanToDPPSource.put(dped.getTableScan(), a); } } for (Entry<ReduceSinkOperator, SemiJoinBranchInfo> e : pctx.getRsToSemiJoinBranchInfo().entrySet()) { optimizerCache.tableScanToDPPSource.put(e.getValue().getTsOp(), e.getKey()); } LOG.debug("DPP information stored in the cache: {}", optimizerCache.tableScanToDPPSource); }
SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(o); if (sjInfo != null) { TableScanOperator ts = sjInfo.getTsOp();
SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op); if (sjbi != null) { set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp()));
TableScanOperator ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp(); if (ts == null) { ts = parseContext.getRsToSemiJoinBranchInfo().get(rs).getTsOp(); assert ts != null; for (Operator<?> parent : mapjoinOp.getParentOperators()) {
final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled && procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0; final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled && procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);
parseContext.getRsToSemiJoinBranchInfo().put(rsOpFinal, sjInfo);
Set<ReduceSinkOperator> rsSet = new HashSet<>(pctx.getRsToSemiJoinBranchInfo().keySet()); for (TableScanOperator ts : tsOps) { for (ReduceSinkOperator rs : rsSet) { SemiJoinBranchInfo sjInfo = pctx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo != null && ts == sjInfo.getTsOp()) {
private static Set<Operator<?>> findChildWorkOperators(ParseContext pctx, SharedWorkOptimizerCache optimizerCache, Operator<?> start) { // Find operators in work Set<Operator<?>> workOps = findWorkOperators(optimizerCache, start); // Gather output works operators Set<Operator<?>> set = new HashSet<Operator<?>>(); for (Operator<?> op : workOps) { if (op instanceof ReduceSinkOperator) { if (op.getChildOperators() != null) { // All children of RS are descendants for (Operator<?> child : op.getChildOperators()) { set.addAll(findWorkOperators(optimizerCache, child)); } } // Semijoin DPP work is considered a child because work needs // to finish for it to execute SemiJoinBranchInfo sjbi = pctx.getRsToSemiJoinBranchInfo().get(op); if (sjbi != null) { set.addAll(findWorkOperators(optimizerCache, sjbi.getTsOp())); } } else if(op.getConf() instanceof DynamicPruningEventDesc) { // DPP work is considered a child because work needs // to finish for it to execute set.addAll(findWorkOperators( optimizerCache, ((DynamicPruningEventDesc) op.getConf()).getTableScan())); } } return set; }
ReduceSinkOperator rs) throws SemanticException { SemiJoinBranchInfo sjInfo = procCtx.parseContext.getRsToSemiJoinBranchInfo().get(rs);
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; planMapper = pCtx.getContext().getPlanMapper(); if (nd instanceof ReduceSinkOperator) { ReduceSinkOperator rs = (ReduceSinkOperator) nd; SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo == null) { return null; } walkSubtree(sjInfo.getTsOp()); } if (nd instanceof AppMasterEventOperator) { AppMasterEventOperator ame = (AppMasterEventOperator) nd; AppMasterEventDesc c = ame.getConf(); if (c instanceof DynamicPruningEventDesc) { DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c; mark(dped.getTableScan()); } } return null; }
private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException { Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
if (context.parseContext.getRsToSemiJoinBranchInfo().size() > 0 && removeReduceSink) { removeCycleCreatingSemiJoinOps(mapJoinOp, parentSelectOpOfBigTableOp,
SemiJoinBranchInfo sjInfo = parseContext.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo == null) { continue;
context.getRsToSemiJoinBranchInfo().remove(rs);
SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx; ParseContext pCtx = rCtx.parseContext; SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo == null) { TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). get(rsFinal).getTsOp(); if (LOG.isDebugEnabled()) { ReduceSinkOperator.class); for (Operator<?> otherRSOp : rsOps) { SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp);
context.parseContext.getRsToSemiJoinBranchInfo().get(o); if (sjInfo == null ) { continue;
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsToSemiJoinBranchInfo(pCtx.getRsToSemiJoinBranchInfo()); clone.setColExprToGBMap(pCtx.getColExprToGBMap()); clone.setSemiJoinHints(pCtx.getSemiJoinHints()); return clone; }