public static void setOrAnnotateStats(Set<Operator<? extends OperatorDesc>> ops, ParseContext pctx) throws SemanticException { for (Operator<? extends OperatorDesc> op : ops) { if (pctx.getContext().getExplainAnalyze() == AnalyzeState.RUNNING) { setRuntimeStatsDir(op, pctx); } else if (pctx.getContext().getExplainAnalyze() == AnalyzeState.ANALYZING) { annotateRuntimeStats(op, pctx); } else { throw new SemanticException("Unexpected stats in AnnotateWithRunTimeStatistics."); } } }
public static void setOrAnnotateStats(Set<Operator<? extends OperatorDesc>> ops, ParseContext pctx) throws SemanticException { for (Operator<? extends OperatorDesc> op : ops) { if (pctx.getContext().getExplainAnalyze() == AnalyzeState.RUNNING) { setRuntimeStatsDir(op, pctx); } else if (pctx.getContext().getExplainAnalyze() == AnalyzeState.ANALYZING) { annotateRuntimeStats(op, pctx); } else { throw new SemanticException("Unexpected stats in AnnotateWithRunTimeStatistics."); } } }
/** * Preserves additional informations about the operator. * * When an operator is replaced by a new one; some of the information of the old have to be retained. */ private void preserveOperatorInfos(Operator<?> newOp, Operator<?> oldOp, OptimizeTezProcContext context) { newOp.setStatistics(oldOp.getStatistics()); // linking these two operator declares that they are representing the same thing // currently important because statistincs are actually gather for newOp; but the lookup is done using oldOp context.parseContext.getContext().getPlanMapper().link(oldOp, newOp); }
private static void annotateRuntimeStats(Operator<? extends OperatorDesc> op, ParseContext pctx) { Long runTimeNumRows = pctx.getContext().getExplainConfig().getOpIdToRuntimeNumRows() .get(op.getOperatorId()); if (op.getConf() != null && op.getConf().getStatistics() != null && runTimeNumRows != null) { LOG.info("annotateRuntimeStats for " + op.getOperatorId()); op.getConf().getStatistics().setRunTimeNumRows(runTimeNumRows); } else { LOG.debug("skip annotateRuntimeStats for " + op.getOperatorId()); } }
private static void annotateRuntimeStats(Operator<? extends OperatorDesc> op, ParseContext pctx) { Long runTimeNumRows = pctx.getContext().getExplainConfig().getOpIdToRuntimeNumRows() .get(op.getOperatorId()); if (op.getConf() != null && op.getConf().getStatistics() != null && runTimeNumRows != null) { LOG.info("annotateRuntimeStats for " + op.getOperatorId()); op.getConf().getStatistics().setRunTimeNumRows(runTimeNumRows); } else { LOG.debug("skip annotateRuntimeStats for " + op.getOperatorId()); } }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 0. We check the conditions to apply this transformation, // if we do not meet them we bail out final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED); final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP); final boolean cboSucceeded = pctx.getContext().isCboSucceeded(); if(!(cboEnabled && returnPathEnabled && cboSucceeded)) { return pctx; } // 1. Initialize aux data structures this.pctx = pctx; this.aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>(); // 2. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), new JoinAnnotate()); opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + "%"), new TableScanAnnotate()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 0. We check the conditions to apply this transformation, // if we do not meet them we bail out final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED); final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP); final boolean cboSucceeded = pctx.getContext().isCboSucceeded(); if(cboEnabled && returnPathEnabled && cboSucceeded) { return pctx; } // 1. We apply the transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", "(" + SelectOperator.getOperatorName() + "%)"), new ProjectRemover()); GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null)); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
SparkPartitionPruningSinkOperator pruningSink) { SparkPartitionPruningSinkDesc desc = pruningSink.getConf(); final Path outputBase = getDPPOutputPath(context.parseContext.getContext()); final String sourceId = pruningSink.getUniqueId(); desc.setPath(new Path(outputBase, sourceId));
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 0. We check the conditions to apply this transformation, // if we do not meet them we bail out final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED); final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP); final boolean cboSucceeded = pctx.getContext().isCboSucceeded(); if(cboEnabled && returnPathEnabled && cboSucceeded) { return pctx; } // 1. We apply the transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", "(" + SelectOperator.getOperatorName() + "%)"), new ProjectRemover()); GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null)); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 0. We check the conditions to apply this transformation, // if we do not meet them we bail out final boolean cboEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_ENABLED); final boolean returnPathEnabled = HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP); final boolean cboSucceeded = pctx.getContext().isCboSucceeded(); if(!(cboEnabled && returnPathEnabled && cboSucceeded)) { return pctx; } // 1. Initialize aux data structures this.pctx = pctx; this.aliasToOpInfo = new HashMap<String, Operator<? extends OperatorDesc>>(); // 2. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", JoinOperator.getOperatorName() + "%"), new JoinAnnotate()); opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + "%"), new TableScanAnnotate()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; planMapper = pCtx.getContext().getPlanMapper(); if (nd instanceof ReduceSinkOperator) { ReduceSinkOperator rs = (ReduceSinkOperator) nd; SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); if (sjInfo == null) { return null; } walkSubtree(sjInfo.getTsOp()); } if (nd instanceof AppMasterEventOperator) { AppMasterEventOperator ame = (AppMasterEventOperator) nd; AppMasterEventDesc c = ame.getConf(); if (c instanceof DynamicPruningEventDesc) { DynamicPruningEventDesc dped = (DynamicPruningEventDesc) c; mark(dped.getTableScan()); } } return null; }
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException { try { OperatorDesc conf = op.getConf(); if (conf != null) { LOG.info("setRuntimeStatsDir for " + op.getOperatorId()); String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString(); StatsPublisher statsPublisher = new FSStatsPublisher(); StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf()); runtimeStatsContext.setStatsTmpDir(path); if (!statsPublisher.init(runtimeStatsContext)) { LOG.error("StatsPublishing error: StatsPublisher is not initialized."); throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg()); } conf.setRuntimeStatsTmpDir(path); } else { LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null"); } } catch (HiveException e) { throw new SemanticException(e); } }
private static void setRuntimeStatsDir(Operator<? extends OperatorDesc> op, ParseContext pctx) throws SemanticException { try { OperatorDesc conf = op.getConf(); if (conf != null) { LOG.info("setRuntimeStatsDir for " + op.getOperatorId()); String path = new Path(pctx.getContext().getExplainConfig().getExplainRootPath(), op.getOperatorId()).toString(); StatsPublisher statsPublisher = new FSStatsPublisher(); StatsCollectionContext runtimeStatsContext = new StatsCollectionContext(pctx.getConf()); runtimeStatsContext.setStatsTmpDir(path); if (!statsPublisher.init(runtimeStatsContext)) { LOG.error("StatsPublishing error: StatsPublisher is not initialized."); throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg()); } conf.setRuntimeStatsTmpDir(path); } else { LOG.debug("skip setRuntimeStatsDir for " + op.getOperatorId() + " because OperatorDesc is null"); } } catch (HiveException e) { throw new SemanticException(e); } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsop = (TableScanOperator) nd; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop); ColumnStatsList colStatsCached = aspCtx.getParseContext().getColStatsCached(partList); Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, colStatsCached, table, tsop); stats = applyRuntimeStats(aspCtx.getParseContext().getContext(), stats, tsop); tsop.setStatistics(stats); if (LOG.isDebugEnabled()) { LOG.debug("[0] STATS-" + tsop.toString() + " (" + table.getTableName() + "): " + stats.extendedToString()); } } catch (HiveException e) { LOG.debug("Failed to retrieve stats ", e); throw new SemanticException(e); } return null; }
stats = applyRuntimeStats(aspCtx.getParseContext().getContext(), stats, op); op.getConf().setStatistics(stats);
Context baseCtx = parseCtx.getContext();
public void initParseCtx(ParseContext pctx) { opToPartPruner = pctx.getOpToPartPruner(); opToPartList = pctx.getOpToPartList(); opToSamplePruner = pctx.getOpToSamplePruner(); topOps = pctx.getTopOps(); loadTableWork = pctx.getLoadTableWork(); loadFileWork = pctx.getLoadFileWork(); ctx = pctx.getContext(); destTableId = pctx.getDestTableId(); idToTableNameMap = pctx.getIdToTableNameMap(); uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); prunedPartitions = pctx.getPrunedPartitions(); tabNameToTabObject = pctx.getTabNameToTabObject(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); }
public void initParseCtx(ParseContext pctx) { opToPartPruner = pctx.getOpToPartPruner(); opToPartList = pctx.getOpToPartList(); opToSamplePruner = pctx.getOpToSamplePruner(); topOps = pctx.getTopOps(); loadTableWork = pctx.getLoadTableWork(); loadFileWork = pctx.getLoadFileWork(); ctx = pctx.getContext(); destTableId = pctx.getDestTableId(); idToTableNameMap = pctx.getIdToTableNameMap(); uCtx = pctx.getUCtx(); listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer(); prunedPartitions = pctx.getPrunedPartitions(); tabNameToTabObject = pctx.getTabNameToTabObject(); fetchTask = pctx.getFetchTask(); setLineageInfo(pctx.getLineageInfo()); }
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsOpToTsOpMap(pCtx.getRsOpToTsOpMap()); return clone; }
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsToSemiJoinBranchInfo(pCtx.getRsToSemiJoinBranchInfo()); clone.setColExprToGBMap(pCtx.getColExprToGBMap()); clone.setSemiJoinHints(pCtx.getSemiJoinHints()); return clone; }