@Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<String, TableScanOperator> topOps = pctx.getTopOps(); if (pctx.getQueryProperties().isQuery() && !pctx.getQueryProperties().isAnalyzeCommand() && topOps.size() == 1) { // no join, no groupby, no distinct, no lateral view, no subq, // no CTAS or insert, not analyze command, and single sourced. String alias = (String) pctx.getTopOps().keySet().toArray()[0]; TableScanOperator topOp = pctx.getTopOps().values().iterator().next(); try { FetchTask fetchTask = optimize(pctx, alias, topOp); if (fetchTask != null) { pctx.setFetchTask(fetchTask); } } catch (Exception e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); if (e instanceof SemanticException) { throw (SemanticException) e; } throw new SemanticException(e.getMessage(), e); } } return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<String, TableScanOperator> topOps = pctx.getTopOps(); if (pctx.getQueryProperties().isQuery() && !pctx.getQueryProperties().isAnalyzeCommand() && topOps.size() == 1) { // no join, no groupby, no distinct, no lateral view, no subq, // no CTAS or insert, not analyze command, and single sourced. String alias = (String) pctx.getTopOps().keySet().toArray()[0]; TableScanOperator topOp = pctx.getTopOps().values().iterator().next(); try { FetchTask fetchTask = optimize(pctx, alias, topOp); if (fetchTask != null) { pctx.setFetchTask(fetchTask); } } catch (Exception e) { // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); if (e instanceof SemanticException) { throw (SemanticException) e; } throw new SemanticException(e.getMessage(), e); } } return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", GBY + RS + GBY + SEL + FS), new SingleGBYProcessor(pctx)); opRules.put(new RuleRegExp("R2", GBY + RS + GBY + FS), new SingleGBYProcessor(pctx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty()) { return pctx; } String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", GBY + RS + GBY + SEL + FS), new SingleGBYProcessor(pctx)); opRules.put(new RuleRegExp("R2", GBY + RS + GBY + FS), new SingleGBYProcessor(pctx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
protected void convertGroupByMapSideSortedGroupBy( HiveConf conf, GroupByOperator groupByOp, int depth) { pGraphContext.getQueryProperties().setHasMapGroupBy(true); if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file groupByOp.setUseBucketizedHiveInputFormat(true); groupByOp.getConf().setMode(GroupByDesc.Mode.FINAL); } }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() // If getNameToSplitSample is not empty, at least one of the source // tables is being sampled and we can not optimize. || !pctx.getNameToSplitSample().isEmpty()) { return pctx; } String TS = TableScanOperator.getOperatorName() + "%"; String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx)); opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx)); NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
protected void convertGroupByMapSideSortedGroupBy( HiveConf conf, GroupByOperator groupByOp, int depth) { pGraphContext.getQueryProperties().setHasMapGroupBy(true); if (removeChildren(groupByOp, depth)) { // Use bucketized hive input format - that makes sure that one mapper reads the entire file groupByOp.setUseBucketizedHiveInputFormat(true); groupByOp.getConf().setMode(GroupByDesc.Mode.FINAL); } }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { if (pctx.getFetchTask() != null || !pctx.getQueryProperties().isQuery() || pctx.getQueryProperties().isAnalyzeRewrite() || pctx.getQueryProperties().isCTAS() || pctx.getLoadFileWork().size() > 1 || !pctx.getLoadTableWork().isEmpty() // If getNameToSplitSample is not empty, at least one of the source // tables is being sampled and we can not optimize. || !pctx.getNameToSplitSample().isEmpty()) { return pctx; } String TS = TableScanOperator.getOperatorName() + "%"; String GBY = GroupByOperator.getOperatorName() + "%"; String RS = ReduceSinkOperator.getOperatorName() + "%"; String SEL = SelectOperator.getOperatorName() + "%"; String FS = FileSinkOperator.getOperatorName() + "%"; Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", TS + SEL + GBY + RS + GBY + SEL + FS), new MetaDataProcessor(pctx)); opRules.put(new RuleRegExp("R2", TS + SEL + GBY + RS + GBY + FS), new MetaDataProcessor(pctx)); NodeProcessorCtx soProcCtx = new StatsOptimizerProcContext(); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, soProcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private void setLoadFileLocation( final ParseContext pCtx, LoadFileDesc lfd) throws SemanticException { // CTAS; make the movetask's destination directory the table's destination. Long txnIdForCtas = null; int stmtId = 0; // CTAS cannot be part of multi-txn stmt FileSinkDesc dataSinkForCtas = null; String loc = null; if (pCtx.getQueryProperties().isCTAS()) { CreateTableDesc ctd = pCtx.getCreateTable(); dataSinkForCtas = ctd.getAndUnsetWriter(); txnIdForCtas = ctd.getInitialMmWriteId(); loc = ctd.getLocation(); } else { loc = pCtx.getCreateViewDesc().getLocation(); } Path location = (loc == null) ? getDefaultCtasLocation(pCtx) : new Path(loc); if (txnIdForCtas != null) { dataSinkForCtas.setDirName(location); location = new Path(location, AcidUtils.deltaSubdir(txnIdForCtas, txnIdForCtas, stmtId)); lfd.setSourcePath(location); if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Setting MM CTAS to " + location); } } if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Location for LFD is being set to " + location + "; moving from " + lfd.getSourcePath()); } lfd.setTargetDir(location); }
if (!pctx.getQueryProperties().isCTAS() && !pctx.getQueryProperties().isMaterializedView() && pctx.getQueryProperties().isQuery() && pctx.getCreateTable() == null && pctx.getCreateViewDesc() == null
@SuppressWarnings("unchecked") private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source) throws Exception { String mode = HiveConf.getVar( pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); final int limit = pctx.getQueryProperties().getOuterQueryLimit(); // limit = 0 means that we do not need any task. if (limit == 0) { return null; } FetchData fetch = checkTree(aggressive, pctx, alias, source); if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork); fetchWork.setSink(fetch.completed(pctx, fetchWork)); fetchWork.setSource(source); fetchWork.setLimit(limit); return fetchTask; } return null; }
@SuppressWarnings("unchecked") private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator source) throws Exception { String mode = HiveConf.getVar( pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); final int limit = pctx.getQueryProperties().getOuterQueryLimit(); // limit = 0 means that we do not need any task. if (limit == 0) { return null; } FetchData fetch = checkTree(aggressive, pctx, alias, source); if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf()); fetchWork.setSink(fetch.completed(pctx, fetchWork)); fetchWork.setSource(source); fetchWork.setLimit(limit); return fetchTask; } return null; }
Class<? extends InputFormat> inputFormat = table.getInputFormatClass(); if (parseContext.getQueryProperties().isAnalyzeCommand()) { Preconditions.checkArgument(tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0, basicStatsWork.setNoScanAnalyzeCommand(parseContext.getQueryProperties().isNoScanAnalyzeCommand()); StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, parseContext.getConf()); columnStatsWork.collectStatsFromAggregator(tableScan.getConf()); if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); context.rootTasks.remove(context.currentTask);
private Path getDefaultCtasLocation(final ParseContext pCtx) throws SemanticException { try { String protoName = null; boolean isExternal = false; if (pCtx.getQueryProperties().isCTAS()) { protoName = pCtx.getCreateTable().getTableName(); isExternal = pCtx.getCreateTable().isExternal(); } else if (pCtx.getQueryProperties().isMaterializedView()) { protoName = pCtx.getCreateViewDesc().getViewName(); } String[] names = Utilities.getDbTableName(protoName); if (!db.databaseExists(names[0])) { throw new SemanticException("ERROR: The database " + names[0] + " does not exist."); } Warehouse wh = new Warehouse(conf); return wh.getDefaultTablePath(db.getDatabase(names[0]), names[1], isExternal); } catch (HiveException e) { throw new SemanticException(e); } catch (MetaException e) { throw new SemanticException(e); } }
pGraphContext.getQueryProperties().setHasMapGroupBy(true); ReduceSinkOperator reduceSinkOp = (ReduceSinkOperator)groupByOp.getChildOperators().get(0);
Class<? extends InputFormat> inputFormat = table.getInputFormatClass(); if (parseContext.getQueryProperties().isAnalyzeCommand()) { basicStatsWork.setNoScanAnalyzeCommand(parseContext.getQueryProperties().isNoScanAnalyzeCommand()); StatsWork columnStatsWork = new StatsWork(table, basicStatsWork, parseContext.getConf()); columnStatsWork.collectStatsFromAggregator(tableScan.getConf()); if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); context.rootTasks.remove(context.currentTask);
.getInputFormatClass(); if (parseContext.getQueryProperties().isAnalyzeCommand()) { Preconditions.checkArgument(tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0, boolean partialScan = parseContext.getQueryProperties().isPartialScanAnalyzeCommand(); boolean noScan = parseContext.getQueryProperties().isNoScanAnalyzeCommand(); if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) { if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask);
mapCurrCtx.put(op, new GenMapRedCtx(currTask, currAliasId)); if (parseCtx.getQueryProperties().isAnalyzeCommand()) { boolean noScan = parseCtx.getQueryProperties().isNoScanAnalyzeCommand(); if (BasicStatsNoJobTask.canUseFooterScan(table, inputFormat)) {
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsOpToTsOpMap(pCtx.getRsOpToTsOpMap()); return clone; }
/** * Create a clone of the parse context */ public ParseContext getParseContext(ParseContext pCtx, List<Task<? extends Serializable>> rootTasks) { ParseContext clone = new ParseContext(queryState, pCtx.getOpToPartPruner(), pCtx.getOpToPartList(), pCtx.getTopOps(), pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(), pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getColumnStatsAutoGatherContexts(), pCtx.getContext(), pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(), pCtx.getListMapJoinOpsNoReducer(), pCtx.getPrunedPartitions(), pCtx.getTabNameToTabObject(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(), pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks, pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(), pCtx.getReduceSinkOperatorsAddedByEnforceBucketingSorting(), pCtx.getAnalyzeRewrite(), pCtx.getCreateTable(), pCtx.getCreateViewDesc(), pCtx.getMaterializedViewUpdateDesc(), pCtx.getQueryProperties(), pCtx.getViewProjectToTableSchema(), pCtx.getAcidSinks()); clone.setFetchTask(pCtx.getFetchTask()); clone.setLineageInfo(pCtx.getLineageInfo()); clone.setMapJoinOps(pCtx.getMapJoinOps()); clone.setRsToRuntimeValuesInfoMap(pCtx.getRsToRuntimeValuesInfoMap()); clone.setRsToSemiJoinBranchInfo(pCtx.getRsToSemiJoinBranchInfo()); clone.setColExprToGBMap(pCtx.getColExprToGBMap()); clone.setSemiJoinHints(pCtx.getSemiJoinHints()); return clone; }