@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 1. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new FilterTransformer()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tableScanOp = (TableScanOperator) nd; // 1. Get alias from topOps String opAlias = null; for (Map.Entry<String, TableScanOperator> topOpEntry : pctx.getTopOps().entrySet()) { if (topOpEntry.getValue() == tableScanOp) { opAlias = topOpEntry.getKey(); } } assert opAlias != null; // 2. Add alias to 1) aliasToOpInfo and 2) opToAlias aliasToOpInfo.put(opAlias, tableScanOp); return null; } }
/** * Get a list of indexes which can be used for rewrite. * @return * @throws SemanticException */ private Map<Table, List<Index>> getIndexesForRewrite() throws SemanticException{ List<String> supportedIndexes = new ArrayList<String>(); supportedIndexes.add(AggregateIndexHandler.class.getName()); // query the metastore to know what columns we have indexed Collection<TableScanOperator> topTables = parseContext.getTopOps().values(); Map<Table, List<Index>> indexes = new HashMap<Table, List<Index>>(); for (TableScanOperator op : topTables) { TableScanOperator tsOP = op; List<Index> tblIndexes = IndexUtils.getIndexes(tsOP.getConf().getTableMetadata(), supportedIndexes); if (tblIndexes.size() > 0) { indexes.put(tsOP.getConf().getTableMetadata(), tblIndexes); } } return indexes; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 1. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tableScanOp = (TableScanOperator) nd; // 1. Get alias from topOps String opAlias = null; for (Map.Entry<String, TableScanOperator> topOpEntry : pctx.getTopOps().entrySet()) { if (topOpEntry.getValue() == tableScanOp) { opAlias = topOpEntry.getKey(); } } assert opAlias != null; // 2. Add alias to 1) aliasToOpInfo and 2) opToAlias aliasToOpInfo.put(opAlias, tableScanOp); return null; } }
@Override public ParseContext transform(ParseContext pCtx) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining the // operator stack. The dispatcher generates the plan from the operator tree Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); String FS = FileSinkOperator.getOperatorName() + "%"; opRules.put(new RuleRegExp("Sorted Dynamic Partition", FS), getSortDynPartProc(pCtx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pCtx.getTopOps().values()); ogw.startWalking(topNodes, null); return pCtx; }
private static List<Entry<String, Long>> rankTablesByAccumulatedSize(ParseContext pctx) { Map<String, Long> tableToTotalSize = new HashMap<>(); for (Entry<String, TableScanOperator> e : pctx.getTopOps().entrySet()) { TableScanOperator tsOp = e.getValue(); String tableName = tsOp.getConf().getTableMetadata().getDbName() + "." + tsOp.getConf().getTableMetadata().getTableName(); long tableSize = tsOp.getStatistics() != null ? tsOp.getStatistics().getDataSize() : 0L; Long totalSize = tableToTotalSize.get(tableName); if (totalSize != null) { tableToTotalSize.put(tableName, StatsUtils.safeAdd(totalSize, tableSize)); } else { tableToTotalSize.put(tableName, tableSize); } } List<Entry<String, Long>> sortedTables = new ArrayList<>(tableToTotalSize.entrySet()); Collections.sort(sortedTables, Collections.reverseOrder( new Comparator<Map.Entry<String, Long>>() { @Override public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) { return (o1.getValue()).compareTo(o2.getValue()); } })); return sortedTables; }
private Set<Set<Operator<?>>> getComponents(OptimizeTezProcContext procCtx) { Deque<Operator<?>> deque = new LinkedList<Operator<?>>(); deque.addAll(procCtx.parseContext.getTopOps().values()); AtomicInteger index = new AtomicInteger(); Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>(); Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>(); Stack<Operator<?>> nodes = new Stack<Operator<?>>(); Set<Set<Operator<?>>> components = new LinkedHashSet<Set<Operator<?>>>(); for (Operator<?> o : deque) { if (!indexes.containsKey(o)) { connect(o, index, nodes, indexes, lowLinks, components, procCtx.parseContext); } } return components; }
@Override public ParseContext transform(ParseContext pCtx) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining the // operator stack. The dispatcher generates the plan from the operator tree Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); String FS = FileSinkOperator.getOperatorName() + "%"; opRules.put(new RuleRegExp("Sorted Dynamic Partition Time Granularity", FS), getSortDynPartProc(pCtx)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new DefaultGraphWalker(disp); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pCtx.getTopOps().values()); ogw.startWalking(topNodes, null); return pCtx; }
private Set<Set<Operator<?>>> getComponents(OptimizeSparkProcContext procCtx) { AtomicInteger index = new AtomicInteger(); Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>(); Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>(); Stack<Operator<?>> nodes = new Stack<Operator<?>>(); Set<Set<Operator<?>>> components = new HashSet<Set<Operator<?>>>(); for (Operator<?> o : procCtx.getParseContext().getTopOps().values()) { if (!indexes.containsKey(o)) { connect(o, index, nodes, indexes, lowLinks, components); } } return components; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 1. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new StructInTransformer()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private Set<Set<Operator<?>>> getComponents(OptimizeTezProcContext procCtx) { Deque<Operator<?>> deque = new LinkedList<Operator<?>>(); deque.addAll(procCtx.parseContext.getTopOps().values()); AtomicInteger index = new AtomicInteger(); Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>(); Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>(); Stack<Operator<?>> nodes = new Stack<Operator<?>>(); Set<Set<Operator<?>>> components = new LinkedHashSet<Set<Operator<?>>>(); for (Operator<?> o : deque) { if (!indexes.containsKey(o)) { connect(o, index, nodes, indexes, lowLinks, components, procCtx.parseContext); } } return components; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 1. We apply the transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", "(" + ReduceSinkOperator.getOperatorName() + "%)"), new ReduceSinkOutputOperatorAnnotator()); GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null)); ArrayList<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private Set<Set<Operator<?>>> getComponents(OptimizeSparkProcContext procCtx) { AtomicInteger index = new AtomicInteger(); Map<Operator<?>, Integer> indexes = new HashMap<Operator<?>, Integer>(); Map<Operator<?>, Integer> lowLinks = new HashMap<Operator<?>, Integer>(); Stack<Operator<?>> nodes = new Stack<Operator<?>>(); Set<Set<Operator<?>>> components = new HashSet<Set<Operator<?>>>(); for (Operator<?> o : procCtx.getParseContext().getTopOps().values()) { if (!indexes.containsKey(o)) { connect(o, index, nodes, indexes, lowLinks, components); } } return components; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", "TS%.*RS%JOIN%"), getSkewJoinProc(pctx)); SkewJoinOptProcCtx skewJoinOptProcCtx = new SkewJoinOptProcCtx(pctx); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher( null, opRules, skewJoinOptProcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); // Create a list of topop nodes List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private Set<String> getTablesUsed(ParseContext parseCtx) throws SemanticException { Set<String> tablesUsed = new HashSet<>(); for (TableScanOperator topOp : parseCtx.getTopOps().values()) { Table table = topOp.getConf().getTableMetadata(); if (!table.isMaterializedTable() && !table.isView()) { // Add to signature tablesUsed.add(table.getFullyQualifiedName()); } } return tablesUsed; }
@Override public ParseContext transform(ParseContext pctx) throws SemanticException { // 1. Trigger transformation Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"), new FilterTransformer()); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, null); GraphWalker ogw = new ForwardWalker(disp); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(pctx.getTopOps().values()); ogw.startWalking(topNodes, null); return pctx; }
private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { // Sequence of TableScan operators to be walked Deque<Operator<?>> deque = new LinkedList<Operator<?>>(); deque.addAll(procCtx.parseContext.getTopOps().values()); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); opRules.put( new RuleRegExp("Remove dynamic pruning by size", AppMasterEventOperator.getOperatorName() + "%"), new RemoveDynamicPruningBySize()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); List<Node> topNodes = new ArrayList<Node>(); topNodes.addAll(procCtx.parseContext.getTopOps().values()); GraphWalker ogw = new ForwardWalker(disp); ogw.startWalking(topNodes, null); }