gbKeys.add(gbAttrs.gbKeys.get(i)); colOutputName = SemanticAnalyzer.getColumnInternalName(i); colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false)); outputColNames.add(colOutputName); gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i)); colExprMap.put(colOutputName, gbKeys.get(i)); boolean inclGrpID = inclGrpSetInMapSide(gbAttrs); if (inclGrpID) { groupingSetsPosition = gbKeys.size(); addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap); colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1); colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false)); outputColNames.add(colOutputName); Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
@SuppressWarnings("nls") private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException { // A map-only job can be optimized - instead of converting it to a // map-reduce job, we can have another map // job to do the same to avoid the cost of sorting in the map-reduce phase. // A better approach would be to // write into a local file and then have a map-only job. // Add the limit operator to get the value fields RowResolver inputRR = opParseCtx.get(input).getRowResolver(); LimitDesc limitDesc = new LimitDesc(offset, limit); globalLimitCtx.setLastReduceLimitDesc(limitDesc); Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild( limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString()); } return limitMap; }
private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); ArrayList<String> outputColumnNames = new ArrayList<String>(); ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>(); GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0); List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature(); ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true); if (inclGrpSetInReduceSide(gbInfo)) { addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap); } ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf() .getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true); ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1); rsOp.setColumnExprMap(colExprMap); return new OpAttr("", new HashSet<Integer>(), rsOp); }
/** * Inserts a filter below the table scan operator. Construct the filter * from the filter expression provided. * @param tableScanOp the table scan operators * @param filterExpr the filter expression */ private void insertFilterOnTop( TableScanOperator tableScanOp, ExprNodeDesc filterExpr) { // Get the top operator and it's child, all operators have a single parent Operator<? extends OperatorDesc> currChild = tableScanOp.getChildOperators().get(0); // Create the filter Operator and update the parents and children appropriately tableScanOp.setChildOperators(null); currChild.setParentOperators(null); Operator<FilterDesc> filter = OperatorFactory.getAndMakeChild( new FilterDesc(filterExpr, false), new RowSchema(tableScanOp.getSchema().getSignature()), tableScanOp); OperatorFactory.makeChild(filter, currChild); }
columnExprMap.put(uInfo.getInternalName(), column); return OperatorFactory.getAndMakeChild(new SelectDesc( columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp); } else { return origInputOp;
int keyLength = reduceKeys.size(); addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap); keyLength++; if (mapGB.getConf().getKeys().size() > reduceKeys.size()) { reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys() .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false); ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS( gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
for (int i = 0; i < gbInfo.gbKeys.size(); i++) { reduceKeys.add(gbInfo.gbKeys.get(i)); String colOutputName = SemanticAnalyzer.getColumnInternalName(i); outputKeyColumnNames.add(colOutputName); colInfoLst.add(new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + colOutputName, gbInfo.gbKeyTypes.get(i), "", false)); colExprMap.put(colOutputName, gbInfo.gbKeys.get(i)); int keyLength = reduceKeys.size(); String udafName = SemanticAnalyzer.getColumnInternalName(reduceKeys.size()); outputKeyColumnNames.add(udafName); for (int i = 0; i < gbInfo.distExprNodes.size(); i++) { reduceKeys.add(gbInfo.distExprNodes.get(i)); String field = Utilities.ReduceField.KEY.toString() + "." + udafName + ":" + i + "." + outputColName; ColumnInfo colInfo = new ColumnInfo(field, gbInfo.distExprNodes.get(i).getTypeInfo(), null, colExprMap.put(field, gbInfo.distExprNodes.get(i)); ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS(gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema( colInfoLst), inputOpAf.inputs.get(0));
OpAttr[] inputs = new OpAttr[inputsList.size()]; for (int i = 0; i < inputs.length; i++) { inputs[i] = dispatch(inputsList.get(i)); Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild( semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);
private static OpAttr genReduceGBRS(OpAttr inputOpAf, GBInfo gbInfo) throws SemanticException { Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); ArrayList<String> outputColumnNames = new ArrayList<String>(); ArrayList<ColumnInfo> colInfoLst = new ArrayList<ColumnInfo>(); GroupByOperator reduceSideGB1 = (GroupByOperator) inputOpAf.inputs.get(0); List<ColumnInfo> gb1ColInfoLst = reduceSideGB1.getSchema().getSignature(); ArrayList<ExprNodeDesc> reduceKeys = getReduceKeysForRS(reduceSideGB1, 0, gbInfo.gbKeys.size() - 1, outputColumnNames, false, colInfoLst, colExprMap, true, true); if (inclGrpSetInReduceSide(gbInfo)) { addGrpSetCol(false, gb1ColInfoLst.get(reduceKeys.size()).getInternalName(), true, reduceKeys, outputColumnNames, colInfoLst, colExprMap); } ArrayList<ExprNodeDesc> reduceValues = getValueKeysForRS(reduceSideGB1, reduceSideGB1.getConf() .getKeys().size(), outputColumnNames, colInfoLst, colExprMap, true, true); ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(reduceKeys, reduceValues, outputColumnNames, true, -1, getNumPartFieldsForReduceSideRS(gbInfo), getParallelismForReduceSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), reduceSideGB1); rsOp.setColumnExprMap(colExprMap); return new OpAttr("", new HashSet<Integer>(), rsOp); }
columnExprMap.put(uInfo.getInternalName(), column); return OperatorFactory.getAndMakeChild(new SelectDesc( columns, colName), new RowSchema(uColumnInfo), columnExprMap, origInputOp); } else { return origInputOp;
int keyLength = reduceKeys.size(); addGrpSetCol(false, SemanticAnalyzer.getColumnInternalName(reduceKeys.size()), true, reduceKeys, outputKeyColumnNames, colInfoLst, colExprMap); keyLength++; if (mapGB.getConf().getKeys().size() > reduceKeys.size()) { reduceKeys.addAll(getReduceKeysForRS(mapGB, reduceKeys.size(), mapGB.getConf().getKeys() .size(), outputValueColumnNames, colInfoLst, colExprMap, false, false); ReduceSinkOperator rsOp = (ReduceSinkOperator) OperatorFactory.getAndMakeChild(PlanUtils .getReduceSinkDesc(reduceKeys, keyLength, reduceValues, gbInfo.distColIndices, outputKeyColumnNames, outputValueColumnNames, true, -1, getNumPartFieldsForMapSideRS( gbInfo), getParallelismForMapSideRS(gbInfo), AcidUtils.Operation.NOT_ACID), new RowSchema(colInfoLst), mapGB);
@Override public Operator<? extends OperatorDesc> clone() throws CloneNotSupportedException { List<Operator<? extends OperatorDesc>> parents = getParentOperators(); List<Operator<? extends OperatorDesc>> parentClones = new ArrayList<Operator<? extends OperatorDesc>>(); if (parents != null) { for (Operator<? extends OperatorDesc> parent : parents) { parentClones.add((parent.clone())); } } @SuppressWarnings("unchecked") T descClone = (T)conf.clone(); // also clone the colExprMap by default // we need a deep copy ArrayList<ColumnInfo> colInfos = new ArrayList<>(getSchema().getSignature()); Map<String, ExprNodeDesc> map = null; Map<String, ExprNodeDesc> colExprMap = getColumnExprMap(); if (colExprMap != null) { map = new HashMap<>(colExprMap); } Operator<? extends OperatorDesc> ret = OperatorFactory.getAndMakeChild( cContext, descClone, new RowSchema(colInfos), map, parentClones); return ret; }
gbInfo.gbKeys.size() - 1, false, false); for (int i = 0; i < gbInfo.gbKeys.size(); i++) { ci = rsColInfoLst.get(i); colOutputName = gbInfo.outputColNames.get(i); outputColNames.add(colOutputName); colInfoLst.add(new ColumnInfo(colOutputName, ci.getType(), "", false)); colExprMap.put(colOutputName, gbKeys.get(i)); groupingSetsPosition = gbKeys.size(); ExprNodeDesc grpSetColExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, rsColInfoLst.get(groupingSetsPosition).getInternalName(), null, false); gbKeys.add(grpSetColExpr); colOutputName = gbInfo.outputColNames.get(gbInfo.outputColNames.size() - 1); outputColNames.add(colOutputName); colInfoLst.add(new ColumnInfo(colOutputName, TypeInfoFactory.stringTypeInfo, null, true)); colExprMap.put(colOutputName, grpSetColExpr); aggregations.add(new AggregationDesc(udafAttr.udafName.toLowerCase(), udaf.genericUDAFEvaluator, udaf.convertedParameters, false, udafMode)); colInfoLst.add(new ColumnInfo(colOutputName, udaf.returnType, "", false)); Operator rsGBOp2 = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.FINAL, outputColNames, gbKeys, aggregations, false, gbInfo.groupByMemoryUsage, gbInfo.memoryThreshold, null, false, groupingSetsPosition, gbInfo.containsDistinctAggr), new RowSchema(colInfoLst), rs);
/** * Inserts a filter below the table scan operator. Construct the filter * from the filter expression provided. * @param tableScanOp the table scan operators * @param filterExpr the filter expression */ private void insertFilterOnTop( TableScanOperator tableScanOp, ExprNodeDesc filterExpr) { // Get the top operator and it's child, all operators have a single parent Operator<? extends OperatorDesc> currChild = tableScanOp.getChildOperators().get(0); // Create the filter Operator and update the parents and children appropriately tableScanOp.setChildOperators(null); currChild.setParentOperators(null); Operator<FilterDesc> filter = OperatorFactory.getAndMakeChild( new FilterDesc(filterExpr, false), new RowSchema(tableScanOp.getSchema().getSignature()), tableScanOp); OperatorFactory.makeChild(filter, currChild); }
@SuppressWarnings("nls") private Operator genLimitPlan(String dest, QB qb, Operator input, int offset, int limit) throws SemanticException { // A map-only job can be optimized - instead of converting it to a // map-reduce job, we can have another map // job to do the same to avoid the cost of sorting in the map-reduce phase. // A better approach would be to // write into a local file and then have a map-only job. // Add the limit operator to get the value fields RowResolver inputRR = opParseCtx.get(input).getRowResolver(); LimitDesc limitDesc = new LimitDesc(offset, limit); globalLimitCtx.setLastReduceLimitDesc(limitDesc); Operator limitMap = putOpInsertMap(OperatorFactory.getAndMakeChild( limitDesc, new RowSchema(inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { LOG.debug("Created LimitOperator Plan for clause: " + dest + " row schema: " + inputRR.toString()); } return limitMap; }
Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) throws SemanticException { ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); ArrayList<ColumnInfo> columns = inputRR.getColumnInfos(); for (int i = 0; i < columns.size(); i++) { ColumnInfo col = columns.get(i); colList.add(new ExprNodeColumnDesc(col)); } ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, qb, selExprList); ArrayList<String> columnNames = new ArrayList<String>(); Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); for (int i = 0; i < colList.size(); i++) { String outputCol = getColumnInternalName(i); colExprMap.put(outputCol, colList.get(i)); columnNames.add(outputCol); } Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); output.setColumnExprMap(colExprMap); return output; }
OperatorFactory.getAndMakeChild(cloneDesc, origSchema == null ? null : new RowSchema(origSchema), origColExprMap == null ? null : new HashMap(origColExprMap), parents.get(p)); parents.set(p, cloneOp); OperatorFactory.getAndMakeChild(fileSinkDesc, parent.getSchema(), parent); tmpFileSinkOp.setChildOperators(null); fileDescLists.add(fileSinkDesc);
/** * TODO: 1) isSamplingPred 2) sampleDesc 3) isSortedFilter */ OpAttr visit(HiveFilter filterRel) throws SemanticException { OpAttr inputOpAf = dispatch(filterRel.getInput()); if (LOG.isDebugEnabled()) { LOG.debug("Translating operator rel#" + filterRel.getId() + ":" + filterRel.getRelTypeName() + " with row type: [" + filterRel.getRowType() + "]"); } ExprNodeDesc filCondExpr = filterRel.getCondition().accept( new ExprNodeConverter(inputOpAf.tabAlias, filterRel.getInput().getRowType(), inputOpAf.vcolsInCalcite, filterRel.getCluster().getTypeFactory(), true)); FilterDesc filDesc = new FilterDesc(filCondExpr, false); ArrayList<ColumnInfo> cinfoLst = createColInfos(inputOpAf.inputs.get(0)); FilterOperator filOp = (FilterOperator) OperatorFactory.getAndMakeChild(filDesc, new RowSchema(cinfoLst), inputOpAf.inputs.get(0)); if (LOG.isDebugEnabled()) { LOG.debug("Generated " + filOp + " with row schema: [" + filOp.getSchema() + "]"); } return inputOpAf.clone(filOp); }
gbKeys.add(gbAttrs.gbKeys.get(i)); colOutputName = SemanticAnalyzer.getColumnInternalName(i); colInfoLst.add(new ColumnInfo(colOutputName, gbAttrs.gbKeyTypes.get(i), "", false)); outputColNames.add(colOutputName); gbKeyColsAsNamesFrmIn.add(gbAttrs.gbKeyColNamesInInput.get(i)); colExprMap.put(colOutputName, gbKeys.get(i)); boolean inclGrpID = inclGrpSetInMapSide(gbAttrs); if (inclGrpID) { groupingSetsPosition = gbKeys.size(); addGrpSetCol(true, null, false, gbKeys, outputColNames, colInfoLst, colExprMap); colOutputName = SemanticAnalyzer.getColumnInternalName(gbKeys.size() + aggregations.size() - 1); colInfoLst.add(new ColumnInfo(colOutputName, udafInfo.returnType, "", false)); outputColNames.add(colOutputName); Operator gbOp = OperatorFactory.getAndMakeChild(new GroupByDesc(GroupByDesc.Mode.HASH, outputColNames, gbKeys, aggregations, false, gbAttrs.groupByMemoryUsage, gbAttrs.memoryThreshold, gbAttrs.grpSets, inclGrpID, groupingSetsPosition, gbAttrs.containsDistinctAggr), new RowSchema(colInfoLst), inputOpAf.inputs.get(0));
OpAttr[] inputs = new OpAttr[inputsList.size()]; for (int i = 0; i < inputs.length; i++) { inputs[i] = dispatch(inputsList.get(i)); Operator<? extends OperatorDesc> unionOp = OperatorFactory.getAndMakeChild( semanticAnalyzer.getOpContext(), unionDesc, new RowSchema(cinfoLst), children);