public int getPosition(String internalName) { int pos = -1; for (ColumnInfo var : rowSchema.getSignature()) { ++pos; if (var.getInternalName().equals(internalName)) { return pos; } } return -1; }
private List<ColumnInfo> buildPrunedRS(List<FieldNode> prunedCols, RowSchema oldRS) throws SemanticException { ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>(); HashSet<FieldNode> prunedColsSet = new HashSet<>(prunedCols); for (ColumnInfo cInfo : oldRS.getSignature()) { if (lookupColumn(prunedColsSet, cInfo.getInternalName()) != null) { sig.add(cInfo); } } return sig; }
private static ArrayList<ColumnInfo> createColInfosSubset(Operator<?> input, List<String> keepColNames) { ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>(); int pos = 0; for (ColumnInfo ci : input.getSchema().getSignature()) { if (pos < keepColNames.size() && ci.getInternalName().equals(keepColNames.get(pos))) { cInfoLst.add(new ColumnInfo(ci)); pos++; } } return cInfoLst; }
/** * This is used to construct new lists of sorted columns where the order of the columns * hasn't changed, only possibly the name * @param bucketCols - input sorted columns * @param colInfos - List of column infos * @return output sorted columns */ private static List<SortCol> getNewSortCols(List<SortCol> sortCols, List<ColumnInfo> colInfos) { List<SortCol> newSortCols = new ArrayList<SortCol>(sortCols.size()); for (int i = 0; i < sortCols.size(); i++) { SortCol sortCol = new SortCol(sortCols.get(i).getSortOrder()); for (Integer index : sortCols.get(i).getIndexes()) { // The only time this condition should be false is in the case of dynamic partitioning if (index < colInfos.size()) { sortCol.addAlias(colInfos.get(index).getInternalName(), index); } else { return null; } } newSortCols.add(sortCol); } return newSortCols; }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; UnionOperator op = (UnionOperator) nd; List<FieldNode> childColLists = cppCtx.genColLists(op); if (childColLists == null) { return null; } RowSchema inputSchema = op.getSchema(); if (inputSchema != null) { List<FieldNode> prunedCols = new ArrayList<>(); for (int index = 0; index < inputSchema.getSignature().size(); index++) { ColumnInfo colInfo = inputSchema.getSignature().get(index); FieldNode fn = lookupColumn(childColLists, colInfo.getInternalName()); if (fn != null) { prunedCols.add(fn); } } cppCtx.getPrunedColLists().put(op, prunedCols); } return null; } }
private Operator genSelectAllDesc(Operator input) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); ArrayList<ColumnInfo> columns = inputRR.getColumnInfos(); ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); ArrayList<String> columnNames = new ArrayList<String>(); Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>(); for (int i = 0; i < columns.size(); i++) { ColumnInfo col = columns.get(i); colList.add(new ExprNodeColumnDesc(col, true)); columnNames.add(col.getInternalName()); columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true)); } RowResolver outputRR = inputRR.duplicate(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(colList, columnNames, true), outputRR.getRowSchema(), input), outputRR); output.setColumnExprMap(columnExprMap); return output; }
static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) { List<ColumnInfo> signature = sqRR.getRowSchema().getSignature(); ColumnInfo joinColumn = signature.get(0); String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); return createColRefAST(sqAlias, joinColName[1]); }
boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); qualifiedAlias[1] = col_alias; if ( !colPresent ) { invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); } else { altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
private static List<String> getPartitionColumns(List<ColumnInfo> schema, List<String> neededColumns, List<String> referencedColumns) { // extra columns is difference between referenced columns vs needed // columns. The difference could be partition columns. List<String> partitionCols = new ArrayList<>(referencedColumns.size()); List<String> extraCols = Lists.newArrayList(referencedColumns); if (referencedColumns.size() > neededColumns.size()) { extraCols.removeAll(neededColumns); for (String col : extraCols) { for (ColumnInfo ci : schema) { // conditions for being partition column if (col.equals(ci.getInternalName()) && ci.getIsVirtualCol() && !ci.isHiddenVirtualCol()) { partitionCols.add(col); } } } } return partitionCols; }
final List<String> colNames = new ArrayList<String>(rowSchema.getSignature().size()); final List<TypeInfo> colTypes = new ArrayList<TypeInfo>(rowSchema.getSignature().size()); boolean hasRowId = false; for (ColumnInfo c : rowSchema.getSignature()) { String columnName = c.getInternalName(); if (VirtualColumn.ROWID.getName().equals(columnName)) { hasRowId = true; virtualColumns = new VirtualColumn[0]; return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]), colTypes.toArray(new TypeInfo[colTypes.size()]), null, null, partitionColumnCount, virtualColumns.length, virtualColumns, new String[0], null);
/** * For each column on the input RR, construct a StructField for it * OI is constructed using the list of input column names and * their corresponding OIs. * * @param rr * @return */ public static StructObjectInspector getStandardStructOI(RowResolver rr) { StructObjectInspector oi; ArrayList<ColumnInfo> colLists = rr.getColumnInfos(); ArrayList<String> structFieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(); for (ColumnInfo columnInfo : colLists) { String colName = columnInfo.getInternalName(); ObjectInspector colOI = columnInfo.getObjectInspector(); structFieldNames.add(colName); structFieldObjectInspectors.add(colOI); } oi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors); return oi; }
private static ArrayList<ColumnInfo> createColInfosSubset(Operator<?> input, List<String> keepColNames) { ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>(); int pos = 0; for (ColumnInfo ci : input.getSchema().getSignature()) { if (pos < keepColNames.size() && ci.getInternalName().equals(keepColNames.get(pos))) { cInfoLst.add(new ColumnInfo(ci)); pos++; } } return cInfoLst; }
/** * This is used to construct new lists of bucketed columns where the order of the columns * hasn't changed, only possibly the name * @param bucketCols - input bucketed columns * @param colInfos - List of column infos * @return output bucketed columns */ private static List<BucketCol> getNewBucketCols(List<BucketCol> bucketCols, List<ColumnInfo> colInfos) { List<BucketCol> newBucketCols = new ArrayList<BucketCol>(bucketCols.size()); for (int i = 0; i < bucketCols.size(); i++) { BucketCol bucketCol = new BucketCol(); for (Integer index : bucketCols.get(i).getIndexes()) { // The only time this condition should be false is in the case of dynamic partitioning // where the data is bucketed on a dynamic partitioning column and the FileSinkOperator is // being processed. In this case, the dynamic partition column will not appear in // colInfos, and due to the limitations of dynamic partitioning, they will appear at the // end of the input schema. Since the order of the columns hasn't changed, and no new // columns have been added/removed, it is safe to assume that these will have indexes // greater than or equal to colInfos.size(). if (index < colInfos.size()) { bucketCol.addAlias(colInfos.get(index).getInternalName(), index); } else { return null; } } newBucketCols.add(bucketCol); } return newBucketCols; }
private Operator<? extends OperatorDesc> genOutputSelectForGroupBy( Operator<? extends OperatorDesc> parentOp, Operator<? extends OperatorDesc> currOp) { assert (parentOp.getSchema().getSignature().size() == currOp.getSchema().getSignature().size()); Iterator<ColumnInfo> pIter = parentOp.getSchema().getSignature().iterator(); Iterator<ColumnInfo> cIter = currOp.getSchema().getSignature().iterator(); List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>(); List<String> colName = new ArrayList<String>(); Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>(); while (pIter.hasNext()) { ColumnInfo pInfo = pIter.next(); ColumnInfo cInfo = cIter.next(); ExprNodeDesc column = new ExprNodeColumnDesc(pInfo.getType(), pInfo.getInternalName(), pInfo.getTabAlias(), pInfo.getIsVirtualCol(), pInfo.isSkewedCol()); columns.add(column); colName.add(cInfo.getInternalName()); columnExprMap.put(cInfo.getInternalName(), column); } return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName), new RowSchema(currOp.getSchema().getSignature()), columnExprMap, parentOp); } }
ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false); colInfoLst.add(colInfo); colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));
static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) { List<ColumnInfo> signature = sqRR.getRowSchema().getSignature(); ColumnInfo joinColumn = signature.get(0); String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); return createColRefAST(sqAlias, joinColName[1]); }
public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) { if (tab_alias != null) { tab_alias = tab_alias.toLowerCase(); } /* * allow multiple mappings to the same ColumnInfo. * When a ColumnInfo is mapped multiple times, only the * first inverse mapping is captured. */ boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); LinkedHashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias); if (f_map == null) { f_map = new LinkedHashMap<String, ColumnInfo>(); rslvMap.put(tab_alias, f_map); } ColumnInfo oldColInfo = f_map.put(col_alias, colInfo); if (oldColInfo != null) { LOG.warn("Duplicate column info for " + tab_alias + "." + col_alias + " was overwritten in RowResolver map: " + oldColInfo + " by " + colInfo); } String[] qualifiedAlias = new String[2]; qualifiedAlias[0] = tab_alias; qualifiedAlias[1] = col_alias; if ( !colPresent ) { invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); } else { altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias); } return colPresent; }
private static void addParitionColumnStats(HiveConf conf, List<String> neededColumns, List<String> referencedColumns, List<ColumnInfo> schema, Table table, PrunedPartitionList partList, List<ColStatistics> colStats) throws HiveException { // extra columns is difference between referenced columns vs needed // columns. The difference could be partition columns. List<String> extraCols = Lists.newArrayList(referencedColumns); if (referencedColumns.size() > neededColumns.size()) { extraCols.removeAll(neededColumns); for (String col : extraCols) { for (ColumnInfo ci : schema) { // conditions for being partition column if (col.equals(ci.getInternalName()) && ci.getIsVirtualCol() && !ci.isHiddenVirtualCol()) { colStats.add(getColStatsForPartCol(ci, new PartitionIterable(partList.getPartitions()), conf)); } } } } }
private List<ColumnInfo> buildPrunedRS(List<FieldNode> prunedCols, RowSchema oldRS) throws SemanticException { ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>(); HashSet<FieldNode> prunedColsSet = new HashSet<>(prunedCols); for (ColumnInfo cInfo : oldRS.getSignature()) { if (lookupColumn(prunedColsSet, cInfo.getInternalName()) != null) { sig.add(cInfo); } } return sig; }
/** * For each column on the input RR, construct a StructField for it * OI is constructed using the list of input column names and * their corresponding OIs. * * @param rr * @return */ public static StructObjectInspector getStandardStructOI(RowResolver rr) { StructObjectInspector oi; ArrayList<ColumnInfo> colLists = rr.getColumnInfos(); ArrayList<String> structFieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(); for (ColumnInfo columnInfo : colLists) { String colName = columnInfo.getInternalName(); ObjectInspector colOI = columnInfo.getObjectInspector(); structFieldNames.add(colName); structFieldObjectInspectors.add(colOI); } oi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors); return oi; }