org.apache.hadoop.hive.ql.exec.ColumnInfo.getInternalName java code examples

Refine search

public int getPosition(String internalName) {
 int pos = -1;
 for (ColumnInfo var : rowSchema.getSignature()) {
  ++pos;
  if (var.getInternalName().equals(internalName)) {
   return pos;
  }
 }
 return -1;
}

private List<ColumnInfo> buildPrunedRS(List<FieldNode> prunedCols, RowSchema oldRS)
  throws SemanticException {
 ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
 HashSet<FieldNode> prunedColsSet = new HashSet<>(prunedCols);
 for (ColumnInfo cInfo : oldRS.getSignature()) {
  if (lookupColumn(prunedColsSet, cInfo.getInternalName()) != null) {
   sig.add(cInfo);
  }
 }
 return sig;
}

private static ArrayList<ColumnInfo> createColInfosSubset(Operator<?> input,
    List<String> keepColNames) {
 ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
 int pos = 0;
 for (ColumnInfo ci : input.getSchema().getSignature()) {
  if (pos < keepColNames.size() &&
      ci.getInternalName().equals(keepColNames.get(pos))) {
   cInfoLst.add(new ColumnInfo(ci));
   pos++;
  }
 }
 return cInfoLst;
}

/**
 * This is used to construct new lists of sorted columns where the order of the columns
 * hasn't changed, only possibly the name
 * @param bucketCols - input sorted columns
 * @param colInfos - List of column infos
 * @return output sorted columns
 */
private static List<SortCol> getNewSortCols(List<SortCol> sortCols, List<ColumnInfo> colInfos) {
 List<SortCol> newSortCols = new ArrayList<SortCol>(sortCols.size());
 for (int i = 0; i < sortCols.size(); i++) {
  SortCol sortCol = new SortCol(sortCols.get(i).getSortOrder());
  for (Integer index : sortCols.get(i).getIndexes()) {
   // The only time this condition should be false is in the case of dynamic partitioning
   if (index < colInfos.size()) {
    sortCol.addAlias(colInfos.get(index).getInternalName(), index);
   } else {
    return null;
   }
  }
  newSortCols.add(sortCol);
 }
 return newSortCols;
}

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs)
   throws SemanticException {
  ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
  UnionOperator op = (UnionOperator) nd;
  List<FieldNode> childColLists = cppCtx.genColLists(op);
  if (childColLists == null) {
   return null;
  }
  RowSchema inputSchema = op.getSchema();
  if (inputSchema != null) {
   List<FieldNode> prunedCols = new ArrayList<>();
   for (int index = 0; index < inputSchema.getSignature().size(); index++) {
    ColumnInfo colInfo = inputSchema.getSignature().get(index);
    FieldNode fn = lookupColumn(childColLists, colInfo.getInternalName());
    if (fn != null) {
     prunedCols.add(fn);
    }
   }
   cppCtx.getPrunedColLists().put(op, prunedCols);
  }
  return null;
 }
}

private Operator genSelectAllDesc(Operator input) throws SemanticException {
 OpParseContext inputCtx = opParseCtx.get(input);
 RowResolver inputRR = inputCtx.getRowResolver();
 ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
 ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
 ArrayList<String> columnNames = new ArrayList<String>();
 Map<String, ExprNodeDesc> columnExprMap =
   new HashMap<String, ExprNodeDesc>();
 for (int i = 0; i < columns.size(); i++) {
  ColumnInfo col = columns.get(i);
  colList.add(new ExprNodeColumnDesc(col, true));
  columnNames.add(col.getInternalName());
  columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col, true));
 }
 RowResolver outputRR = inputRR.duplicate();
 Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
   new SelectDesc(colList, columnNames, true),
   outputRR.getRowSchema(), input), outputRR);
 output.setColumnExprMap(columnExprMap);
 return output;
}

static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) {
 List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
 ColumnInfo joinColumn = signature.get(0);
 String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
 return createColRefAST(sqAlias, joinColName[1]);
}

boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName());
qualifiedAlias[1] = col_alias;
if ( !colPresent ) {
 invRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
} else {
 altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias);

private static List<String> getPartitionColumns(List<ColumnInfo> schema,
  List<String> neededColumns,
  List<String> referencedColumns) {
 // extra columns is difference between referenced columns vs needed
 // columns. The difference could be partition columns.
 List<String> partitionCols = new ArrayList<>(referencedColumns.size());
 List<String> extraCols = Lists.newArrayList(referencedColumns);
 if (referencedColumns.size() > neededColumns.size()) {
  extraCols.removeAll(neededColumns);
  for (String col : extraCols) {
   for (ColumnInfo ci : schema) {
    // conditions for being partition column
    if (col.equals(ci.getInternalName()) && ci.getIsVirtualCol() &&
      !ci.isHiddenVirtualCol()) {
     partitionCols.add(col);
    }
   }
  }
 }
 return partitionCols;
}

final List<String> colNames = new ArrayList<String>(rowSchema.getSignature().size());
final List<TypeInfo> colTypes = new ArrayList<TypeInfo>(rowSchema.getSignature().size());
boolean hasRowId = false;
for (ColumnInfo c : rowSchema.getSignature()) {
 String columnName = c.getInternalName();
 if (VirtualColumn.ROWID.getName().equals(columnName)) {
  hasRowId = true;
 virtualColumns = new VirtualColumn[0];
return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]),
  colTypes.toArray(new TypeInfo[colTypes.size()]), null, null, partitionColumnCount,
  virtualColumns.length, virtualColumns, new String[0], null);

/**
 * For each column on the input RR, construct a StructField for it
 * OI is constructed using the list of input column names and
 * their corresponding OIs.
 *
 * @param rr
 * @return
 */
public static StructObjectInspector getStandardStructOI(RowResolver rr) {
 StructObjectInspector oi;
 ArrayList<ColumnInfo> colLists = rr.getColumnInfos();
 ArrayList<String> structFieldNames = new ArrayList<String>();
 ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
 for (ColumnInfo columnInfo : colLists) {
  String colName = columnInfo.getInternalName();
  ObjectInspector colOI = columnInfo.getObjectInspector();
  structFieldNames.add(colName);
  structFieldObjectInspectors.add(colOI);
 }
 oi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames,
   structFieldObjectInspectors);
 return oi;
}

private static ArrayList<ColumnInfo> createColInfosSubset(Operator<?> input,
    List<String> keepColNames) {
 ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
 int pos = 0;
 for (ColumnInfo ci : input.getSchema().getSignature()) {
  if (pos < keepColNames.size() &&
      ci.getInternalName().equals(keepColNames.get(pos))) {
   cInfoLst.add(new ColumnInfo(ci));
   pos++;
  }
 }
 return cInfoLst;
}

/**
 * This is used to construct new lists of bucketed columns where the order of the columns
 * hasn't changed, only possibly the name
 * @param bucketCols - input bucketed columns
 * @param colInfos - List of column infos
 * @return output bucketed columns
 */
private static List<BucketCol> getNewBucketCols(List<BucketCol> bucketCols,
  List<ColumnInfo> colInfos) {
 List<BucketCol> newBucketCols = new ArrayList<BucketCol>(bucketCols.size());
 for (int i = 0; i < bucketCols.size(); i++) {
  BucketCol bucketCol = new BucketCol();
  for (Integer index : bucketCols.get(i).getIndexes()) {
   // The only time this condition should be false is in the case of dynamic partitioning
   // where the data is bucketed on a dynamic partitioning column and the FileSinkOperator is
   // being processed.  In this case, the dynamic partition column will not appear in
   // colInfos, and due to the limitations of dynamic partitioning, they will appear at the
   // end of the input schema.  Since the order of the columns hasn't changed, and no new
   // columns have been added/removed, it is safe to assume that these will have indexes
   // greater than or equal to colInfos.size().
   if (index < colInfos.size()) {
    bucketCol.addAlias(colInfos.get(index).getInternalName(), index);
   } else {
    return null;
   }
  }
  newBucketCols.add(bucketCol);
 }
 return newBucketCols;
}

 private Operator<? extends OperatorDesc> genOutputSelectForGroupBy(
   Operator<? extends OperatorDesc> parentOp, Operator<? extends OperatorDesc> currOp) {
  assert (parentOp.getSchema().getSignature().size() == currOp.getSchema().getSignature().size());
  Iterator<ColumnInfo> pIter = parentOp.getSchema().getSignature().iterator();
  Iterator<ColumnInfo> cIter = currOp.getSchema().getSignature().iterator();
  List<ExprNodeDesc> columns = new ArrayList<ExprNodeDesc>();
  List<String> colName = new ArrayList<String>();
  Map<String, ExprNodeDesc> columnExprMap = new HashMap<String, ExprNodeDesc>();
  while (pIter.hasNext()) {
   ColumnInfo pInfo = pIter.next();
   ColumnInfo cInfo = cIter.next();
   ExprNodeDesc column = new ExprNodeColumnDesc(pInfo.getType(), pInfo.getInternalName(),
     pInfo.getTabAlias(), pInfo.getIsVirtualCol(), pInfo.isSkewedCol());
   columns.add(column);
   colName.add(cInfo.getInternalName());
   columnExprMap.put(cInfo.getInternalName(), column);
  }
  return OperatorFactory.getAndMakeChild(new SelectDesc(columns, colName),
    new RowSchema(currOp.getSchema().getSignature()), columnExprMap, parentOp);
 }
}

ColumnInfo colInfo = new ColumnInfo(Utilities.ReduceField.KEY.toString() + "."
  + outputColName, reduceKeys.get(i).getTypeInfo(), tabAlias, false);
colInfoLst.add(colInfo);
colExprMap.put(colInfo.getInternalName(), reduceKeys.get(i));

static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) {
 List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
 ColumnInfo joinColumn = signature.get(0);
 String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
 return createColRefAST(sqAlias, joinColName[1]);
}

public boolean addMappingOnly(String tab_alias, String col_alias, ColumnInfo colInfo) {
 if (tab_alias != null) {
  tab_alias = tab_alias.toLowerCase();
 }
 /*
  * allow multiple mappings to the same ColumnInfo.
  * When a ColumnInfo is mapped multiple times, only the
  * first inverse mapping is captured.
  */
 boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName());
 LinkedHashMap<String, ColumnInfo> f_map = rslvMap.get(tab_alias);
 if (f_map == null) {
  f_map = new LinkedHashMap<String, ColumnInfo>();
  rslvMap.put(tab_alias, f_map);
 }
 ColumnInfo oldColInfo = f_map.put(col_alias, colInfo);
 if (oldColInfo != null) {
  LOG.warn("Duplicate column info for " + tab_alias + "." + col_alias
    + " was overwritten in RowResolver map: " + oldColInfo + " by " + colInfo);
 }
 String[] qualifiedAlias = new String[2];
 qualifiedAlias[0] = tab_alias;
 qualifiedAlias[1] = col_alias;
 if ( !colPresent ) {
  invRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
 } else {
  altInvRslvMap.put(colInfo.getInternalName(), qualifiedAlias);
 }
 return colPresent;
}

private static void addParitionColumnStats(HiveConf conf, List<String> neededColumns,
  List<String> referencedColumns, List<ColumnInfo> schema, Table table,
  PrunedPartitionList partList, List<ColStatistics> colStats)
  throws HiveException {
 // extra columns is difference between referenced columns vs needed
 // columns. The difference could be partition columns.
 List<String> extraCols = Lists.newArrayList(referencedColumns);
 if (referencedColumns.size() > neededColumns.size()) {
  extraCols.removeAll(neededColumns);
  for (String col : extraCols) {
   for (ColumnInfo ci : schema) {
    // conditions for being partition column
    if (col.equals(ci.getInternalName()) && ci.getIsVirtualCol() &&
      !ci.isHiddenVirtualCol()) {
     colStats.add(getColStatsForPartCol(ci, new PartitionIterable(partList.getPartitions()), conf));
    }
   }
  }
 }
}

private List<ColumnInfo> buildPrunedRS(List<FieldNode> prunedCols, RowSchema oldRS)
  throws SemanticException {
 ArrayList<ColumnInfo> sig = new ArrayList<ColumnInfo>();
 HashSet<FieldNode> prunedColsSet = new HashSet<>(prunedCols);
 for (ColumnInfo cInfo : oldRS.getSignature()) {
  if (lookupColumn(prunedColsSet, cInfo.getInternalName()) != null) {
   sig.add(cInfo);
  }
 }
 return sig;
}

/**
 * For each column on the input RR, construct a StructField for it
 * OI is constructed using the list of input column names and
 * their corresponding OIs.
 *
 * @param rr
 * @return
 */
public static StructObjectInspector getStandardStructOI(RowResolver rr) {
 StructObjectInspector oi;
 ArrayList<ColumnInfo> colLists = rr.getColumnInfos();
 ArrayList<String> structFieldNames = new ArrayList<String>();
 ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
 for (ColumnInfo columnInfo : colLists) {
  String colName = columnInfo.getInternalName();
  ObjectInspector colOI = columnInfo.getObjectInspector();
  structFieldNames.add(colName);
  structFieldObjectInspectors.add(colOI);
 }
 oi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames,
   structFieldObjectInspectors);
 return oi;
}

How to use getInternalNamemethodin org.apache.hadoop.hive.ql.exec.ColumnInfo

Best Java code snippets using org.apache.hadoop.hive.ql.exec.ColumnInfo.getInternalName (Showing top 20 results out of 315)

Refine search

How to use
getInternalName
method
in
org.apache.hadoop.hive.ql.exec.ColumnInfo