org.apache.hadoop.hive.ql.exec.TableScanOperator java code examples

  jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(),
  ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
ts.passExecContext(getExecContext());
ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()});
fetchOp.clearFetchContext();

/**
 * Inserts a filter below the table scan operator. Construct the filter
 * from the filter expression provided.
 * @param tableScanOp the table scan operators
 * @param filterExpr the filter expression
 */
private void insertFilterOnTop(
 TableScanOperator tableScanOp,
 ExprNodeDesc filterExpr) {
 // Get the top operator and it's child, all operators have a single parent
 Operator<? extends OperatorDesc> currChild = tableScanOp.getChildOperators().get(0);
 // Create the filter Operator and update the parents and children appropriately
 tableScanOp.setChildOperators(null);
 currChild.setParentOperators(null);
 Operator<FilterDesc> filter = OperatorFactory.getAndMakeChild(
  new FilterDesc(filterExpr, false),
  new RowSchema(tableScanOp.getSchema().getSignature()), tableScanOp);
 OperatorFactory.makeChild(filter, currChild);
}

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, when this
 * assumption stops to be true, i.e table data won't be only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void process(Object row, int tag) throws HiveException {
 if (rowLimit >= 0) {
  if (row instanceof VectorizedRowBatch) {
   VectorizedRowBatch batch = (VectorizedRowBatch) row;
   if (currCount >= rowLimit) {
    setDone(true);
    return;
   }
   if (currCount + batch.size > rowLimit) {
    batch.size = rowLimit - currCount;
   }
   currCount += batch.size;
  } else if (currCount++ >= rowLimit) {
   setDone(true);
   return;
  }
 }
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 forward(row, inputObjInspectors[tag]);
}

@Override
public Operator<? extends OperatorDesc> clone()
 throws CloneNotSupportedException {
 TableScanOperator ts = (TableScanOperator) super.clone();
 ts.setNeededColumnIDs(new ArrayList<Integer>(getNeededColumnIDs()));
 ts.setNeededColumns(new ArrayList<String>(getNeededColumns()));
 ts.setReferencedColumns(new ArrayList<String>(getReferencedColumns()));
 return ts;
}

/**
 * Other than gathering statistics for the ANALYZE command, the table scan operator
 * does not do anything special other than just forwarding the row. Since the table
 * data is always read as part of the map-reduce framework by the mapper. But, when this
 * assumption stops to be true, i.e table data won't be only read by the mapper, this
 * operator will be enhanced to read the table.
 **/
@Override
public void process(Object row, int tag) throws HiveException {
 if (rowLimit >= 0) {
  if (checkSetDone(row, tag)) {
   return;
  }
 }
 if (conf != null && conf.isGatherStats()) {
  gatherStats(row);
 }
 if (vectorized) {
  vectorForward((VectorizedRowBatch) row);
 } else {
  forward(row, inputObjInspectors[tag]);
 }
}

@Override
public void closeOp(boolean abort) throws HiveException {
 if (getExecContext() != null && getExecContext().getFileId() == null) {
  updateFileId();
 }
 if (conf != null) {
  if (conf.isGatherStats() && stats.size() != 0) {
   publishStats();
  }
 }
 super.closeOp(abort);
}

  jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths());
AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(),
  ts.getConf().getAcidOperationalProperties());
AcidUtils.setValidWriteIdList(jobClone, ts.getConf());

List<String> neededNestedColumnPaths = new ArrayList<>();
List<String> referencedColumnNames = new ArrayList<String>();
TableScanDesc desc = scanOp.getConf();
List<VirtualColumn> virtualCols = desc.getVirtualCols();
List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
if(scanOp.getConf().isGatherStats()) {
 cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName()));
scanOp.setNeededColumnIDs(neededColumnIds);
scanOp.setNeededColumns(neededColumnNames);
scanOp.setNeededNestedColumnPaths(neededNestedColumnPaths);
scanOp.setReferencedColumns(referencedColumnNames);

private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) {
 Map<String, Integer> bucketFileNameMapping = new HashMap<String, Integer>();
 for (int pos = 0; pos < srcs.length; pos++) {
  if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) {
   throw new RuntimeException("Was expecting '" + srcs[pos].getPath() + "' to be bucket file.");
  }
  bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos);
 }
 tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping);
}

final TableScanOperator ts = (TableScanOperator) op;
if (ts.getNeededColumnIDs() == null) {
 allColumnsNeeded = true;
} else {
 neededColumnIDs.addAll(ts.getNeededColumnIDs());
 if (ts.getNeededNestedColumnPaths() != null) {
  neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths());
rowSchema = ts.getSchema();
ExprNodeGenericFuncDesc filterExpr =
  ts.getConf() == null ? null : ts.getConf().getFilterExpr();

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
   Object... nodeOutputs) throws SemanticException {
  TableScanOperator scanOp = (TableScanOperator) nd;
  ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx;
  List<FieldNode> cols = cppCtx
    .genColLists((Operator<? extends OperatorDesc>) nd);
  if (cols == null && !scanOp.getConf().isGatherStats() ) {
   scanOp.setNeededColumnIDs(null);
   return null;
  }
  cols = cols == null ? new ArrayList<FieldNode>() : cols;
  cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols);
  RowSchema inputRS = scanOp.getSchema();
  setupNeededColumns(scanOp, inputRS, cols);
  return null;
 }
}

 @Override
 public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
   Object... nodeOutputs) throws SemanticException {
  TableScanOperator tsOp = (TableScanOperator) nd;
  WalkerCtx walkerCtx = (WalkerCtx) procCtx;
  List<Integer> colIDs = tsOp.getNeededColumnIDs();
  TableScanDesc desc = tsOp.getConf();
  boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
  boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
              || (desc.getVirtualCols().isEmpty());
  boolean isSkipHF = desc.isNeedSkipHeaderFooters();
  if (noColNeeded && noVCneeded && !isSkipHF) {
   walkerCtx.setMayBeMetadataOnly(tsOp);
  }
  return nd;
 }
}

private static void pushFilterToTopOfTableScan(
    SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp)
        throws UDFArgumentException {
 ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr();
 List<Operator<? extends OperatorDesc>> allChildren =
     Lists.newArrayList(tsOp.getChildOperators());
 for (Operator<? extends OperatorDesc> op : allChildren) {
  if (op instanceof FilterOperator) {
   filterOp.getConf().setPredicate(newPred);
  } else {
   Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(),
       new FilterDesc(tableScanExprNode.clone(), false),
       new RowSchema(tsOp.getSchema().getSignature()));
   tsOp.replaceChild(op, newOp);
   newOp.getParentOperators().add(tsOp);
   op.replaceParent(tsOp, newOp);

 .entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
if (!tableScanOp.isInsideView()) {
 Table tbl = tableScanOp.getConf().getTableMetadata();
 List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
 List<FieldSchema> columns = tbl.getCols();
 List<String> cols = new ArrayList<String>();

private void vectorizeTableScanOperatorInPlace(TableScanOperator tableScanOperator,
  VectorTaskColumnInfo vectorTaskColumnInfo) {
 TableScanDesc tableScanDesc = tableScanOperator.getConf();
 VectorTableScanDesc vectorTableScanDesc = new VectorTableScanDesc();
 tableScanDesc.setVectorDesc(vectorTableScanDesc);
 vectorTableScanDesc.setProjectedColumnDataTypePhysicalVariations(projectedDataColumnDataTypePhysicalVariation);
 tableScanOperator.getConf().setVectorized(true);
 List<Operator<? extends OperatorDesc>> children = tableScanOperator.getChildOperators();
 while (children.size() > 0) {
  children = dosetVectorDesc(children);

org.apache.hadoop.hive.ql.metadata.Table t = top.getConf().getTableMetadata();
Table tab = t.getTTable();
RowSchema rs = top.getSchema();
List<FieldSchema> cols = t.getAllCols();
Map<String, FieldSchema> fieldSchemaMap = new HashMap<String, FieldSchema>();
tai.setAlias(top.getConf().getAlias());
tai.setTable(tab);
for(ColumnInfo ci : rs.getSignature()) {

public static TableScanOperator createTemporaryTableScanOperator(
  CompilationOpContext ctx, RowSchema rowSchema) {
 TableScanOperator tableScanOp =
   (TableScanOperator) OperatorFactory.get(ctx, new TableScanDesc(null), rowSchema);
 // Set needed columns for this dummy TableScanOperator
 List<Integer> neededColumnIds = new ArrayList<Integer>();
 List<String> neededColumnNames = new ArrayList<String>();
 List<ColumnInfo> parentColumnInfos = rowSchema.getSignature();
 for (int i = 0 ; i < parentColumnInfos.size(); i++) {
  neededColumnIds.add(i);
  neededColumnNames.add(parentColumnInfos.get(i).getInternalName());
 }
 tableScanOp.setNeededColumnIDs(neededColumnIds);
 tableScanOp.setNeededColumns(neededColumnNames);
 tableScanOp.setReferencedColumns(neededColumnNames);
 return tableScanOp;
}

/**
 * Collect table, partition and column level statistics
 * @param conf
 *          - hive configuration
 * @param partList
 *          - partition list
 * @param table
 *          - table
 * @param tableScanOperator
 *          - table scan operator
 * @return statistics object
 * @throws HiveException
 */
public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, ColumnStatsList colStatsCache,
  Table table, TableScanOperator tableScanOperator) throws HiveException {
 // column level statistics are required only for the columns that are needed
 List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature();
 List<String> neededColumns = tableScanOperator.getNeededColumns();
 List<String> referencedColumns = tableScanOperator.getReferencedColumns();
 return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns);
}

/**
 * The operator name for this operator type. This is used to construct the
 * rule for an operator
 *
 * @return the operator name
 **/
@Override
public String getName() {
 return TableScanOperator.getOperatorName();
}

Preconditions.checkArgument(tableScan.getChildOperators().size() == 1
  && tableScan.getChildOperators().get(0) instanceof MapJoinOperator);
HashTableDummyDesc desc = new HashTableDummyDesc();
HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get(
  tableScan.getCompilationOpContext(), desc);
dummyOp.getConf().setTbl(tableScan.getTableDesc());
MapJoinOperator mapJoinOp = (MapJoinOperator) tableScan.getChildOperators().get(0);
mapJoinOp.replaceParent(tableScan, dummyOp);
List<Operator<? extends OperatorDesc>> mapJoinChildren =
SparkHashTableSinkDesc hashTableSinkDesc = new SparkHashTableSinkDesc(mjDesc);
SparkHashTableSinkOperator hashTableSinkOp = (SparkHashTableSinkOperator)OperatorFactory.get(
    tableScan.getCompilationOpContext(), hashTableSinkDesc);
int[] valueIndex = mjDesc.getValueIndex(tag);
if (valueIndex != null) {
tableScan.replaceChild(mapJoinOp, hashTableSinkOp);
List<Operator<? extends OperatorDesc>> tableScanParents =
  new ArrayList<Operator<? extends OperatorDesc>>();

Javadoc

Table Scan Operator If the data is coming from the map-reduce framework, just forward it. This will be needed as part of local work when data is not being read as part of map-reduce framework

Most used methods

Popular in Java

Making http requests using okhttp
notifyDataSetChanged (ArrayAdapter)
runOnUiThread (Activity)
setRequestProperty (URLConnection)
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
JTable (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Best plugins for Eclipse

How to useTableScanOperator in org.apache.hadoop.hive.ql.exec

Best Java code snippets using org.apache.hadoop.hive.ql.exec.TableScanOperator (Showing top 20 results out of 315)

How to use
TableScanOperator
in
org.apache.hadoop.hive.ql.exec