jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths()); AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties()); AcidUtils.setValidWriteIdList(jobClone, ts.getConf()); ts.passExecContext(getExecContext()); ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()}); fetchOp.clearFetchContext();
/** * Inserts a filter below the table scan operator. Construct the filter * from the filter expression provided. * @param tableScanOp the table scan operators * @param filterExpr the filter expression */ private void insertFilterOnTop( TableScanOperator tableScanOp, ExprNodeDesc filterExpr) { // Get the top operator and it's child, all operators have a single parent Operator<? extends OperatorDesc> currChild = tableScanOp.getChildOperators().get(0); // Create the filter Operator and update the parents and children appropriately tableScanOp.setChildOperators(null); currChild.setParentOperators(null); Operator<FilterDesc> filter = OperatorFactory.getAndMakeChild( new FilterDesc(filterExpr, false), new RowSchema(tableScanOp.getSchema().getSignature()), tableScanOp); OperatorFactory.makeChild(filter, currChild); }
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, when this * assumption stops to be true, i.e table data won't be only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { if (row instanceof VectorizedRowBatch) { VectorizedRowBatch batch = (VectorizedRowBatch) row; if (currCount >= rowLimit) { setDone(true); return; } if (currCount + batch.size > rowLimit) { batch.size = rowLimit - currCount; } currCount += batch.size; } else if (currCount++ >= rowLimit) { setDone(true); return; } } if (conf != null && conf.isGatherStats()) { gatherStats(row); } forward(row, inputObjInspectors[tag]); }
@Override public Operator<? extends OperatorDesc> clone() throws CloneNotSupportedException { TableScanOperator ts = (TableScanOperator) super.clone(); ts.setNeededColumnIDs(new ArrayList<Integer>(getNeededColumnIDs())); ts.setNeededColumns(new ArrayList<String>(getNeededColumns())); ts.setReferencedColumns(new ArrayList<String>(getReferencedColumns())); return ts; }
/** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table * data is always read as part of the map-reduce framework by the mapper. But, when this * assumption stops to be true, i.e table data won't be only read by the mapper, this * operator will be enhanced to read the table. **/ @Override public void process(Object row, int tag) throws HiveException { if (rowLimit >= 0) { if (checkSetDone(row, tag)) { return; } } if (conf != null && conf.isGatherStats()) { gatherStats(row); } if (vectorized) { vectorForward((VectorizedRowBatch) row); } else { forward(row, inputObjInspectors[tag]); } }
@Override public void closeOp(boolean abort) throws HiveException { if (getExecContext() != null && getExecContext().getFileId() == null) { updateFileId(); } if (conf != null) { if (conf.isGatherStats() && stats.size() != 0) { publishStats(); } } super.closeOp(abort); }
jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns(), ts.getNeededNestedColumnPaths()); AcidUtils.setAcidOperationalProperties(jobClone, ts.getConf().isTranscationalTable(), ts.getConf().getAcidOperationalProperties()); AcidUtils.setValidWriteIdList(jobClone, ts.getConf());
List<String> neededNestedColumnPaths = new ArrayList<>(); List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); if(scanOp.getConf().isGatherStats()) { cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName())); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames); scanOp.setNeededNestedColumnPaths(neededNestedColumnPaths); scanOp.setReferencedColumns(referencedColumnNames);
private void storeBucketPathMapping(TableScanOperator tsOp, FileStatus[] srcs) { Map<String, Integer> bucketFileNameMapping = new HashMap<String, Integer>(); for (int pos = 0; pos < srcs.length; pos++) { if (ShimLoader.getHadoopShims().isDirectory(srcs[pos])) { throw new RuntimeException("Was expecting '" + srcs[pos].getPath() + "' to be bucket file."); } bucketFileNameMapping.put(srcs[pos].getPath().getName(), pos); } tsOp.getConf().setBucketFileNameMapping(bucketFileNameMapping); }
final TableScanOperator ts = (TableScanOperator) op; if (ts.getNeededColumnIDs() == null) { allColumnsNeeded = true; } else { neededColumnIDs.addAll(ts.getNeededColumnIDs()); if (ts.getNeededNestedColumnPaths() != null) { neededNestedColumnPaths.addAll(ts.getNeededNestedColumnPaths()); rowSchema = ts.getSchema(); ExprNodeGenericFuncDesc filterExpr = ts.getConf() == null ? null : ts.getConf().getFilterExpr();
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator scanOp = (TableScanOperator) nd; ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<FieldNode> cols = cppCtx .genColLists((Operator<? extends OperatorDesc>) nd); if (cols == null && !scanOp.getConf().isGatherStats() ) { scanOp.setNeededColumnIDs(null); return null; } cols = cols == null ? new ArrayList<FieldNode>() : cols; cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd, cols); RowSchema inputRS = scanOp.getSchema(); setupNeededColumns(scanOp, inputRS, cols); return null; } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx walkerCtx = (WalkerCtx) procCtx; List<Integer> colIDs = tsOp.getNeededColumnIDs(); TableScanDesc desc = tsOp.getConf(); boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty()); boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null) || (desc.getVirtualCols().isEmpty()); boolean isSkipHF = desc.isNeedSkipHeaderFooters(); if (noColNeeded && noVCneeded && !isSkipHF) { walkerCtx.setMayBeMetadataOnly(tsOp); } return nd; } }
private static void pushFilterToTopOfTableScan( SharedWorkOptimizerCache optimizerCache, TableScanOperator tsOp) throws UDFArgumentException { ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr(); List<Operator<? extends OperatorDesc>> allChildren = Lists.newArrayList(tsOp.getChildOperators()); for (Operator<? extends OperatorDesc> op : allChildren) { if (op instanceof FilterOperator) { filterOp.getConf().setPredicate(newPred); } else { Operator<FilterDesc> newOp = OperatorFactory.get(tsOp.getCompilationOpContext(), new FilterDesc(tableScanExprNode.clone(), false), new RowSchema(tsOp.getSchema().getSignature())); tsOp.replaceChild(op, newOp); newOp.getParentOperators().add(tsOp); op.replaceParent(tsOp, newOp);
.entrySet()) { TableScanOperator tableScanOp = topOpMap.getValue(); if (!tableScanOp.isInsideView()) { Table tbl = tableScanOp.getConf().getTableMetadata(); List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs(); List<FieldSchema> columns = tbl.getCols(); List<String> cols = new ArrayList<String>();
private void vectorizeTableScanOperatorInPlace(TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) { TableScanDesc tableScanDesc = tableScanOperator.getConf(); VectorTableScanDesc vectorTableScanDesc = new VectorTableScanDesc(); tableScanDesc.setVectorDesc(vectorTableScanDesc); vectorTableScanDesc.setProjectedColumnDataTypePhysicalVariations(projectedDataColumnDataTypePhysicalVariation); tableScanOperator.getConf().setVectorized(true); List<Operator<? extends OperatorDesc>> children = tableScanOperator.getChildOperators(); while (children.size() > 0) { children = dosetVectorDesc(children);
org.apache.hadoop.hive.ql.metadata.Table t = top.getConf().getTableMetadata(); Table tab = t.getTTable(); RowSchema rs = top.getSchema(); List<FieldSchema> cols = t.getAllCols(); Map<String, FieldSchema> fieldSchemaMap = new HashMap<String, FieldSchema>(); tai.setAlias(top.getConf().getAlias()); tai.setTable(tab); for(ColumnInfo ci : rs.getSignature()) {
public static TableScanOperator createTemporaryTableScanOperator( CompilationOpContext ctx, RowSchema rowSchema) { TableScanOperator tableScanOp = (TableScanOperator) OperatorFactory.get(ctx, new TableScanDesc(null), rowSchema); // Set needed columns for this dummy TableScanOperator List<Integer> neededColumnIds = new ArrayList<Integer>(); List<String> neededColumnNames = new ArrayList<String>(); List<ColumnInfo> parentColumnInfos = rowSchema.getSignature(); for (int i = 0 ; i < parentColumnInfos.size(); i++) { neededColumnIds.add(i); neededColumnNames.add(parentColumnInfos.get(i).getInternalName()); } tableScanOp.setNeededColumnIDs(neededColumnIds); tableScanOp.setNeededColumns(neededColumnNames); tableScanOp.setReferencedColumns(neededColumnNames); return tableScanOp; }
/** * Collect table, partition and column level statistics * @param conf * - hive configuration * @param partList * - partition list * @param table * - table * @param tableScanOperator * - table scan operator * @return statistics object * @throws HiveException */ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, ColumnStatsList colStatsCache, Table table, TableScanOperator tableScanOperator) throws HiveException { // column level statistics are required only for the columns that are needed List<ColumnInfo> schema = tableScanOperator.getSchema().getSignature(); List<String> neededColumns = tableScanOperator.getNeededColumns(); List<String> referencedColumns = tableScanOperator.getReferencedColumns(); return collectStatistics(conf, partList, table, schema, neededColumns, colStatsCache, referencedColumns); }
/** * The operator name for this operator type. This is used to construct the * rule for an operator * * @return the operator name **/ @Override public String getName() { return TableScanOperator.getOperatorName(); }
Preconditions.checkArgument(tableScan.getChildOperators().size() == 1 && tableScan.getChildOperators().get(0) instanceof MapJoinOperator); HashTableDummyDesc desc = new HashTableDummyDesc(); HashTableDummyOperator dummyOp = (HashTableDummyOperator) OperatorFactory.get( tableScan.getCompilationOpContext(), desc); dummyOp.getConf().setTbl(tableScan.getTableDesc()); MapJoinOperator mapJoinOp = (MapJoinOperator) tableScan.getChildOperators().get(0); mapJoinOp.replaceParent(tableScan, dummyOp); List<Operator<? extends OperatorDesc>> mapJoinChildren = SparkHashTableSinkDesc hashTableSinkDesc = new SparkHashTableSinkDesc(mjDesc); SparkHashTableSinkOperator hashTableSinkOp = (SparkHashTableSinkOperator)OperatorFactory.get( tableScan.getCompilationOpContext(), hashTableSinkDesc); int[] valueIndex = mjDesc.getValueIndex(tag); if (valueIndex != null) { tableScan.replaceChild(mapJoinOp, hashTableSinkOp); List<Operator<? extends OperatorDesc>> tableScanParents = new ArrayList<Operator<? extends OperatorDesc>>();