@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx, Object... nodeOutputs) throws SemanticException { TableScanOperator op = (TableScanOperator) nd; TableScanDesc conf = op.getConf(); ConstantPropagateProcCtx cppCtx = (ConstantPropagateProcCtx) ctx; Map<ColumnInfo, ExprNodeDesc> constants = cppCtx.getPropagatedConstants(op); cppCtx.getOpToConstantExprs().put(op, constants); ExprNodeGenericFuncDesc pred = conf.getFilterExpr(); if (pred == null) { return null; } ExprNodeDesc constant = foldExpr(pred, constants, cppCtx, op, 0, false); if (constant instanceof ExprNodeGenericFuncDesc) { conf.setFilterExpr((ExprNodeGenericFuncDesc) constant); } else { conf.setFilterExpr(null); } return null; } }
@Override public Object clone() { List<VirtualColumn> vcs = new ArrayList<VirtualColumn>(getVirtualCols()); return new TableScanDesc(getAlias(), vcs, this.tableMetadata); }
String partitionSpecs; inputFileChanged = false; if (conf.getPartColumns() == null || conf.getPartColumns().size() == 0) { partitionSpecs = ""; // non-partitioned } else { assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct"; writable = new ArrayList<Object>(conf.getPartColumns().size()); values = new ArrayList<String>(conf.getPartColumns().size()); dpStartCol = 0; StructObjectInspector soi = (StructObjectInspector) inputObjInspectors[0]; for (StructField sf : soi.getAllStructFieldRefs()) { String fn = sf.getFieldName(); if (!conf.getPartColumns().contains(fn)) { dpStartCol++; } else { .getPartColumns().size(), (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE); partitionSpecs = FileUtils.makePartName(conf.getPartColumns(), values); if (LOG.isInfoEnabled()) { LOG.info("Stats Gathering found a new partition spec = " + partitionSpecs);
@Override public boolean isSame(OperatorDesc other) { if (getClass().getName().equals(other.getClass().getName())) { TableScanDesc otherDesc = (TableScanDesc) other; return Objects.equals(getQualifiedTable(), otherDesc.getQualifiedTable()) && ExprNodeDescUtils.isSame(getFilterExpr(), otherDesc.getFilterExpr()) && getRowLimit() == otherDesc.getRowLimit() && isGatherStats() == otherDesc.isGatherStats(); } return false; }
@Override public boolean equals(TableScanOperator op1, TableScanOperator op2) { Preconditions.checkNotNull(op1); Preconditions.checkNotNull(op2); TableScanDesc op1Conf = op1.getConf(); TableScanDesc op2Conf = op2.getConf(); if (compareString(op1Conf.getAlias(), op2Conf.getAlias()) && compareExprNodeDesc(op1Conf.getFilterExpr(), op2Conf.getFilterExpr()) && op1Conf.getRowLimit() == op2Conf.getRowLimit() && op1Conf.isGatherStats() == op2Conf.isGatherStats()) { return true; } else { return false; } } }
tsDesc.setGatherStats(false); return; String statsTmpLoc = ctx.getTempDirForInterimJobPath(tab.getPath()).toString(); LOG.debug("Set stats collection dir : " + statsTmpLoc); tsDesc.setTmpStatsDir(statsTmpLoc); tsDesc.setGatherStats(true); tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); vcList.add(vc); tsDesc.addVirtualCols(vcList); tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); cols.add(fs.getName()); tsDesc.setPartColumns(cols); return; if (partSpec != null) { cols.addAll(partSpec.keySet()); tsDesc.setPartColumns(cols); } else { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg());
Utilities.setColumnTypeList(jobConf, tableScan); ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc)scanDesc.getFilterExpr(); if (filterExpr == null) { return; && (scanDesc.getTableMetadata() == null || !(scanDesc.getTableMetadata().getStorageHandler() instanceof HiveStoragePredicateHandler))) { return; String serializedFilterObj = scanDesc.getSerializedFilterObject(); String serializedFilterExpr = scanDesc.getSerializedFilterExpr(); boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null; if (!hasObj) { Serializable filterObject = scanDesc.getFilterObject(); if (filterObject != null) { serializedFilterObj = SerializationUtilities.serializeObject(filterObject);
TableScanDesc tsDesc = new TableScanDesc(alias, vcList, tab); setupStats(tsDesc, qb.getParseInfo(), tab, alias, rwsch); tsDesc.setRowLimit(sample.getRowCount()); nameToSplitSample.remove(alias_id); tsDesc.setOpProps(properties); top.setBucketingVersion(tsDesc.getTableMetadata().getBucketingVersion()); } else { rwsch = opParseCtx.get(top).getRowResolver(); if (ts != null) { TableScanOperator tableScanOp = top; tableScanOp.getConf().setTableSample(ts); int num = ts.getNumerator(); int den = ts.getDenominator();
collect(desc.getPredicate(), removalContext); CollectContext tsRemovalContext = new CollectContext(); collect(ts.getConf().getFilterExpr(), tsRemovalContext); Table table = ts.getConf().getTableMetadata(); if (tsChild.isSame(child)) { if (tsParent == null) { ts.getConf().setFilterExpr(null); } else { int i = tsParent.getChildren().indexOf(tsChild);
TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD()); tsd.setPartColumns(partColNames); tsd.setNeededColumnIDs(neededColumnIDs); tsd.setNeededColumns(neededColumnNames);
TableScanDesc indexTableScanDesc = new TableScanDesc(indexTableHandle); indexTableScanDesc.setGatherStats(false); indexTableScanDesc.setStatsAggPrefix(k); scanOperator.setConf(indexTableScanDesc); scanOperator.getConf().setAlias(newAlias); scanOperator.setAlias(indexTableName); topOps.put(newAlias, scanOperator);
private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) throws HiveException { Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata() .getInputFormatClass(); ParseContext parseContext = context.parseContext; Table table = tableScan.getConf().getTableMetadata(); List<Partition> partitions = new ArrayList<>(); if (table.isPartitioned()) { tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() .getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( } else { StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); statsWork.setSourceTask(context.currentTask); statsWork.setStatsReliable(parseContext.getConf().getBoolVar(
if (ts.getConf() != null && ts.getConf().getFilterExpr() != null) { if (LOG.isDebugEnabled()) { LOG.debug("Serializing: " + ts.getConf().getFilterExpr().getExprString()); ts.getConf().setSerializedFilterExpr( SerializationUtilities.serializeExpression(ts.getConf().getFilterExpr())); if (ts.getConf() != null && ts.getConf().getFilterObject() != null) { if (LOG.isDebugEnabled()) { LOG.debug("Serializing: " + ts.getConf().getFilterObject()); ts.getConf().setSerializedFilterObject( SerializationUtilities.serializeObject(ts.getConf().getFilterObject()));
tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { conf.set(StatsSetupConst.STATS_TMP_LOC, statsTmpLoc); tsDesc.setGatherStats(true); tsDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); tsDesc.setMaxStatsKeyPrefixLength(StatsFactory.getMaxPrefixLength(conf)); vcList.add(vc); tsDesc.addVirtualCols(vcList); List<String> cols = new ArrayList<String>(); cols.addAll(partSpec.keySet()); tsDesc.setPartColumns(cols); tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k);
List<String> referencedColumnNames = new ArrayList<String>(); TableScanDesc desc = scanOp.getConf(); List<VirtualColumn> virtualCols = desc.getVirtualCols(); List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>(); if(scanOp.getConf().isGatherStats()) { cols.add(new FieldNode(VirtualColumn.RAWDATASIZE.getName())); desc.setVirtualCols(newVirtualCols); scanOp.setNeededColumnIDs(neededColumnIds); scanOp.setNeededColumns(neededColumnNames);
Table tbl = tableScanDesc.getTableMetadata(); if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTINDEXFILTER)) { tableScanDesc.setFilterExpr(originalPredicate); tableScanDesc.setFilterExpr(originalPredicate); return originalPredicate; tableScanDesc.setFilterExpr(decomposed.pushedPredicate); tableScanDesc.setFilterObject(decomposed.pushedPredicateObject);
private Operator<TableScanDesc> getTsOp(int i) { Table tblMetadata = new Table("db", "table"); TableScanDesc desc = new TableScanDesc("alias"/*+ cCtx.nextOperatorId()*/, tblMetadata); List<ExprNodeDesc> as = Lists.newArrayList(new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, Integer.valueOf(i)), new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "c1", "aa", false)); ExprNodeGenericFuncDesc f1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, udf, as); desc.setFilterExpr(f1); Operator<TableScanDesc> ts = OperatorFactory.get(cCtx, desc); return ts; }
public static Set<Partition> getConfirmedPartitionsForScan(TableScanOperator tableScanOp) { Set<Partition> confirmedPartns = new HashSet<Partition>(); TableSpec tblSpec = tableScanOp.getConf().getTableMetadata().getTableSpec(); if (tblSpec.specType == TableSpec.SpecType.STATIC_PARTITION) { // static partition if (tblSpec.partHandle != null) { confirmedPartns.add(tblSpec.partHandle); } else { // partial partition spec has null partHandle confirmedPartns.addAll(tblSpec.partitions); } } else if (tblSpec.specType == TableSpec.SpecType.DYNAMIC_PARTITION) { // dynamic partition confirmedPartns.addAll(tblSpec.partitions); } return confirmedPartns; }
public MapWork createMapWork(GenTezProcContext context, Operator<?> root, TezWork tezWork, PrunedPartitionList partitions) throws SemanticException { assert root.getParentOperators().isEmpty(); MapWork mapWork = new MapWork(Utilities.MAPNAME + context.nextSequenceNumber()); LOG.debug("Adding map work (" + mapWork.getName() + ") for " + root); // map work starts with table scan operators assert root instanceof TableScanOperator; TableScanOperator ts = (TableScanOperator) root; String alias = ts.getConf().getAlias(); setupMapWork(mapWork, context, partitions, ts, alias); if (ts.getConf().getTableMetadata() != null && ts.getConf().getTableMetadata().isDummyTable()) { mapWork.setDummyTableScan(true); } if (ts.getConf().getNumBuckets() > 0) { mapWork.setIncludedBuckets(ts.getConf().getIncludedBuckets()); } // add new item to the tez work tezWork.add(mapWork); return mapWork; }
private void publishStats() throws HiveException { boolean isStatsReliable = conf.isStatsReliable(); sc.setStatsTmpDir(conf.getTmpStatsDir()); sc.setContextSuffix(getOperatorId()); if (!statsPublisher.connect(sc)) { String prefix = Utilities.join(conf.getStatsAggPrefix(), pspecs);