public BucketMapJoinContext(MapJoinDesc clone) { this.mapJoinBigTableAlias = clone.getBigTableAlias(); this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); this.bucketFileNameMapping = clone.getBigTableBucketNumMapping(); this.bigTablePartSpecToFileMapping = clone.getBigTablePartSpecToFileMapping(); }
public BucketMapJoinContext(MapJoinDesc clone) { this.mapJoinBigTableAlias = clone.getBigTableAlias(); this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); this.bucketFileNameMapping = clone.getBigTableBucketNumMapping(); this.bigTablePartSpecToFileMapping = clone.getBigTablePartSpecToFileMapping(); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = (AbstractMapJoinOperator<? extends MapJoinDesc>) nd; MapJoinDesc mjDesc = mjOp.getConf(); String bigTablAlias = mjDesc.getBigTableAlias(); if ( bigTablAlias == null ) { Operator<? extends OperatorDesc> parent = null; for(Operator<? extends OperatorDesc> op : mjOp.getParentOperators() ) { if ( op instanceof TableScanOperator ) { parent = op; } } if ( parent != null) { TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); bigTablAlias = tDesc.getAlias(); } } bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; List<ExprNodeDesc> joinExprs = mjDesc.getKeys().values().iterator().next(); if ( joinExprs.size() == 0 ) { warnings.add( String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", mjOp.toString(), bigTablAlias, taskName)); } return null; } }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = (AbstractMapJoinOperator<? extends MapJoinDesc>) nd; MapJoinDesc mjDesc = mjOp.getConf(); String bigTablAlias = mjDesc.getBigTableAlias(); if ( bigTablAlias == null ) { Operator<? extends OperatorDesc> parent = null; for(Operator<? extends OperatorDesc> op : mjOp.getParentOperators() ) { if ( op instanceof TableScanOperator ) { parent = op; } } if ( parent != null) { TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); bigTablAlias = tDesc.getAlias(); } } bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; List<ExprNodeDesc> joinExprs = mjDesc.getKeys().values().iterator().next(); if ( joinExprs.size() == 0 ) { warnings.add( String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", mjOp.toString(), bigTablAlias, taskName)); } return null; } }
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException { if (!checkConvertJoinBucketMapJoin(joinOp, context, bigTablePosition, tezBucketJoinProcCtx)) { LOG.info("Check conversion to bucket map join failed."); return false; } MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition, true); if (mapJoinOp == null) { LOG.debug("Conversion to bucket map join failed."); return false; } MapJoinDesc joinDesc = mapJoinOp.getConf(); joinDesc.setBucketMapJoin(true); // we can set the traits for this join operator OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null, joinOp.getOpTraits().getNumReduceSinks()); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); // Once the conversion is done, we can set the partitioner to bucket cols on the small table Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>(); bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets()); joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping); return true; }
bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets()); joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping);
mapJoinOp.getConf().getConds()) < 0) { throw new SemanticException( ErrorMsg.INVALID_BIGTABLE_MAPJOIN.format(mapJoinOp.getConf().getBigTableAlias()));
mapJoinOp.getConf().getConds()) < 0) { throw new SemanticException( ErrorMsg.INVALID_BIGTABLE_MAPJOIN.format(mapJoinOp.getConf().getBigTableAlias()));
currMapJoinOp.getConf().getBigTableBucketNumMapping()); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
currMapJoinOp.getConf().getBigTableBucketNumMapping()); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
public BucketMapJoinContext(MapJoinDesc clone) { this.mapJoinBigTableAlias = clone.getBigTableAlias(); this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); this.bucketFileNameMapping = clone.getBigTableBucketNumMapping(); this.bigTablePartSpecToFileMapping = clone.getBigTablePartSpecToFileMapping(); }
@Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @SuppressWarnings("unchecked") AbstractMapJoinOperator<? extends MapJoinDesc> mjOp = (AbstractMapJoinOperator<? extends MapJoinDesc>) nd; MapJoinDesc mjDesc = mjOp.getConf(); String bigTablAlias = mjDesc.getBigTableAlias(); if ( bigTablAlias == null ) { Operator<? extends OperatorDesc> parent = null; for(Operator<? extends OperatorDesc> op : mjOp.getParentOperators() ) { if ( op instanceof TableScanOperator ) { parent = op; } } if ( parent != null) { TableScanDesc tDesc = ((TableScanOperator)parent).getConf(); bigTablAlias = tDesc.getAlias(); } } bigTablAlias = bigTablAlias == null ? "?" : bigTablAlias; List<ExprNodeDesc> joinExprs = mjDesc.getKeys().values().iterator().next(); if ( joinExprs.size() == 0 ) { warnings.add( String.format("Map Join %s[bigTable=%s] in task '%s' is a cross product", mjOp.toString(), bigTablAlias, taskName)); } return null; } }
private boolean convertJoinBucketMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, TezBucketJoinProcCtx tezBucketJoinProcCtx) throws SemanticException { if (!checkConvertJoinBucketMapJoin(joinOp, context, bigTablePosition, tezBucketJoinProcCtx)) { LOG.info("Check conversion to bucket map join failed."); return false; } MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePosition); MapJoinDesc joinDesc = mapJoinOp.getConf(); joinDesc.setBucketMapJoin(true); // we can set the traits for this join operator OpTraits opTraits = new OpTraits(joinOp.getOpTraits().getBucketColNames(), tezBucketJoinProcCtx.getNumBuckets(), null); mapJoinOp.setOpTraits(opTraits); mapJoinOp.setStatistics(joinOp.getStatistics()); setNumberOfBucketsOnChildren(mapJoinOp); // Once the conversion is done, we can set the partitioner to bucket cols on the small table Map<String, Integer> bigTableBucketNumMapping = new HashMap<String, Integer>(); bigTableBucketNumMapping.put(joinDesc.getBigTableAlias(), tezBucketJoinProcCtx.getNumBuckets()); joinDesc.setBigTableBucketNumMapping(bigTableBucketNumMapping); return true; }
bucketMJCxt.setBucketFileNameMapping(currMapJoinOp.getConf().getBucketFileNameMapping()); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
mapJoinOp.getConf().getConds()) < 0) { throw new SemanticException( ErrorMsg.INVALID_BIGTABLE_MAPJOIN.format(mapJoinOp.getConf().getBigTableAlias()));
currMapJoinOp.getConf().getBigTableBucketNumMapping()); localPlan.setInputFileChangeSensitive(true); bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); bucketMJCxt .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
public HashTableSinkDesc(MapJoinDesc clone) { this.bigKeysDirMap = clone.getBigKeysDirMap(); this.conds = clone.getConds(); this.exprs= clone.getExprs(); this.handleSkewJoin = clone.getHandleSkewJoin(); this.keyTableDesc = clone.getKeyTableDesc(); this.noOuterJoin = clone.getNoOuterJoin(); this.outputColumnNames = clone.getOutputColumnNames(); this.reversedExprs = clone.getReversedExprs(); this.skewKeyDefinition = clone.getSkewKeyDefinition(); this.skewKeysValuesTables = clone.getSkewKeysValuesTables(); this.smallKeysDirMap = clone.getSmallKeysDirMap(); this.tagOrder = clone.getTagOrder(); this.filters = clone.getFilters(); this.keys = clone.getKeys(); this.keyTblDesc = clone.getKeyTblDesc(); this.valueTblDescs = clone.getValueTblDescs(); this.valueTblFilteredDescs = clone.getValueFilteredTblDescs(); this.posBigTable = clone.getPosBigTable(); this.retainList = clone.getRetainList(); this.bigTableAlias = clone.getBigTableAlias(); this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); this.bucketFileNameMapping = clone.getBucketFileNameMapping(); }