public HashTableSinkDesc(MapJoinDesc clone) { this.bigKeysDirMap = clone.getBigKeysDirMap(); this.conds = clone.getConds(); this.exprs = new HashMap<Byte, List<ExprNodeDesc>>(clone.getExprs()); this.handleSkewJoin = clone.getHandleSkewJoin(); this.keyTableDesc = clone.getKeyTableDesc(); this.noOuterJoin = clone.getNoOuterJoin(); this.outputColumnNames = clone.getOutputColumnNames(); this.reversedExprs = clone.getReversedExprs(); this.skewKeyDefinition = clone.getSkewKeyDefinition(); this.skewKeysValuesTables = clone.getSkewKeysValuesTables(); this.smallKeysDirMap = clone.getSmallKeysDirMap(); this.tagOrder = clone.getTagOrder(); this.filters = new HashMap<Byte, List<ExprNodeDesc>>(clone.getFilters()); this.filterMap = clone.getFilterMap(); this.keys = new HashMap<Byte, List<ExprNodeDesc>>(clone.getKeys()); this.keyTblDesc = clone.getKeyTblDesc(); this.valueTblDescs = clone.getValueTblDescs(); this.valueTblFilteredDescs = clone.getValueFilteredTblDescs(); this.posBigTable = clone.getPosBigTable(); this.retainList = clone.getRetainList(); this.dumpFilePrefix = clone.getDumpFilePrefix(); this.bucketMapjoinContext = new BucketMapJoinContext(clone); this.hashtableMemoryUsage = clone.getHashTableMemoryUsage(); }
@Override public boolean isSame(OperatorDesc other) { if (super.isSame(other)) { MapJoinDesc otherDesc = (MapJoinDesc) other; return Objects.equals(getParentToInput(), otherDesc.getParentToInput()) && Objects.equals(getKeyCountsExplainDesc(), otherDesc.getKeyCountsExplainDesc()) && getPosBigTable() == otherDesc.getPosBigTable() && isBucketMapJoin() == otherDesc.isBucketMapJoin(); } return false; }
public VectorMapJoinBaseOperator(CompilationOpContext ctx, VectorizationContext vContext, OperatorDesc conf) throws HiveException { super(ctx); MapJoinDesc desc = (MapJoinDesc) conf; this.conf = desc; order = desc.getTagOrder(); numAliases = desc.getExprs().size(); posBigTable = (byte) desc.getPosBigTable(); filterMaps = desc.getFilterMap(); noOuterJoin = desc.isNoOuterJoin(); // We are making a new output vectorized row batch. vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(), /* vContextEnvironment */ vContext); }
public HashTableSinkDesc(MapJoinDesc clone) { this.bigKeysDirMap = clone.getBigKeysDirMap(); this.conds = clone.getConds(); this.exprs= clone.getExprs(); this.handleSkewJoin = clone.getHandleSkewJoin(); this.keyTableDesc = clone.getKeyTableDesc(); this.noOuterJoin = clone.getNoOuterJoin(); this.outputColumnNames = clone.getOutputColumnNames(); this.reversedExprs = clone.getReversedExprs(); this.skewKeyDefinition = clone.getSkewKeyDefinition(); this.skewKeysValuesTables = clone.getSkewKeysValuesTables(); this.smallKeysDirMap = clone.getSmallKeysDirMap(); this.tagOrder = clone.getTagOrder(); this.filters = clone.getFilters(); this.keys = clone.getKeys(); this.keyTblDesc = clone.getKeyTblDesc(); this.valueTblDescs = clone.getValueTblDescs(); this.valueTblFilteredDescs = clone.getValueFilteredTblDescs(); this.posBigTable = clone.getPosBigTable(); this.retainList = clone.getRetainList(); this.bigTableAlias = clone.getBigTableAlias(); this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); this.bucketFileNameMapping = clone.getBucketFileNameMapping(); }
boolean oneMapJoinCondition = (desc.getConds().length == 1); byte posBigTable = (byte) desc.getPosBigTable(); List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); boolean outerJoinHasNoKeys = (!desc.isNoOuterJoin() && keyDesc.size() == 0); List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable); boolean isHybridHashJoin = desc.isHybridHashJoin(); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); boolean isOuterJoin = !desc.getNoOuterJoin(); List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable); List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); int smallTableRetainSize = smallTableRetainList.size(); Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters(); VectorExpression[] bigTableFilterExpressions =
JoinCondDesc[] joinCondns = op.getConf().getConds(); MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs, valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op.getConf().getNoOuterJoin(), dumpFilePrefix, op.getConf().getMemoryMonitorInfo(), op.getConf().getInMemoryDataSize()); mapJoinDescriptor.setStatistics(op.getConf().getStatistics()); mapJoinDescriptor.setTagOrder(tagOrder); mapJoinDescriptor.setNullSafes(desc.getNullSafes()); mapJoinDescriptor.setFilterMap(desc.getFilterMap()); mapJoinDescriptor.setResidualFilterExprs(desc.getResidualFilterExprs()); mapJoinDescriptor.setColumnExprMap(colExprMap); if (!valueIndices.isEmpty()) { mapJoinDescriptor.setValueIndices(valueIndices);
desc.setVectorDesc(vectorDesc); boolean oneMapJoinCondition = (desc.getConds().length == 1); byte posBigTable = (byte) desc.getPosBigTable(); List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); final int allBigTableKeyExpressionsLength = allBigTableKeyExpressions.length; List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable); VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs); boolean isHybridHashJoin = desc.isHybridHashJoin(); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); boolean isOuterJoin = !desc.getNoOuterJoin(); List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable); int bigTableRetainSize = bigTableRetainList.size(); List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable);
MapJoinDesc mapJoinDescriptor = new MapJoinDesc(newJoinKeys, keyTblDesc, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor .getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, joinDescriptor.getMemoryMonitorInfo(), joinDescriptor.getInMemoryDataSize()); mapJoinDescriptor.setTagOrder(tags); mapJoinDescriptor.setHandleSkewJoin(false); mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes()); mapJoinDescriptor.setColumnExprMap(joinDescriptor.getColumnExprMap());
public void generateMapMetaData() throws HiveException { // generate the meta data for key // index for key is -1 try { TableDesc keyTableDesc = conf.getKeyTblDesc(); AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance( keyTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); for (int pos = 0; pos < order.length; pos++) { if (pos == posBigTable) { continue; } TableDesc valueTableDesc; if (conf.getNoOuterJoin()) { valueTableDesc = conf.getValueTblDescs().get(pos); } else { valueTableDesc = conf.getValueFilteredTblDescs().get(pos); } AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance( valueTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos)); mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext); } } catch (SerDeException e) { throw new HiveException(e); } }
public static TypeInfo[] getOutputTypeInfos(MapJoinDesc desc) { final byte posBigTable = (byte) desc.getPosBigTable(); List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); final int outputColumnCount = desc.getOutputColumnNames().size(); TypeInfo[] outputTypeInfos = new TypeInfo[outputColumnCount]; List<Integer> bigTableRetainList = desc.getRetainList().get(posBigTable); final int bigTableRetainSize = bigTableRetainList.size(); List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { List<Integer> smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); final int smallTableRetainSize = (smallTableRetainList != null ? smallTableRetainList.size() : 0);
private boolean validateMapJoinDesc(MapJoinDesc desc) { byte posBigTable = (byte) desc.getPosBigTable(); List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable); if (!validateExprNodeDesc( filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true)) { return false; } List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable); if (!validateExprNodeDesc(keyExprs, "Key")) { return false; } List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable); if (!validateExprNodeDesc(valueExprs, "Value")) { return false; } Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); if (!validateExprNodeDesc(smallTableExprs, "Small Table")) { return false; } if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { setOperatorIssue("Non-equi joins not supported"); return false; } return true; }
if (mapJoinOp.getConf().getAliasBucketFileNameMapping() == null || mapJoinOp.getConf().getAliasBucketFileNameMapping().size() == 0) { return false; String[] srcs = mapJoinOp.getConf().getBaseSrc(); for (int srcPos = 0; srcPos < srcs.length; srcPos++) { srcs[srcPos] = QB.getAppendedAliasFromId(mapJoinOp.getConf().getId(), srcs[srcPos]); tableEligibleForBucketedSortMergeJoin = tableEligibleForBucketedSortMergeJoin && isEligibleForBucketSortMergeJoin(smbJoinContext, mapJoinOp.getConf().getKeys().get((byte) pos), mapJoinOp.getConf().getAliasToOpInfo(), srcs, pos, if (MapJoinProcessor.checkMapJoin(mapJoinOp.getConf().getPosBigTable(), mapJoinOp.getConf().getConds()) < 0) { throw new SemanticException( ErrorMsg.INVALID_BIGTABLE_MAPJOIN.format(mapJoinOp.getConf().getBigTableAlias()));
MapJoinDesc mapJoinDesc = new MapJoinDesc(); mapJoinDesc.setPosBigTable(0); mapJoinDesc.setKeys(keyMap); mapJoinDesc.setExprs(exprMap); mapJoinDesc.setTagOrder(order); mapJoinDesc.setNoOuterJoin( testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER && testDesc.vectorMapJoinVariation != VectorMapJoinVariation.FULL_OUTER); mapJoinDesc.setFilters(filterMap); mapJoinDesc.setValueIndices(valueIndicesMap); mapJoinDesc.setRetainList(retainListMap); mapJoinDesc.setDynamicPartitionHashJoin(true); break; default: mapJoinDesc.setConds(conds); mapJoinDesc.setKeyTblDesc(keyTableDesc); mapJoinDesc.setValueTblDescs(valueTableDescsList); mapJoinDesc.setValueFilteredTblDescs(valueTableDescsList);
byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); VectorExpression[] allBigTableKeyExpressions = vContext.getVectorExpressions(mapJoinDesc.getKeys().get(posBigTable)); vectorMapJoinDesc.setAllBigTableKeyExpressions(allBigTableKeyExpressions); Map<Byte, List<ExprNodeDesc>> exprs = mapJoinDesc.getExprs(); VectorExpression[] allBigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); vectorMapJoinDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); List<ExprNodeDesc> bigTableFilters = mapJoinDesc.getFilters().get(bigTablePos); boolean isOuterAndFiltered = (!mapJoinDesc.isNoOuterJoin() && bigTableFilters.size() > 0); if (!isOuterAndFiltered) { operator = new VectorMapJoinOperator(
@Override public boolean equals(MapJoinOperator op1, MapJoinOperator op2) { Preconditions.checkNotNull(op1); Preconditions.checkNotNull(op2); MapJoinDesc desc1 = op1.getConf(); MapJoinDesc desc2 = op2.getConf(); if (compareObject(desc1.getParentToInput(), desc2.getParentToInput()) && compareString(desc1.getKeyCountsExplainDesc(), desc2.getKeyCountsExplainDesc()) && compareObject(desc1.getKeysString(), desc2.getKeysString()) && desc1.getPosBigTable() == desc2.getPosBigTable() && desc1.isBucketMapJoin() == desc2.isBucketMapJoin() && compareObject(desc1.getKeysString(), desc2.getKeysString()) && compareObject(desc1.getFiltersStringMap(), desc2.getFiltersStringMap()) && compareObject(desc1.getOutputColumnNames(), desc2.getOutputColumnNames()) && compareObject(desc1.getCondsList(), desc2.getCondsList()) && desc1.getHandleSkewJoin() == desc2.getHandleSkewJoin() && compareString(desc1.getNullSafeString(), desc2.getNullSafeString())) { return true; } else { return false; } } }
order = desc.getTagOrder(); posBigTable = (byte) desc.getPosBigTable(); posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); isOuterJoin = !desc.getNoOuterJoin(); Map<Byte, List<ExprNodeDesc>> filterExpressions = desc.getFilters(); bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), VectorExpressionDescriptor.Mode.FILTER); List<ExprNodeDesc> keyDesc = desc.getKeys().get(posBigTable); bigTableKeyExpressions = vContext.getVectorExpressions(keyDesc); List<ExprNodeDesc> bigTableExprs = desc.getExprs().get(posBigTable); bigTableValueExpressions = vContext.getVectorExpressions(bigTableExprs);
List<String> valueContextStringList = new ArrayList<String>(); try { boolean noOuterJoin = getNoOuterJoin(); Byte[] order = getTagOrder(); int[][] filterMaps = getFilterMap(); valueTableDesc = getValueTblDescs().get(pos); } else { valueTableDesc = getValueFilteredTblDescs().get(pos); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos, filterMaps)); valueContextStringList.add(pos + ":" + valueContext.stringify());
conf.getKeysString()); if (conf.getValueIndices() != null) { for (Entry<Byte, int[]> entry : conf.getValueIndices().entrySet()) { LOG.info(getLoggingPrefix() + " mapJoinDesc.getValueIndices +" + (int) entry.getKey() + " " + Arrays.toString(entry.getValue())); conf.getExprs().toString()); LOG.info(getLoggingPrefix() + " mapJoinDesc.getRetainList " + conf.getRetainList().toString()); setupVOutContext(conf.getOutputColumnNames());
public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { int[][] filterMaps = mapJoinDesc.getFilterMap(); if (filterMaps == null) { return; final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); final int numAliases = mapJoinDesc.getExprs().size(); List<TableDesc> valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); for (byte pos = 0; pos < numAliases; pos++) { if (pos != posBigTable) { mapJoinDesc.setFilterMap(null);
JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(),order,NOTSKIPBIGTABLE); joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, inputObjInspectors,NOTSKIPBIGTABLE); posBigTable = conf.getPosBigTable(); if (conf.getOutputColumnNames().size() < structFields.size()) { List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(); for (Byte alias : order) { int sz = conf.getExprs().get(alias).size(); List<Integer> retained = conf.getRetainList().get(alias); for (int i = 0; i < sz; i++) { int pos = retained.get(i); .getStandardStructObjectInspector(conf.getOutputColumnNames(), structFieldObjectInspectors);