/** * On close, make sure a partially filled overflow batch gets forwarded. */ @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } if (isLogDebugEnabled) { LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed"); } }
@Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; alias = (byte) tag; if (needCommonSetup) { // Our one time process method initialization. commonSetup(); needCommonSetup = false; } if (needFirstBatchSetup) { // Our one time first-batch method initialization. firstBatchSetup(batch); needFirstBatchSetup = false; } if (needHashTableSetup) { // Setup our hash table specialization. It will be the first time the process // method is called, or after a Hybrid Grace reload. hashTableSetup(); needHashTableSetup = false; } batchCounter++; if (batch.size == 0) { return; } rowCounter += batch.size; processBatch(batch); }
bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); setupVOutContext(conf.getOutputColumnNames());
vOutContext = new VectorizationContext(getName(), this.vContext); determineCommonInfo(isOuterJoin);
bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeNames " + bigTableKeyTypeNames); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + bigTableValueTypeNames); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableIndices " + Arrays.toString(smallTableIndices)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableRetainList " + smallTableRetainList); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor firstSmallTableOutputColumn " + firstSmallTableOutputColumn); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableOutputCount " + smallTableOutputCount); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns));
initializeFullOuterObjects(); overflowBatch = setupOverflowBatch(); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); int i = 0; for (StructField field : fields) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); i++;
overflowBatch = setupOverflowBatch(); LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns)); int i = 0; for (StructField field : fields) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator initializeOp " + i + " field " + field.getFieldName() + " type " + field.getFieldObjectInspector().getTypeName()); i++;
@Override protected void reloadHashTable(byte pos, int partitionId) throws IOException, HiveException, SerDeException, ClassNotFoundException { this.vectorMapJoinHashTable = null; // The super method will reload a hash table partition of one of the small tables. // Currently, for native vector map join it will only be one small table. super.reloadHashTable(pos, partitionId); MapJoinTableContainer smallTable = spilledMapJoinTables[pos]; vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, smallTable); needHashTableSetup = true; LOG.info("Created " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName()); if (LOG.isDebugEnabled()) { LOG.debug(CLASS_NAME + " reloadHashTable!"); } }
protected VectorizedRowBatch setupOverflowBatch() throws HiveException { int initialColumnCount = vContext.firstOutputColumnIndex(); VectorizedRowBatch overflowBatch; int totalNumColumns = initialColumnCount + vOutContext.getScratchColumnTypeNames().length; overflowBatch = new VectorizedRowBatch(totalNumColumns); // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { int outputColumn = outputProjection[i]; String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } // Now, add any scratch columns needed for children operators. int outputColumn = initialColumnCount; for (String typeName : vOutContext.getScratchColumnTypeNames()) { allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); } overflowBatch.projectedColumns = outputProjection; overflowBatch.projectionSize = outputProjection.length; overflowBatch.reset(); return overflowBatch; }
protected void commonSetup(VectorizedRowBatch batch) throws HiveException { super.commonSetup(batch); batchCounter = 0; }
overflowBatch = setupOverflowBatch(); LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp currentScratchColumns " + Arrays.toString(currentScratchColumns));
/** * Determine from a mapping which columns are BytesColumnVector columns. */ private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) { return getByteColumnVectorColumns(mapping.getOutputColumns(), mapping.getTypeInfos()); }
protected void commonSetup(VectorizedRowBatch batch) throws HiveException { if (isLogDebugEnabled) { LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); displayBatchColumns(batch, "batch"); displayBatchColumns(overflowBatch, "overflowBatch"); } // Make sure big table BytesColumnVectors have room for string values in the overflow batch... for (int column: bigTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled // to disk for the Hybrid Grace hash partitioning. spillReplayBatch = VectorizedBatchUtil.makeLike(batch); }
vOutContext = new VectorizationContext(getName(), this.vContext); determineCommonInfo(isOuterJoin);
@Override protected void reloadHashTable(byte pos, int partitionId) throws IOException, HiveException, SerDeException, ClassNotFoundException { // The super method will reload a hash table partition of one of the small tables. // Currently, for native vector map join it will only be one small table. super.reloadHashTable(pos, partitionId); MapJoinTableContainer smallTable = spilledMapJoinTables[pos]; vectorMapJoinHashTable = VectorMapJoinOptimizedCreateHashTable.createHashTable(conf, smallTable); needHashTableSetup = true; LOG.info("Created " + vectorMapJoinHashTable.getClass().getSimpleName() + " from " + this.getClass().getSimpleName()); if (isLogDebugEnabled) { LOG.debug(CLASS_NAME + " reloadHashTable!"); } }
protected VectorizedRowBatch setupOverflowBatch() throws HiveException { int initialColumnCount = vContext.firstOutputColumnIndex(); VectorizedRowBatch overflowBatch; int totalNumColumns = initialColumnCount + vOutContext.getScratchColumnTypeNames().length; overflowBatch = new VectorizedRowBatch(totalNumColumns); // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { int outputColumn = outputProjection[i]; String typeName = outputTypeInfos[i].getTypeName(); allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); } // Now, add any scratch columns needed for children operators. int outputColumn = initialColumnCount; for (String typeName : vOutContext.getScratchColumnTypeNames()) { allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); } overflowBatch.projectedColumns = outputProjection; overflowBatch.projectionSize = outputProjection.length; overflowBatch.reset(); return overflowBatch; }
protected void commonSetup(VectorizedRowBatch batch) throws HiveException { super.commonSetup(batch); batchCounter = 0; }
protected void commonSetup(VectorizedRowBatch batch) throws HiveException { if (LOG.isDebugEnabled()) { LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); displayBatchColumns(batch, "batch"); displayBatchColumns(overflowBatch, "overflowBatch"); } // Make sure big table BytesColumnVectors have room for string values in the overflow batch... for (int column: bigTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled // to disk for the Hybrid Grace hash partitioning. spillReplayBatch = VectorizedBatchUtil.makeLike(batch); }
getByteColumnVectorColumns(bigTableRetainColumnMap, bigTableRetainTypeInfos); getByteColumnVectorColumns(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); getByteColumnVectorColumns(outerSmallTableKeyMapping); getByteColumnVectorColumns(smallTableValueMapping); LOG.info(getLoggingPrefix() + " order " + Arrays.toString(orderDisplayable)); LOG.info(getLoggingPrefix() + " posBigTable " + (int) posBigTable); LOG.info(getLoggingPrefix() + " posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); LOG.info(getLoggingPrefix() + " bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); LOG.info(getLoggingPrefix() + " bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); LOG.info(getLoggingPrefix() + " bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); LOG.info(getLoggingPrefix() + " bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); LOG.info(getLoggingPrefix() + " bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); LOG.info(getLoggingPrefix() + " bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos));
vOutContext = new VectorizationContext(getName(), this.vContext); determineCommonInfo(isOuterJoin);