/** * Postpone processing the big table row temporarily by spilling it to a row container * @param hybridHtContainer Hybrid hashtable container * @param row big table row */ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer; int partitionId = ht.getToSpillPartitionId(); HashPartition hp = ht.getHashPartitions()[partitionId]; ObjectContainer bigTable = hp.getMatchfileObjContainer(); bigTable.add(row); }
/** * Postpone processing the big table row temporarily by spilling it to a row container * @param hybridHtContainer Hybrid hashtable container * @param row big table row */ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer; int partitionId = ht.getToSpillPartitionId(); HashPartition hp = ht.getHashPartitions()[partitionId]; ObjectContainer bigTable = hp.getMatchfileObjContainer(); bigTable.add(row); }
private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, int partitionId) throws IOException { HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); rowBytesContainer.finishRow(); }
/** * Iterate over the big table row container and feed process() with leftover rows * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException */ protected void reProcessBigTable(int partitionId) throws HiveException { // For binary join, firstSmallTable is the only small table; it has reference to spilled big // table rows; // For n-way join, since we only spill once, when processing the first small table, so only the // firstSmallTable has reference to the spilled big table rows. HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " + partitionId + ". Number of rows: " + bigTable.size()); while (bigTable.hasNext()) { Object row = bigTable.next(); process(row, conf.getPosBigTable()); } bigTable.clear(); }
/** * Iterate over the big table row container and feed process() with leftover rows * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException */ protected void reProcessBigTable(int partitionId) throws HiveException { // For binary join, firstSmallTable is the only small table; it has reference to spilled big // table rows; // For n-way join, since we only spill once, when processing the first small table, so only the // firstSmallTable has reference to the spilled big table rows. HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " + partitionId + ". Number of rows: " + bigTable.size()); while (bigTable.hasNext()) { Object row = bigTable.next(); process(row, conf.getPosBigTable()); } bigTable.clear(); }
private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorMapJoinHashTableResult hashTableResult) throws IOException { int partitionId = hashTableResult.spillPartitionId(); HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); // int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); // int length = output.getLength() - offset; rowBytesContainer.finishRow(); // LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); }
HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
throws IOException, HiveException, SerDeException, ClassNotFoundException { HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos]; HashPartition partition = container.getHashPartitions()[partitionId];
if (pos != conf.getPosBigTable()) { firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos]; numPartitions = firstSmallTable.getHashPartitions().length; break; hybridHtContainer.dumpStats(); HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions(); HashPartition[] hashPartitions = firstSmallTable.getHashPartitions(); if (hashPartitions[i].isHashMapOnDisk()) { try {
if (pos != conf.getPosBigTable()) { firstSmallTable = (HybridHashTableContainer) mapJoinTables[pos]; numPartitions = firstSmallTable.getHashPartitions().length; break; hybridHtContainer.dumpStats(); HashPartition[] hashPartitions = hybridHtContainer.getHashPartitions(); HashPartition[] hashPartitions = firstSmallTable.getHashPartitions(); if (hashPartitions[i].isHashMapOnDisk()) { try {
HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
throws IOException, HiveException, SerDeException, ClassNotFoundException { HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos]; HashPartition partition = container.getHashPartitions()[partitionId];
HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer();
HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer();
/** * Postpone processing the big table row temporarily by spilling it to a row container * @param hybridHtContainer Hybrid hashtable container * @param row big table row */ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer; int partitionId = ht.getToSpillPartitionId(); HashPartition hp = ht.getHashPartitions()[partitionId]; ObjectContainer bigTable = hp.getMatchfileObjContainer(); bigTable.add(row); }
private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorMapJoinHashTableResult hashTableResult) throws IOException { int partitionId = hashTableResult.spillPartitionId(); HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); // int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); // int length = output.getLength() - offset; rowBytesContainer.finishRow(); // LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); }
/** * Iterate over the big table row container and feed process() with leftover rows * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException */ protected void reProcessBigTable(int partitionId) throws HiveException { // For binary join, firstSmallTable is the only small table; it has reference to spilled big // table rows; // For n-way join, since we only spill once, when processing the first small table, so only the // firstSmallTable has reference to the spilled big table rows. HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); while (bigTable.hasNext()) { Object row = bigTable.next(); process(row, conf.getPosBigTable()); } bigTable.clear(); }
HybridHashTableContainer.HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer();
HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
throws IOException, HiveException, SerDeException, ClassNotFoundException { HybridHashTableContainer container = (HybridHashTableContainer)mapJoinTables[pos]; HashPartition partition = container.getHashPartitions()[partitionId];