nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table if (i == 0) { // We unconditionally create a hashmap for the first hash partition hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, true, spillLocalDirs); LOG.info("Each new partition will require memory: " + hashPartitions[0].hashMap.memorySize()); hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, memoryUsed + hashPartitions[0].hashMap.memorySize() < memoryThreshold, spillLocalDirs); hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, false, spillLocalDirs); } else { hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, true, spillLocalDirs);
@Override public void clear() { for (int i = 0; i < hashPartitions.length; i++) { HashPartition hp = hashPartitions[i]; if (hp != null) { LOG.info("Going to clear hash partition " + i); hp.clear(); } } memoryUsed = 0; }
KeyValueContainer kvContainer = partition.getSidefileKVContainer(); int rowCount = kvContainer.size(); LOG.info("Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: " + BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount); rowCount += restoredHashMap.getNumValues(); LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table if (i == 0) { // We unconditionally create a hashmap for the first hash partition hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, true); } else { hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, memoryThreshold, memoryUsed + writeBufferSize < memoryThreshold); hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, false); } else { hashPartitions[i] = new HashPartition(threshold, loadFactor, writeBufferSize, memoryThreshold, true);
if (!hashPartitions[i].isHashMapOnDisk()) { hybridHtContainer.setTotalInMemRowCount( hybridHtContainer.getTotalInMemRowCount() - hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear(); if (hashPartitions[i].isHashMapOnDisk()) { try {
KeyValueContainer kvContainer = partition.getSidefileKVContainer(); int rowCount = kvContainer.size(); LOG.info("Hybrid Grace Hash Join: Number of rows restored from KeyValueContainer: " + BytesBytesMultiHashMap restoredHashMap = partition.getHashMapFromDisk(rowCount); rowCount += restoredHashMap.getNumValues(); LOG.info("Hybrid Grace Hash Join: Deserializing spilled hash partition...");
VectorMapJoinRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); bigTable.prepareForReading();
KeyValueContainer kvContainer = hashPartition.getSidefileKVContainer(); kvContainer.add((HiveKey) currentKey, (BytesWritable) currentValue); } else {
ObjectContainer bigTable = partition.getMatchfileObjContainer();
/** * Iterate over the big table row container and feed process() with leftover rows * @param partitionId the partition from which to take out spilled big table rows * @throws HiveException */ protected void reProcessBigTable(int partitionId) throws HiveException { // For binary join, firstSmallTable is the only small table; it has reference to spilled big // table rows; // For n-way join, since we only spill once, when processing the first small table, so only the // firstSmallTable has reference to the spilled big table rows. HashPartition partition = firstSmallTable.getHashPartitions()[partitionId]; ObjectContainer bigTable = partition.getMatchfileObjContainer(); while (bigTable.hasNext()) { Object row = bigTable.next(); process(row, conf.getPosBigTable()); } bigTable.clear(); }
public void dumpStats() { int numPartitionsInMem = 0; int numPartitionsOnDisk = 0; for (HashPartition hp : hashPartitions) { if (hp.isHashMapOnDisk()) { numPartitionsOnDisk++; } else { numPartitionsInMem++; } } LOG.info("In memory partitions have been processed successfully: " + numPartitionsInMem + " partitions in memory have been processed; " + numPartitionsOnDisk + " partitions have been spilled to disk and will be processed next."); } }
private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorMapJoinHashTableResult hashTableResult) throws IOException { int partitionId = hashTableResult.spillPartitionId(); HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTables[posSingleVectorMapJoinSmallTable]; HashPartition hp = ht.getHashPartitions()[partitionId]; VectorMapJoinRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); // int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); // int length = output.getLength() - offset; rowBytesContainer.finishRow(); // LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); }
/** * Postpone processing the big table row temporarily by spilling it to a row container * @param hybridHtContainer Hybrid hashtable container * @param row big table row */ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { HybridHashTableContainer ht = (HybridHashTableContainer) hybridHtContainer; int partitionId = ht.getToSpillPartitionId(); HashPartition hp = ht.getHashPartitions()[partitionId]; ObjectContainer bigTable = hp.getMatchfileObjContainer(); bigTable.add(row); }
@Override public void clear() { for (HashPartition hp : hashPartitions) { if (hp != null) { hp.clear(); } } memoryUsed = 0; }
nwayConf.getLoadedContainerList().size() == 0) { // n-way join, first (biggest) small table if (i == 0) { // We unconditionally create a hashmap for the first hash partition hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, true, spillLocalDirs); LOG.info("Each new partition will require memory: " + hashPartitions[0].hashMap.memorySize()); hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, memoryUsed + hashPartitions[0].hashMap.memorySize() < memoryThreshold, spillLocalDirs); hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, false, spillLocalDirs); } else { hashPartitions[i] = new HashPartition(initialCapacity, loadFactor, writeBufferSize, maxCapacity, true, spillLocalDirs);
if (!hashPartitions[i].isHashMapOnDisk()) { hybridHtContainer.setTotalInMemRowCount( hybridHtContainer.getTotalInMemRowCount() - hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear(); if (hashPartitions[i].isHashMapOnDisk()) { try {
if (!hashPartitions[i].isHashMapOnDisk()) { hybridHtContainer.setTotalInMemRowCount( hybridHtContainer.getTotalInMemRowCount() - hashPartitions[i].getHashMapFromMemory().getNumValues()); hashPartitions[i].getHashMapFromMemory().clear(); if (hashPartitions[i].isHashMapOnDisk()) { try {
VectorMapJoinRowBytesContainer bigTable = partition.getMatchfileRowBytesContainer(); bigTable.prepareForReading();
} else { // destination in memory if (!lastPartitionInMem && // If this is the only partition in memory, proceed without check (hashPartition.size() == 0 || // Destination partition being empty indicates a write buffer KeyValueContainer kvContainer = hashPartition.getSidefileKVContainer(); kvContainer.add((HiveKey) currentKey, (BytesWritable) currentValue); } else {
} else { // destination in memory if (!lastPartitionInMem && // If this is the only partition in memory, proceed without check (hashPartition.size() == 0 || // Destination partition being empty indicates a write buffer KeyValueContainer kvContainer = hashPartition.getSidefileKVContainer(); kvContainer.add((HiveKey) currentKey, (BytesWritable) currentValue); } else {