break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL:
/** * Locates the aggregation buffer sets to use for each key in the current batch. * The keyWrappersBatch must have evaluated the current batch first. */ private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException { // The aggregation batch vector needs to know when we start a new batch // to bump its internal version. aggregationBatchInfo.startBatch(); // We now have to probe the global hash and find-or-allocate // the aggregation buffers to use for each key present in the batch VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); for (int i=0; i < batch.size; ++i) { VectorHashKeyWrapper kw = keyWrappers[i]; VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw); if (null == aggregationBuffer) { // the probe failed, we must allocate a set of aggregation buffers // and push the (keywrapper,buffers) pair into the hash. // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i); } }
prepareBatchAggregationBufferSets(batch); processAggregators(batch); while (shouldFlush(batch)) { flush(false); updateAvgVariableSize(batch); checkHashModeEfficiency();
} else { processingMode = this.new ProcessingModeHashAggregate();
prepareBatchAggregationBufferSets(batch); processAggregators(batch); while (shouldFlush(batch)) { flush(false); updateAvgVariableSize(batch); checkHashModeEfficiency();
computeMemoryLimits(); LOG.info("using hash aggregation processing mode");
/** * Locates the aggregation buffer sets to use for each key in the current batch. * The keyWrappersBatch must have evaluated the current batch first. */ private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException { // The aggregation batch vector needs to know when we start a new batch // to bump its internal version. aggregationBatchInfo.startBatch(); // We now have to probe the global hash and find-or-allocate // the aggregation buffers to use for each key present in the batch VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); for (int i=0; i < batch.size; ++i) { VectorHashKeyWrapper kw = keyWrappers[i]; VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw); if (null == aggregationBuffer) { // the probe failed, we must allocate a set of aggregation buffers // and push the (keywrapper,buffers) pair into the hash. // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i); } }
/** * Returns true if the memory threshold for the hash table was reached. */ private boolean shouldFlush(VectorizedRowBatch batch) { if (batch.size == 0) { return false; } //numEntriesSinceCheck is the number of entries added to the hash table // since the last time we checked the average variable size if (numEntriesSinceCheck >= this.checkInterval) { // Were going to update the average variable row size by sampling the current batch updateAvgVariableSize(batch); numEntriesSinceCheck = 0; } if (numEntriesHashTable > this.maxHtEntries || numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) { return true; } if (gcCanary.get() == null) { return true; } return false; }
/** * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor * @throws HiveException */ private void checkHashModeEfficiency() throws HiveException { if (lastModeCheckRowCount > numRowsCompareHashAggr) { lastModeCheckRowCount = 0; if (LOG.isDebugEnabled()) { LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { flush(true); changeToUnsortedStreamingMode(); } } } }
@Override public void close(boolean aborted) throws HiveException { if (!aborted) { flush(true); } }
break; case HASH: processingMode = this.new ProcessingModeHashAggregate(); break; case MERGE_PARTIAL:
computeMemoryLimits(); LOG.debug("using hash aggregation processing mode");
prepareBatchAggregationBufferSets(batch); processAggregators(batch); while (shouldFlush(batch)) { flush(false); updateAvgVariableSize(batch); checkHashModeEfficiency();
computeMemoryLimits(); LOG.debug("using hash aggregation processing mode");
aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++;
/** * Returns true if the memory threshold for the hash table was reached. */ private boolean shouldFlush(VectorizedRowBatch batch) { if (batch.size == 0) { return false; } //numEntriesSinceCheck is the number of entries added to the hash table // since the last time we checked the average variable size if (numEntriesSinceCheck >= this.checkInterval) { // Were going to update the average variable row size by sampling the current batch updateAvgVariableSize(batch); numEntriesSinceCheck = 0; } if (numEntriesHashTable > this.maxHtEntries || numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) { return true; } if (gcCanary.get() == null) { return true; } return false; }
/** * Returns true if the memory threshold for the hash table was reached. */ private boolean shouldFlush(VectorizedRowBatch batch) { if (batch.size == 0) { return false; } //numEntriesSinceCheck is the number of entries added to the hash table // since the last time we checked the average variable size if (numEntriesSinceCheck >= this.checkInterval) { // Were going to update the average variable row size by sampling the current batch updateAvgVariableSize(batch); numEntriesSinceCheck = 0; } if (numEntriesHashTable > this.maxHtEntries || numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) { return true; } if (gcCanary.get() == null) { return true; } return false; }
/** * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor * @throws HiveException */ private void checkHashModeEfficiency() throws HiveException { if (lastModeCheckRowCount > numRowsCompareHashAggr) { lastModeCheckRowCount = 0; if (LOG.isDebugEnabled()) { LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { flush(true); changeToStreamingMode(); } } } }
@Override public void close(boolean aborted) throws HiveException { if (!aborted) { flush(true); } if (!aborted && sumBatchSize == 0 && GroupByOperator.shouldEmitSummaryRow(conf)) { // in case the empty grouping set is preset; but no output has done // the "summary row" still needs to be emitted VectorHashKeyWrapperBase kw = keyWrappersBatch.getVectorHashKeyWrappers()[0]; kw.setNull(); int pos = conf.getGroupingSetPosition(); if (pos >= 0) { long val = (1L << pos) - 1; keyWrappersBatch.setLongValue(kw, pos, val); } VectorAggregationBufferRow groupAggregators = allocateAggregationBuffer(); writeSingleRow(kw, groupAggregators); } }
/** * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor * @throws HiveException */ private void checkHashModeEfficiency() throws HiveException { if (lastModeCheckRowCount > numRowsCompareHashAggr) { lastModeCheckRowCount = 0; if (LOG.isDebugEnabled()) { LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d", numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr))); } if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) { flush(true); changeToStreamingMode(); } } } }